29278 lines
716 KiB
JSON
29278 lines
716 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"eval_steps": 500,
|
|
"global_step": 20862,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0001438021282714984,
|
|
"grad_norm": 0.8895956262211892,
|
|
"learning_rate": 1.9999999886614413e-05,
|
|
"loss": 0.7503,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0007190106413574921,
|
|
"grad_norm": 0.7538850969485658,
|
|
"learning_rate": 1.9999997165360364e-05,
|
|
"loss": 0.7445,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.0014380212827149843,
|
|
"grad_norm": 0.3052316332485418,
|
|
"learning_rate": 1.9999988661443057e-05,
|
|
"loss": 0.6786,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.0021570319240724763,
|
|
"grad_norm": 0.2899600972103938,
|
|
"learning_rate": 1.9999974488252902e-05,
|
|
"loss": 0.6686,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.0028760425654299686,
|
|
"grad_norm": 0.26674080063712186,
|
|
"learning_rate": 1.9999954645797935e-05,
|
|
"loss": 0.6502,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.0035950532067874604,
|
|
"grad_norm": 0.2571378355868916,
|
|
"learning_rate": 1.9999929134089406e-05,
|
|
"loss": 0.6425,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.004314063848144953,
|
|
"grad_norm": 0.2509279861113437,
|
|
"learning_rate": 1.9999897953141777e-05,
|
|
"loss": 0.6354,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.005033074489502445,
|
|
"grad_norm": 0.23412598127898154,
|
|
"learning_rate": 1.9999861102972723e-05,
|
|
"loss": 0.6232,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.005752085130859937,
|
|
"grad_norm": 0.23324654117368399,
|
|
"learning_rate": 1.999981858360314e-05,
|
|
"loss": 0.6105,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.0064710957722174285,
|
|
"grad_norm": 0.24845353197583306,
|
|
"learning_rate": 1.999977039505713e-05,
|
|
"loss": 0.6146,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.007190106413574921,
|
|
"grad_norm": 0.24546704448200354,
|
|
"learning_rate": 1.9999716537362013e-05,
|
|
"loss": 0.6156,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.007909117054932413,
|
|
"grad_norm": 0.2418689562299013,
|
|
"learning_rate": 1.9999657010548325e-05,
|
|
"loss": 0.616,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.008628127696289905,
|
|
"grad_norm": 0.23965859222142355,
|
|
"learning_rate": 1.999959181464981e-05,
|
|
"loss": 0.5937,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.009347138337647398,
|
|
"grad_norm": 0.23669570942953966,
|
|
"learning_rate": 1.9999520949703432e-05,
|
|
"loss": 0.6027,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.01006614897900489,
|
|
"grad_norm": 0.24176708430886543,
|
|
"learning_rate": 1.9999444415749365e-05,
|
|
"loss": 0.5905,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.010785159620362382,
|
|
"grad_norm": 0.24723478742464217,
|
|
"learning_rate": 1.9999362212831e-05,
|
|
"loss": 0.6016,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.011504170261719874,
|
|
"grad_norm": 0.2428143716928379,
|
|
"learning_rate": 1.9999274340994935e-05,
|
|
"loss": 0.5978,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.012223180903077365,
|
|
"grad_norm": 0.2350929588489868,
|
|
"learning_rate": 1.999918080029099e-05,
|
|
"loss": 0.5669,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.012942191544434857,
|
|
"grad_norm": 0.2280484038192313,
|
|
"learning_rate": 1.99990815907722e-05,
|
|
"loss": 0.6127,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.01366120218579235,
|
|
"grad_norm": 0.23131386557697947,
|
|
"learning_rate": 1.9998976712494805e-05,
|
|
"loss": 0.6004,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.014380212827149842,
|
|
"grad_norm": 0.2547551645700165,
|
|
"learning_rate": 1.9998866165518264e-05,
|
|
"loss": 0.5946,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.015099223468507334,
|
|
"grad_norm": 0.23194950565240102,
|
|
"learning_rate": 1.999874994990525e-05,
|
|
"loss": 0.5896,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.015818234109864826,
|
|
"grad_norm": 0.2675421206218923,
|
|
"learning_rate": 1.9998628065721647e-05,
|
|
"loss": 0.6009,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.016537244751222317,
|
|
"grad_norm": 0.2564973354360148,
|
|
"learning_rate": 1.999850051303656e-05,
|
|
"loss": 0.6146,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.01725625539257981,
|
|
"grad_norm": 0.2702995046995952,
|
|
"learning_rate": 1.9998367291922293e-05,
|
|
"loss": 0.592,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.0179752660339373,
|
|
"grad_norm": 0.23744671504568096,
|
|
"learning_rate": 1.9998228402454384e-05,
|
|
"loss": 0.5881,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.018694276675294795,
|
|
"grad_norm": 0.25400484853443966,
|
|
"learning_rate": 1.9998083844711563e-05,
|
|
"loss": 0.5995,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.019413287316652286,
|
|
"grad_norm": 0.2289330994293863,
|
|
"learning_rate": 1.9997933618775787e-05,
|
|
"loss": 0.5831,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.02013229795800978,
|
|
"grad_norm": 0.23314947417175733,
|
|
"learning_rate": 1.999777772473223e-05,
|
|
"loss": 0.588,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.02085130859936727,
|
|
"grad_norm": 0.24609630390061227,
|
|
"learning_rate": 1.999761616266926e-05,
|
|
"loss": 0.6057,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.021570319240724764,
|
|
"grad_norm": 0.25060806808962244,
|
|
"learning_rate": 1.9997448932678482e-05,
|
|
"loss": 0.6062,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.022289329882082255,
|
|
"grad_norm": 0.23305588376025257,
|
|
"learning_rate": 1.9997276034854698e-05,
|
|
"loss": 0.5625,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.02300834052343975,
|
|
"grad_norm": 0.2425323796334993,
|
|
"learning_rate": 1.999709746929593e-05,
|
|
"loss": 0.5981,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.02372735116479724,
|
|
"grad_norm": 0.2393332584757854,
|
|
"learning_rate": 1.9996913236103418e-05,
|
|
"loss": 0.5676,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.02444636180615473,
|
|
"grad_norm": 0.25948576809270496,
|
|
"learning_rate": 1.9996723335381595e-05,
|
|
"loss": 0.5843,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.025165372447512224,
|
|
"grad_norm": 0.24575999197763174,
|
|
"learning_rate": 1.9996527767238132e-05,
|
|
"loss": 0.5873,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.025884383088869714,
|
|
"grad_norm": 0.25781746644112463,
|
|
"learning_rate": 1.9996326531783898e-05,
|
|
"loss": 0.6042,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.026603393730227208,
|
|
"grad_norm": 0.23786701467089164,
|
|
"learning_rate": 1.999611962913298e-05,
|
|
"loss": 0.5777,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.0273224043715847,
|
|
"grad_norm": 0.26102003785829764,
|
|
"learning_rate": 1.999590705940268e-05,
|
|
"loss": 0.5968,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.028041415012942193,
|
|
"grad_norm": 0.24249586062086007,
|
|
"learning_rate": 1.9995688822713503e-05,
|
|
"loss": 0.6061,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.028760425654299683,
|
|
"grad_norm": 0.23976276635942745,
|
|
"learning_rate": 1.9995464919189177e-05,
|
|
"loss": 0.5998,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.029479436295657177,
|
|
"grad_norm": 0.252632644612971,
|
|
"learning_rate": 1.9995235348956643e-05,
|
|
"loss": 0.5811,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.030198446937014668,
|
|
"grad_norm": 0.2278841743646487,
|
|
"learning_rate": 1.9995000112146045e-05,
|
|
"loss": 0.5829,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.03091745757837216,
|
|
"grad_norm": 0.2357301218659176,
|
|
"learning_rate": 1.9994759208890744e-05,
|
|
"loss": 0.5936,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.03163646821972965,
|
|
"grad_norm": 0.2504255718005453,
|
|
"learning_rate": 1.999451263932732e-05,
|
|
"loss": 0.6065,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.032355478861087146,
|
|
"grad_norm": 0.2642191490569926,
|
|
"learning_rate": 1.999426040359556e-05,
|
|
"loss": 0.5857,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.03307448950244463,
|
|
"grad_norm": 0.24247603256650757,
|
|
"learning_rate": 1.999400250183846e-05,
|
|
"loss": 0.5933,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.03379350014380213,
|
|
"grad_norm": 0.31604085503458695,
|
|
"learning_rate": 1.9993738934202235e-05,
|
|
"loss": 0.567,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.03451251078515962,
|
|
"grad_norm": 0.23670027365405064,
|
|
"learning_rate": 1.9993469700836307e-05,
|
|
"loss": 0.5642,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.035231521426517115,
|
|
"grad_norm": 0.23669625898006572,
|
|
"learning_rate": 1.999319480189331e-05,
|
|
"loss": 0.5789,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.0359505320678746,
|
|
"grad_norm": 0.24845579465645523,
|
|
"learning_rate": 1.9992914237529094e-05,
|
|
"loss": 0.5847,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.036669542709232096,
|
|
"grad_norm": 0.23450615631614805,
|
|
"learning_rate": 1.9992628007902718e-05,
|
|
"loss": 0.5849,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.03738855335058959,
|
|
"grad_norm": 0.30173595777351025,
|
|
"learning_rate": 1.999233611317646e-05,
|
|
"loss": 0.5802,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.038107563991947084,
|
|
"grad_norm": 0.2414617541078757,
|
|
"learning_rate": 1.9992038553515792e-05,
|
|
"loss": 0.5791,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.03882657463330457,
|
|
"grad_norm": 0.24955545431976164,
|
|
"learning_rate": 1.9991735329089416e-05,
|
|
"loss": 0.5781,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.039545585274662065,
|
|
"grad_norm": 0.2681517501448067,
|
|
"learning_rate": 1.999142644006924e-05,
|
|
"loss": 0.5738,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.04026459591601956,
|
|
"grad_norm": 0.24569712912592853,
|
|
"learning_rate": 1.9991111886630375e-05,
|
|
"loss": 0.5719,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.040983606557377046,
|
|
"grad_norm": 0.25324852483277277,
|
|
"learning_rate": 1.9990791668951155e-05,
|
|
"loss": 0.5783,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.04170261719873454,
|
|
"grad_norm": 0.2353776930936098,
|
|
"learning_rate": 1.9990465787213118e-05,
|
|
"loss": 0.5749,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.042421627840092034,
|
|
"grad_norm": 0.2672442244356293,
|
|
"learning_rate": 1.999013424160102e-05,
|
|
"loss": 0.5844,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.04314063848144953,
|
|
"grad_norm": 0.2547192640704089,
|
|
"learning_rate": 1.998979703230282e-05,
|
|
"loss": 0.5901,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.043859649122807015,
|
|
"grad_norm": 0.24689548800613587,
|
|
"learning_rate": 1.998945415950969e-05,
|
|
"loss": 0.5637,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.04457865976416451,
|
|
"grad_norm": 0.24720149594694088,
|
|
"learning_rate": 1.9989105623416014e-05,
|
|
"loss": 0.5692,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.045297670405522,
|
|
"grad_norm": 0.24942201632003652,
|
|
"learning_rate": 1.998875142421939e-05,
|
|
"loss": 0.5877,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.0460166810468795,
|
|
"grad_norm": 0.24826746598996405,
|
|
"learning_rate": 1.998839156212062e-05,
|
|
"loss": 0.567,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.046735691688236984,
|
|
"grad_norm": 0.23321079253402682,
|
|
"learning_rate": 1.9988026037323728e-05,
|
|
"loss": 0.5837,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.04745470232959448,
|
|
"grad_norm": 0.23600090147065284,
|
|
"learning_rate": 1.9987654850035926e-05,
|
|
"loss": 0.5706,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.04817371297095197,
|
|
"grad_norm": 0.23188518123042903,
|
|
"learning_rate": 1.9987278000467665e-05,
|
|
"loss": 0.5693,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.04889272361230946,
|
|
"grad_norm": 0.2299076677891295,
|
|
"learning_rate": 1.998689548883258e-05,
|
|
"loss": 0.5649,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.04961173425366695,
|
|
"grad_norm": 0.24272821845012257,
|
|
"learning_rate": 1.9986507315347535e-05,
|
|
"loss": 0.5731,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.05033074489502445,
|
|
"grad_norm": 0.24883260991609574,
|
|
"learning_rate": 1.9986113480232598e-05,
|
|
"loss": 0.5684,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.05104975553638194,
|
|
"grad_norm": 0.24361728388598086,
|
|
"learning_rate": 1.9985713983711034e-05,
|
|
"loss": 0.5703,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.05176876617773943,
|
|
"grad_norm": 0.24082816776797009,
|
|
"learning_rate": 1.998530882600934e-05,
|
|
"loss": 0.5698,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.05248777681909692,
|
|
"grad_norm": 0.28034003180068806,
|
|
"learning_rate": 1.9984898007357203e-05,
|
|
"loss": 0.5792,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.053206787460454416,
|
|
"grad_norm": 0.2389126953535011,
|
|
"learning_rate": 1.9984481527987535e-05,
|
|
"loss": 0.585,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.05392579810181191,
|
|
"grad_norm": 0.2604223693815005,
|
|
"learning_rate": 1.9984059388136448e-05,
|
|
"loss": 0.5841,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.0546448087431694,
|
|
"grad_norm": 0.2241690714850129,
|
|
"learning_rate": 1.998363158804326e-05,
|
|
"loss": 0.5703,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.05536381938452689,
|
|
"grad_norm": 0.2337180098567318,
|
|
"learning_rate": 1.9983198127950507e-05,
|
|
"loss": 0.5629,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.056082830025884385,
|
|
"grad_norm": 0.2571830946809361,
|
|
"learning_rate": 1.9982759008103926e-05,
|
|
"loss": 0.5528,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.05680184066724187,
|
|
"grad_norm": 0.23443323460830098,
|
|
"learning_rate": 1.9982314228752474e-05,
|
|
"loss": 0.5518,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.057520851308599366,
|
|
"grad_norm": 0.2373171025532724,
|
|
"learning_rate": 1.9981863790148303e-05,
|
|
"loss": 0.5646,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.05823986194995686,
|
|
"grad_norm": 0.23501490363911096,
|
|
"learning_rate": 1.9981407692546776e-05,
|
|
"loss": 0.5798,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.058958872591314354,
|
|
"grad_norm": 0.2277225203925688,
|
|
"learning_rate": 1.9980945936206475e-05,
|
|
"loss": 0.5549,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.05967788323267184,
|
|
"grad_norm": 0.228725824660234,
|
|
"learning_rate": 1.998047852138918e-05,
|
|
"loss": 0.5702,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.060396893874029335,
|
|
"grad_norm": 0.24468329140247444,
|
|
"learning_rate": 1.9980005448359878e-05,
|
|
"loss": 0.5802,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.06111590451538683,
|
|
"grad_norm": 0.24887287731815272,
|
|
"learning_rate": 1.997952671738677e-05,
|
|
"loss": 0.5541,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.06183491515674432,
|
|
"grad_norm": 0.2345470777925773,
|
|
"learning_rate": 1.9979042328741264e-05,
|
|
"loss": 0.5751,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.06255392579810182,
|
|
"grad_norm": 0.22675626648422811,
|
|
"learning_rate": 1.997855228269797e-05,
|
|
"loss": 0.5645,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.0632729364394593,
|
|
"grad_norm": 0.22969452073158558,
|
|
"learning_rate": 1.997805657953471e-05,
|
|
"loss": 0.5576,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.06399194708081679,
|
|
"grad_norm": 0.23819343640060633,
|
|
"learning_rate": 1.9977555219532512e-05,
|
|
"loss": 0.5614,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.06471095772217429,
|
|
"grad_norm": 0.22449524684154257,
|
|
"learning_rate": 1.997704820297561e-05,
|
|
"loss": 0.5632,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.06542996836353178,
|
|
"grad_norm": 0.2281428133225678,
|
|
"learning_rate": 1.9976535530151447e-05,
|
|
"loss": 0.5668,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.06614897900488927,
|
|
"grad_norm": 0.22733576739594663,
|
|
"learning_rate": 1.997601720135067e-05,
|
|
"loss": 0.5559,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.06686798964624677,
|
|
"grad_norm": 0.25848007315376675,
|
|
"learning_rate": 1.9975493216867143e-05,
|
|
"loss": 0.561,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.06758700028760425,
|
|
"grad_norm": 0.2589607216199232,
|
|
"learning_rate": 1.9974963576997912e-05,
|
|
"loss": 0.556,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.06830601092896176,
|
|
"grad_norm": 0.25234683129298624,
|
|
"learning_rate": 1.9974428282043255e-05,
|
|
"loss": 0.5596,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.06902502157031924,
|
|
"grad_norm": 0.23087115529102087,
|
|
"learning_rate": 1.9973887332306648e-05,
|
|
"loss": 0.5668,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.06974403221167673,
|
|
"grad_norm": 0.23159311014909575,
|
|
"learning_rate": 1.997334072809476e-05,
|
|
"loss": 0.5483,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.07046304285303423,
|
|
"grad_norm": 0.2380679120871314,
|
|
"learning_rate": 1.9972788469717483e-05,
|
|
"loss": 0.5506,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.07118205349439172,
|
|
"grad_norm": 0.23474547391570408,
|
|
"learning_rate": 1.9972230557487908e-05,
|
|
"loss": 0.5647,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.0719010641357492,
|
|
"grad_norm": 0.2369243370447207,
|
|
"learning_rate": 1.997166699172233e-05,
|
|
"loss": 0.5837,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.0726200747771067,
|
|
"grad_norm": 0.2314228917745866,
|
|
"learning_rate": 1.9971097772740248e-05,
|
|
"loss": 0.5685,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.07333908541846419,
|
|
"grad_norm": 0.22604831031414893,
|
|
"learning_rate": 1.997052290086437e-05,
|
|
"loss": 0.553,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.07405809605982168,
|
|
"grad_norm": 0.23479155122370488,
|
|
"learning_rate": 1.9969942376420606e-05,
|
|
"loss": 0.5693,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.07477710670117918,
|
|
"grad_norm": 0.24154952916904426,
|
|
"learning_rate": 1.9969356199738076e-05,
|
|
"loss": 0.5559,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.07549611734253667,
|
|
"grad_norm": 0.24234210645824733,
|
|
"learning_rate": 1.9968764371149098e-05,
|
|
"loss": 0.5763,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.07621512798389417,
|
|
"grad_norm": 0.23512860396843185,
|
|
"learning_rate": 1.996816689098919e-05,
|
|
"loss": 0.5623,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.07693413862525166,
|
|
"grad_norm": 0.23778492447875255,
|
|
"learning_rate": 1.9967563759597084e-05,
|
|
"loss": 0.5546,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.07765314926660914,
|
|
"grad_norm": 0.2306104882528985,
|
|
"learning_rate": 1.9966954977314717e-05,
|
|
"loss": 0.5613,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.07837215990796664,
|
|
"grad_norm": 0.25470531407410457,
|
|
"learning_rate": 1.9966340544487214e-05,
|
|
"loss": 0.5678,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.07909117054932413,
|
|
"grad_norm": 0.2549311232751504,
|
|
"learning_rate": 1.996572046146293e-05,
|
|
"loss": 0.5641,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.07981018119068162,
|
|
"grad_norm": 0.23736262691577187,
|
|
"learning_rate": 1.996509472859339e-05,
|
|
"loss": 0.5708,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.08052919183203912,
|
|
"grad_norm": 0.23789179184218126,
|
|
"learning_rate": 1.996446334623335e-05,
|
|
"loss": 0.5747,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.0812482024733966,
|
|
"grad_norm": 0.24658441392917815,
|
|
"learning_rate": 1.9963826314740755e-05,
|
|
"loss": 0.5715,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.08196721311475409,
|
|
"grad_norm": 0.23122288100315114,
|
|
"learning_rate": 1.9963183634476757e-05,
|
|
"loss": 0.5596,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.0826862237561116,
|
|
"grad_norm": 0.24086550214425853,
|
|
"learning_rate": 1.996253530580571e-05,
|
|
"loss": 0.5711,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.08340523439746908,
|
|
"grad_norm": 0.24735019428776434,
|
|
"learning_rate": 1.9961881329095167e-05,
|
|
"loss": 0.5787,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.08412424503882658,
|
|
"grad_norm": 0.24048575173417583,
|
|
"learning_rate": 1.9961221704715886e-05,
|
|
"loss": 0.569,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.08484325568018407,
|
|
"grad_norm": 0.23036818476348792,
|
|
"learning_rate": 1.996055643304183e-05,
|
|
"loss": 0.5725,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.08556226632154156,
|
|
"grad_norm": 0.23658089750158737,
|
|
"learning_rate": 1.995988551445016e-05,
|
|
"loss": 0.5526,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.08628127696289906,
|
|
"grad_norm": 0.24266345921678414,
|
|
"learning_rate": 1.9959208949321234e-05,
|
|
"loss": 0.5695,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.08700028760425654,
|
|
"grad_norm": 0.22811106526417912,
|
|
"learning_rate": 1.9958526738038618e-05,
|
|
"loss": 0.5651,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.08771929824561403,
|
|
"grad_norm": 0.2421343124759253,
|
|
"learning_rate": 1.9957838880989076e-05,
|
|
"loss": 0.5651,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.08843830888697153,
|
|
"grad_norm": 0.24224078684978484,
|
|
"learning_rate": 1.9957145378562574e-05,
|
|
"loss": 0.5565,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.08915731952832902,
|
|
"grad_norm": 0.23449828939088413,
|
|
"learning_rate": 1.995644623115228e-05,
|
|
"loss": 0.557,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.0898763301696865,
|
|
"grad_norm": 0.2354090594428972,
|
|
"learning_rate": 1.9955741439154557e-05,
|
|
"loss": 0.5601,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.090595340811044,
|
|
"grad_norm": 0.23429370403590513,
|
|
"learning_rate": 1.9955031002968972e-05,
|
|
"loss": 0.5436,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.0913143514524015,
|
|
"grad_norm": 0.23909034721910113,
|
|
"learning_rate": 1.995431492299829e-05,
|
|
"loss": 0.5438,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.092033362093759,
|
|
"grad_norm": 0.2546003626324708,
|
|
"learning_rate": 1.9953593199648484e-05,
|
|
"loss": 0.552,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.09275237273511648,
|
|
"grad_norm": 0.24340505218858643,
|
|
"learning_rate": 1.9952865833328707e-05,
|
|
"loss": 0.545,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.09347138337647397,
|
|
"grad_norm": 0.235160724181661,
|
|
"learning_rate": 1.9952132824451333e-05,
|
|
"loss": 0.5443,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.09419039401783147,
|
|
"grad_norm": 0.2304357738930148,
|
|
"learning_rate": 1.995139417343192e-05,
|
|
"loss": 0.5588,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.09490940465918896,
|
|
"grad_norm": 0.24348777956804377,
|
|
"learning_rate": 1.995064988068923e-05,
|
|
"loss": 0.5734,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.09562841530054644,
|
|
"grad_norm": 0.25544009282187286,
|
|
"learning_rate": 1.994989994664523e-05,
|
|
"loss": 0.5562,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.09634742594190394,
|
|
"grad_norm": 0.23348945321713513,
|
|
"learning_rate": 1.994914437172507e-05,
|
|
"loss": 0.5546,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.09706643658326143,
|
|
"grad_norm": 0.23369100591763928,
|
|
"learning_rate": 1.9948383156357112e-05,
|
|
"loss": 0.5609,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.09778544722461892,
|
|
"grad_norm": 0.2506151417862584,
|
|
"learning_rate": 1.9947616300972906e-05,
|
|
"loss": 0.5782,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.09850445786597642,
|
|
"grad_norm": 0.22590797440006433,
|
|
"learning_rate": 1.994684380600721e-05,
|
|
"loss": 0.5466,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.0992234685073339,
|
|
"grad_norm": 0.23440225124281835,
|
|
"learning_rate": 1.9946065671897965e-05,
|
|
"loss": 0.546,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.09994247914869141,
|
|
"grad_norm": 0.2322606378315528,
|
|
"learning_rate": 1.9945281899086325e-05,
|
|
"loss": 0.5614,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.1006614897900489,
|
|
"grad_norm": 0.22932823503652058,
|
|
"learning_rate": 1.9944492488016623e-05,
|
|
"loss": 0.5709,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.10138050043140638,
|
|
"grad_norm": 0.22956801014845277,
|
|
"learning_rate": 1.994369743913641e-05,
|
|
"loss": 0.5546,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.10209951107276388,
|
|
"grad_norm": 0.2452051181302563,
|
|
"learning_rate": 1.9942896752896413e-05,
|
|
"loss": 0.5503,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.10281852171412137,
|
|
"grad_norm": 0.2431334061890164,
|
|
"learning_rate": 1.9942090429750564e-05,
|
|
"loss": 0.5677,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.10353753235547886,
|
|
"grad_norm": 0.23416786364574083,
|
|
"learning_rate": 1.9941278470155993e-05,
|
|
"loss": 0.5493,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.10425654299683636,
|
|
"grad_norm": 0.2338646746203413,
|
|
"learning_rate": 1.9940460874573025e-05,
|
|
"loss": 0.5537,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.10497555363819384,
|
|
"grad_norm": 0.24660941049936622,
|
|
"learning_rate": 1.993963764346517e-05,
|
|
"loss": 0.5545,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.10569456427955133,
|
|
"grad_norm": 0.224186981201213,
|
|
"learning_rate": 1.9938808777299145e-05,
|
|
"loss": 0.546,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.10641357492090883,
|
|
"grad_norm": 0.2416482602683721,
|
|
"learning_rate": 1.993797427654486e-05,
|
|
"loss": 0.5444,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.10713258556226632,
|
|
"grad_norm": 0.23866160212304743,
|
|
"learning_rate": 1.993713414167541e-05,
|
|
"loss": 0.5566,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.10785159620362382,
|
|
"grad_norm": 0.23920491517081066,
|
|
"learning_rate": 1.9936288373167095e-05,
|
|
"loss": 0.5541,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.10857060684498131,
|
|
"grad_norm": 0.23148987601367907,
|
|
"learning_rate": 1.9935436971499408e-05,
|
|
"loss": 0.5532,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.1092896174863388,
|
|
"grad_norm": 0.2709530628970777,
|
|
"learning_rate": 1.993457993715503e-05,
|
|
"loss": 0.5511,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.1100086281276963,
|
|
"grad_norm": 0.23234198844872012,
|
|
"learning_rate": 1.9933717270619835e-05,
|
|
"loss": 0.5515,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.11072763876905378,
|
|
"grad_norm": 0.23109806096717964,
|
|
"learning_rate": 1.9932848972382895e-05,
|
|
"loss": 0.5386,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.11144664941041127,
|
|
"grad_norm": 0.2449388533996965,
|
|
"learning_rate": 1.9931975042936473e-05,
|
|
"loss": 0.5444,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.11216566005176877,
|
|
"grad_norm": 0.22958192290078638,
|
|
"learning_rate": 1.993109548277602e-05,
|
|
"loss": 0.5538,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.11288467069312626,
|
|
"grad_norm": 0.2358876835459476,
|
|
"learning_rate": 1.9930210292400186e-05,
|
|
"loss": 0.559,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.11360368133448374,
|
|
"grad_norm": 0.2403799019112171,
|
|
"learning_rate": 1.9929319472310814e-05,
|
|
"loss": 0.5472,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.11432269197584125,
|
|
"grad_norm": 0.23364592975387316,
|
|
"learning_rate": 1.992842302301293e-05,
|
|
"loss": 0.5514,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.11504170261719873,
|
|
"grad_norm": 0.23389566476752166,
|
|
"learning_rate": 1.9927520945014757e-05,
|
|
"loss": 0.5539,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.11576071325855623,
|
|
"grad_norm": 0.40418569681544375,
|
|
"learning_rate": 1.992661323882771e-05,
|
|
"loss": 0.5548,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.11647972389991372,
|
|
"grad_norm": 0.24462456666343171,
|
|
"learning_rate": 1.992569990496639e-05,
|
|
"loss": 0.5468,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.11719873454127121,
|
|
"grad_norm": 0.23888748035425905,
|
|
"learning_rate": 1.9924780943948595e-05,
|
|
"loss": 0.5727,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.11791774518262871,
|
|
"grad_norm": 0.231850528615198,
|
|
"learning_rate": 1.9923856356295306e-05,
|
|
"loss": 0.5473,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.1186367558239862,
|
|
"grad_norm": 0.2396817797554619,
|
|
"learning_rate": 1.9922926142530698e-05,
|
|
"loss": 0.5605,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.11935576646534368,
|
|
"grad_norm": 0.22363201399811253,
|
|
"learning_rate": 1.9921990303182138e-05,
|
|
"loss": 0.5558,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.12007477710670118,
|
|
"grad_norm": 0.23021616719333593,
|
|
"learning_rate": 1.992104883878018e-05,
|
|
"loss": 0.5767,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.12079378774805867,
|
|
"grad_norm": 0.23380857983669595,
|
|
"learning_rate": 1.992010174985856e-05,
|
|
"loss": 0.5521,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.12151279838941616,
|
|
"grad_norm": 0.22729003181704024,
|
|
"learning_rate": 1.9919149036954216e-05,
|
|
"loss": 0.5472,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.12223180903077366,
|
|
"grad_norm": 0.23937887856660198,
|
|
"learning_rate": 1.9918190700607267e-05,
|
|
"loss": 0.5495,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.12295081967213115,
|
|
"grad_norm": 0.23791753701672147,
|
|
"learning_rate": 1.9917226741361014e-05,
|
|
"loss": 0.5538,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.12366983031348865,
|
|
"grad_norm": 0.23694673340103434,
|
|
"learning_rate": 1.9916257159761964e-05,
|
|
"loss": 0.5468,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.12438884095484613,
|
|
"grad_norm": 0.2333089213103549,
|
|
"learning_rate": 1.9915281956359788e-05,
|
|
"loss": 0.5447,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.12510785159620363,
|
|
"grad_norm": 0.227723221504762,
|
|
"learning_rate": 1.991430113170736e-05,
|
|
"loss": 0.5579,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.12582686223756112,
|
|
"grad_norm": 0.23086865156496933,
|
|
"learning_rate": 1.9913314686360744e-05,
|
|
"loss": 0.5625,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.1265458728789186,
|
|
"grad_norm": 0.25490421224303816,
|
|
"learning_rate": 1.991232262087917e-05,
|
|
"loss": 0.5498,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.1272648835202761,
|
|
"grad_norm": 0.25322058897047683,
|
|
"learning_rate": 1.9911324935825083e-05,
|
|
"loss": 0.5467,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.12798389416163358,
|
|
"grad_norm": 0.24209252391434005,
|
|
"learning_rate": 1.9910321631764083e-05,
|
|
"loss": 0.5554,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.1287029048029911,
|
|
"grad_norm": 0.23150880614620745,
|
|
"learning_rate": 1.9909312709264982e-05,
|
|
"loss": 0.5522,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.12942191544434858,
|
|
"grad_norm": 0.22637748606237124,
|
|
"learning_rate": 1.9908298168899764e-05,
|
|
"loss": 0.5605,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.13014092608570607,
|
|
"grad_norm": 0.23657431868503107,
|
|
"learning_rate": 1.9907278011243598e-05,
|
|
"loss": 0.5469,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.13085993672706356,
|
|
"grad_norm": 0.23323393650672994,
|
|
"learning_rate": 1.9906252236874842e-05,
|
|
"loss": 0.5574,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.13157894736842105,
|
|
"grad_norm": 0.2242261540344663,
|
|
"learning_rate": 1.990522084637503e-05,
|
|
"loss": 0.5435,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.13229795800977853,
|
|
"grad_norm": 0.2399896547903275,
|
|
"learning_rate": 1.99041838403289e-05,
|
|
"loss": 0.5497,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.13301696865113605,
|
|
"grad_norm": 0.22799645707237384,
|
|
"learning_rate": 1.9903141219324346e-05,
|
|
"loss": 0.5344,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.13373597929249353,
|
|
"grad_norm": 0.24695130258598189,
|
|
"learning_rate": 1.9902092983952464e-05,
|
|
"loss": 0.5608,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.13445498993385102,
|
|
"grad_norm": 0.2340756439929401,
|
|
"learning_rate": 1.9901039134807528e-05,
|
|
"loss": 0.5381,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.1351740005752085,
|
|
"grad_norm": 0.2312762255986081,
|
|
"learning_rate": 1.9899979672486997e-05,
|
|
"loss": 0.556,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.135893011216566,
|
|
"grad_norm": 0.2310236580618276,
|
|
"learning_rate": 1.9898914597591504e-05,
|
|
"loss": 0.5327,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.1366120218579235,
|
|
"grad_norm": 0.24017163230048633,
|
|
"learning_rate": 1.9897843910724877e-05,
|
|
"loss": 0.5608,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.137331032499281,
|
|
"grad_norm": 0.23318572231200663,
|
|
"learning_rate": 1.989676761249411e-05,
|
|
"loss": 0.541,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.13805004314063848,
|
|
"grad_norm": 0.21727740276615842,
|
|
"learning_rate": 1.9895685703509393e-05,
|
|
"loss": 0.542,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.13876905378199597,
|
|
"grad_norm": 0.24073882046706868,
|
|
"learning_rate": 1.989459818438409e-05,
|
|
"loss": 0.5704,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.13948806442335346,
|
|
"grad_norm": 0.2331323920025414,
|
|
"learning_rate": 1.989350505573474e-05,
|
|
"loss": 0.5622,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.14020707506471095,
|
|
"grad_norm": 0.2339887752455901,
|
|
"learning_rate": 1.9892406318181075e-05,
|
|
"loss": 0.5253,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.14092608570606846,
|
|
"grad_norm": 0.22730026395412942,
|
|
"learning_rate": 1.9891301972345993e-05,
|
|
"loss": 0.5663,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.14164509634742595,
|
|
"grad_norm": 0.24588319237645848,
|
|
"learning_rate": 1.9890192018855587e-05,
|
|
"loss": 0.563,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.14236410698878343,
|
|
"grad_norm": 0.23378060948352938,
|
|
"learning_rate": 1.9889076458339116e-05,
|
|
"loss": 0.5579,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.14308311763014092,
|
|
"grad_norm": 0.23722178398720728,
|
|
"learning_rate": 1.988795529142902e-05,
|
|
"loss": 0.5408,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.1438021282714984,
|
|
"grad_norm": 0.23988344715061594,
|
|
"learning_rate": 1.9886828518760925e-05,
|
|
"loss": 0.5265,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.14452113891285592,
|
|
"grad_norm": 0.24275883896253647,
|
|
"learning_rate": 1.9885696140973625e-05,
|
|
"loss": 0.5414,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.1452401495542134,
|
|
"grad_norm": 0.2346568620768657,
|
|
"learning_rate": 1.9884558158709103e-05,
|
|
"loss": 0.5407,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.1459591601955709,
|
|
"grad_norm": 0.2263203624484671,
|
|
"learning_rate": 1.9883414572612506e-05,
|
|
"loss": 0.5391,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.14667817083692838,
|
|
"grad_norm": 0.23593016127036032,
|
|
"learning_rate": 1.988226538333217e-05,
|
|
"loss": 0.5333,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.14739718147828587,
|
|
"grad_norm": 0.23969237664732787,
|
|
"learning_rate": 1.98811105915196e-05,
|
|
"loss": 0.5421,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.14811619211964336,
|
|
"grad_norm": 0.22835038314625386,
|
|
"learning_rate": 1.9879950197829477e-05,
|
|
"loss": 0.5538,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.14883520276100087,
|
|
"grad_norm": 0.24277098351604232,
|
|
"learning_rate": 1.9878784202919668e-05,
|
|
"loss": 0.5496,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.14955421340235836,
|
|
"grad_norm": 0.24541139573862844,
|
|
"learning_rate": 1.9877612607451203e-05,
|
|
"loss": 0.5493,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.15027322404371585,
|
|
"grad_norm": 0.23602266976589184,
|
|
"learning_rate": 1.9876435412088292e-05,
|
|
"loss": 0.5392,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.15099223468507333,
|
|
"grad_norm": 0.23870568698917677,
|
|
"learning_rate": 1.987525261749832e-05,
|
|
"loss": 0.5433,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.15171124532643082,
|
|
"grad_norm": 0.23740753927088906,
|
|
"learning_rate": 1.9874064224351846e-05,
|
|
"loss": 0.5467,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.15243025596778834,
|
|
"grad_norm": 0.2374693346992944,
|
|
"learning_rate": 1.987287023332261e-05,
|
|
"loss": 0.5541,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.15314926660914582,
|
|
"grad_norm": 0.23345027765310092,
|
|
"learning_rate": 1.987167064508751e-05,
|
|
"loss": 0.5535,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.1538682772505033,
|
|
"grad_norm": 0.22219741109666344,
|
|
"learning_rate": 1.9870465460326628e-05,
|
|
"loss": 0.5441,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.1545872878918608,
|
|
"grad_norm": 0.2359136000366983,
|
|
"learning_rate": 1.9869254679723222e-05,
|
|
"loss": 0.5513,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.15530629853321828,
|
|
"grad_norm": 0.23894326584217548,
|
|
"learning_rate": 1.986803830396371e-05,
|
|
"loss": 0.5478,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.15602530917457577,
|
|
"grad_norm": 0.23669074673568327,
|
|
"learning_rate": 1.9866816333737694e-05,
|
|
"loss": 0.5463,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.1567443198159333,
|
|
"grad_norm": 0.2294896030560247,
|
|
"learning_rate": 1.9865588769737944e-05,
|
|
"loss": 0.548,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.15746333045729077,
|
|
"grad_norm": 0.22698285930341772,
|
|
"learning_rate": 1.9864355612660397e-05,
|
|
"loss": 0.5567,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.15818234109864826,
|
|
"grad_norm": 0.2195602706946871,
|
|
"learning_rate": 1.9863116863204165e-05,
|
|
"loss": 0.5371,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.15890135174000575,
|
|
"grad_norm": 0.23219751677955974,
|
|
"learning_rate": 1.9861872522071532e-05,
|
|
"loss": 0.5308,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.15962036238136323,
|
|
"grad_norm": 0.22608054174949835,
|
|
"learning_rate": 1.9860622589967946e-05,
|
|
"loss": 0.5327,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.16033937302272075,
|
|
"grad_norm": 0.2295650963912051,
|
|
"learning_rate": 1.985936706760203e-05,
|
|
"loss": 0.5443,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.16105838366407824,
|
|
"grad_norm": 0.2291500016959589,
|
|
"learning_rate": 1.985810595568558e-05,
|
|
"loss": 0.5317,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.16177739430543572,
|
|
"grad_norm": 0.22975864734060938,
|
|
"learning_rate": 1.9856839254933545e-05,
|
|
"loss": 0.5206,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.1624964049467932,
|
|
"grad_norm": 0.22403915883013656,
|
|
"learning_rate": 1.9855566966064062e-05,
|
|
"loss": 0.5432,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.1632154155881507,
|
|
"grad_norm": 0.23054931238613238,
|
|
"learning_rate": 1.9854289089798422e-05,
|
|
"loss": 0.5497,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.16393442622950818,
|
|
"grad_norm": 0.23378976237458074,
|
|
"learning_rate": 1.985300562686109e-05,
|
|
"loss": 0.5382,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.1646534368708657,
|
|
"grad_norm": 0.2453434720853162,
|
|
"learning_rate": 1.98517165779797e-05,
|
|
"loss": 0.5522,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.1653724475122232,
|
|
"grad_norm": 0.23126619905706874,
|
|
"learning_rate": 1.9850421943885045e-05,
|
|
"loss": 0.5256,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.16609145815358067,
|
|
"grad_norm": 0.23933710626023538,
|
|
"learning_rate": 1.9849121725311094e-05,
|
|
"loss": 0.5363,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.16681046879493816,
|
|
"grad_norm": 0.23609953921386437,
|
|
"learning_rate": 1.984781592299497e-05,
|
|
"loss": 0.5338,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.16752947943629565,
|
|
"grad_norm": 0.22988073789336907,
|
|
"learning_rate": 1.984650453767698e-05,
|
|
"loss": 0.5213,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.16824849007765316,
|
|
"grad_norm": 0.22991077323241457,
|
|
"learning_rate": 1.9845187570100576e-05,
|
|
"loss": 0.5415,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.16896750071901065,
|
|
"grad_norm": 0.22750061988091566,
|
|
"learning_rate": 1.9843865021012386e-05,
|
|
"loss": 0.5498,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.16968651136036814,
|
|
"grad_norm": 0.237248966845271,
|
|
"learning_rate": 1.9842536891162202e-05,
|
|
"loss": 0.5599,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.17040552200172562,
|
|
"grad_norm": 0.23234132221437664,
|
|
"learning_rate": 1.984120318130297e-05,
|
|
"loss": 0.5475,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.1711245326430831,
|
|
"grad_norm": 0.2419059605223682,
|
|
"learning_rate": 1.983986389219082e-05,
|
|
"loss": 0.5428,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.1718435432844406,
|
|
"grad_norm": 0.22907754436174932,
|
|
"learning_rate": 1.9838519024585025e-05,
|
|
"loss": 0.552,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.1725625539257981,
|
|
"grad_norm": 0.21402770173233282,
|
|
"learning_rate": 1.9837168579248027e-05,
|
|
"loss": 0.5276,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.1732815645671556,
|
|
"grad_norm": 0.227098258804778,
|
|
"learning_rate": 1.983581255694543e-05,
|
|
"loss": 0.5415,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.1740005752085131,
|
|
"grad_norm": 0.2491045684374358,
|
|
"learning_rate": 1.983445095844601e-05,
|
|
"loss": 0.5439,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.17471958584987057,
|
|
"grad_norm": 0.22501977101506793,
|
|
"learning_rate": 1.9833083784521687e-05,
|
|
"loss": 0.5392,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.17543859649122806,
|
|
"grad_norm": 0.22291022506238115,
|
|
"learning_rate": 1.9831711035947552e-05,
|
|
"loss": 0.5256,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.17615760713258558,
|
|
"grad_norm": 0.22811394434642848,
|
|
"learning_rate": 1.9830332713501855e-05,
|
|
"loss": 0.5374,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.17687661777394306,
|
|
"grad_norm": 0.2353804592934281,
|
|
"learning_rate": 1.9828948817966006e-05,
|
|
"loss": 0.5486,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.17759562841530055,
|
|
"grad_norm": 0.23615171001248797,
|
|
"learning_rate": 1.9827559350124573e-05,
|
|
"loss": 0.5414,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 0.17831463905665804,
|
|
"grad_norm": 0.23404800644559273,
|
|
"learning_rate": 1.9826164310765284e-05,
|
|
"loss": 0.5478,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.17903364969801552,
|
|
"grad_norm": 0.23147222875411733,
|
|
"learning_rate": 1.9824763700679026e-05,
|
|
"loss": 0.5643,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 0.179752660339373,
|
|
"grad_norm": 0.22886466551947476,
|
|
"learning_rate": 1.9823357520659843e-05,
|
|
"loss": 0.5534,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.18047167098073053,
|
|
"grad_norm": 0.23299717065916334,
|
|
"learning_rate": 1.982194577150494e-05,
|
|
"loss": 0.5497,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 0.181190681622088,
|
|
"grad_norm": 0.21174447690771017,
|
|
"learning_rate": 1.982052845401468e-05,
|
|
"loss": 0.5229,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.1819096922634455,
|
|
"grad_norm": 0.2288204806983009,
|
|
"learning_rate": 1.981910556899257e-05,
|
|
"loss": 0.5507,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 0.182628702904803,
|
|
"grad_norm": 0.22209899205461645,
|
|
"learning_rate": 1.9817677117245293e-05,
|
|
"loss": 0.5541,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.18334771354616047,
|
|
"grad_norm": 0.22373001284221763,
|
|
"learning_rate": 1.981624309958267e-05,
|
|
"loss": 0.5362,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.184066724187518,
|
|
"grad_norm": 0.26037079338235714,
|
|
"learning_rate": 1.9814803516817695e-05,
|
|
"loss": 0.5305,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.18478573482887548,
|
|
"grad_norm": 0.23351357170898626,
|
|
"learning_rate": 1.98133583697665e-05,
|
|
"loss": 0.5241,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 0.18550474547023296,
|
|
"grad_norm": 0.22572536057908968,
|
|
"learning_rate": 1.981190765924838e-05,
|
|
"loss": 0.5414,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.18622375611159045,
|
|
"grad_norm": 0.24302606733378837,
|
|
"learning_rate": 1.9810451386085788e-05,
|
|
"loss": 0.5206,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 0.18694276675294794,
|
|
"grad_norm": 0.22806542556817114,
|
|
"learning_rate": 1.9808989551104324e-05,
|
|
"loss": 0.5478,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.18766177739430542,
|
|
"grad_norm": 0.23299659727388808,
|
|
"learning_rate": 1.980752215513274e-05,
|
|
"loss": 0.5214,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 0.18838078803566294,
|
|
"grad_norm": 0.2388535868127206,
|
|
"learning_rate": 1.9806049199002944e-05,
|
|
"loss": 0.5404,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.18909979867702043,
|
|
"grad_norm": 0.2469828609274157,
|
|
"learning_rate": 1.980457068355e-05,
|
|
"loss": 0.547,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 0.1898188093183779,
|
|
"grad_norm": 0.22257039601510287,
|
|
"learning_rate": 1.9803086609612118e-05,
|
|
"loss": 0.5374,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.1905378199597354,
|
|
"grad_norm": 0.2413030333997793,
|
|
"learning_rate": 1.980159697803066e-05,
|
|
"loss": 0.5271,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.1912568306010929,
|
|
"grad_norm": 0.23034141755393386,
|
|
"learning_rate": 1.980010178965014e-05,
|
|
"loss": 0.5401,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.1919758412424504,
|
|
"grad_norm": 0.2344180021730793,
|
|
"learning_rate": 1.9798601045318224e-05,
|
|
"loss": 0.5143,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 0.1926948518838079,
|
|
"grad_norm": 0.2300905493052747,
|
|
"learning_rate": 1.979709474588572e-05,
|
|
"loss": 0.5347,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.19341386252516538,
|
|
"grad_norm": 0.24252955790420352,
|
|
"learning_rate": 1.9795582892206598e-05,
|
|
"loss": 0.5587,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 0.19413287316652286,
|
|
"grad_norm": 0.22426323090474662,
|
|
"learning_rate": 1.9794065485137973e-05,
|
|
"loss": 0.5442,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.19485188380788035,
|
|
"grad_norm": 0.22660906301904027,
|
|
"learning_rate": 1.9792542525540093e-05,
|
|
"loss": 0.5578,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 0.19557089444923784,
|
|
"grad_norm": 0.22840321357737117,
|
|
"learning_rate": 1.9791014014276377e-05,
|
|
"loss": 0.5298,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.19628990509059535,
|
|
"grad_norm": 0.22583338327840055,
|
|
"learning_rate": 1.9789479952213372e-05,
|
|
"loss": 0.5156,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 0.19700891573195284,
|
|
"grad_norm": 0.2283477520153283,
|
|
"learning_rate": 1.978794034022079e-05,
|
|
"loss": 0.5349,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.19772792637331033,
|
|
"grad_norm": 0.22295892877749496,
|
|
"learning_rate": 1.9786395179171474e-05,
|
|
"loss": 0.5446,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.1984469370146678,
|
|
"grad_norm": 0.23247464793333245,
|
|
"learning_rate": 1.978484446994142e-05,
|
|
"loss": 0.5578,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.1991659476560253,
|
|
"grad_norm": 0.25651838953898104,
|
|
"learning_rate": 1.978328821340977e-05,
|
|
"loss": 0.534,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 0.19988495829738281,
|
|
"grad_norm": 0.24940461896639685,
|
|
"learning_rate": 1.978172641045881e-05,
|
|
"loss": 0.5368,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.2006039689387403,
|
|
"grad_norm": 0.23819459349466615,
|
|
"learning_rate": 1.9780159061973964e-05,
|
|
"loss": 0.5488,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 0.2013229795800978,
|
|
"grad_norm": 0.24600076890112443,
|
|
"learning_rate": 1.977858616884381e-05,
|
|
"loss": 0.5451,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.20204199022145528,
|
|
"grad_norm": 0.22966108206971866,
|
|
"learning_rate": 1.977700773196007e-05,
|
|
"loss": 0.5245,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 0.20276100086281276,
|
|
"grad_norm": 0.22368217165519352,
|
|
"learning_rate": 1.9775423752217594e-05,
|
|
"loss": 0.5399,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.20348001150417025,
|
|
"grad_norm": 0.2439537542208693,
|
|
"learning_rate": 1.9773834230514386e-05,
|
|
"loss": 0.5245,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 0.20419902214552776,
|
|
"grad_norm": 0.23360094996444933,
|
|
"learning_rate": 1.97722391677516e-05,
|
|
"loss": 0.5287,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.20491803278688525,
|
|
"grad_norm": 0.24149617489789105,
|
|
"learning_rate": 1.977063856483351e-05,
|
|
"loss": 0.5501,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.20563704342824274,
|
|
"grad_norm": 0.23400826927180796,
|
|
"learning_rate": 1.9769032422667548e-05,
|
|
"loss": 0.5381,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.20635605406960023,
|
|
"grad_norm": 0.2313057655360691,
|
|
"learning_rate": 1.976742074216428e-05,
|
|
"loss": 0.5181,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 0.2070750647109577,
|
|
"grad_norm": 0.245508577754147,
|
|
"learning_rate": 1.9765803524237417e-05,
|
|
"loss": 0.5362,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.20779407535231523,
|
|
"grad_norm": 0.22858703382328208,
|
|
"learning_rate": 1.9764180769803795e-05,
|
|
"loss": 0.5339,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 0.20851308599367271,
|
|
"grad_norm": 0.23956741295965744,
|
|
"learning_rate": 1.9762552479783407e-05,
|
|
"loss": 0.5522,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.2092320966350302,
|
|
"grad_norm": 0.2316826087844408,
|
|
"learning_rate": 1.9760918655099376e-05,
|
|
"loss": 0.5484,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 0.2099511072763877,
|
|
"grad_norm": 0.23046848800785244,
|
|
"learning_rate": 1.9759279296677957e-05,
|
|
"loss": 0.5528,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.21067011791774518,
|
|
"grad_norm": 0.2313691713412135,
|
|
"learning_rate": 1.9757634405448554e-05,
|
|
"loss": 0.5378,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 0.21138912855910266,
|
|
"grad_norm": 0.23265144139370875,
|
|
"learning_rate": 1.9755983982343698e-05,
|
|
"loss": 0.5287,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.21210813920046018,
|
|
"grad_norm": 0.24235250103699738,
|
|
"learning_rate": 1.9754328028299064e-05,
|
|
"loss": 0.5568,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.21282714984181766,
|
|
"grad_norm": 0.25300425841033486,
|
|
"learning_rate": 1.9752666544253453e-05,
|
|
"loss": 0.528,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.21354616048317515,
|
|
"grad_norm": 0.25582352827044114,
|
|
"learning_rate": 1.975099953114881e-05,
|
|
"loss": 0.5372,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 0.21426517112453264,
|
|
"grad_norm": 0.2393144672938059,
|
|
"learning_rate": 1.9749326989930213e-05,
|
|
"loss": 0.5557,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.21498418176589013,
|
|
"grad_norm": 0.2338384244722033,
|
|
"learning_rate": 1.974764892154587e-05,
|
|
"loss": 0.5278,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 0.21570319240724764,
|
|
"grad_norm": 0.22718114309745505,
|
|
"learning_rate": 1.9745965326947126e-05,
|
|
"loss": 0.5292,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.21642220304860513,
|
|
"grad_norm": 0.23738809850294332,
|
|
"learning_rate": 1.9744276207088454e-05,
|
|
"loss": 0.5381,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 0.21714121368996261,
|
|
"grad_norm": 0.2350559821128524,
|
|
"learning_rate": 1.974258156292747e-05,
|
|
"loss": 0.5271,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.2178602243313201,
|
|
"grad_norm": 0.22301098485626983,
|
|
"learning_rate": 1.9740881395424904e-05,
|
|
"loss": 0.523,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 0.2185792349726776,
|
|
"grad_norm": 0.22973786300474133,
|
|
"learning_rate": 1.973917570554464e-05,
|
|
"loss": 0.5225,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.21929824561403508,
|
|
"grad_norm": 0.22820444656957772,
|
|
"learning_rate": 1.973746449425368e-05,
|
|
"loss": 0.5199,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.2200172562553926,
|
|
"grad_norm": 0.24294662043501544,
|
|
"learning_rate": 1.973574776252215e-05,
|
|
"loss": 0.5336,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.22073626689675008,
|
|
"grad_norm": 0.2253611699849513,
|
|
"learning_rate": 1.9734025511323317e-05,
|
|
"loss": 0.5079,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 0.22145527753810756,
|
|
"grad_norm": 0.22870243521742797,
|
|
"learning_rate": 1.9732297741633577e-05,
|
|
"loss": 0.5228,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.22217428817946505,
|
|
"grad_norm": 0.23223776960809672,
|
|
"learning_rate": 1.973056445443245e-05,
|
|
"loss": 0.5333,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 0.22289329882082254,
|
|
"grad_norm": 0.22960419047645408,
|
|
"learning_rate": 1.9728825650702577e-05,
|
|
"loss": 0.5314,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.22361230946218005,
|
|
"grad_norm": 0.22057428146654934,
|
|
"learning_rate": 1.972708133142974e-05,
|
|
"loss": 0.5352,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 0.22433132010353754,
|
|
"grad_norm": 0.2260456985994222,
|
|
"learning_rate": 1.9725331497602848e-05,
|
|
"loss": 0.5338,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.22505033074489503,
|
|
"grad_norm": 0.23375258498113377,
|
|
"learning_rate": 1.972357615021392e-05,
|
|
"loss": 0.5282,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 0.22576934138625251,
|
|
"grad_norm": 0.24090124346949987,
|
|
"learning_rate": 1.972181529025812e-05,
|
|
"loss": 0.5192,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.22648835202761,
|
|
"grad_norm": 0.23821272932675513,
|
|
"learning_rate": 1.9720048918733723e-05,
|
|
"loss": 0.5203,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.2272073626689675,
|
|
"grad_norm": 0.22696455116323191,
|
|
"learning_rate": 1.9718277036642135e-05,
|
|
"loss": 0.5478,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.227926373310325,
|
|
"grad_norm": 0.22304866738956428,
|
|
"learning_rate": 1.971649964498789e-05,
|
|
"loss": 0.5137,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 0.2286453839516825,
|
|
"grad_norm": 0.22051335274667153,
|
|
"learning_rate": 1.971471674477864e-05,
|
|
"loss": 0.5222,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.22936439459303998,
|
|
"grad_norm": 0.23818798516644749,
|
|
"learning_rate": 1.9712928337025152e-05,
|
|
"loss": 0.5297,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 0.23008340523439746,
|
|
"grad_norm": 0.23680065043886803,
|
|
"learning_rate": 1.9711134422741335e-05,
|
|
"loss": 0.5384,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.23080241587575495,
|
|
"grad_norm": 0.22339621581722777,
|
|
"learning_rate": 1.9709335002944205e-05,
|
|
"loss": 0.5047,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 0.23152142651711247,
|
|
"grad_norm": 0.23067104286841825,
|
|
"learning_rate": 1.9707530078653903e-05,
|
|
"loss": 0.529,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.23224043715846995,
|
|
"grad_norm": 0.2205878433893513,
|
|
"learning_rate": 1.9705719650893692e-05,
|
|
"loss": 0.5245,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 0.23295944779982744,
|
|
"grad_norm": 0.23060983274023172,
|
|
"learning_rate": 1.9703903720689954e-05,
|
|
"loss": 0.5321,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.23367845844118493,
|
|
"grad_norm": 0.2322122812506246,
|
|
"learning_rate": 1.9702082289072192e-05,
|
|
"loss": 0.5389,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.23439746908254241,
|
|
"grad_norm": 0.22567259557106886,
|
|
"learning_rate": 1.9700255357073023e-05,
|
|
"loss": 0.5273,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.2351164797238999,
|
|
"grad_norm": 0.22286904843242045,
|
|
"learning_rate": 1.9698422925728184e-05,
|
|
"loss": 0.5247,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 0.23583549036525742,
|
|
"grad_norm": 0.24293209876127186,
|
|
"learning_rate": 1.969658499607654e-05,
|
|
"loss": 0.5055,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.2365545010066149,
|
|
"grad_norm": 0.23539146522743076,
|
|
"learning_rate": 1.9694741569160057e-05,
|
|
"loss": 0.5403,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 0.2372735116479724,
|
|
"grad_norm": 0.23425506379258582,
|
|
"learning_rate": 1.969289264602383e-05,
|
|
"loss": 0.5494,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.23799252228932988,
|
|
"grad_norm": 0.23818614892816958,
|
|
"learning_rate": 1.9691038227716062e-05,
|
|
"loss": 0.5355,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 0.23871153293068736,
|
|
"grad_norm": 0.22586868983651667,
|
|
"learning_rate": 1.9689178315288073e-05,
|
|
"loss": 0.5285,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.23943054357204488,
|
|
"grad_norm": 0.24169875014394682,
|
|
"learning_rate": 1.9687312909794304e-05,
|
|
"loss": 0.5434,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 0.24014955421340237,
|
|
"grad_norm": 0.2619831502991906,
|
|
"learning_rate": 1.9685442012292303e-05,
|
|
"loss": 0.5262,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.24086856485475985,
|
|
"grad_norm": 0.2457796865562363,
|
|
"learning_rate": 1.9683565623842734e-05,
|
|
"loss": 0.5305,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 0.24158757549611734,
|
|
"grad_norm": 0.24695986543118498,
|
|
"learning_rate": 1.9681683745509376e-05,
|
|
"loss": 0.5431,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.24230658613747483,
|
|
"grad_norm": 0.2262775958024951,
|
|
"learning_rate": 1.9679796378359114e-05,
|
|
"loss": 0.5288,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 0.24302559677883231,
|
|
"grad_norm": 0.2167246493798092,
|
|
"learning_rate": 1.967790352346195e-05,
|
|
"loss": 0.5347,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.24374460742018983,
|
|
"grad_norm": 0.22096827906452082,
|
|
"learning_rate": 1.9676005181891e-05,
|
|
"loss": 0.5202,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 0.24446361806154732,
|
|
"grad_norm": 0.23135900618997918,
|
|
"learning_rate": 1.967410135472249e-05,
|
|
"loss": 0.5259,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.2451826287029048,
|
|
"grad_norm": 0.23070941887649535,
|
|
"learning_rate": 1.9672192043035744e-05,
|
|
"loss": 0.5194,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 0.2459016393442623,
|
|
"grad_norm": 0.23834592003390928,
|
|
"learning_rate": 1.9670277247913205e-05,
|
|
"loss": 0.5476,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.24662064998561978,
|
|
"grad_norm": 0.23438083518633582,
|
|
"learning_rate": 1.966835697044043e-05,
|
|
"loss": 0.5208,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 0.2473396606269773,
|
|
"grad_norm": 0.23226909980735055,
|
|
"learning_rate": 1.9666431211706073e-05,
|
|
"loss": 0.5221,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.24805867126833478,
|
|
"grad_norm": 0.2466818291192998,
|
|
"learning_rate": 1.9664499972801902e-05,
|
|
"loss": 0.5382,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 0.24877768190969227,
|
|
"grad_norm": 0.22066837565634534,
|
|
"learning_rate": 1.966256325482279e-05,
|
|
"loss": 0.5127,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.24949669255104975,
|
|
"grad_norm": 0.23376402195778545,
|
|
"learning_rate": 1.966062105886672e-05,
|
|
"loss": 0.5252,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 0.25021570319240727,
|
|
"grad_norm": 0.22849185736104233,
|
|
"learning_rate": 1.9658673386034773e-05,
|
|
"loss": 0.5453,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.25093471383376476,
|
|
"grad_norm": 0.24879707626081934,
|
|
"learning_rate": 1.965672023743114e-05,
|
|
"loss": 0.5344,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 0.25165372447512224,
|
|
"grad_norm": 0.2253533058308538,
|
|
"learning_rate": 1.9654761614163112e-05,
|
|
"loss": 0.5202,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.25237273511647973,
|
|
"grad_norm": 0.24628121730237923,
|
|
"learning_rate": 1.9652797517341095e-05,
|
|
"loss": 0.519,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 0.2530917457578372,
|
|
"grad_norm": 0.21993950116541036,
|
|
"learning_rate": 1.9650827948078586e-05,
|
|
"loss": 0.5181,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.2538107563991947,
|
|
"grad_norm": 0.2216964533979465,
|
|
"learning_rate": 1.9648852907492187e-05,
|
|
"loss": 0.535,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 0.2545297670405522,
|
|
"grad_norm": 0.22733449106986603,
|
|
"learning_rate": 1.9646872396701603e-05,
|
|
"loss": 0.5341,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.2552487776819097,
|
|
"grad_norm": 0.23300459174017882,
|
|
"learning_rate": 1.964488641682965e-05,
|
|
"loss": 0.5457,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 0.25596778832326716,
|
|
"grad_norm": 0.2260882801630965,
|
|
"learning_rate": 1.9642894969002224e-05,
|
|
"loss": 0.5302,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.25668679896462465,
|
|
"grad_norm": 0.21935871604787338,
|
|
"learning_rate": 1.964089805434834e-05,
|
|
"loss": 0.5213,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 0.2574058096059822,
|
|
"grad_norm": 0.22969789518771314,
|
|
"learning_rate": 1.96388956740001e-05,
|
|
"loss": 0.5127,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.2581248202473397,
|
|
"grad_norm": 0.2217372653926415,
|
|
"learning_rate": 1.963688782909271e-05,
|
|
"loss": 0.5504,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 0.25884383088869717,
|
|
"grad_norm": 0.22767228659738542,
|
|
"learning_rate": 1.9634874520764478e-05,
|
|
"loss": 0.5119,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.25956284153005466,
|
|
"grad_norm": 0.2254639647009183,
|
|
"learning_rate": 1.96328557501568e-05,
|
|
"loss": 0.5207,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 0.26028185217141214,
|
|
"grad_norm": 0.23627855092627786,
|
|
"learning_rate": 1.9630831518414176e-05,
|
|
"loss": 0.5335,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.26100086281276963,
|
|
"grad_norm": 0.2281833795806712,
|
|
"learning_rate": 1.9628801826684197e-05,
|
|
"loss": 0.5279,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 0.2617198734541271,
|
|
"grad_norm": 0.22218430509158774,
|
|
"learning_rate": 1.9626766676117555e-05,
|
|
"loss": 0.5228,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.2624388840954846,
|
|
"grad_norm": 0.21712174411589044,
|
|
"learning_rate": 1.962472606786803e-05,
|
|
"loss": 0.525,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 0.2631578947368421,
|
|
"grad_norm": 0.23417579156196858,
|
|
"learning_rate": 1.9622680003092503e-05,
|
|
"loss": 0.5211,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.2638769053781996,
|
|
"grad_norm": 0.2240231565672544,
|
|
"learning_rate": 1.962062848295095e-05,
|
|
"loss": 0.5371,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 0.26459591601955706,
|
|
"grad_norm": 0.2222473795124956,
|
|
"learning_rate": 1.961857150860642e-05,
|
|
"loss": 0.5434,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.2653149266609146,
|
|
"grad_norm": 0.23076834678829988,
|
|
"learning_rate": 1.961650908122508e-05,
|
|
"loss": 0.522,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 0.2660339373022721,
|
|
"grad_norm": 0.23132830942202995,
|
|
"learning_rate": 1.961444120197618e-05,
|
|
"loss": 0.5141,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.2667529479436296,
|
|
"grad_norm": 0.2262698238973961,
|
|
"learning_rate": 1.961236787203205e-05,
|
|
"loss": 0.5175,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 0.26747195858498707,
|
|
"grad_norm": 0.2501956106882812,
|
|
"learning_rate": 1.9610289092568125e-05,
|
|
"loss": 0.5211,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.26819096922634456,
|
|
"grad_norm": 0.23101958311553186,
|
|
"learning_rate": 1.9608204864762923e-05,
|
|
"loss": 0.5388,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 0.26890997986770204,
|
|
"grad_norm": 0.22903904556030297,
|
|
"learning_rate": 1.9606115189798047e-05,
|
|
"loss": 0.513,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.26962899050905953,
|
|
"grad_norm": 0.2360316924178287,
|
|
"learning_rate": 1.9604020068858197e-05,
|
|
"loss": 0.5215,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 0.270348001150417,
|
|
"grad_norm": 0.2323414670928527,
|
|
"learning_rate": 1.960191950313115e-05,
|
|
"loss": 0.5197,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.2710670117917745,
|
|
"grad_norm": 0.23097225391963927,
|
|
"learning_rate": 1.9599813493807778e-05,
|
|
"loss": 0.5132,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 0.271786022433132,
|
|
"grad_norm": 0.22586506109921145,
|
|
"learning_rate": 1.959770204208204e-05,
|
|
"loss": 0.5217,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.2725050330744895,
|
|
"grad_norm": 0.2362053442728093,
|
|
"learning_rate": 1.959558514915097e-05,
|
|
"loss": 0.5328,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 0.273224043715847,
|
|
"grad_norm": 0.2326560206023545,
|
|
"learning_rate": 1.9593462816214698e-05,
|
|
"loss": 0.543,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.2739430543572045,
|
|
"grad_norm": 0.23463535723170362,
|
|
"learning_rate": 1.959133504447644e-05,
|
|
"loss": 0.5328,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 0.274662064998562,
|
|
"grad_norm": 0.2304443090931523,
|
|
"learning_rate": 1.9589201835142476e-05,
|
|
"loss": 0.5095,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.2753810756399195,
|
|
"grad_norm": 0.21985857057475458,
|
|
"learning_rate": 1.9587063189422188e-05,
|
|
"loss": 0.5194,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 0.27610008628127697,
|
|
"grad_norm": 0.2321461057505647,
|
|
"learning_rate": 1.9584919108528036e-05,
|
|
"loss": 0.5232,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.27681909692263446,
|
|
"grad_norm": 0.23450598290151903,
|
|
"learning_rate": 1.9582769593675557e-05,
|
|
"loss": 0.5148,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 0.27753810756399194,
|
|
"grad_norm": 0.23719067147262055,
|
|
"learning_rate": 1.958061464608337e-05,
|
|
"loss": 0.5241,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.27825711820534943,
|
|
"grad_norm": 0.22914212741212578,
|
|
"learning_rate": 1.9578454266973184e-05,
|
|
"loss": 0.5292,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 0.2789761288467069,
|
|
"grad_norm": 0.2428143124876925,
|
|
"learning_rate": 1.9576288457569764e-05,
|
|
"loss": 0.5394,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.2796951394880644,
|
|
"grad_norm": 0.24019828487603107,
|
|
"learning_rate": 1.9574117219100975e-05,
|
|
"loss": 0.5314,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 0.2804141501294219,
|
|
"grad_norm": 0.23020307675607476,
|
|
"learning_rate": 1.9571940552797758e-05,
|
|
"loss": 0.5514,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.28113316077077943,
|
|
"grad_norm": 0.23117631707107703,
|
|
"learning_rate": 1.9569758459894118e-05,
|
|
"loss": 0.5207,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 0.2818521714121369,
|
|
"grad_norm": 0.23188555518556003,
|
|
"learning_rate": 1.9567570941627144e-05,
|
|
"loss": 0.5106,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.2825711820534944,
|
|
"grad_norm": 0.22719305269705242,
|
|
"learning_rate": 1.9565377999237007e-05,
|
|
"loss": 0.5397,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 0.2832901926948519,
|
|
"grad_norm": 0.2425886923256403,
|
|
"learning_rate": 1.9563179633966944e-05,
|
|
"loss": 0.5389,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.2840092033362094,
|
|
"grad_norm": 0.2248447291879169,
|
|
"learning_rate": 1.9560975847063267e-05,
|
|
"loss": 0.5314,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 0.28472821397756687,
|
|
"grad_norm": 0.22291943170416983,
|
|
"learning_rate": 1.955876663977537e-05,
|
|
"loss": 0.5234,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.28544722461892436,
|
|
"grad_norm": 0.23294609267878633,
|
|
"learning_rate": 1.955655201335571e-05,
|
|
"loss": 0.5245,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 0.28616623526028184,
|
|
"grad_norm": 0.2467537340412599,
|
|
"learning_rate": 1.9554331969059825e-05,
|
|
"loss": 0.5185,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.28688524590163933,
|
|
"grad_norm": 0.24172071683786844,
|
|
"learning_rate": 1.955210650814632e-05,
|
|
"loss": 0.5443,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 0.2876042565429968,
|
|
"grad_norm": 0.22605868965849668,
|
|
"learning_rate": 1.9549875631876864e-05,
|
|
"loss": 0.5121,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.2883232671843543,
|
|
"grad_norm": 0.22942794996365853,
|
|
"learning_rate": 1.9547639341516206e-05,
|
|
"loss": 0.5095,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 0.28904227782571185,
|
|
"grad_norm": 0.2298099151783695,
|
|
"learning_rate": 1.9545397638332163e-05,
|
|
"loss": 0.5286,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.28976128846706933,
|
|
"grad_norm": 0.24778629700549126,
|
|
"learning_rate": 1.9543150523595625e-05,
|
|
"loss": 0.537,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 0.2904802991084268,
|
|
"grad_norm": 0.2322170927230343,
|
|
"learning_rate": 1.954089799858053e-05,
|
|
"loss": 0.5294,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.2911993097497843,
|
|
"grad_norm": 0.2238734266550472,
|
|
"learning_rate": 1.953864006456391e-05,
|
|
"loss": 0.5146,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 0.2919183203911418,
|
|
"grad_norm": 0.21981906060948142,
|
|
"learning_rate": 1.9536376722825844e-05,
|
|
"loss": 0.5077,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.2926373310324993,
|
|
"grad_norm": 0.22954693685234034,
|
|
"learning_rate": 1.953410797464949e-05,
|
|
"loss": 0.5335,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 0.29335634167385677,
|
|
"grad_norm": 0.23985169357884734,
|
|
"learning_rate": 1.9531833821321057e-05,
|
|
"loss": 0.5376,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.29407535231521426,
|
|
"grad_norm": 0.2207958326221015,
|
|
"learning_rate": 1.952955426412983e-05,
|
|
"loss": 0.52,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 0.29479436295657174,
|
|
"grad_norm": 0.23780493129732794,
|
|
"learning_rate": 1.9527269304368154e-05,
|
|
"loss": 0.4906,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.29551337359792923,
|
|
"grad_norm": 0.2330149879325887,
|
|
"learning_rate": 1.9524978943331435e-05,
|
|
"loss": 0.5194,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 0.2962323842392867,
|
|
"grad_norm": 0.2386438942499068,
|
|
"learning_rate": 1.9522683182318145e-05,
|
|
"loss": 0.5346,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.29695139488064426,
|
|
"grad_norm": 0.23293713570114105,
|
|
"learning_rate": 1.9520382022629814e-05,
|
|
"loss": 0.5459,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 0.29767040552200175,
|
|
"grad_norm": 0.21852751340739718,
|
|
"learning_rate": 1.951807546557103e-05,
|
|
"loss": 0.5164,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.29838941616335923,
|
|
"grad_norm": 0.23863393320891652,
|
|
"learning_rate": 1.951576351244945e-05,
|
|
"loss": 0.5379,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 0.2991084268047167,
|
|
"grad_norm": 0.22760314810091892,
|
|
"learning_rate": 1.9513446164575782e-05,
|
|
"loss": 0.5227,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.2998274374460742,
|
|
"grad_norm": 0.22785151169541662,
|
|
"learning_rate": 1.9511123423263797e-05,
|
|
"loss": 0.5279,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 0.3005464480874317,
|
|
"grad_norm": 0.22331529138091233,
|
|
"learning_rate": 1.950879528983032e-05,
|
|
"loss": 0.5168,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.3012654587287892,
|
|
"grad_norm": 0.2265341653861222,
|
|
"learning_rate": 1.9506461765595233e-05,
|
|
"loss": 0.5129,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 0.30198446937014667,
|
|
"grad_norm": 0.224803847238097,
|
|
"learning_rate": 1.950412285188148e-05,
|
|
"loss": 0.5113,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.30270348001150416,
|
|
"grad_norm": 0.22826292672125245,
|
|
"learning_rate": 1.9501778550015057e-05,
|
|
"loss": 0.5172,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 0.30342249065286164,
|
|
"grad_norm": 0.23941019012024453,
|
|
"learning_rate": 1.949942886132501e-05,
|
|
"loss": 0.5364,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.30414150129421913,
|
|
"grad_norm": 0.23098899764233535,
|
|
"learning_rate": 1.9497073787143445e-05,
|
|
"loss": 0.5198,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 0.3048605119355767,
|
|
"grad_norm": 0.2235225210957512,
|
|
"learning_rate": 1.9494713328805522e-05,
|
|
"loss": 0.5105,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.30557952257693416,
|
|
"grad_norm": 0.24219249616083108,
|
|
"learning_rate": 1.949234748764945e-05,
|
|
"loss": 0.5178,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 0.30629853321829165,
|
|
"grad_norm": 0.2294038434256185,
|
|
"learning_rate": 1.9489976265016483e-05,
|
|
"loss": 0.5236,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.30701754385964913,
|
|
"grad_norm": 0.22860251975225562,
|
|
"learning_rate": 1.9487599662250945e-05,
|
|
"loss": 0.5151,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 0.3077365545010066,
|
|
"grad_norm": 0.22768766298971088,
|
|
"learning_rate": 1.948521768070019e-05,
|
|
"loss": 0.5157,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.3084555651423641,
|
|
"grad_norm": 0.2262280341082414,
|
|
"learning_rate": 1.9482830321714634e-05,
|
|
"loss": 0.5179,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 0.3091745757837216,
|
|
"grad_norm": 0.21765278037284172,
|
|
"learning_rate": 1.9480437586647737e-05,
|
|
"loss": 0.5249,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.3098935864250791,
|
|
"grad_norm": 0.21991607271397515,
|
|
"learning_rate": 1.9478039476856004e-05,
|
|
"loss": 0.5151,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 0.31061259706643657,
|
|
"grad_norm": 0.22731893412220183,
|
|
"learning_rate": 1.9475635993698995e-05,
|
|
"loss": 0.5135,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.31133160770779406,
|
|
"grad_norm": 0.22518016603767735,
|
|
"learning_rate": 1.9473227138539305e-05,
|
|
"loss": 0.5062,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 0.31205061834915154,
|
|
"grad_norm": 0.22989447620543818,
|
|
"learning_rate": 1.9470812912742588e-05,
|
|
"loss": 0.5097,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.3127696289905091,
|
|
"grad_norm": 0.22642057674043994,
|
|
"learning_rate": 1.9468393317677537e-05,
|
|
"loss": 0.5136,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 0.3134886396318666,
|
|
"grad_norm": 0.2243558671870111,
|
|
"learning_rate": 1.9465968354715882e-05,
|
|
"loss": 0.5109,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.31420765027322406,
|
|
"grad_norm": 0.23756228609410254,
|
|
"learning_rate": 1.946353802523241e-05,
|
|
"loss": 0.5187,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 0.31492666091458155,
|
|
"grad_norm": 0.22829309083723986,
|
|
"learning_rate": 1.946110233060493e-05,
|
|
"loss": 0.5119,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.31564567155593903,
|
|
"grad_norm": 0.2258910659924151,
|
|
"learning_rate": 1.945866127221432e-05,
|
|
"loss": 0.5269,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 0.3163646821972965,
|
|
"grad_norm": 0.24851677104917747,
|
|
"learning_rate": 1.945621485144447e-05,
|
|
"loss": 0.5211,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.317083692838654,
|
|
"grad_norm": 0.2252194695451131,
|
|
"learning_rate": 1.9453763069682336e-05,
|
|
"loss": 0.5154,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 0.3178027034800115,
|
|
"grad_norm": 0.2567628615443648,
|
|
"learning_rate": 1.94513059283179e-05,
|
|
"loss": 0.5224,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.318521714121369,
|
|
"grad_norm": 0.22849096152898013,
|
|
"learning_rate": 1.9448843428744175e-05,
|
|
"loss": 0.4982,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 0.31924072476272647,
|
|
"grad_norm": 0.21917519968492866,
|
|
"learning_rate": 1.944637557235723e-05,
|
|
"loss": 0.5091,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.31995973540408396,
|
|
"grad_norm": 0.22313052392410496,
|
|
"learning_rate": 1.944390236055616e-05,
|
|
"loss": 0.536,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 0.3206787460454415,
|
|
"grad_norm": 0.25969930780411865,
|
|
"learning_rate": 1.9441423794743092e-05,
|
|
"loss": 0.5357,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.321397756686799,
|
|
"grad_norm": 0.2489657558014886,
|
|
"learning_rate": 1.9438939876323202e-05,
|
|
"loss": 0.5148,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 0.3221167673281565,
|
|
"grad_norm": 0.24222728781124633,
|
|
"learning_rate": 1.9436450606704688e-05,
|
|
"loss": 0.5291,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.32283577796951396,
|
|
"grad_norm": 0.2258550445944719,
|
|
"learning_rate": 1.943395598729879e-05,
|
|
"loss": 0.5101,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 0.32355478861087145,
|
|
"grad_norm": 0.22598842350418805,
|
|
"learning_rate": 1.9431456019519774e-05,
|
|
"loss": 0.5107,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.32427379925222893,
|
|
"grad_norm": 0.23349735743483102,
|
|
"learning_rate": 1.9428950704784944e-05,
|
|
"loss": 0.5078,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 0.3249928098935864,
|
|
"grad_norm": 0.2271548994925287,
|
|
"learning_rate": 1.942644004451463e-05,
|
|
"loss": 0.5317,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.3257118205349439,
|
|
"grad_norm": 0.2149592225481257,
|
|
"learning_rate": 1.94239240401322e-05,
|
|
"loss": 0.4978,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 0.3264308311763014,
|
|
"grad_norm": 0.2331575054290221,
|
|
"learning_rate": 1.9421402693064037e-05,
|
|
"loss": 0.5117,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.3271498418176589,
|
|
"grad_norm": 0.25608468064653417,
|
|
"learning_rate": 1.941887600473958e-05,
|
|
"loss": 0.5102,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 0.32786885245901637,
|
|
"grad_norm": 0.2298492196508499,
|
|
"learning_rate": 1.941634397659126e-05,
|
|
"loss": 0.5161,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.3285878631003739,
|
|
"grad_norm": 0.2316441559609167,
|
|
"learning_rate": 1.941380661005457e-05,
|
|
"loss": 0.527,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 0.3293068737417314,
|
|
"grad_norm": 0.2342418485461944,
|
|
"learning_rate": 1.9411263906568007e-05,
|
|
"loss": 0.5153,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.3300258843830889,
|
|
"grad_norm": 0.21500183625653949,
|
|
"learning_rate": 1.94087158675731e-05,
|
|
"loss": 0.5227,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 0.3307448950244464,
|
|
"grad_norm": 0.21606183098344467,
|
|
"learning_rate": 1.9406162494514406e-05,
|
|
"loss": 0.5151,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.33146390566580386,
|
|
"grad_norm": 0.21936796559465915,
|
|
"learning_rate": 1.9403603788839503e-05,
|
|
"loss": 0.5342,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 0.33218291630716135,
|
|
"grad_norm": 0.22373896227411189,
|
|
"learning_rate": 1.940103975199899e-05,
|
|
"loss": 0.5176,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.33290192694851883,
|
|
"grad_norm": 0.23290539221639253,
|
|
"learning_rate": 1.93984703854465e-05,
|
|
"loss": 0.5263,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 0.3336209375898763,
|
|
"grad_norm": 0.2144772210153438,
|
|
"learning_rate": 1.9395895690638662e-05,
|
|
"loss": 0.504,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.3343399482312338,
|
|
"grad_norm": 0.2238835015898237,
|
|
"learning_rate": 1.9393315669035157e-05,
|
|
"loss": 0.522,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 0.3350589588725913,
|
|
"grad_norm": 0.23661097593806635,
|
|
"learning_rate": 1.9390730322098667e-05,
|
|
"loss": 0.5149,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.3357779695139488,
|
|
"grad_norm": 0.2261318626286406,
|
|
"learning_rate": 1.9388139651294897e-05,
|
|
"loss": 0.5251,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 0.3364969801553063,
|
|
"grad_norm": 0.23000865916981403,
|
|
"learning_rate": 1.9385543658092572e-05,
|
|
"loss": 0.5302,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.3372159907966638,
|
|
"grad_norm": 0.22875455612471773,
|
|
"learning_rate": 1.938294234396343e-05,
|
|
"loss": 0.5211,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 0.3379350014380213,
|
|
"grad_norm": 0.2219866431765329,
|
|
"learning_rate": 1.938033571038223e-05,
|
|
"loss": 0.528,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.3386540120793788,
|
|
"grad_norm": 0.21715874352702125,
|
|
"learning_rate": 1.9377723758826746e-05,
|
|
"loss": 0.49,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 0.3393730227207363,
|
|
"grad_norm": 0.22382192120897512,
|
|
"learning_rate": 1.9375106490777768e-05,
|
|
"loss": 0.5129,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.34009203336209376,
|
|
"grad_norm": 0.2961426979518339,
|
|
"learning_rate": 1.9372483907719092e-05,
|
|
"loss": 0.4934,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 0.34081104400345125,
|
|
"grad_norm": 0.23631695058807112,
|
|
"learning_rate": 1.936985601113754e-05,
|
|
"loss": 0.5105,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.34153005464480873,
|
|
"grad_norm": 0.2294202832007626,
|
|
"learning_rate": 1.936722280252294e-05,
|
|
"loss": 0.5203,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 0.3422490652861662,
|
|
"grad_norm": 0.2184499380948413,
|
|
"learning_rate": 1.9364584283368127e-05,
|
|
"loss": 0.4972,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.3429680759275237,
|
|
"grad_norm": 0.3176172772787008,
|
|
"learning_rate": 1.9361940455168954e-05,
|
|
"loss": 0.5156,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 0.3436870865688812,
|
|
"grad_norm": 0.22609068671139035,
|
|
"learning_rate": 1.935929131942428e-05,
|
|
"loss": 0.5182,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.34440609721023874,
|
|
"grad_norm": 0.23496652718953012,
|
|
"learning_rate": 1.9356636877635975e-05,
|
|
"loss": 0.5247,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 0.3451251078515962,
|
|
"grad_norm": 0.2410937708363873,
|
|
"learning_rate": 1.935397713130892e-05,
|
|
"loss": 0.5155,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.3458441184929537,
|
|
"grad_norm": 0.24457505196862847,
|
|
"learning_rate": 1.935131208195099e-05,
|
|
"loss": 0.5234,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 0.3465631291343112,
|
|
"grad_norm": 0.2314728776928187,
|
|
"learning_rate": 1.9348641731073085e-05,
|
|
"loss": 0.5004,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.3472821397756687,
|
|
"grad_norm": 0.24582034077579354,
|
|
"learning_rate": 1.9345966080189095e-05,
|
|
"loss": 0.5425,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 0.3480011504170262,
|
|
"grad_norm": 0.22554765028784285,
|
|
"learning_rate": 1.934328513081592e-05,
|
|
"loss": 0.5265,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.34872016105838366,
|
|
"grad_norm": 0.2216814016503243,
|
|
"learning_rate": 1.9340598884473478e-05,
|
|
"loss": 0.5137,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 0.34943917169974115,
|
|
"grad_norm": 0.22611290789235558,
|
|
"learning_rate": 1.9337907342684664e-05,
|
|
"loss": 0.4992,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.35015818234109863,
|
|
"grad_norm": 0.24118982095464295,
|
|
"learning_rate": 1.933521050697539e-05,
|
|
"loss": 0.5046,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 0.3508771929824561,
|
|
"grad_norm": 0.23009247877401107,
|
|
"learning_rate": 1.933250837887457e-05,
|
|
"loss": 0.533,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.3515962036238136,
|
|
"grad_norm": 0.22314852251039957,
|
|
"learning_rate": 1.932980095991412e-05,
|
|
"loss": 0.5123,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 0.35231521426517115,
|
|
"grad_norm": 0.232637861133316,
|
|
"learning_rate": 1.9327088251628946e-05,
|
|
"loss": 0.5195,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.35303422490652864,
|
|
"grad_norm": 0.2250538273016955,
|
|
"learning_rate": 1.9324370255556957e-05,
|
|
"loss": 0.5237,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 0.3537532355478861,
|
|
"grad_norm": 0.2340025948600299,
|
|
"learning_rate": 1.932164697323906e-05,
|
|
"loss": 0.5081,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.3544722461892436,
|
|
"grad_norm": 0.23692479665974087,
|
|
"learning_rate": 1.9318918406219168e-05,
|
|
"loss": 0.5218,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 0.3551912568306011,
|
|
"grad_norm": 0.5252858739768315,
|
|
"learning_rate": 1.9316184556044176e-05,
|
|
"loss": 0.5291,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.3559102674719586,
|
|
"grad_norm": 0.22584081496056052,
|
|
"learning_rate": 1.931344542426398e-05,
|
|
"loss": 0.5115,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 0.3566292781133161,
|
|
"grad_norm": 0.22181076897628466,
|
|
"learning_rate": 1.931070101243147e-05,
|
|
"loss": 0.5236,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.35734828875467356,
|
|
"grad_norm": 0.2368592260778137,
|
|
"learning_rate": 1.930795132210253e-05,
|
|
"loss": 0.5196,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 0.35806729939603105,
|
|
"grad_norm": 0.25285566239619084,
|
|
"learning_rate": 1.930519635483604e-05,
|
|
"loss": 0.5348,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.35878631003738853,
|
|
"grad_norm": 0.2172012713603105,
|
|
"learning_rate": 1.9302436112193863e-05,
|
|
"loss": 0.5133,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 0.359505320678746,
|
|
"grad_norm": 0.22253220913655272,
|
|
"learning_rate": 1.929967059574086e-05,
|
|
"loss": 0.5195,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.36022433132010356,
|
|
"grad_norm": 0.2318823497847268,
|
|
"learning_rate": 1.9296899807044876e-05,
|
|
"loss": 0.5013,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 0.36094334196146105,
|
|
"grad_norm": 0.22474959745467496,
|
|
"learning_rate": 1.9294123747676757e-05,
|
|
"loss": 0.51,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.36166235260281854,
|
|
"grad_norm": 0.22499764413272827,
|
|
"learning_rate": 1.929134241921032e-05,
|
|
"loss": 0.5196,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 0.362381363244176,
|
|
"grad_norm": 0.22408515127502746,
|
|
"learning_rate": 1.928855582322238e-05,
|
|
"loss": 0.5061,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.3631003738855335,
|
|
"grad_norm": 0.2218720986538149,
|
|
"learning_rate": 1.9285763961292738e-05,
|
|
"loss": 0.4987,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 0.363819384526891,
|
|
"grad_norm": 0.23440751496432688,
|
|
"learning_rate": 1.9282966835004177e-05,
|
|
"loss": 0.4959,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.3645383951682485,
|
|
"grad_norm": 0.23791499533194543,
|
|
"learning_rate": 1.9280164445942467e-05,
|
|
"loss": 0.5045,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 0.365257405809606,
|
|
"grad_norm": 0.23397274808648272,
|
|
"learning_rate": 1.927735679569636e-05,
|
|
"loss": 0.51,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.36597641645096346,
|
|
"grad_norm": 0.22441000781632436,
|
|
"learning_rate": 1.9274543885857594e-05,
|
|
"loss": 0.5246,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 0.36669542709232095,
|
|
"grad_norm": 0.22439109575711147,
|
|
"learning_rate": 1.9271725718020877e-05,
|
|
"loss": 0.5163,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.36741443773367843,
|
|
"grad_norm": 0.23923944832721677,
|
|
"learning_rate": 1.9268902293783918e-05,
|
|
"loss": 0.4949,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 0.368133448375036,
|
|
"grad_norm": 0.22120021514337773,
|
|
"learning_rate": 1.926607361474739e-05,
|
|
"loss": 0.5122,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.36885245901639346,
|
|
"grad_norm": 0.2371975422289003,
|
|
"learning_rate": 1.9263239682514953e-05,
|
|
"loss": 0.5214,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 0.36957146965775095,
|
|
"grad_norm": 0.232241975255212,
|
|
"learning_rate": 1.9260400498693236e-05,
|
|
"loss": 0.5031,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.37029048029910844,
|
|
"grad_norm": 0.22581754155054365,
|
|
"learning_rate": 1.9257556064891858e-05,
|
|
"loss": 0.5011,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 0.3710094909404659,
|
|
"grad_norm": 0.24102315491721474,
|
|
"learning_rate": 1.9254706382723404e-05,
|
|
"loss": 0.518,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.3717285015818234,
|
|
"grad_norm": 0.2224274974729962,
|
|
"learning_rate": 1.925185145380344e-05,
|
|
"loss": 0.4986,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 0.3724475122231809,
|
|
"grad_norm": 0.2352443275901719,
|
|
"learning_rate": 1.9248991279750507e-05,
|
|
"loss": 0.5067,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.3731665228645384,
|
|
"grad_norm": 0.21567561516996372,
|
|
"learning_rate": 1.9246125862186116e-05,
|
|
"loss": 0.5139,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 0.3738855335058959,
|
|
"grad_norm": 0.222006321644596,
|
|
"learning_rate": 1.924325520273475e-05,
|
|
"loss": 0.5028,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.37460454414725336,
|
|
"grad_norm": 0.22928775820841665,
|
|
"learning_rate": 1.924037930302387e-05,
|
|
"loss": 0.5028,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 0.37532355478861085,
|
|
"grad_norm": 0.2316016899827689,
|
|
"learning_rate": 1.9237498164683898e-05,
|
|
"loss": 0.5161,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.3760425654299684,
|
|
"grad_norm": 0.22536272794248402,
|
|
"learning_rate": 1.9234611789348242e-05,
|
|
"loss": 0.5109,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 0.3767615760713259,
|
|
"grad_norm": 0.23014273480588587,
|
|
"learning_rate": 1.9231720178653254e-05,
|
|
"loss": 0.5029,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.37748058671268336,
|
|
"grad_norm": 0.22814428980830126,
|
|
"learning_rate": 1.9228823334238284e-05,
|
|
"loss": 0.5022,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 0.37819959735404085,
|
|
"grad_norm": 0.2167038042325131,
|
|
"learning_rate": 1.9225921257745623e-05,
|
|
"loss": 0.5108,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.37891860799539834,
|
|
"grad_norm": 0.23434021986166953,
|
|
"learning_rate": 1.9223013950820542e-05,
|
|
"loss": 0.5064,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 0.3796376186367558,
|
|
"grad_norm": 0.225965873014395,
|
|
"learning_rate": 1.922010141511128e-05,
|
|
"loss": 0.514,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.3803566292781133,
|
|
"grad_norm": 0.22919798332492977,
|
|
"learning_rate": 1.921718365226903e-05,
|
|
"loss": 0.4962,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 0.3810756399194708,
|
|
"grad_norm": 0.23126416170567896,
|
|
"learning_rate": 1.921426066394795e-05,
|
|
"loss": 0.521,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.3817946505608283,
|
|
"grad_norm": 0.25582988216177793,
|
|
"learning_rate": 1.9211332451805173e-05,
|
|
"loss": 0.5261,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 0.3825136612021858,
|
|
"grad_norm": 0.2281178044654486,
|
|
"learning_rate": 1.9208399017500773e-05,
|
|
"loss": 0.503,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.38323267184354326,
|
|
"grad_norm": 0.2631327587490055,
|
|
"learning_rate": 1.920546036269781e-05,
|
|
"loss": 0.5046,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 0.3839516824849008,
|
|
"grad_norm": 0.23088230748866914,
|
|
"learning_rate": 1.9202516489062273e-05,
|
|
"loss": 0.5008,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.3846706931262583,
|
|
"grad_norm": 0.23527118660089594,
|
|
"learning_rate": 1.9199567398263136e-05,
|
|
"loss": 0.5154,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 0.3853897037676158,
|
|
"grad_norm": 0.293795071835734,
|
|
"learning_rate": 1.919661309197232e-05,
|
|
"loss": 0.5095,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.38610871440897326,
|
|
"grad_norm": 0.239008766521943,
|
|
"learning_rate": 1.9193653571864706e-05,
|
|
"loss": 0.5361,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 0.38682772505033075,
|
|
"grad_norm": 0.2317785167460494,
|
|
"learning_rate": 1.9190688839618122e-05,
|
|
"loss": 0.5263,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.38754673569168824,
|
|
"grad_norm": 0.2247915886082916,
|
|
"learning_rate": 1.9187718896913364e-05,
|
|
"loss": 0.5206,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 0.3882657463330457,
|
|
"grad_norm": 0.2342002775002291,
|
|
"learning_rate": 1.918474374543417e-05,
|
|
"loss": 0.5148,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.3889847569744032,
|
|
"grad_norm": 0.24713293310816917,
|
|
"learning_rate": 1.918176338686724e-05,
|
|
"loss": 0.5291,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 0.3897037676157607,
|
|
"grad_norm": 0.2283808207676213,
|
|
"learning_rate": 1.9178777822902223e-05,
|
|
"loss": 0.5187,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.3904227782571182,
|
|
"grad_norm": 0.23504214119034128,
|
|
"learning_rate": 1.9175787055231713e-05,
|
|
"loss": 0.5146,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 0.3911417888984757,
|
|
"grad_norm": 0.2278359350821238,
|
|
"learning_rate": 1.917279108555127e-05,
|
|
"loss": 0.5052,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.3918607995398332,
|
|
"grad_norm": 0.2163025374806738,
|
|
"learning_rate": 1.9169789915559384e-05,
|
|
"loss": 0.508,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 0.3925798101811907,
|
|
"grad_norm": 0.22661359197059017,
|
|
"learning_rate": 1.91667835469575e-05,
|
|
"loss": 0.5054,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.3932988208225482,
|
|
"grad_norm": 0.2189191212011496,
|
|
"learning_rate": 1.916377198145002e-05,
|
|
"loss": 0.5049,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 0.3940178314639057,
|
|
"grad_norm": 0.22112339418211252,
|
|
"learning_rate": 1.9160755220744285e-05,
|
|
"loss": 0.507,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.39473684210526316,
|
|
"grad_norm": 0.24023378549204696,
|
|
"learning_rate": 1.9157733266550577e-05,
|
|
"loss": 0.5001,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 0.39545585274662065,
|
|
"grad_norm": 0.23085735744919814,
|
|
"learning_rate": 1.9154706120582124e-05,
|
|
"loss": 0.4964,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.39617486338797814,
|
|
"grad_norm": 0.22201978435599948,
|
|
"learning_rate": 1.9151673784555104e-05,
|
|
"loss": 0.5106,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 0.3968938740293356,
|
|
"grad_norm": 0.22146871558017686,
|
|
"learning_rate": 1.914863626018863e-05,
|
|
"loss": 0.521,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.3976128846706931,
|
|
"grad_norm": 0.261735920771169,
|
|
"learning_rate": 1.9145593549204765e-05,
|
|
"loss": 0.5158,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 0.3983318953120506,
|
|
"grad_norm": 0.23628237933283103,
|
|
"learning_rate": 1.9142545653328498e-05,
|
|
"loss": 0.5125,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.3990509059534081,
|
|
"grad_norm": 0.2244030842478789,
|
|
"learning_rate": 1.9139492574287773e-05,
|
|
"loss": 0.5065,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 0.39976991659476563,
|
|
"grad_norm": 0.22124683679006732,
|
|
"learning_rate": 1.9136434313813464e-05,
|
|
"loss": 0.5148,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.4004889272361231,
|
|
"grad_norm": 0.22194477521719427,
|
|
"learning_rate": 1.9133370873639384e-05,
|
|
"loss": 0.5187,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 0.4012079378774806,
|
|
"grad_norm": 0.2575630583603113,
|
|
"learning_rate": 1.913030225550228e-05,
|
|
"loss": 0.5218,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.4019269485188381,
|
|
"grad_norm": 0.22569098013577954,
|
|
"learning_rate": 1.9127228461141842e-05,
|
|
"loss": 0.4918,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 0.4026459591601956,
|
|
"grad_norm": 0.2310480906609719,
|
|
"learning_rate": 1.9124149492300688e-05,
|
|
"loss": 0.5119,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.40336496980155306,
|
|
"grad_norm": 0.23554005972607317,
|
|
"learning_rate": 1.9121065350724373e-05,
|
|
"loss": 0.5052,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 0.40408398044291055,
|
|
"grad_norm": 0.22735551853092875,
|
|
"learning_rate": 1.9117976038161382e-05,
|
|
"loss": 0.5191,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.40480299108426804,
|
|
"grad_norm": 0.22047183159407308,
|
|
"learning_rate": 1.911488155636313e-05,
|
|
"loss": 0.5108,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 0.4055220017256255,
|
|
"grad_norm": 0.21509880016454425,
|
|
"learning_rate": 1.9111781907083965e-05,
|
|
"loss": 0.5306,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.406241012366983,
|
|
"grad_norm": 0.22716977879847486,
|
|
"learning_rate": 1.9108677092081168e-05,
|
|
"loss": 0.5072,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 0.4069600230083405,
|
|
"grad_norm": 0.24450214167605047,
|
|
"learning_rate": 1.910556711311495e-05,
|
|
"loss": 0.505,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.40767903364969804,
|
|
"grad_norm": 0.2327700820207908,
|
|
"learning_rate": 1.910245197194843e-05,
|
|
"loss": 0.5205,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 0.40839804429105553,
|
|
"grad_norm": 0.21825242144579582,
|
|
"learning_rate": 1.9099331670347685e-05,
|
|
"loss": 0.5101,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.409117054932413,
|
|
"grad_norm": 0.23767483509194245,
|
|
"learning_rate": 1.909620621008169e-05,
|
|
"loss": 0.5218,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 0.4098360655737705,
|
|
"grad_norm": 0.22889357601500054,
|
|
"learning_rate": 1.909307559292236e-05,
|
|
"loss": 0.5169,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.410555076215128,
|
|
"grad_norm": 0.22625666452282742,
|
|
"learning_rate": 1.908993982064453e-05,
|
|
"loss": 0.5072,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 0.4112740868564855,
|
|
"grad_norm": 0.21580683260119565,
|
|
"learning_rate": 1.9086798895025955e-05,
|
|
"loss": 0.5069,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.41199309749784296,
|
|
"grad_norm": 0.22956520306395545,
|
|
"learning_rate": 1.9083652817847313e-05,
|
|
"loss": 0.5215,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 0.41271210813920045,
|
|
"grad_norm": 0.23021825019034187,
|
|
"learning_rate": 1.9080501590892204e-05,
|
|
"loss": 0.5184,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.41343111878055794,
|
|
"grad_norm": 0.22176100614050664,
|
|
"learning_rate": 1.9077345215947148e-05,
|
|
"loss": 0.4997,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 0.4141501294219154,
|
|
"grad_norm": 0.27256463340242076,
|
|
"learning_rate": 1.9074183694801582e-05,
|
|
"loss": 0.5064,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.4148691400632729,
|
|
"grad_norm": 0.2262004936961111,
|
|
"learning_rate": 1.9071017029247855e-05,
|
|
"loss": 0.5125,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 0.41558815070463045,
|
|
"grad_norm": 0.24563263769390858,
|
|
"learning_rate": 1.9067845221081244e-05,
|
|
"loss": 0.5152,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.41630716134598794,
|
|
"grad_norm": 0.22076043331812095,
|
|
"learning_rate": 1.906466827209994e-05,
|
|
"loss": 0.5109,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 0.41702617198734543,
|
|
"grad_norm": 0.23198323940026291,
|
|
"learning_rate": 1.9061486184105032e-05,
|
|
"loss": 0.5149,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.4177451826287029,
|
|
"grad_norm": 0.23598266909227508,
|
|
"learning_rate": 1.905829895890054e-05,
|
|
"loss": 0.5223,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 0.4184641932700604,
|
|
"grad_norm": 0.228514639309264,
|
|
"learning_rate": 1.9055106598293397e-05,
|
|
"loss": 0.5058,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.4191832039114179,
|
|
"grad_norm": 0.23800299382683535,
|
|
"learning_rate": 1.9051909104093435e-05,
|
|
"loss": 0.5058,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 0.4199022145527754,
|
|
"grad_norm": 0.23133785515445354,
|
|
"learning_rate": 1.90487064781134e-05,
|
|
"loss": 0.5213,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.42062122519413286,
|
|
"grad_norm": 0.22342554440272905,
|
|
"learning_rate": 1.9045498722168955e-05,
|
|
"loss": 0.4991,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 0.42134023583549035,
|
|
"grad_norm": 0.22853945252588564,
|
|
"learning_rate": 1.904228583807867e-05,
|
|
"loss": 0.5006,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.42205924647684784,
|
|
"grad_norm": 0.22268903457409447,
|
|
"learning_rate": 1.903906782766401e-05,
|
|
"loss": 0.5138,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 0.4227782571182053,
|
|
"grad_norm": 0.23470747813012946,
|
|
"learning_rate": 1.903584469274936e-05,
|
|
"loss": 0.507,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.42349726775956287,
|
|
"grad_norm": 0.23158290190173897,
|
|
"learning_rate": 1.9032616435162006e-05,
|
|
"loss": 0.494,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 0.42421627840092035,
|
|
"grad_norm": 0.23651030424701674,
|
|
"learning_rate": 1.9029383056732137e-05,
|
|
"loss": 0.5192,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.42493528904227784,
|
|
"grad_norm": 0.22640594430508912,
|
|
"learning_rate": 1.902614455929284e-05,
|
|
"loss": 0.5259,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 0.42565429968363533,
|
|
"grad_norm": 0.22262895192874665,
|
|
"learning_rate": 1.9022900944680115e-05,
|
|
"loss": 0.5067,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.4263733103249928,
|
|
"grad_norm": 0.21766140475746906,
|
|
"learning_rate": 1.9019652214732856e-05,
|
|
"loss": 0.4988,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 0.4270923209663503,
|
|
"grad_norm": 0.23215571807401733,
|
|
"learning_rate": 1.9016398371292865e-05,
|
|
"loss": 0.5053,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.4278113316077078,
|
|
"grad_norm": 0.22387635197718406,
|
|
"learning_rate": 1.9013139416204827e-05,
|
|
"loss": 0.5277,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 0.4285303422490653,
|
|
"grad_norm": 0.21890148025509146,
|
|
"learning_rate": 1.9009875351316338e-05,
|
|
"loss": 0.5085,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.42924935289042276,
|
|
"grad_norm": 0.2253750179966219,
|
|
"learning_rate": 1.9006606178477887e-05,
|
|
"loss": 0.5131,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 0.42996836353178025,
|
|
"grad_norm": 0.22408290204012185,
|
|
"learning_rate": 1.9003331899542864e-05,
|
|
"loss": 0.5223,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.43068737417313774,
|
|
"grad_norm": 0.24372803124516482,
|
|
"learning_rate": 1.9000052516367548e-05,
|
|
"loss": 0.5124,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 0.4314063848144953,
|
|
"grad_norm": 0.21808115918337018,
|
|
"learning_rate": 1.8996768030811105e-05,
|
|
"loss": 0.5102,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.43212539545585277,
|
|
"grad_norm": 0.23243284851206658,
|
|
"learning_rate": 1.899347844473561e-05,
|
|
"loss": 0.517,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 0.43284440609721025,
|
|
"grad_norm": 0.22815958327074795,
|
|
"learning_rate": 1.899018376000602e-05,
|
|
"loss": 0.522,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.43356341673856774,
|
|
"grad_norm": 0.22171076721553623,
|
|
"learning_rate": 1.8986883978490183e-05,
|
|
"loss": 0.5072,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 0.43428242737992523,
|
|
"grad_norm": 0.23723540529297746,
|
|
"learning_rate": 1.8983579102058832e-05,
|
|
"loss": 0.5176,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.4350014380212827,
|
|
"grad_norm": 0.22060421243897868,
|
|
"learning_rate": 1.8980269132585603e-05,
|
|
"loss": 0.4943,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 0.4357204486626402,
|
|
"grad_norm": 0.26456992213454594,
|
|
"learning_rate": 1.8976954071947e-05,
|
|
"loss": 0.5068,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.4364394593039977,
|
|
"grad_norm": 0.24097182160658487,
|
|
"learning_rate": 1.8973633922022435e-05,
|
|
"loss": 0.51,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 0.4371584699453552,
|
|
"grad_norm": 0.23089530329520278,
|
|
"learning_rate": 1.8970308684694186e-05,
|
|
"loss": 0.5073,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.43787748058671266,
|
|
"grad_norm": 0.22460564208903933,
|
|
"learning_rate": 1.8966978361847426e-05,
|
|
"loss": 0.4963,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 0.43859649122807015,
|
|
"grad_norm": 0.23903022133946736,
|
|
"learning_rate": 1.8963642955370203e-05,
|
|
"loss": 0.5141,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.4393155018694277,
|
|
"grad_norm": 0.23200855596333272,
|
|
"learning_rate": 1.8960302467153457e-05,
|
|
"loss": 0.5134,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 0.4400345125107852,
|
|
"grad_norm": 0.2438151089386712,
|
|
"learning_rate": 1.8956956899091004e-05,
|
|
"loss": 0.4802,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.44075352315214267,
|
|
"grad_norm": 0.23012560648822744,
|
|
"learning_rate": 1.8953606253079537e-05,
|
|
"loss": 0.5116,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 0.44147253379350015,
|
|
"grad_norm": 0.22946741307925678,
|
|
"learning_rate": 1.8950250531018636e-05,
|
|
"loss": 0.5165,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.44219154443485764,
|
|
"grad_norm": 0.22590133613817706,
|
|
"learning_rate": 1.8946889734810744e-05,
|
|
"loss": 0.5089,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 0.44291055507621513,
|
|
"grad_norm": 0.23305060264263988,
|
|
"learning_rate": 1.89435238663612e-05,
|
|
"loss": 0.5143,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.4436295657175726,
|
|
"grad_norm": 0.23846789632242757,
|
|
"learning_rate": 1.894015292757821e-05,
|
|
"loss": 0.5098,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 0.4443485763589301,
|
|
"grad_norm": 0.23320831247194246,
|
|
"learning_rate": 1.893677692037284e-05,
|
|
"loss": 0.5181,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.4450675870002876,
|
|
"grad_norm": 0.22608827407258242,
|
|
"learning_rate": 1.8933395846659057e-05,
|
|
"loss": 0.5183,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 0.4457865976416451,
|
|
"grad_norm": 0.2284432845740079,
|
|
"learning_rate": 1.8930009708353675e-05,
|
|
"loss": 0.5116,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.44650560828300256,
|
|
"grad_norm": 0.24099811110806968,
|
|
"learning_rate": 1.89266185073764e-05,
|
|
"loss": 0.5091,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 0.4472246189243601,
|
|
"grad_norm": 0.2320885348377546,
|
|
"learning_rate": 1.8923222245649796e-05,
|
|
"loss": 0.5211,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.4479436295657176,
|
|
"grad_norm": 0.2225444619602451,
|
|
"learning_rate": 1.891982092509929e-05,
|
|
"loss": 0.5132,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 0.4486626402070751,
|
|
"grad_norm": 0.21887758463857643,
|
|
"learning_rate": 1.89164145476532e-05,
|
|
"loss": 0.5082,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.44938165084843257,
|
|
"grad_norm": 0.21821324265710812,
|
|
"learning_rate": 1.8913003115242686e-05,
|
|
"loss": 0.4948,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 0.45010066148979005,
|
|
"grad_norm": 0.22546647289935937,
|
|
"learning_rate": 1.8909586629801788e-05,
|
|
"loss": 0.4875,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.45081967213114754,
|
|
"grad_norm": 0.2388867270357045,
|
|
"learning_rate": 1.8906165093267407e-05,
|
|
"loss": 0.5105,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 0.45153868277250503,
|
|
"grad_norm": 0.22303569790676106,
|
|
"learning_rate": 1.8902738507579305e-05,
|
|
"loss": 0.5039,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.4522576934138625,
|
|
"grad_norm": 0.227972234263652,
|
|
"learning_rate": 1.8899306874680113e-05,
|
|
"loss": 0.4885,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 0.45297670405522,
|
|
"grad_norm": 0.2267767566599487,
|
|
"learning_rate": 1.8895870196515314e-05,
|
|
"loss": 0.5049,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.4536957146965775,
|
|
"grad_norm": 0.21945728338555323,
|
|
"learning_rate": 1.8892428475033264e-05,
|
|
"loss": 0.5137,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 0.454414725337935,
|
|
"grad_norm": 0.22083748984649187,
|
|
"learning_rate": 1.8888981712185166e-05,
|
|
"loss": 0.5106,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.4551337359792925,
|
|
"grad_norm": 0.2401040695033316,
|
|
"learning_rate": 1.888552990992509e-05,
|
|
"loss": 0.5157,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 0.45585274662065,
|
|
"grad_norm": 0.24329074291054098,
|
|
"learning_rate": 1.888207307020995e-05,
|
|
"loss": 0.5124,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.4565717572620075,
|
|
"grad_norm": 0.22721196193725088,
|
|
"learning_rate": 1.887861119499954e-05,
|
|
"loss": 0.5184,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 0.457290767903365,
|
|
"grad_norm": 0.22097197219531742,
|
|
"learning_rate": 1.887514428625648e-05,
|
|
"loss": 0.5118,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.45800977854472247,
|
|
"grad_norm": 0.22942161994403518,
|
|
"learning_rate": 1.8871672345946265e-05,
|
|
"loss": 0.5002,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 0.45872878918607995,
|
|
"grad_norm": 0.23294479900892548,
|
|
"learning_rate": 1.8868195376037234e-05,
|
|
"loss": 0.5106,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.45944779982743744,
|
|
"grad_norm": 0.220153166817927,
|
|
"learning_rate": 1.8864713378500574e-05,
|
|
"loss": 0.5046,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 0.46016681046879493,
|
|
"grad_norm": 0.23782734580650305,
|
|
"learning_rate": 1.886122635531033e-05,
|
|
"loss": 0.5083,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.4608858211101524,
|
|
"grad_norm": 0.22201878015890575,
|
|
"learning_rate": 1.8857734308443392e-05,
|
|
"loss": 0.4996,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 0.4616048317515099,
|
|
"grad_norm": 0.25951882547960176,
|
|
"learning_rate": 1.8854237239879505e-05,
|
|
"loss": 0.5186,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.4623238423928674,
|
|
"grad_norm": 0.23372725500667854,
|
|
"learning_rate": 1.8850735151601243e-05,
|
|
"loss": 0.5137,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 0.46304285303422493,
|
|
"grad_norm": 0.22203328341904643,
|
|
"learning_rate": 1.8847228045594047e-05,
|
|
"loss": 0.5058,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.4637618636755824,
|
|
"grad_norm": 0.22777680675837877,
|
|
"learning_rate": 1.884371592384619e-05,
|
|
"loss": 0.514,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 0.4644808743169399,
|
|
"grad_norm": 0.2535600439046393,
|
|
"learning_rate": 1.8840198788348795e-05,
|
|
"loss": 0.521,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.4651998849582974,
|
|
"grad_norm": 0.23929596753578028,
|
|
"learning_rate": 1.8836676641095815e-05,
|
|
"loss": 0.5041,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 0.4659188955996549,
|
|
"grad_norm": 0.22737419484986415,
|
|
"learning_rate": 1.8833149484084064e-05,
|
|
"loss": 0.4928,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.46663790624101237,
|
|
"grad_norm": 0.22747158371404952,
|
|
"learning_rate": 1.8829617319313183e-05,
|
|
"loss": 0.5176,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 0.46735691688236985,
|
|
"grad_norm": 0.2350098884737649,
|
|
"learning_rate": 1.882608014878565e-05,
|
|
"loss": 0.5063,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.46807592752372734,
|
|
"grad_norm": 0.24683317655013465,
|
|
"learning_rate": 1.8822537974506794e-05,
|
|
"loss": 0.5138,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 0.46879493816508483,
|
|
"grad_norm": 0.2321129798855353,
|
|
"learning_rate": 1.8818990798484766e-05,
|
|
"loss": 0.5237,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.4695139488064423,
|
|
"grad_norm": 0.2313513696629806,
|
|
"learning_rate": 1.8815438622730563e-05,
|
|
"loss": 0.5094,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 0.4702329594477998,
|
|
"grad_norm": 0.22221489438951242,
|
|
"learning_rate": 1.8811881449258008e-05,
|
|
"loss": 0.5257,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.47095197008915735,
|
|
"grad_norm": 0.2309891648111513,
|
|
"learning_rate": 1.8808319280083766e-05,
|
|
"loss": 0.4929,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 0.47167098073051483,
|
|
"grad_norm": 0.21935255772772466,
|
|
"learning_rate": 1.880475211722733e-05,
|
|
"loss": 0.5007,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.4723899913718723,
|
|
"grad_norm": 0.21443691347510438,
|
|
"learning_rate": 1.8801179962711022e-05,
|
|
"loss": 0.5071,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 0.4731090020132298,
|
|
"grad_norm": 0.22881942399765773,
|
|
"learning_rate": 1.8797602818559996e-05,
|
|
"loss": 0.5073,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.4738280126545873,
|
|
"grad_norm": 0.22744544291360294,
|
|
"learning_rate": 1.879402068680224e-05,
|
|
"loss": 0.5131,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 0.4745470232959448,
|
|
"grad_norm": 0.22692909860000035,
|
|
"learning_rate": 1.879043356946856e-05,
|
|
"loss": 0.5133,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.47526603393730227,
|
|
"grad_norm": 0.2258584203109247,
|
|
"learning_rate": 1.8786841468592592e-05,
|
|
"loss": 0.4988,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 0.47598504457865976,
|
|
"grad_norm": 0.2329578824209415,
|
|
"learning_rate": 1.8783244386210802e-05,
|
|
"loss": 0.5066,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.47670405522001724,
|
|
"grad_norm": 0.2178009959841328,
|
|
"learning_rate": 1.8779642324362475e-05,
|
|
"loss": 0.5135,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 0.47742306586137473,
|
|
"grad_norm": 0.22999756735795288,
|
|
"learning_rate": 1.877603528508972e-05,
|
|
"loss": 0.5033,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.4781420765027322,
|
|
"grad_norm": 0.23474329467975602,
|
|
"learning_rate": 1.8772423270437467e-05,
|
|
"loss": 0.5043,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 0.47886108714408976,
|
|
"grad_norm": 0.227373395841068,
|
|
"learning_rate": 1.876880628245347e-05,
|
|
"loss": 0.5365,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.47958009778544725,
|
|
"grad_norm": 0.22867563621139628,
|
|
"learning_rate": 1.87651843231883e-05,
|
|
"loss": 0.4967,
|
|
"step": 3335
|
|
},
|
|
{
|
|
"epoch": 0.48029910842680473,
|
|
"grad_norm": 0.2556750962454127,
|
|
"learning_rate": 1.8761557394695347e-05,
|
|
"loss": 0.4932,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.4810181190681622,
|
|
"grad_norm": 0.23099567532789703,
|
|
"learning_rate": 1.8757925499030817e-05,
|
|
"loss": 0.5051,
|
|
"step": 3345
|
|
},
|
|
{
|
|
"epoch": 0.4817371297095197,
|
|
"grad_norm": 0.23416258925845912,
|
|
"learning_rate": 1.8754288638253734e-05,
|
|
"loss": 0.5,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.4824561403508772,
|
|
"grad_norm": 0.23674712863753772,
|
|
"learning_rate": 1.875064681442594e-05,
|
|
"loss": 0.4995,
|
|
"step": 3355
|
|
},
|
|
{
|
|
"epoch": 0.4831751509922347,
|
|
"grad_norm": 0.2361268981891666,
|
|
"learning_rate": 1.8747000029612077e-05,
|
|
"loss": 0.5046,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.48389416163359217,
|
|
"grad_norm": 0.22590461729069614,
|
|
"learning_rate": 1.8743348285879615e-05,
|
|
"loss": 0.505,
|
|
"step": 3365
|
|
},
|
|
{
|
|
"epoch": 0.48461317227494966,
|
|
"grad_norm": 0.2318060822698632,
|
|
"learning_rate": 1.8739691585298833e-05,
|
|
"loss": 0.5107,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.48533218291630714,
|
|
"grad_norm": 0.24037948201072387,
|
|
"learning_rate": 1.8736029929942813e-05,
|
|
"loss": 0.5119,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 0.48605119355766463,
|
|
"grad_norm": 0.2319768646318957,
|
|
"learning_rate": 1.8732363321887447e-05,
|
|
"loss": 0.5179,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.48677020419902217,
|
|
"grad_norm": 0.23122665039326531,
|
|
"learning_rate": 1.872869176321144e-05,
|
|
"loss": 0.5049,
|
|
"step": 3385
|
|
},
|
|
{
|
|
"epoch": 0.48748921484037966,
|
|
"grad_norm": 0.22506661120445953,
|
|
"learning_rate": 1.87250152559963e-05,
|
|
"loss": 0.506,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.48820822548173715,
|
|
"grad_norm": 0.22671157330761432,
|
|
"learning_rate": 1.8721333802326345e-05,
|
|
"loss": 0.5124,
|
|
"step": 3395
|
|
},
|
|
{
|
|
"epoch": 0.48892723612309463,
|
|
"grad_norm": 0.2262392387165888,
|
|
"learning_rate": 1.871764740428869e-05,
|
|
"loss": 0.5075,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.4896462467644521,
|
|
"grad_norm": 0.23953668318855156,
|
|
"learning_rate": 1.871395606397326e-05,
|
|
"loss": 0.5035,
|
|
"step": 3405
|
|
},
|
|
{
|
|
"epoch": 0.4903652574058096,
|
|
"grad_norm": 0.22816597207508776,
|
|
"learning_rate": 1.8710259783472778e-05,
|
|
"loss": 0.5217,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.4910842680471671,
|
|
"grad_norm": 0.22589418481821869,
|
|
"learning_rate": 1.8706558564882766e-05,
|
|
"loss": 0.5225,
|
|
"step": 3415
|
|
},
|
|
{
|
|
"epoch": 0.4918032786885246,
|
|
"grad_norm": 0.23084269685354364,
|
|
"learning_rate": 1.8702852410301556e-05,
|
|
"loss": 0.4966,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.49252228932988207,
|
|
"grad_norm": 0.22922196106101597,
|
|
"learning_rate": 1.8699141321830257e-05,
|
|
"loss": 0.4897,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 0.49324129997123956,
|
|
"grad_norm": 0.22904199398424144,
|
|
"learning_rate": 1.8695425301572802e-05,
|
|
"loss": 0.4981,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.49396031061259704,
|
|
"grad_norm": 0.23056356839049091,
|
|
"learning_rate": 1.8691704351635903e-05,
|
|
"loss": 0.4904,
|
|
"step": 3435
|
|
},
|
|
{
|
|
"epoch": 0.4946793212539546,
|
|
"grad_norm": 0.27576248579574547,
|
|
"learning_rate": 1.8687978474129065e-05,
|
|
"loss": 0.5119,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.49539833189531207,
|
|
"grad_norm": 0.22644457374568444,
|
|
"learning_rate": 1.8684247671164596e-05,
|
|
"loss": 0.5015,
|
|
"step": 3445
|
|
},
|
|
{
|
|
"epoch": 0.49611734253666956,
|
|
"grad_norm": 0.22092616693572895,
|
|
"learning_rate": 1.868051194485759e-05,
|
|
"loss": 0.4963,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.49683635317802705,
|
|
"grad_norm": 0.2279874573282857,
|
|
"learning_rate": 1.8676771297325943e-05,
|
|
"loss": 0.4986,
|
|
"step": 3455
|
|
},
|
|
{
|
|
"epoch": 0.49755536381938453,
|
|
"grad_norm": 0.22574757462624237,
|
|
"learning_rate": 1.8673025730690323e-05,
|
|
"loss": 0.5125,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.498274374460742,
|
|
"grad_norm": 0.24253004022010075,
|
|
"learning_rate": 1.8669275247074197e-05,
|
|
"loss": 0.5042,
|
|
"step": 3465
|
|
},
|
|
{
|
|
"epoch": 0.4989933851020995,
|
|
"grad_norm": 0.22875289480420072,
|
|
"learning_rate": 1.8665519848603825e-05,
|
|
"loss": 0.513,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.499712395743457,
|
|
"grad_norm": 0.22613927853567778,
|
|
"learning_rate": 1.8661759537408245e-05,
|
|
"loss": 0.5026,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 0.5004314063848145,
|
|
"grad_norm": 0.23799705443593844,
|
|
"learning_rate": 1.865799431561928e-05,
|
|
"loss": 0.5166,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.501150417026172,
|
|
"grad_norm": 0.234919497465417,
|
|
"learning_rate": 1.865422418537154e-05,
|
|
"loss": 0.5125,
|
|
"step": 3485
|
|
},
|
|
{
|
|
"epoch": 0.5018694276675295,
|
|
"grad_norm": 0.2186332262355146,
|
|
"learning_rate": 1.8650449148802416e-05,
|
|
"loss": 0.506,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.502588438308887,
|
|
"grad_norm": 0.22872419867177446,
|
|
"learning_rate": 1.8646669208052086e-05,
|
|
"loss": 0.4887,
|
|
"step": 3495
|
|
},
|
|
{
|
|
"epoch": 0.5033074489502445,
|
|
"grad_norm": 0.2367736854157651,
|
|
"learning_rate": 1.86428843652635e-05,
|
|
"loss": 0.5178,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.504026459591602,
|
|
"grad_norm": 0.22439560833364647,
|
|
"learning_rate": 1.8639094622582395e-05,
|
|
"loss": 0.5116,
|
|
"step": 3505
|
|
},
|
|
{
|
|
"epoch": 0.5047454702329595,
|
|
"grad_norm": 0.22908813891521232,
|
|
"learning_rate": 1.8635299982157272e-05,
|
|
"loss": 0.4907,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.505464480874317,
|
|
"grad_norm": 0.23232159031491972,
|
|
"learning_rate": 1.8631500446139436e-05,
|
|
"loss": 0.5194,
|
|
"step": 3515
|
|
},
|
|
{
|
|
"epoch": 0.5061834915156744,
|
|
"grad_norm": 0.2283220143978447,
|
|
"learning_rate": 1.8627696016682934e-05,
|
|
"loss": 0.5001,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.5069025021570319,
|
|
"grad_norm": 0.22980408461545354,
|
|
"learning_rate": 1.8623886695944612e-05,
|
|
"loss": 0.5107,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 0.5076215127983894,
|
|
"grad_norm": 0.23722746787328844,
|
|
"learning_rate": 1.8620072486084075e-05,
|
|
"loss": 0.5066,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.5083405234397469,
|
|
"grad_norm": 0.23287381111820576,
|
|
"learning_rate": 1.8616253389263713e-05,
|
|
"loss": 0.5078,
|
|
"step": 3535
|
|
},
|
|
{
|
|
"epoch": 0.5090595340811044,
|
|
"grad_norm": 0.23693781129127364,
|
|
"learning_rate": 1.8612429407648668e-05,
|
|
"loss": 0.5255,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.5097785447224619,
|
|
"grad_norm": 0.2698671806909946,
|
|
"learning_rate": 1.860860054340687e-05,
|
|
"loss": 0.5131,
|
|
"step": 3545
|
|
},
|
|
{
|
|
"epoch": 0.5104975553638194,
|
|
"grad_norm": 0.22671582879050173,
|
|
"learning_rate": 1.8604766798709005e-05,
|
|
"loss": 0.5018,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.5112165660051768,
|
|
"grad_norm": 0.22718775482090045,
|
|
"learning_rate": 1.8600928175728535e-05,
|
|
"loss": 0.4973,
|
|
"step": 3555
|
|
},
|
|
{
|
|
"epoch": 0.5119355766465343,
|
|
"grad_norm": 0.21840372466561936,
|
|
"learning_rate": 1.8597084676641677e-05,
|
|
"loss": 0.4842,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.5126545872878918,
|
|
"grad_norm": 0.21210979676494143,
|
|
"learning_rate": 1.859323630362742e-05,
|
|
"loss": 0.4945,
|
|
"step": 3565
|
|
},
|
|
{
|
|
"epoch": 0.5133735979292493,
|
|
"grad_norm": 0.21804206661910921,
|
|
"learning_rate": 1.8589383058867515e-05,
|
|
"loss": 0.4896,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.5140926085706068,
|
|
"grad_norm": 0.23110253293609673,
|
|
"learning_rate": 1.8585524944546473e-05,
|
|
"loss": 0.5223,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 0.5148116192119644,
|
|
"grad_norm": 0.23613500534537313,
|
|
"learning_rate": 1.8581661962851566e-05,
|
|
"loss": 0.4987,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.5155306298533219,
|
|
"grad_norm": 0.22778446097339797,
|
|
"learning_rate": 1.8577794115972824e-05,
|
|
"loss": 0.5083,
|
|
"step": 3585
|
|
},
|
|
{
|
|
"epoch": 0.5162496404946794,
|
|
"grad_norm": 0.23767354447655717,
|
|
"learning_rate": 1.8573921406103048e-05,
|
|
"loss": 0.5087,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.5169686511360368,
|
|
"grad_norm": 0.21981817666742454,
|
|
"learning_rate": 1.8570043835437772e-05,
|
|
"loss": 0.499,
|
|
"step": 3595
|
|
},
|
|
{
|
|
"epoch": 0.5176876617773943,
|
|
"grad_norm": 0.2274951101541769,
|
|
"learning_rate": 1.8566161406175306e-05,
|
|
"loss": 0.5144,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.5184066724187518,
|
|
"grad_norm": 0.23572189148125483,
|
|
"learning_rate": 1.856227412051671e-05,
|
|
"loss": 0.4995,
|
|
"step": 3605
|
|
},
|
|
{
|
|
"epoch": 0.5191256830601093,
|
|
"grad_norm": 0.21629735180634516,
|
|
"learning_rate": 1.855838198066579e-05,
|
|
"loss": 0.4963,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.5198446937014668,
|
|
"grad_norm": 0.22363878924914682,
|
|
"learning_rate": 1.8554484988829108e-05,
|
|
"loss": 0.5063,
|
|
"step": 3615
|
|
},
|
|
{
|
|
"epoch": 0.5205637043428243,
|
|
"grad_norm": 0.2316899394396873,
|
|
"learning_rate": 1.8550583147215985e-05,
|
|
"loss": 0.4905,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.5212827149841818,
|
|
"grad_norm": 0.221354141997133,
|
|
"learning_rate": 1.854667645803847e-05,
|
|
"loss": 0.4988,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 0.5220017256255393,
|
|
"grad_norm": 0.2253239099128233,
|
|
"learning_rate": 1.8542764923511392e-05,
|
|
"loss": 0.5033,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.5227207362668967,
|
|
"grad_norm": 0.22954386049162062,
|
|
"learning_rate": 1.8538848545852294e-05,
|
|
"loss": 0.4878,
|
|
"step": 3635
|
|
},
|
|
{
|
|
"epoch": 0.5234397469082542,
|
|
"grad_norm": 0.22715952879859952,
|
|
"learning_rate": 1.8534927327281488e-05,
|
|
"loss": 0.499,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.5241587575496117,
|
|
"grad_norm": 0.22229217452640895,
|
|
"learning_rate": 1.8531001270022024e-05,
|
|
"loss": 0.4884,
|
|
"step": 3645
|
|
},
|
|
{
|
|
"epoch": 0.5248777681909692,
|
|
"grad_norm": 0.23891821519257553,
|
|
"learning_rate": 1.852707037629968e-05,
|
|
"loss": 0.5108,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.5255967788323267,
|
|
"grad_norm": 0.23675197935926118,
|
|
"learning_rate": 1.852313464834301e-05,
|
|
"loss": 0.4957,
|
|
"step": 3655
|
|
},
|
|
{
|
|
"epoch": 0.5263157894736842,
|
|
"grad_norm": 0.22253486079417645,
|
|
"learning_rate": 1.851919408838327e-05,
|
|
"loss": 0.4775,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.5270348001150417,
|
|
"grad_norm": 0.22333070985380785,
|
|
"learning_rate": 1.8515248698654486e-05,
|
|
"loss": 0.4983,
|
|
"step": 3665
|
|
},
|
|
{
|
|
"epoch": 0.5277538107563992,
|
|
"grad_norm": 0.223274460335613,
|
|
"learning_rate": 1.8511298481393403e-05,
|
|
"loss": 0.4982,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.5284728213977566,
|
|
"grad_norm": 0.22355882969418756,
|
|
"learning_rate": 1.850734343883951e-05,
|
|
"loss": 0.5084,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 0.5291918320391141,
|
|
"grad_norm": 0.22137972544339088,
|
|
"learning_rate": 1.8503383573235032e-05,
|
|
"loss": 0.5012,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.5299108426804716,
|
|
"grad_norm": 0.22426702815827018,
|
|
"learning_rate": 1.8499418886824926e-05,
|
|
"loss": 0.5014,
|
|
"step": 3685
|
|
},
|
|
{
|
|
"epoch": 0.5306298533218292,
|
|
"grad_norm": 0.2384895998266707,
|
|
"learning_rate": 1.8495449381856886e-05,
|
|
"loss": 0.4985,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.5313488639631867,
|
|
"grad_norm": 0.23328619696763794,
|
|
"learning_rate": 1.8491475060581337e-05,
|
|
"loss": 0.4892,
|
|
"step": 3695
|
|
},
|
|
{
|
|
"epoch": 0.5320678746045442,
|
|
"grad_norm": 0.2208450387758745,
|
|
"learning_rate": 1.8487495925251427e-05,
|
|
"loss": 0.4839,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.5327868852459017,
|
|
"grad_norm": 0.22952401879385564,
|
|
"learning_rate": 1.848351197812304e-05,
|
|
"loss": 0.5041,
|
|
"step": 3705
|
|
},
|
|
{
|
|
"epoch": 0.5335058958872592,
|
|
"grad_norm": 0.22545010734962523,
|
|
"learning_rate": 1.847952322145479e-05,
|
|
"loss": 0.5189,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.5342249065286166,
|
|
"grad_norm": 0.23198247491965804,
|
|
"learning_rate": 1.8475529657508016e-05,
|
|
"loss": 0.5041,
|
|
"step": 3715
|
|
},
|
|
{
|
|
"epoch": 0.5349439171699741,
|
|
"grad_norm": 0.2321580786261051,
|
|
"learning_rate": 1.8471531288546773e-05,
|
|
"loss": 0.5108,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.5356629278113316,
|
|
"grad_norm": 0.23247829012931276,
|
|
"learning_rate": 1.8467528116837857e-05,
|
|
"loss": 0.5238,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 0.5363819384526891,
|
|
"grad_norm": 0.23385610902788734,
|
|
"learning_rate": 1.8463520144650773e-05,
|
|
"loss": 0.4964,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.5371009490940466,
|
|
"grad_norm": 0.22626064932274298,
|
|
"learning_rate": 1.8459507374257755e-05,
|
|
"loss": 0.5097,
|
|
"step": 3735
|
|
},
|
|
{
|
|
"epoch": 0.5378199597354041,
|
|
"grad_norm": 0.22079798279561869,
|
|
"learning_rate": 1.845548980793375e-05,
|
|
"loss": 0.4997,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.5385389703767616,
|
|
"grad_norm": 0.2345884377445552,
|
|
"learning_rate": 1.845146744795643e-05,
|
|
"loss": 0.4853,
|
|
"step": 3745
|
|
},
|
|
{
|
|
"epoch": 0.5392579810181191,
|
|
"grad_norm": 0.22831199879883093,
|
|
"learning_rate": 1.8447440296606193e-05,
|
|
"loss": 0.5012,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.5399769916594765,
|
|
"grad_norm": 0.24900114363683074,
|
|
"learning_rate": 1.8443408356166128e-05,
|
|
"loss": 0.521,
|
|
"step": 3755
|
|
},
|
|
{
|
|
"epoch": 0.540696002300834,
|
|
"grad_norm": 0.22163999971406523,
|
|
"learning_rate": 1.8439371628922064e-05,
|
|
"loss": 0.5045,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.5414150129421915,
|
|
"grad_norm": 0.22760154174757039,
|
|
"learning_rate": 1.8435330117162534e-05,
|
|
"loss": 0.501,
|
|
"step": 3765
|
|
},
|
|
{
|
|
"epoch": 0.542134023583549,
|
|
"grad_norm": 0.22523822958018522,
|
|
"learning_rate": 1.843128382317878e-05,
|
|
"loss": 0.5133,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 0.5428530342249065,
|
|
"grad_norm": 0.22600992748430698,
|
|
"learning_rate": 1.8427232749264762e-05,
|
|
"loss": 0.499,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 0.543572044866264,
|
|
"grad_norm": 0.22389972269539865,
|
|
"learning_rate": 1.8423176897717143e-05,
|
|
"loss": 0.5015,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.5442910555076215,
|
|
"grad_norm": 0.23666072422166584,
|
|
"learning_rate": 1.8419116270835307e-05,
|
|
"loss": 0.522,
|
|
"step": 3785
|
|
},
|
|
{
|
|
"epoch": 0.545010066148979,
|
|
"grad_norm": 0.23444635492141347,
|
|
"learning_rate": 1.841505087092133e-05,
|
|
"loss": 0.4916,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 0.5457290767903364,
|
|
"grad_norm": 0.23417088378508938,
|
|
"learning_rate": 1.841098070028e-05,
|
|
"loss": 0.5131,
|
|
"step": 3795
|
|
},
|
|
{
|
|
"epoch": 0.546448087431694,
|
|
"grad_norm": 0.2398297332015248,
|
|
"learning_rate": 1.8406905761218815e-05,
|
|
"loss": 0.4969,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.5471670980730515,
|
|
"grad_norm": 0.23646633183918445,
|
|
"learning_rate": 1.8402826056047964e-05,
|
|
"loss": 0.5148,
|
|
"step": 3805
|
|
},
|
|
{
|
|
"epoch": 0.547886108714409,
|
|
"grad_norm": 0.23304665042616649,
|
|
"learning_rate": 1.8398741587080358e-05,
|
|
"loss": 0.506,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 0.5486051193557665,
|
|
"grad_norm": 0.21709816862440437,
|
|
"learning_rate": 1.8394652356631585e-05,
|
|
"loss": 0.5089,
|
|
"step": 3815
|
|
},
|
|
{
|
|
"epoch": 0.549324129997124,
|
|
"grad_norm": 0.23021854763291302,
|
|
"learning_rate": 1.8390558367019954e-05,
|
|
"loss": 0.4946,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.5500431406384815,
|
|
"grad_norm": 0.2206971983996814,
|
|
"learning_rate": 1.8386459620566453e-05,
|
|
"loss": 0.4745,
|
|
"step": 3825
|
|
},
|
|
{
|
|
"epoch": 0.550762151279839,
|
|
"grad_norm": 0.21574112788117586,
|
|
"learning_rate": 1.838235611959478e-05,
|
|
"loss": 0.5086,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 0.5514811619211964,
|
|
"grad_norm": 0.2261136289668569,
|
|
"learning_rate": 1.8378247866431325e-05,
|
|
"loss": 0.4966,
|
|
"step": 3835
|
|
},
|
|
{
|
|
"epoch": 0.5522001725625539,
|
|
"grad_norm": 0.22972972838837386,
|
|
"learning_rate": 1.837413486340517e-05,
|
|
"loss": 0.4906,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.5529191832039114,
|
|
"grad_norm": 0.2330925028758894,
|
|
"learning_rate": 1.837001711284809e-05,
|
|
"loss": 0.5098,
|
|
"step": 3845
|
|
},
|
|
{
|
|
"epoch": 0.5536381938452689,
|
|
"grad_norm": 0.2270327849370844,
|
|
"learning_rate": 1.8365894617094558e-05,
|
|
"loss": 0.4926,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.5543572044866264,
|
|
"grad_norm": 0.23660944623443908,
|
|
"learning_rate": 1.8361767378481725e-05,
|
|
"loss": 0.5044,
|
|
"step": 3855
|
|
},
|
|
{
|
|
"epoch": 0.5550762151279839,
|
|
"grad_norm": 0.29863962341515,
|
|
"learning_rate": 1.8357635399349442e-05,
|
|
"loss": 0.5173,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.5557952257693414,
|
|
"grad_norm": 0.21782436220827342,
|
|
"learning_rate": 1.8353498682040244e-05,
|
|
"loss": 0.499,
|
|
"step": 3865
|
|
},
|
|
{
|
|
"epoch": 0.5565142364106989,
|
|
"grad_norm": 0.2210253870313405,
|
|
"learning_rate": 1.8349357228899348e-05,
|
|
"loss": 0.4892,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 0.5572332470520563,
|
|
"grad_norm": 0.21922597978664715,
|
|
"learning_rate": 1.834521104227466e-05,
|
|
"loss": 0.4924,
|
|
"step": 3875
|
|
},
|
|
{
|
|
"epoch": 0.5579522576934138,
|
|
"grad_norm": 0.22792159860332095,
|
|
"learning_rate": 1.8341060124516774e-05,
|
|
"loss": 0.52,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.5586712683347713,
|
|
"grad_norm": 0.21526668904279522,
|
|
"learning_rate": 1.833690447797896e-05,
|
|
"loss": 0.4981,
|
|
"step": 3885
|
|
},
|
|
{
|
|
"epoch": 0.5593902789761288,
|
|
"grad_norm": 0.29103804275461576,
|
|
"learning_rate": 1.8332744105017163e-05,
|
|
"loss": 0.4928,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 0.5601092896174863,
|
|
"grad_norm": 0.2357301459907596,
|
|
"learning_rate": 1.832857900799002e-05,
|
|
"loss": 0.4984,
|
|
"step": 3895
|
|
},
|
|
{
|
|
"epoch": 0.5608283002588438,
|
|
"grad_norm": 0.22478118848509318,
|
|
"learning_rate": 1.832440918925884e-05,
|
|
"loss": 0.4948,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.5615473109002013,
|
|
"grad_norm": 0.23300933444107352,
|
|
"learning_rate": 1.8320234651187614e-05,
|
|
"loss": 0.4909,
|
|
"step": 3905
|
|
},
|
|
{
|
|
"epoch": 0.5622663215415589,
|
|
"grad_norm": 0.230128753020351,
|
|
"learning_rate": 1.8316055396142997e-05,
|
|
"loss": 0.5244,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 0.5629853321829164,
|
|
"grad_norm": 0.2304505782146836,
|
|
"learning_rate": 1.831187142649433e-05,
|
|
"loss": 0.5158,
|
|
"step": 3915
|
|
},
|
|
{
|
|
"epoch": 0.5637043428242738,
|
|
"grad_norm": 0.22014125911826737,
|
|
"learning_rate": 1.830768274461362e-05,
|
|
"loss": 0.482,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.5644233534656313,
|
|
"grad_norm": 0.2278820436024887,
|
|
"learning_rate": 1.830348935287555e-05,
|
|
"loss": 0.5112,
|
|
"step": 3925
|
|
},
|
|
{
|
|
"epoch": 0.5651423641069888,
|
|
"grad_norm": 0.2156583838047392,
|
|
"learning_rate": 1.829929125365747e-05,
|
|
"loss": 0.496,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 0.5658613747483463,
|
|
"grad_norm": 0.2315411777675097,
|
|
"learning_rate": 1.8295088449339395e-05,
|
|
"loss": 0.5031,
|
|
"step": 3935
|
|
},
|
|
{
|
|
"epoch": 0.5665803853897038,
|
|
"grad_norm": 0.2191930539045992,
|
|
"learning_rate": 1.8290880942304018e-05,
|
|
"loss": 0.5017,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.5672993960310613,
|
|
"grad_norm": 0.23045445869070985,
|
|
"learning_rate": 1.8286668734936693e-05,
|
|
"loss": 0.5047,
|
|
"step": 3945
|
|
},
|
|
{
|
|
"epoch": 0.5680184066724188,
|
|
"grad_norm": 0.23068453263665495,
|
|
"learning_rate": 1.8282451829625433e-05,
|
|
"loss": 0.4884,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.5687374173137763,
|
|
"grad_norm": 0.22441184191853916,
|
|
"learning_rate": 1.827823022876092e-05,
|
|
"loss": 0.4925,
|
|
"step": 3955
|
|
},
|
|
{
|
|
"epoch": 0.5694564279551337,
|
|
"grad_norm": 0.2274613215901173,
|
|
"learning_rate": 1.8274003934736507e-05,
|
|
"loss": 0.4948,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 0.5701754385964912,
|
|
"grad_norm": 0.2154163185531681,
|
|
"learning_rate": 1.8269772949948185e-05,
|
|
"loss": 0.4804,
|
|
"step": 3965
|
|
},
|
|
{
|
|
"epoch": 0.5708944492378487,
|
|
"grad_norm": 0.2265136281130171,
|
|
"learning_rate": 1.8265537276794624e-05,
|
|
"loss": 0.5021,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 0.5716134598792062,
|
|
"grad_norm": 0.23548231058339367,
|
|
"learning_rate": 1.826129691767714e-05,
|
|
"loss": 0.4969,
|
|
"step": 3975
|
|
},
|
|
{
|
|
"epoch": 0.5723324705205637,
|
|
"grad_norm": 0.24709148621521423,
|
|
"learning_rate": 1.8257051874999723e-05,
|
|
"loss": 0.4947,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 0.5730514811619212,
|
|
"grad_norm": 0.22658966766078337,
|
|
"learning_rate": 1.8252802151168992e-05,
|
|
"loss": 0.4806,
|
|
"step": 3985
|
|
},
|
|
{
|
|
"epoch": 0.5737704918032787,
|
|
"grad_norm": 0.23535080257238622,
|
|
"learning_rate": 1.8248547748594246e-05,
|
|
"loss": 0.496,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 0.5744895024446361,
|
|
"grad_norm": 0.23300185528621603,
|
|
"learning_rate": 1.8244288669687414e-05,
|
|
"loss": 0.5161,
|
|
"step": 3995
|
|
},
|
|
{
|
|
"epoch": 0.5752085130859936,
|
|
"grad_norm": 0.2195927252226493,
|
|
"learning_rate": 1.8240024916863096e-05,
|
|
"loss": 0.5145,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.5759275237273511,
|
|
"grad_norm": 0.22707087879411403,
|
|
"learning_rate": 1.823575649253853e-05,
|
|
"loss": 0.5027,
|
|
"step": 4005
|
|
},
|
|
{
|
|
"epoch": 0.5766465343687086,
|
|
"grad_norm": 0.23258755169070053,
|
|
"learning_rate": 1.82314833991336e-05,
|
|
"loss": 0.4822,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 0.5773655450100661,
|
|
"grad_norm": 0.21245372046944946,
|
|
"learning_rate": 1.8227205639070845e-05,
|
|
"loss": 0.4841,
|
|
"step": 4015
|
|
},
|
|
{
|
|
"epoch": 0.5780845556514237,
|
|
"grad_norm": 0.227555450329633,
|
|
"learning_rate": 1.822292321477545e-05,
|
|
"loss": 0.5137,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 0.5788035662927812,
|
|
"grad_norm": 0.22238301849030442,
|
|
"learning_rate": 1.821863612867524e-05,
|
|
"loss": 0.5018,
|
|
"step": 4025
|
|
},
|
|
{
|
|
"epoch": 0.5795225769341387,
|
|
"grad_norm": 0.23818689276016308,
|
|
"learning_rate": 1.821434438320068e-05,
|
|
"loss": 0.4984,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 0.5802415875754962,
|
|
"grad_norm": 0.22243316105430688,
|
|
"learning_rate": 1.821004798078488e-05,
|
|
"loss": 0.4791,
|
|
"step": 4035
|
|
},
|
|
{
|
|
"epoch": 0.5809605982168536,
|
|
"grad_norm": 0.2235013488087155,
|
|
"learning_rate": 1.8205746923863596e-05,
|
|
"loss": 0.5031,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 0.5816796088582111,
|
|
"grad_norm": 0.22378546532385274,
|
|
"learning_rate": 1.820144121487521e-05,
|
|
"loss": 0.4869,
|
|
"step": 4045
|
|
},
|
|
{
|
|
"epoch": 0.5823986194995686,
|
|
"grad_norm": 0.22608188106946078,
|
|
"learning_rate": 1.819713085626076e-05,
|
|
"loss": 0.5,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.5831176301409261,
|
|
"grad_norm": 0.23120069866094303,
|
|
"learning_rate": 1.8192815850463896e-05,
|
|
"loss": 0.5043,
|
|
"step": 4055
|
|
},
|
|
{
|
|
"epoch": 0.5838366407822836,
|
|
"grad_norm": 0.22373861994189723,
|
|
"learning_rate": 1.8188496199930922e-05,
|
|
"loss": 0.5069,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 0.5845556514236411,
|
|
"grad_norm": 0.22542422851046623,
|
|
"learning_rate": 1.8184171907110767e-05,
|
|
"loss": 0.4898,
|
|
"step": 4065
|
|
},
|
|
{
|
|
"epoch": 0.5852746620649986,
|
|
"grad_norm": 0.22992707671161222,
|
|
"learning_rate": 1.8179842974454997e-05,
|
|
"loss": 0.5058,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 0.585993672706356,
|
|
"grad_norm": 0.23512992551531928,
|
|
"learning_rate": 1.8175509404417795e-05,
|
|
"loss": 0.5131,
|
|
"step": 4075
|
|
},
|
|
{
|
|
"epoch": 0.5867126833477135,
|
|
"grad_norm": 0.23047258195395515,
|
|
"learning_rate": 1.8171171199455995e-05,
|
|
"loss": 0.4866,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 0.587431693989071,
|
|
"grad_norm": 0.22879948545941575,
|
|
"learning_rate": 1.8166828362029038e-05,
|
|
"loss": 0.4984,
|
|
"step": 4085
|
|
},
|
|
{
|
|
"epoch": 0.5881507046304285,
|
|
"grad_norm": 0.2318143159711983,
|
|
"learning_rate": 1.8162480894599007e-05,
|
|
"loss": 0.5046,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 0.588869715271786,
|
|
"grad_norm": 0.22044473281174515,
|
|
"learning_rate": 1.8158128799630593e-05,
|
|
"loss": 0.4972,
|
|
"step": 4095
|
|
},
|
|
{
|
|
"epoch": 0.5895887259131435,
|
|
"grad_norm": 0.21967188839528212,
|
|
"learning_rate": 1.815377207959113e-05,
|
|
"loss": 0.5114,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.590307736554501,
|
|
"grad_norm": 0.22481352334786187,
|
|
"learning_rate": 1.8149410736950557e-05,
|
|
"loss": 0.4804,
|
|
"step": 4105
|
|
},
|
|
{
|
|
"epoch": 0.5910267471958585,
|
|
"grad_norm": 0.22907040470257112,
|
|
"learning_rate": 1.8145044774181446e-05,
|
|
"loss": 0.5133,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 0.591745757837216,
|
|
"grad_norm": 0.22599774025751448,
|
|
"learning_rate": 1.814067419375898e-05,
|
|
"loss": 0.5127,
|
|
"step": 4115
|
|
},
|
|
{
|
|
"epoch": 0.5924647684785734,
|
|
"grad_norm": 0.22849166460658857,
|
|
"learning_rate": 1.8136298998160964e-05,
|
|
"loss": 0.4885,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 0.5931837791199309,
|
|
"grad_norm": 0.23707425413994465,
|
|
"learning_rate": 1.8131919189867823e-05,
|
|
"loss": 0.5023,
|
|
"step": 4125
|
|
},
|
|
{
|
|
"epoch": 0.5939027897612885,
|
|
"grad_norm": 0.218741855347216,
|
|
"learning_rate": 1.8127534771362583e-05,
|
|
"loss": 0.5053,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 0.594621800402646,
|
|
"grad_norm": 0.21990224755715207,
|
|
"learning_rate": 1.81231457451309e-05,
|
|
"loss": 0.5011,
|
|
"step": 4135
|
|
},
|
|
{
|
|
"epoch": 0.5953408110440035,
|
|
"grad_norm": 0.22519334707137623,
|
|
"learning_rate": 1.8118752113661036e-05,
|
|
"loss": 0.4929,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 0.596059821685361,
|
|
"grad_norm": 0.2251407174797879,
|
|
"learning_rate": 1.811435387944386e-05,
|
|
"loss": 0.4897,
|
|
"step": 4145
|
|
},
|
|
{
|
|
"epoch": 0.5967788323267185,
|
|
"grad_norm": 0.22081155450327186,
|
|
"learning_rate": 1.8109951044972852e-05,
|
|
"loss": 0.5096,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.597497842968076,
|
|
"grad_norm": 0.21913049190140144,
|
|
"learning_rate": 1.810554361274411e-05,
|
|
"loss": 0.4994,
|
|
"step": 4155
|
|
},
|
|
{
|
|
"epoch": 0.5982168536094334,
|
|
"grad_norm": 0.2280104778800277,
|
|
"learning_rate": 1.8101131585256327e-05,
|
|
"loss": 0.5088,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 0.5989358642507909,
|
|
"grad_norm": 0.23121277159859566,
|
|
"learning_rate": 1.80967149650108e-05,
|
|
"loss": 0.4977,
|
|
"step": 4165
|
|
},
|
|
{
|
|
"epoch": 0.5996548748921484,
|
|
"grad_norm": 0.23167148679803448,
|
|
"learning_rate": 1.8092293754511437e-05,
|
|
"loss": 0.4928,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 0.6003738855335059,
|
|
"grad_norm": 0.22355387075840977,
|
|
"learning_rate": 1.808786795626475e-05,
|
|
"loss": 0.4905,
|
|
"step": 4175
|
|
},
|
|
{
|
|
"epoch": 0.6010928961748634,
|
|
"grad_norm": 0.2145020922572475,
|
|
"learning_rate": 1.8083437572779842e-05,
|
|
"loss": 0.4835,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 0.6018119068162209,
|
|
"grad_norm": 0.2502122824779311,
|
|
"learning_rate": 1.8079002606568425e-05,
|
|
"loss": 0.4885,
|
|
"step": 4185
|
|
},
|
|
{
|
|
"epoch": 0.6025309174575784,
|
|
"grad_norm": 0.25428888688428436,
|
|
"learning_rate": 1.8074563060144804e-05,
|
|
"loss": 0.5103,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 0.6032499280989359,
|
|
"grad_norm": 0.22496649178253486,
|
|
"learning_rate": 1.807011893602588e-05,
|
|
"loss": 0.4787,
|
|
"step": 4195
|
|
},
|
|
{
|
|
"epoch": 0.6039689387402933,
|
|
"grad_norm": 0.22504966410722568,
|
|
"learning_rate": 1.8065670236731147e-05,
|
|
"loss": 0.4946,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.6046879493816508,
|
|
"grad_norm": 0.2256429602495479,
|
|
"learning_rate": 1.8061216964782707e-05,
|
|
"loss": 0.4919,
|
|
"step": 4205
|
|
},
|
|
{
|
|
"epoch": 0.6054069600230083,
|
|
"grad_norm": 0.24545480436663297,
|
|
"learning_rate": 1.805675912270524e-05,
|
|
"loss": 0.5098,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 0.6061259706643658,
|
|
"grad_norm": 0.23138472437464977,
|
|
"learning_rate": 1.805229671302602e-05,
|
|
"loss": 0.49,
|
|
"step": 4215
|
|
},
|
|
{
|
|
"epoch": 0.6068449813057233,
|
|
"grad_norm": 0.22420294449430914,
|
|
"learning_rate": 1.8047829738274912e-05,
|
|
"loss": 0.5135,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 0.6075639919470808,
|
|
"grad_norm": 0.23477514688466256,
|
|
"learning_rate": 1.8043358200984367e-05,
|
|
"loss": 0.5091,
|
|
"step": 4225
|
|
},
|
|
{
|
|
"epoch": 0.6082830025884383,
|
|
"grad_norm": 0.22664370647917517,
|
|
"learning_rate": 1.8038882103689425e-05,
|
|
"loss": 0.5009,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 0.6090020132297957,
|
|
"grad_norm": 0.22665190958697912,
|
|
"learning_rate": 1.8034401448927717e-05,
|
|
"loss": 0.5039,
|
|
"step": 4235
|
|
},
|
|
{
|
|
"epoch": 0.6097210238711533,
|
|
"grad_norm": 0.22347929069368033,
|
|
"learning_rate": 1.8029916239239444e-05,
|
|
"loss": 0.4995,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 0.6104400345125108,
|
|
"grad_norm": 0.22840687464864576,
|
|
"learning_rate": 1.8025426477167398e-05,
|
|
"loss": 0.4906,
|
|
"step": 4245
|
|
},
|
|
{
|
|
"epoch": 0.6111590451538683,
|
|
"grad_norm": 0.21865290043810331,
|
|
"learning_rate": 1.802093216525695e-05,
|
|
"loss": 0.5002,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.6118780557952258,
|
|
"grad_norm": 0.2257697870345122,
|
|
"learning_rate": 1.8016433306056056e-05,
|
|
"loss": 0.4974,
|
|
"step": 4255
|
|
},
|
|
{
|
|
"epoch": 0.6125970664365833,
|
|
"grad_norm": 0.23554978601033177,
|
|
"learning_rate": 1.801192990211524e-05,
|
|
"loss": 0.5076,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 0.6133160770779408,
|
|
"grad_norm": 0.21763035761289187,
|
|
"learning_rate": 1.800742195598761e-05,
|
|
"loss": 0.5022,
|
|
"step": 4265
|
|
},
|
|
{
|
|
"epoch": 0.6140350877192983,
|
|
"grad_norm": 0.22845763433585353,
|
|
"learning_rate": 1.800290947022884e-05,
|
|
"loss": 0.5012,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 0.6147540983606558,
|
|
"grad_norm": 0.23660622765117761,
|
|
"learning_rate": 1.7998392447397197e-05,
|
|
"loss": 0.4953,
|
|
"step": 4275
|
|
},
|
|
{
|
|
"epoch": 0.6154731090020132,
|
|
"grad_norm": 0.23512957287176042,
|
|
"learning_rate": 1.799387089005349e-05,
|
|
"loss": 0.5081,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 0.6161921196433707,
|
|
"grad_norm": 0.23139366949545287,
|
|
"learning_rate": 1.7989344800761138e-05,
|
|
"loss": 0.4785,
|
|
"step": 4285
|
|
},
|
|
{
|
|
"epoch": 0.6169111302847282,
|
|
"grad_norm": 0.21543568576088704,
|
|
"learning_rate": 1.798481418208609e-05,
|
|
"loss": 0.4889,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 0.6176301409260857,
|
|
"grad_norm": 0.24103936247130817,
|
|
"learning_rate": 1.7980279036596882e-05,
|
|
"loss": 0.4887,
|
|
"step": 4295
|
|
},
|
|
{
|
|
"epoch": 0.6183491515674432,
|
|
"grad_norm": 0.22480459818866064,
|
|
"learning_rate": 1.797573936686462e-05,
|
|
"loss": 0.4998,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.6190681622088007,
|
|
"grad_norm": 0.2302806647932163,
|
|
"learning_rate": 1.797119517546297e-05,
|
|
"loss": 0.4823,
|
|
"step": 4305
|
|
},
|
|
{
|
|
"epoch": 0.6197871728501582,
|
|
"grad_norm": 0.2195093911650767,
|
|
"learning_rate": 1.7966646464968156e-05,
|
|
"loss": 0.4874,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 0.6205061834915157,
|
|
"grad_norm": 0.23515790120245852,
|
|
"learning_rate": 1.7962093237958975e-05,
|
|
"loss": 0.5017,
|
|
"step": 4315
|
|
},
|
|
{
|
|
"epoch": 0.6212251941328731,
|
|
"grad_norm": 0.22558657641284738,
|
|
"learning_rate": 1.7957535497016773e-05,
|
|
"loss": 0.4836,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 0.6219442047742306,
|
|
"grad_norm": 0.21514923208427525,
|
|
"learning_rate": 1.7952973244725466e-05,
|
|
"loss": 0.503,
|
|
"step": 4325
|
|
},
|
|
{
|
|
"epoch": 0.6226632154155881,
|
|
"grad_norm": 0.2240849714125103,
|
|
"learning_rate": 1.7948406483671516e-05,
|
|
"loss": 0.5095,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 0.6233822260569456,
|
|
"grad_norm": 0.23011998238604692,
|
|
"learning_rate": 1.7943835216443954e-05,
|
|
"loss": 0.4978,
|
|
"step": 4335
|
|
},
|
|
{
|
|
"epoch": 0.6241012366983031,
|
|
"grad_norm": 0.2226633367382515,
|
|
"learning_rate": 1.793925944563435e-05,
|
|
"loss": 0.4978,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 0.6248202473396606,
|
|
"grad_norm": 0.22271625540264856,
|
|
"learning_rate": 1.7934679173836845e-05,
|
|
"loss": 0.4793,
|
|
"step": 4345
|
|
},
|
|
{
|
|
"epoch": 0.6255392579810182,
|
|
"grad_norm": 0.22168826941628614,
|
|
"learning_rate": 1.7930094403648123e-05,
|
|
"loss": 0.485,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.6262582686223757,
|
|
"grad_norm": 0.2194434339487712,
|
|
"learning_rate": 1.792550513766741e-05,
|
|
"loss": 0.5179,
|
|
"step": 4355
|
|
},
|
|
{
|
|
"epoch": 0.6269772792637331,
|
|
"grad_norm": 0.2241703162853489,
|
|
"learning_rate": 1.79209113784965e-05,
|
|
"loss": 0.4924,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 0.6276962899050906,
|
|
"grad_norm": 0.22907476566053597,
|
|
"learning_rate": 1.7916313128739713e-05,
|
|
"loss": 0.5165,
|
|
"step": 4365
|
|
},
|
|
{
|
|
"epoch": 0.6284153005464481,
|
|
"grad_norm": 0.2287805893738131,
|
|
"learning_rate": 1.791171039100393e-05,
|
|
"loss": 0.504,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 0.6291343111878056,
|
|
"grad_norm": 0.22889669531847387,
|
|
"learning_rate": 1.7907103167898574e-05,
|
|
"loss": 0.4875,
|
|
"step": 4375
|
|
},
|
|
{
|
|
"epoch": 0.6298533218291631,
|
|
"grad_norm": 0.22708288689043107,
|
|
"learning_rate": 1.7902491462035604e-05,
|
|
"loss": 0.4997,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 0.6305723324705206,
|
|
"grad_norm": 0.22524635262428871,
|
|
"learning_rate": 1.789787527602953e-05,
|
|
"loss": 0.5019,
|
|
"step": 4385
|
|
},
|
|
{
|
|
"epoch": 0.6312913431118781,
|
|
"grad_norm": 0.22737838008003655,
|
|
"learning_rate": 1.789325461249739e-05,
|
|
"loss": 0.5035,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 0.6320103537532356,
|
|
"grad_norm": 0.22814314146043482,
|
|
"learning_rate": 1.788862947405877e-05,
|
|
"loss": 0.5136,
|
|
"step": 4395
|
|
},
|
|
{
|
|
"epoch": 0.632729364394593,
|
|
"grad_norm": 0.23190044903178156,
|
|
"learning_rate": 1.7883999863335795e-05,
|
|
"loss": 0.4727,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.6334483750359505,
|
|
"grad_norm": 0.23035525240649793,
|
|
"learning_rate": 1.787936578295311e-05,
|
|
"loss": 0.4864,
|
|
"step": 4405
|
|
},
|
|
{
|
|
"epoch": 0.634167385677308,
|
|
"grad_norm": 0.2172364867230587,
|
|
"learning_rate": 1.787472723553792e-05,
|
|
"loss": 0.4932,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 0.6348863963186655,
|
|
"grad_norm": 0.22456558012536526,
|
|
"learning_rate": 1.7870084223719927e-05,
|
|
"loss": 0.4963,
|
|
"step": 4415
|
|
},
|
|
{
|
|
"epoch": 0.635605406960023,
|
|
"grad_norm": 0.22782599669782636,
|
|
"learning_rate": 1.7865436750131404e-05,
|
|
"loss": 0.485,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 0.6363244176013805,
|
|
"grad_norm": 0.22293627410505634,
|
|
"learning_rate": 1.7860784817407123e-05,
|
|
"loss": 0.5025,
|
|
"step": 4425
|
|
},
|
|
{
|
|
"epoch": 0.637043428242738,
|
|
"grad_norm": 0.2290342354101169,
|
|
"learning_rate": 1.7856128428184394e-05,
|
|
"loss": 0.5034,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 0.6377624388840955,
|
|
"grad_norm": 0.2123244352372745,
|
|
"learning_rate": 1.7851467585103058e-05,
|
|
"loss": 0.4789,
|
|
"step": 4435
|
|
},
|
|
{
|
|
"epoch": 0.6384814495254529,
|
|
"grad_norm": 0.2363554364033453,
|
|
"learning_rate": 1.7846802290805475e-05,
|
|
"loss": 0.4993,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 0.6392004601668104,
|
|
"grad_norm": 0.2169137466116251,
|
|
"learning_rate": 1.784213254793653e-05,
|
|
"loss": 0.5046,
|
|
"step": 4445
|
|
},
|
|
{
|
|
"epoch": 0.6399194708081679,
|
|
"grad_norm": 0.22732219414522734,
|
|
"learning_rate": 1.7837458359143635e-05,
|
|
"loss": 0.4898,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 0.6406384814495254,
|
|
"grad_norm": 0.23308360464567718,
|
|
"learning_rate": 1.783277972707671e-05,
|
|
"loss": 0.5037,
|
|
"step": 4455
|
|
},
|
|
{
|
|
"epoch": 0.641357492090883,
|
|
"grad_norm": 0.21797166379499391,
|
|
"learning_rate": 1.782809665438821e-05,
|
|
"loss": 0.4836,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 0.6420765027322405,
|
|
"grad_norm": 0.21670714333069238,
|
|
"learning_rate": 1.7823409143733096e-05,
|
|
"loss": 0.492,
|
|
"step": 4465
|
|
},
|
|
{
|
|
"epoch": 0.642795513373598,
|
|
"grad_norm": 0.22289980619650276,
|
|
"learning_rate": 1.7818717197768853e-05,
|
|
"loss": 0.488,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 0.6435145240149555,
|
|
"grad_norm": 0.21492990992266275,
|
|
"learning_rate": 1.7814020819155467e-05,
|
|
"loss": 0.485,
|
|
"step": 4475
|
|
},
|
|
{
|
|
"epoch": 0.644233534656313,
|
|
"grad_norm": 0.22565582075395257,
|
|
"learning_rate": 1.7809320010555457e-05,
|
|
"loss": 0.504,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 0.6449525452976704,
|
|
"grad_norm": 0.22953046603808086,
|
|
"learning_rate": 1.7804614774633837e-05,
|
|
"loss": 0.4942,
|
|
"step": 4485
|
|
},
|
|
{
|
|
"epoch": 0.6456715559390279,
|
|
"grad_norm": 0.22714500679627633,
|
|
"learning_rate": 1.7799905114058135e-05,
|
|
"loss": 0.4929,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 0.6463905665803854,
|
|
"grad_norm": 0.22402856864894585,
|
|
"learning_rate": 1.779519103149839e-05,
|
|
"loss": 0.5052,
|
|
"step": 4495
|
|
},
|
|
{
|
|
"epoch": 0.6471095772217429,
|
|
"grad_norm": 0.21690244310309423,
|
|
"learning_rate": 1.7790472529627152e-05,
|
|
"loss": 0.4773,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.6478285878631004,
|
|
"grad_norm": 0.22885853032295295,
|
|
"learning_rate": 1.7785749611119468e-05,
|
|
"loss": 0.5014,
|
|
"step": 4505
|
|
},
|
|
{
|
|
"epoch": 0.6485475985044579,
|
|
"grad_norm": 0.21772258605601158,
|
|
"learning_rate": 1.7781022278652892e-05,
|
|
"loss": 0.4843,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 0.6492666091458154,
|
|
"grad_norm": 0.22695485689332384,
|
|
"learning_rate": 1.777629053490748e-05,
|
|
"loss": 0.5005,
|
|
"step": 4515
|
|
},
|
|
{
|
|
"epoch": 0.6499856197871728,
|
|
"grad_norm": 0.2301755389479362,
|
|
"learning_rate": 1.777155438256579e-05,
|
|
"loss": 0.4863,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 0.6507046304285303,
|
|
"grad_norm": 0.2303790735813269,
|
|
"learning_rate": 1.776681382431288e-05,
|
|
"loss": 0.5158,
|
|
"step": 4525
|
|
},
|
|
{
|
|
"epoch": 0.6514236410698878,
|
|
"grad_norm": 0.22708261008915043,
|
|
"learning_rate": 1.7762068862836305e-05,
|
|
"loss": 0.4928,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 0.6521426517112453,
|
|
"grad_norm": 0.21978609691147083,
|
|
"learning_rate": 1.7757319500826118e-05,
|
|
"loss": 0.4821,
|
|
"step": 4535
|
|
},
|
|
{
|
|
"epoch": 0.6528616623526028,
|
|
"grad_norm": 0.2280064491413487,
|
|
"learning_rate": 1.775256574097486e-05,
|
|
"loss": 0.4944,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 0.6535806729939603,
|
|
"grad_norm": 0.23140586976106473,
|
|
"learning_rate": 1.7747807585977575e-05,
|
|
"loss": 0.4982,
|
|
"step": 4545
|
|
},
|
|
{
|
|
"epoch": 0.6542996836353178,
|
|
"grad_norm": 0.2249761869967712,
|
|
"learning_rate": 1.774304503853179e-05,
|
|
"loss": 0.503,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 0.6550186942766753,
|
|
"grad_norm": 0.22383737618616106,
|
|
"learning_rate": 1.773827810133753e-05,
|
|
"loss": 0.4845,
|
|
"step": 4555
|
|
},
|
|
{
|
|
"epoch": 0.6557377049180327,
|
|
"grad_norm": 0.23868560906760042,
|
|
"learning_rate": 1.77335067770973e-05,
|
|
"loss": 0.4915,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 0.6564567155593902,
|
|
"grad_norm": 0.22975311873958806,
|
|
"learning_rate": 1.7728731068516102e-05,
|
|
"loss": 0.4972,
|
|
"step": 4565
|
|
},
|
|
{
|
|
"epoch": 0.6571757262007478,
|
|
"grad_norm": 0.22989215659951942,
|
|
"learning_rate": 1.772395097830142e-05,
|
|
"loss": 0.4817,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 0.6578947368421053,
|
|
"grad_norm": 0.21496590035678406,
|
|
"learning_rate": 1.771916650916321e-05,
|
|
"loss": 0.4658,
|
|
"step": 4575
|
|
},
|
|
{
|
|
"epoch": 0.6586137474834628,
|
|
"grad_norm": 0.2242487964584687,
|
|
"learning_rate": 1.7714377663813932e-05,
|
|
"loss": 0.4948,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 0.6593327581248203,
|
|
"grad_norm": 0.2259566340870872,
|
|
"learning_rate": 1.770958444496851e-05,
|
|
"loss": 0.4884,
|
|
"step": 4585
|
|
},
|
|
{
|
|
"epoch": 0.6600517687661778,
|
|
"grad_norm": 0.22184505787317513,
|
|
"learning_rate": 1.7704786855344362e-05,
|
|
"loss": 0.4933,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 0.6607707794075353,
|
|
"grad_norm": 0.22147846460463083,
|
|
"learning_rate": 1.7699984897661366e-05,
|
|
"loss": 0.5163,
|
|
"step": 4595
|
|
},
|
|
{
|
|
"epoch": 0.6614897900488927,
|
|
"grad_norm": 0.22380466269010332,
|
|
"learning_rate": 1.769517857464189e-05,
|
|
"loss": 0.4924,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.6622088006902502,
|
|
"grad_norm": 0.2183939165932946,
|
|
"learning_rate": 1.769036788901077e-05,
|
|
"loss": 0.497,
|
|
"step": 4605
|
|
},
|
|
{
|
|
"epoch": 0.6629278113316077,
|
|
"grad_norm": 0.2299963801247869,
|
|
"learning_rate": 1.7685552843495325e-05,
|
|
"loss": 0.4888,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 0.6636468219729652,
|
|
"grad_norm": 0.22186769796460507,
|
|
"learning_rate": 1.768073344082533e-05,
|
|
"loss": 0.4946,
|
|
"step": 4615
|
|
},
|
|
{
|
|
"epoch": 0.6643658326143227,
|
|
"grad_norm": 0.22516670993138563,
|
|
"learning_rate": 1.7675909683733044e-05,
|
|
"loss": 0.489,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 0.6650848432556802,
|
|
"grad_norm": 0.2220975827121235,
|
|
"learning_rate": 1.767108157495319e-05,
|
|
"loss": 0.5141,
|
|
"step": 4625
|
|
},
|
|
{
|
|
"epoch": 0.6658038538970377,
|
|
"grad_norm": 0.22391628586117562,
|
|
"learning_rate": 1.7666249117222954e-05,
|
|
"loss": 0.5046,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 0.6665228645383952,
|
|
"grad_norm": 0.21878055532902985,
|
|
"learning_rate": 1.7661412313281996e-05,
|
|
"loss": 0.4827,
|
|
"step": 4635
|
|
},
|
|
{
|
|
"epoch": 0.6672418751797526,
|
|
"grad_norm": 0.21455190269812402,
|
|
"learning_rate": 1.7656571165872433e-05,
|
|
"loss": 0.4904,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 0.6679608858211101,
|
|
"grad_norm": 0.23229713610117478,
|
|
"learning_rate": 1.7651725677738848e-05,
|
|
"loss": 0.4944,
|
|
"step": 4645
|
|
},
|
|
{
|
|
"epoch": 0.6686798964624676,
|
|
"grad_norm": 0.2194496579507639,
|
|
"learning_rate": 1.764687585162828e-05,
|
|
"loss": 0.4945,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 0.6693989071038251,
|
|
"grad_norm": 0.24046731351470918,
|
|
"learning_rate": 1.764202169029023e-05,
|
|
"loss": 0.4985,
|
|
"step": 4655
|
|
},
|
|
{
|
|
"epoch": 0.6701179177451826,
|
|
"grad_norm": 0.24250442873954306,
|
|
"learning_rate": 1.7637163196476665e-05,
|
|
"loss": 0.4857,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 0.6708369283865401,
|
|
"grad_norm": 0.22781641303717975,
|
|
"learning_rate": 1.7632300372941994e-05,
|
|
"loss": 0.495,
|
|
"step": 4665
|
|
},
|
|
{
|
|
"epoch": 0.6715559390278976,
|
|
"grad_norm": 0.22177424874769494,
|
|
"learning_rate": 1.762743322244309e-05,
|
|
"loss": 0.4952,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 0.672274949669255,
|
|
"grad_norm": 0.2228721918137761,
|
|
"learning_rate": 1.762256174773928e-05,
|
|
"loss": 0.478,
|
|
"step": 4675
|
|
},
|
|
{
|
|
"epoch": 0.6729939603106126,
|
|
"grad_norm": 0.2199598987395226,
|
|
"learning_rate": 1.7617685951592332e-05,
|
|
"loss": 0.4921,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 0.6737129709519701,
|
|
"grad_norm": 0.21649943361141435,
|
|
"learning_rate": 1.7612805836766473e-05,
|
|
"loss": 0.4919,
|
|
"step": 4685
|
|
},
|
|
{
|
|
"epoch": 0.6744319815933276,
|
|
"grad_norm": 0.22575564833456532,
|
|
"learning_rate": 1.7607921406028383e-05,
|
|
"loss": 0.4804,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 0.6751509922346851,
|
|
"grad_norm": 0.2208911041009505,
|
|
"learning_rate": 1.7603032662147174e-05,
|
|
"loss": 0.4827,
|
|
"step": 4695
|
|
},
|
|
{
|
|
"epoch": 0.6758700028760426,
|
|
"grad_norm": 0.2318713562772489,
|
|
"learning_rate": 1.7598139607894415e-05,
|
|
"loss": 0.4916,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.6765890135174001,
|
|
"grad_norm": 0.22042625401063254,
|
|
"learning_rate": 1.7593242246044112e-05,
|
|
"loss": 0.4994,
|
|
"step": 4705
|
|
},
|
|
{
|
|
"epoch": 0.6773080241587576,
|
|
"grad_norm": 0.23336914586818205,
|
|
"learning_rate": 1.7588340579372723e-05,
|
|
"loss": 0.4876,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 0.6780270348001151,
|
|
"grad_norm": 0.23040143177334688,
|
|
"learning_rate": 1.7583434610659135e-05,
|
|
"loss": 0.4896,
|
|
"step": 4715
|
|
},
|
|
{
|
|
"epoch": 0.6787460454414725,
|
|
"grad_norm": 0.222450062203108,
|
|
"learning_rate": 1.757852434268468e-05,
|
|
"loss": 0.4977,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 0.67946505608283,
|
|
"grad_norm": 0.22334129871305886,
|
|
"learning_rate": 1.757360977823312e-05,
|
|
"loss": 0.4843,
|
|
"step": 4725
|
|
},
|
|
{
|
|
"epoch": 0.6801840667241875,
|
|
"grad_norm": 0.2249846211526674,
|
|
"learning_rate": 1.7568690920090667e-05,
|
|
"loss": 0.508,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 0.680903077365545,
|
|
"grad_norm": 0.22385876087181392,
|
|
"learning_rate": 1.756376777104596e-05,
|
|
"loss": 0.4792,
|
|
"step": 4735
|
|
},
|
|
{
|
|
"epoch": 0.6816220880069025,
|
|
"grad_norm": 0.21810768344183779,
|
|
"learning_rate": 1.755884033389006e-05,
|
|
"loss": 0.4959,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 0.68234109864826,
|
|
"grad_norm": 0.22889663535319346,
|
|
"learning_rate": 1.7553908611416476e-05,
|
|
"loss": 0.4921,
|
|
"step": 4745
|
|
},
|
|
{
|
|
"epoch": 0.6830601092896175,
|
|
"grad_norm": 0.22836005091803305,
|
|
"learning_rate": 1.754897260642114e-05,
|
|
"loss": 0.5041,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 0.683779119930975,
|
|
"grad_norm": 0.2337522649597847,
|
|
"learning_rate": 1.754403232170241e-05,
|
|
"loss": 0.4983,
|
|
"step": 4755
|
|
},
|
|
{
|
|
"epoch": 0.6844981305723324,
|
|
"grad_norm": 0.22427854619228013,
|
|
"learning_rate": 1.7539087760061065e-05,
|
|
"loss": 0.4788,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 0.6852171412136899,
|
|
"grad_norm": 0.22340482122931785,
|
|
"learning_rate": 1.7534138924300322e-05,
|
|
"loss": 0.4871,
|
|
"step": 4765
|
|
},
|
|
{
|
|
"epoch": 0.6859361518550474,
|
|
"grad_norm": 0.21209483637838136,
|
|
"learning_rate": 1.7529185817225814e-05,
|
|
"loss": 0.4843,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 0.6866551624964049,
|
|
"grad_norm": 0.2614114941386408,
|
|
"learning_rate": 1.7524228441645595e-05,
|
|
"loss": 0.4889,
|
|
"step": 4775
|
|
},
|
|
{
|
|
"epoch": 0.6873741731377624,
|
|
"grad_norm": 0.2759337878952069,
|
|
"learning_rate": 1.751926680037014e-05,
|
|
"loss": 0.4924,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 0.6880931837791199,
|
|
"grad_norm": 0.2395374492522584,
|
|
"learning_rate": 1.7514300896212337e-05,
|
|
"loss": 0.5061,
|
|
"step": 4785
|
|
},
|
|
{
|
|
"epoch": 0.6888121944204775,
|
|
"grad_norm": 0.2596268828245961,
|
|
"learning_rate": 1.7509330731987503e-05,
|
|
"loss": 0.5152,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 0.689531205061835,
|
|
"grad_norm": 0.22117993803359795,
|
|
"learning_rate": 1.750435631051336e-05,
|
|
"loss": 0.4988,
|
|
"step": 4795
|
|
},
|
|
{
|
|
"epoch": 0.6902502157031924,
|
|
"grad_norm": 0.22858075750882878,
|
|
"learning_rate": 1.7499377634610045e-05,
|
|
"loss": 0.5127,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.6909692263445499,
|
|
"grad_norm": 0.22143843885058742,
|
|
"learning_rate": 1.7494394707100106e-05,
|
|
"loss": 0.4877,
|
|
"step": 4805
|
|
},
|
|
{
|
|
"epoch": 0.6916882369859074,
|
|
"grad_norm": 0.22707771227565943,
|
|
"learning_rate": 1.748940753080851e-05,
|
|
"loss": 0.4958,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 0.6924072476272649,
|
|
"grad_norm": 0.27308742041361583,
|
|
"learning_rate": 1.7484416108562622e-05,
|
|
"loss": 0.4825,
|
|
"step": 4815
|
|
},
|
|
{
|
|
"epoch": 0.6931262582686224,
|
|
"grad_norm": 0.21424028907678153,
|
|
"learning_rate": 1.7479420443192224e-05,
|
|
"loss": 0.4854,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 0.6938452689099799,
|
|
"grad_norm": 0.22823633517798755,
|
|
"learning_rate": 1.747442053752949e-05,
|
|
"loss": 0.5075,
|
|
"step": 4825
|
|
},
|
|
{
|
|
"epoch": 0.6945642795513374,
|
|
"grad_norm": 0.21523415768614815,
|
|
"learning_rate": 1.746941639440902e-05,
|
|
"loss": 0.4939,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 0.6952832901926949,
|
|
"grad_norm": 0.2294486883785128,
|
|
"learning_rate": 1.7464408016667782e-05,
|
|
"loss": 0.4798,
|
|
"step": 4835
|
|
},
|
|
{
|
|
"epoch": 0.6960023008340523,
|
|
"grad_norm": 0.22961433155302852,
|
|
"learning_rate": 1.7459395407145184e-05,
|
|
"loss": 0.5036,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 0.6967213114754098,
|
|
"grad_norm": 0.2316012619634393,
|
|
"learning_rate": 1.7454378568683003e-05,
|
|
"loss": 0.4768,
|
|
"step": 4845
|
|
},
|
|
{
|
|
"epoch": 0.6974403221167673,
|
|
"grad_norm": 0.22749278143875307,
|
|
"learning_rate": 1.744935750412543e-05,
|
|
"loss": 0.488,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 0.6981593327581248,
|
|
"grad_norm": 0.22330719621266287,
|
|
"learning_rate": 1.7444332216319044e-05,
|
|
"loss": 0.4905,
|
|
"step": 4855
|
|
},
|
|
{
|
|
"epoch": 0.6988783433994823,
|
|
"grad_norm": 0.23288889389670006,
|
|
"learning_rate": 1.7439302708112825e-05,
|
|
"loss": 0.4975,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 0.6995973540408398,
|
|
"grad_norm": 0.2179924907854225,
|
|
"learning_rate": 1.743426898235814e-05,
|
|
"loss": 0.4972,
|
|
"step": 4865
|
|
},
|
|
{
|
|
"epoch": 0.7003163646821973,
|
|
"grad_norm": 0.22453081468092548,
|
|
"learning_rate": 1.7429231041908745e-05,
|
|
"loss": 0.4885,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 0.7010353753235548,
|
|
"grad_norm": 0.22653760101343884,
|
|
"learning_rate": 1.742418888962079e-05,
|
|
"loss": 0.4772,
|
|
"step": 4875
|
|
},
|
|
{
|
|
"epoch": 0.7017543859649122,
|
|
"grad_norm": 0.2412243876517065,
|
|
"learning_rate": 1.7419142528352815e-05,
|
|
"loss": 0.5079,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 0.7024733966062697,
|
|
"grad_norm": 0.2292565589968856,
|
|
"learning_rate": 1.7414091960965745e-05,
|
|
"loss": 0.4601,
|
|
"step": 4885
|
|
},
|
|
{
|
|
"epoch": 0.7031924072476272,
|
|
"grad_norm": 0.2409486748433275,
|
|
"learning_rate": 1.7409037190322882e-05,
|
|
"loss": 0.4947,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 0.7039114178889847,
|
|
"grad_norm": 0.22951050066480008,
|
|
"learning_rate": 1.740397821928992e-05,
|
|
"loss": 0.4942,
|
|
"step": 4895
|
|
},
|
|
{
|
|
"epoch": 0.7046304285303423,
|
|
"grad_norm": 0.22113426876234893,
|
|
"learning_rate": 1.7398915050734934e-05,
|
|
"loss": 0.4954,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.7053494391716998,
|
|
"grad_norm": 0.22645148562176193,
|
|
"learning_rate": 1.7393847687528367e-05,
|
|
"loss": 0.4824,
|
|
"step": 4905
|
|
},
|
|
{
|
|
"epoch": 0.7060684498130573,
|
|
"grad_norm": 0.22216456536462917,
|
|
"learning_rate": 1.7388776132543055e-05,
|
|
"loss": 0.4627,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 0.7067874604544148,
|
|
"grad_norm": 0.23986355860805847,
|
|
"learning_rate": 1.73837003886542e-05,
|
|
"loss": 0.511,
|
|
"step": 4915
|
|
},
|
|
{
|
|
"epoch": 0.7075064710957722,
|
|
"grad_norm": 0.24141481260266942,
|
|
"learning_rate": 1.737862045873939e-05,
|
|
"loss": 0.4904,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 0.7082254817371297,
|
|
"grad_norm": 0.2247741598774793,
|
|
"learning_rate": 1.7373536345678578e-05,
|
|
"loss": 0.5114,
|
|
"step": 4925
|
|
},
|
|
{
|
|
"epoch": 0.7089444923784872,
|
|
"grad_norm": 0.22360516425594493,
|
|
"learning_rate": 1.736844805235408e-05,
|
|
"loss": 0.5022,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 0.7096635030198447,
|
|
"grad_norm": 0.22136089523441504,
|
|
"learning_rate": 1.73633555816506e-05,
|
|
"loss": 0.4964,
|
|
"step": 4935
|
|
},
|
|
{
|
|
"epoch": 0.7103825136612022,
|
|
"grad_norm": 0.21994560345932826,
|
|
"learning_rate": 1.7358258936455203e-05,
|
|
"loss": 0.4985,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 0.7111015243025597,
|
|
"grad_norm": 0.22481508730322386,
|
|
"learning_rate": 1.7353158119657312e-05,
|
|
"loss": 0.4924,
|
|
"step": 4945
|
|
},
|
|
{
|
|
"epoch": 0.7118205349439172,
|
|
"grad_norm": 0.23240499727299993,
|
|
"learning_rate": 1.7348053134148727e-05,
|
|
"loss": 0.4896,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 0.7125395455852747,
|
|
"grad_norm": 0.2283444105591223,
|
|
"learning_rate": 1.7342943982823612e-05,
|
|
"loss": 0.4912,
|
|
"step": 4955
|
|
},
|
|
{
|
|
"epoch": 0.7132585562266321,
|
|
"grad_norm": 0.22351511189067214,
|
|
"learning_rate": 1.7337830668578478e-05,
|
|
"loss": 0.5084,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 0.7139775668679896,
|
|
"grad_norm": 0.21938042798122012,
|
|
"learning_rate": 1.733271319431221e-05,
|
|
"loss": 0.4814,
|
|
"step": 4965
|
|
},
|
|
{
|
|
"epoch": 0.7146965775093471,
|
|
"grad_norm": 0.23337124553132274,
|
|
"learning_rate": 1.732759156292605e-05,
|
|
"loss": 0.4892,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 0.7154155881507046,
|
|
"grad_norm": 0.2273338423301612,
|
|
"learning_rate": 1.732246577732359e-05,
|
|
"loss": 0.4862,
|
|
"step": 4975
|
|
},
|
|
{
|
|
"epoch": 0.7161345987920621,
|
|
"grad_norm": 0.22692402136683593,
|
|
"learning_rate": 1.731733584041078e-05,
|
|
"loss": 0.4781,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 0.7168536094334196,
|
|
"grad_norm": 0.22180526870888811,
|
|
"learning_rate": 1.731220175509593e-05,
|
|
"loss": 0.4937,
|
|
"step": 4985
|
|
},
|
|
{
|
|
"epoch": 0.7175726200747771,
|
|
"grad_norm": 0.23380812277896126,
|
|
"learning_rate": 1.7307063524289692e-05,
|
|
"loss": 0.4911,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 0.7182916307161346,
|
|
"grad_norm": 0.22035178227881136,
|
|
"learning_rate": 1.730192115090507e-05,
|
|
"loss": 0.4816,
|
|
"step": 4995
|
|
},
|
|
{
|
|
"epoch": 0.719010641357492,
|
|
"grad_norm": 0.22970889099981737,
|
|
"learning_rate": 1.7296774637857428e-05,
|
|
"loss": 0.5036,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.7197296519988495,
|
|
"grad_norm": 0.22578161192062568,
|
|
"learning_rate": 1.729162398806446e-05,
|
|
"loss": 0.493,
|
|
"step": 5005
|
|
},
|
|
{
|
|
"epoch": 0.7204486626402071,
|
|
"grad_norm": 0.2196631750733422,
|
|
"learning_rate": 1.7286469204446215e-05,
|
|
"loss": 0.4768,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 0.7211676732815646,
|
|
"grad_norm": 0.2357705200771042,
|
|
"learning_rate": 1.7281310289925087e-05,
|
|
"loss": 0.4955,
|
|
"step": 5015
|
|
},
|
|
{
|
|
"epoch": 0.7218866839229221,
|
|
"grad_norm": 0.22215462652234658,
|
|
"learning_rate": 1.7276147247425802e-05,
|
|
"loss": 0.485,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 0.7226056945642796,
|
|
"grad_norm": 0.22752699079874833,
|
|
"learning_rate": 1.7270980079875444e-05,
|
|
"loss": 0.4918,
|
|
"step": 5025
|
|
},
|
|
{
|
|
"epoch": 0.7233247052056371,
|
|
"grad_norm": 0.23611477914863882,
|
|
"learning_rate": 1.726580879020341e-05,
|
|
"loss": 0.5021,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 0.7240437158469946,
|
|
"grad_norm": 0.2182898832045658,
|
|
"learning_rate": 1.7260633381341462e-05,
|
|
"loss": 0.4753,
|
|
"step": 5035
|
|
},
|
|
{
|
|
"epoch": 0.724762726488352,
|
|
"grad_norm": 0.2527928852955433,
|
|
"learning_rate": 1.7255453856223674e-05,
|
|
"loss": 0.4885,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 0.7254817371297095,
|
|
"grad_norm": 0.22254442434874563,
|
|
"learning_rate": 1.7250270217786473e-05,
|
|
"loss": 0.4986,
|
|
"step": 5045
|
|
},
|
|
{
|
|
"epoch": 0.726200747771067,
|
|
"grad_norm": 0.2301174649411321,
|
|
"learning_rate": 1.7245082468968596e-05,
|
|
"loss": 0.4904,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 0.7269197584124245,
|
|
"grad_norm": 0.213616955709942,
|
|
"learning_rate": 1.7239890612711135e-05,
|
|
"loss": 0.4967,
|
|
"step": 5055
|
|
},
|
|
{
|
|
"epoch": 0.727638769053782,
|
|
"grad_norm": 0.21870487684081705,
|
|
"learning_rate": 1.723469465195749e-05,
|
|
"loss": 0.496,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 0.7283577796951395,
|
|
"grad_norm": 0.21985442116131565,
|
|
"learning_rate": 1.7229494589653403e-05,
|
|
"loss": 0.4709,
|
|
"step": 5065
|
|
},
|
|
{
|
|
"epoch": 0.729076790336497,
|
|
"grad_norm": 0.24043341432046253,
|
|
"learning_rate": 1.722429042874693e-05,
|
|
"loss": 0.4944,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 0.7297958009778545,
|
|
"grad_norm": 0.22259041116703665,
|
|
"learning_rate": 1.7219082172188452e-05,
|
|
"loss": 0.4974,
|
|
"step": 5075
|
|
},
|
|
{
|
|
"epoch": 0.730514811619212,
|
|
"grad_norm": 0.2227993175937651,
|
|
"learning_rate": 1.7213869822930686e-05,
|
|
"loss": 0.4906,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 0.7312338222605694,
|
|
"grad_norm": 0.23570180181268807,
|
|
"learning_rate": 1.7208653383928645e-05,
|
|
"loss": 0.4769,
|
|
"step": 5085
|
|
},
|
|
{
|
|
"epoch": 0.7319528329019269,
|
|
"grad_norm": 0.22680006014496892,
|
|
"learning_rate": 1.7203432858139683e-05,
|
|
"loss": 0.5028,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 0.7326718435432844,
|
|
"grad_norm": 0.22184121654666847,
|
|
"learning_rate": 1.719820824852346e-05,
|
|
"loss": 0.4839,
|
|
"step": 5095
|
|
},
|
|
{
|
|
"epoch": 0.7333908541846419,
|
|
"grad_norm": 0.22870656998660482,
|
|
"learning_rate": 1.719297955804195e-05,
|
|
"loss": 0.4995,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.7341098648259994,
|
|
"grad_norm": 0.2192476673370964,
|
|
"learning_rate": 1.718774678965945e-05,
|
|
"loss": 0.48,
|
|
"step": 5105
|
|
},
|
|
{
|
|
"epoch": 0.7348288754673569,
|
|
"grad_norm": 0.2276749968421666,
|
|
"learning_rate": 1.7182509946342554e-05,
|
|
"loss": 0.5092,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 0.7355478861087144,
|
|
"grad_norm": 0.2323405461686891,
|
|
"learning_rate": 1.717726903106018e-05,
|
|
"loss": 0.4908,
|
|
"step": 5115
|
|
},
|
|
{
|
|
"epoch": 0.736266896750072,
|
|
"grad_norm": 0.2276263889083126,
|
|
"learning_rate": 1.717202404678355e-05,
|
|
"loss": 0.4824,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 0.7369859073914294,
|
|
"grad_norm": 0.23767352851696075,
|
|
"learning_rate": 1.716677499648619e-05,
|
|
"loss": 0.498,
|
|
"step": 5125
|
|
},
|
|
{
|
|
"epoch": 0.7377049180327869,
|
|
"grad_norm": 0.23840569740317488,
|
|
"learning_rate": 1.7161521883143936e-05,
|
|
"loss": 0.491,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 0.7384239286741444,
|
|
"grad_norm": 0.23215210545943304,
|
|
"learning_rate": 1.715626470973492e-05,
|
|
"loss": 0.4882,
|
|
"step": 5135
|
|
},
|
|
{
|
|
"epoch": 0.7391429393155019,
|
|
"grad_norm": 0.21936893104681401,
|
|
"learning_rate": 1.7151003479239583e-05,
|
|
"loss": 0.5061,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 0.7398619499568594,
|
|
"grad_norm": 0.21293031412925917,
|
|
"learning_rate": 1.7145738194640665e-05,
|
|
"loss": 0.4774,
|
|
"step": 5145
|
|
},
|
|
{
|
|
"epoch": 0.7405809605982169,
|
|
"grad_norm": 0.21941363355229476,
|
|
"learning_rate": 1.7140468858923198e-05,
|
|
"loss": 0.4902,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 0.7412999712395744,
|
|
"grad_norm": 0.2266668243692637,
|
|
"learning_rate": 1.7135195475074523e-05,
|
|
"loss": 0.4869,
|
|
"step": 5155
|
|
},
|
|
{
|
|
"epoch": 0.7420189818809318,
|
|
"grad_norm": 0.2209708864201447,
|
|
"learning_rate": 1.7129918046084263e-05,
|
|
"loss": 0.4758,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 0.7427379925222893,
|
|
"grad_norm": 0.22661270643043085,
|
|
"learning_rate": 1.712463657494434e-05,
|
|
"loss": 0.4973,
|
|
"step": 5165
|
|
},
|
|
{
|
|
"epoch": 0.7434570031636468,
|
|
"grad_norm": 0.23214286702988027,
|
|
"learning_rate": 1.711935106464897e-05,
|
|
"loss": 0.4996,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 0.7441760138050043,
|
|
"grad_norm": 0.2187655580344453,
|
|
"learning_rate": 1.7114061518194655e-05,
|
|
"loss": 0.4873,
|
|
"step": 5175
|
|
},
|
|
{
|
|
"epoch": 0.7448950244463618,
|
|
"grad_norm": 0.2242583712510688,
|
|
"learning_rate": 1.7108767938580184e-05,
|
|
"loss": 0.48,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 0.7456140350877193,
|
|
"grad_norm": 0.2152231764672806,
|
|
"learning_rate": 1.710347032880664e-05,
|
|
"loss": 0.4861,
|
|
"step": 5185
|
|
},
|
|
{
|
|
"epoch": 0.7463330457290768,
|
|
"grad_norm": 0.23531880208372852,
|
|
"learning_rate": 1.7098168691877386e-05,
|
|
"loss": 0.473,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 0.7470520563704343,
|
|
"grad_norm": 0.22192408281986586,
|
|
"learning_rate": 1.7092863030798063e-05,
|
|
"loss": 0.493,
|
|
"step": 5195
|
|
},
|
|
{
|
|
"epoch": 0.7477710670117917,
|
|
"grad_norm": 0.22200491012741358,
|
|
"learning_rate": 1.7087553348576603e-05,
|
|
"loss": 0.4864,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.7484900776531492,
|
|
"grad_norm": 0.22153294696718556,
|
|
"learning_rate": 1.7082239648223212e-05,
|
|
"loss": 0.4989,
|
|
"step": 5205
|
|
},
|
|
{
|
|
"epoch": 0.7492090882945067,
|
|
"grad_norm": 0.21469480556358042,
|
|
"learning_rate": 1.7076921932750374e-05,
|
|
"loss": 0.515,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 0.7499280989358642,
|
|
"grad_norm": 0.22916542377243984,
|
|
"learning_rate": 1.7071600205172848e-05,
|
|
"loss": 0.498,
|
|
"step": 5215
|
|
},
|
|
{
|
|
"epoch": 0.7506471095772217,
|
|
"grad_norm": 0.22666436482873567,
|
|
"learning_rate": 1.7066274468507677e-05,
|
|
"loss": 0.4987,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 0.7513661202185792,
|
|
"grad_norm": 0.2399650963726253,
|
|
"learning_rate": 1.7060944725774165e-05,
|
|
"loss": 0.4897,
|
|
"step": 5225
|
|
},
|
|
{
|
|
"epoch": 0.7520851308599368,
|
|
"grad_norm": 0.2112528215844886,
|
|
"learning_rate": 1.7055610979993895e-05,
|
|
"loss": 0.4886,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 0.7528041415012943,
|
|
"grad_norm": 0.21680533409841338,
|
|
"learning_rate": 1.705027323419071e-05,
|
|
"loss": 0.5032,
|
|
"step": 5235
|
|
},
|
|
{
|
|
"epoch": 0.7535231521426518,
|
|
"grad_norm": 0.2334385250291673,
|
|
"learning_rate": 1.7044931491390736e-05,
|
|
"loss": 0.4986,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 0.7542421627840092,
|
|
"grad_norm": 0.2167997638685853,
|
|
"learning_rate": 1.7039585754622345e-05,
|
|
"loss": 0.5036,
|
|
"step": 5245
|
|
},
|
|
{
|
|
"epoch": 0.7549611734253667,
|
|
"grad_norm": 0.22161454188712626,
|
|
"learning_rate": 1.7034236026916195e-05,
|
|
"loss": 0.4845,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 0.7556801840667242,
|
|
"grad_norm": 0.2300865929908801,
|
|
"learning_rate": 1.7028882311305194e-05,
|
|
"loss": 0.4831,
|
|
"step": 5255
|
|
},
|
|
{
|
|
"epoch": 0.7563991947080817,
|
|
"grad_norm": 0.2347650948886215,
|
|
"learning_rate": 1.7023524610824508e-05,
|
|
"loss": 0.4781,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 0.7571182053494392,
|
|
"grad_norm": 0.21672676426437748,
|
|
"learning_rate": 1.7018162928511572e-05,
|
|
"loss": 0.4866,
|
|
"step": 5265
|
|
},
|
|
{
|
|
"epoch": 0.7578372159907967,
|
|
"grad_norm": 0.2277082506824969,
|
|
"learning_rate": 1.7012797267406068e-05,
|
|
"loss": 0.4863,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 0.7585562266321542,
|
|
"grad_norm": 0.2249837586716329,
|
|
"learning_rate": 1.700742763054995e-05,
|
|
"loss": 0.4941,
|
|
"step": 5275
|
|
},
|
|
{
|
|
"epoch": 0.7592752372735116,
|
|
"grad_norm": 0.22592325114041079,
|
|
"learning_rate": 1.70020540209874e-05,
|
|
"loss": 0.4996,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 0.7599942479148691,
|
|
"grad_norm": 0.23233670566592116,
|
|
"learning_rate": 1.6996676441764877e-05,
|
|
"loss": 0.4909,
|
|
"step": 5285
|
|
},
|
|
{
|
|
"epoch": 0.7607132585562266,
|
|
"grad_norm": 0.21942890788454864,
|
|
"learning_rate": 1.6991294895931083e-05,
|
|
"loss": 0.4811,
|
|
"step": 5290
|
|
},
|
|
{
|
|
"epoch": 0.7614322691975841,
|
|
"grad_norm": 0.22043734089709277,
|
|
"learning_rate": 1.6985909386536957e-05,
|
|
"loss": 0.5007,
|
|
"step": 5295
|
|
},
|
|
{
|
|
"epoch": 0.7621512798389416,
|
|
"grad_norm": 0.228478118346707,
|
|
"learning_rate": 1.6980519916635704e-05,
|
|
"loss": 0.4662,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 0.7628702904802991,
|
|
"grad_norm": 0.22449985491532506,
|
|
"learning_rate": 1.6975126489282762e-05,
|
|
"loss": 0.4757,
|
|
"step": 5305
|
|
},
|
|
{
|
|
"epoch": 0.7635893011216566,
|
|
"grad_norm": 0.22059926031579813,
|
|
"learning_rate": 1.6969729107535814e-05,
|
|
"loss": 0.4754,
|
|
"step": 5310
|
|
},
|
|
{
|
|
"epoch": 0.7643083117630141,
|
|
"grad_norm": 0.2273525810445935,
|
|
"learning_rate": 1.6964327774454784e-05,
|
|
"loss": 0.4756,
|
|
"step": 5315
|
|
},
|
|
{
|
|
"epoch": 0.7650273224043715,
|
|
"grad_norm": 0.2309888426177309,
|
|
"learning_rate": 1.6958922493101844e-05,
|
|
"loss": 0.4972,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 0.765746333045729,
|
|
"grad_norm": 0.21694684669431855,
|
|
"learning_rate": 1.6953513266541396e-05,
|
|
"loss": 0.4875,
|
|
"step": 5325
|
|
},
|
|
{
|
|
"epoch": 0.7664653436870865,
|
|
"grad_norm": 0.22695318127330225,
|
|
"learning_rate": 1.6948100097840082e-05,
|
|
"loss": 0.4916,
|
|
"step": 5330
|
|
},
|
|
{
|
|
"epoch": 0.767184354328444,
|
|
"grad_norm": 0.23443902534856237,
|
|
"learning_rate": 1.694268299006678e-05,
|
|
"loss": 0.4918,
|
|
"step": 5335
|
|
},
|
|
{
|
|
"epoch": 0.7679033649698016,
|
|
"grad_norm": 0.22364296541891465,
|
|
"learning_rate": 1.6937261946292603e-05,
|
|
"loss": 0.4949,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 0.7686223756111591,
|
|
"grad_norm": 0.24012071225481527,
|
|
"learning_rate": 1.693183696959088e-05,
|
|
"loss": 0.4987,
|
|
"step": 5345
|
|
},
|
|
{
|
|
"epoch": 0.7693413862525166,
|
|
"grad_norm": 0.23501285078960005,
|
|
"learning_rate": 1.6926408063037194e-05,
|
|
"loss": 0.4734,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 0.7700603968938741,
|
|
"grad_norm": 0.22817716513346892,
|
|
"learning_rate": 1.692097522970934e-05,
|
|
"loss": 0.4697,
|
|
"step": 5355
|
|
},
|
|
{
|
|
"epoch": 0.7707794075352316,
|
|
"grad_norm": 0.2273677168182446,
|
|
"learning_rate": 1.6915538472687337e-05,
|
|
"loss": 0.483,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 0.771498418176589,
|
|
"grad_norm": 0.24145708448332248,
|
|
"learning_rate": 1.6910097795053443e-05,
|
|
"loss": 0.495,
|
|
"step": 5365
|
|
},
|
|
{
|
|
"epoch": 0.7722174288179465,
|
|
"grad_norm": 0.21989503645557912,
|
|
"learning_rate": 1.6904653199892128e-05,
|
|
"loss": 0.4928,
|
|
"step": 5370
|
|
},
|
|
{
|
|
"epoch": 0.772936439459304,
|
|
"grad_norm": 0.2308138520043049,
|
|
"learning_rate": 1.689920469029008e-05,
|
|
"loss": 0.4869,
|
|
"step": 5375
|
|
},
|
|
{
|
|
"epoch": 0.7736554501006615,
|
|
"grad_norm": 0.2190293279349582,
|
|
"learning_rate": 1.689375226933622e-05,
|
|
"loss": 0.4697,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 0.774374460742019,
|
|
"grad_norm": 0.22367853404329643,
|
|
"learning_rate": 1.6888295940121667e-05,
|
|
"loss": 0.4875,
|
|
"step": 5385
|
|
},
|
|
{
|
|
"epoch": 0.7750934713833765,
|
|
"grad_norm": 0.2248775477308476,
|
|
"learning_rate": 1.6882835705739777e-05,
|
|
"loss": 0.4838,
|
|
"step": 5390
|
|
},
|
|
{
|
|
"epoch": 0.775812482024734,
|
|
"grad_norm": 0.22960776143545394,
|
|
"learning_rate": 1.6877371569286103e-05,
|
|
"loss": 0.5037,
|
|
"step": 5395
|
|
},
|
|
{
|
|
"epoch": 0.7765314926660914,
|
|
"grad_norm": 0.22781522889996447,
|
|
"learning_rate": 1.6871903533858417e-05,
|
|
"loss": 0.4959,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 0.7772505033074489,
|
|
"grad_norm": 0.22083050011075891,
|
|
"learning_rate": 1.6866431602556704e-05,
|
|
"loss": 0.4885,
|
|
"step": 5405
|
|
},
|
|
{
|
|
"epoch": 0.7779695139488064,
|
|
"grad_norm": 0.21468440259119084,
|
|
"learning_rate": 1.686095577848315e-05,
|
|
"loss": 0.4975,
|
|
"step": 5410
|
|
},
|
|
{
|
|
"epoch": 0.7786885245901639,
|
|
"grad_norm": 0.22226370104089818,
|
|
"learning_rate": 1.6855476064742156e-05,
|
|
"loss": 0.4887,
|
|
"step": 5415
|
|
},
|
|
{
|
|
"epoch": 0.7794075352315214,
|
|
"grad_norm": 0.22494368750807195,
|
|
"learning_rate": 1.6849992464440323e-05,
|
|
"loss": 0.4968,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 0.7801265458728789,
|
|
"grad_norm": 0.22237263202529223,
|
|
"learning_rate": 1.684450498068646e-05,
|
|
"loss": 0.4835,
|
|
"step": 5425
|
|
},
|
|
{
|
|
"epoch": 0.7808455565142364,
|
|
"grad_norm": 0.22938231525634556,
|
|
"learning_rate": 1.6839013616591574e-05,
|
|
"loss": 0.4905,
|
|
"step": 5430
|
|
},
|
|
{
|
|
"epoch": 0.7815645671555939,
|
|
"grad_norm": 0.2144686219903849,
|
|
"learning_rate": 1.683351837526887e-05,
|
|
"loss": 0.5035,
|
|
"step": 5435
|
|
},
|
|
{
|
|
"epoch": 0.7822835777969513,
|
|
"grad_norm": 0.21730574169202727,
|
|
"learning_rate": 1.6828019259833758e-05,
|
|
"loss": 0.4762,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 0.7830025884383088,
|
|
"grad_norm": 0.22346888724944097,
|
|
"learning_rate": 1.6822516273403832e-05,
|
|
"loss": 0.463,
|
|
"step": 5445
|
|
},
|
|
{
|
|
"epoch": 0.7837215990796664,
|
|
"grad_norm": 0.22238996145588832,
|
|
"learning_rate": 1.68170094190989e-05,
|
|
"loss": 0.4952,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 0.7844406097210239,
|
|
"grad_norm": 0.21586108868278092,
|
|
"learning_rate": 1.681149870004094e-05,
|
|
"loss": 0.4896,
|
|
"step": 5455
|
|
},
|
|
{
|
|
"epoch": 0.7851596203623814,
|
|
"grad_norm": 0.23240105717982454,
|
|
"learning_rate": 1.6805984119354146e-05,
|
|
"loss": 0.4818,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 0.7858786310037389,
|
|
"grad_norm": 0.22390902508487515,
|
|
"learning_rate": 1.6800465680164875e-05,
|
|
"loss": 0.499,
|
|
"step": 5465
|
|
},
|
|
{
|
|
"epoch": 0.7865976416450964,
|
|
"grad_norm": 0.2281975071709816,
|
|
"learning_rate": 1.6794943385601688e-05,
|
|
"loss": 0.4914,
|
|
"step": 5470
|
|
},
|
|
{
|
|
"epoch": 0.7873166522864539,
|
|
"grad_norm": 0.22488441842585513,
|
|
"learning_rate": 1.6789417238795334e-05,
|
|
"loss": 0.4635,
|
|
"step": 5475
|
|
},
|
|
{
|
|
"epoch": 0.7880356629278114,
|
|
"grad_norm": 0.22598444041192908,
|
|
"learning_rate": 1.678388724287873e-05,
|
|
"loss": 0.4772,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 0.7887546735691688,
|
|
"grad_norm": 0.2199128816848034,
|
|
"learning_rate": 1.6778353400986996e-05,
|
|
"loss": 0.4797,
|
|
"step": 5485
|
|
},
|
|
{
|
|
"epoch": 0.7894736842105263,
|
|
"grad_norm": 0.22362822017432454,
|
|
"learning_rate": 1.6772815716257414e-05,
|
|
"loss": 0.4847,
|
|
"step": 5490
|
|
},
|
|
{
|
|
"epoch": 0.7901926948518838,
|
|
"grad_norm": 0.2243910018533074,
|
|
"learning_rate": 1.676727419182945e-05,
|
|
"loss": 0.4868,
|
|
"step": 5495
|
|
},
|
|
{
|
|
"epoch": 0.7909117054932413,
|
|
"grad_norm": 0.23119935601280228,
|
|
"learning_rate": 1.6761728830844758e-05,
|
|
"loss": 0.4804,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 0.7916307161345988,
|
|
"grad_norm": 0.22939329313957305,
|
|
"learning_rate": 1.6756179636447153e-05,
|
|
"loss": 0.483,
|
|
"step": 5505
|
|
},
|
|
{
|
|
"epoch": 0.7923497267759563,
|
|
"grad_norm": 0.223164242974818,
|
|
"learning_rate": 1.6750626611782624e-05,
|
|
"loss": 0.4934,
|
|
"step": 5510
|
|
},
|
|
{
|
|
"epoch": 0.7930687374173138,
|
|
"grad_norm": 0.21955055597425524,
|
|
"learning_rate": 1.6745069759999345e-05,
|
|
"loss": 0.4863,
|
|
"step": 5515
|
|
},
|
|
{
|
|
"epoch": 0.7937877480586712,
|
|
"grad_norm": 0.22108710690359845,
|
|
"learning_rate": 1.673950908424764e-05,
|
|
"loss": 0.4933,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 0.7945067587000287,
|
|
"grad_norm": 0.21896612206901434,
|
|
"learning_rate": 1.6733944587680024e-05,
|
|
"loss": 0.4842,
|
|
"step": 5525
|
|
},
|
|
{
|
|
"epoch": 0.7952257693413862,
|
|
"grad_norm": 0.2250445157258839,
|
|
"learning_rate": 1.6728376273451155e-05,
|
|
"loss": 0.4802,
|
|
"step": 5530
|
|
},
|
|
{
|
|
"epoch": 0.7959447799827437,
|
|
"grad_norm": 0.22614610498599702,
|
|
"learning_rate": 1.6722804144717866e-05,
|
|
"loss": 0.4867,
|
|
"step": 5535
|
|
},
|
|
{
|
|
"epoch": 0.7966637906241012,
|
|
"grad_norm": 0.21945982018024862,
|
|
"learning_rate": 1.671722820463916e-05,
|
|
"loss": 0.4837,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 0.7973828012654587,
|
|
"grad_norm": 0.2235783071481693,
|
|
"learning_rate": 1.6711648456376187e-05,
|
|
"loss": 0.4847,
|
|
"step": 5545
|
|
},
|
|
{
|
|
"epoch": 0.7981018119068162,
|
|
"grad_norm": 0.22503519784393333,
|
|
"learning_rate": 1.6706064903092265e-05,
|
|
"loss": 0.4824,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 0.7988208225481737,
|
|
"grad_norm": 0.23765125443432644,
|
|
"learning_rate": 1.670047754795287e-05,
|
|
"loss": 0.502,
|
|
"step": 5555
|
|
},
|
|
{
|
|
"epoch": 0.7995398331895313,
|
|
"grad_norm": 0.24539570004485733,
|
|
"learning_rate": 1.6694886394125616e-05,
|
|
"loss": 0.4853,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 0.8002588438308887,
|
|
"grad_norm": 0.22839361425841867,
|
|
"learning_rate": 1.6689291444780296e-05,
|
|
"loss": 0.4843,
|
|
"step": 5565
|
|
},
|
|
{
|
|
"epoch": 0.8009778544722462,
|
|
"grad_norm": 0.2220318852698405,
|
|
"learning_rate": 1.668369270308884e-05,
|
|
"loss": 0.4761,
|
|
"step": 5570
|
|
},
|
|
{
|
|
"epoch": 0.8016968651136037,
|
|
"grad_norm": 0.22278535824455947,
|
|
"learning_rate": 1.6678090172225334e-05,
|
|
"loss": 0.4724,
|
|
"step": 5575
|
|
},
|
|
{
|
|
"epoch": 0.8024158757549612,
|
|
"grad_norm": 0.22065163329260376,
|
|
"learning_rate": 1.6672483855366003e-05,
|
|
"loss": 0.4823,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 0.8031348863963187,
|
|
"grad_norm": 0.22705093047998076,
|
|
"learning_rate": 1.6666873755689233e-05,
|
|
"loss": 0.473,
|
|
"step": 5585
|
|
},
|
|
{
|
|
"epoch": 0.8038538970376762,
|
|
"grad_norm": 0.22183215111313642,
|
|
"learning_rate": 1.6661259876375538e-05,
|
|
"loss": 0.4858,
|
|
"step": 5590
|
|
},
|
|
{
|
|
"epoch": 0.8045729076790337,
|
|
"grad_norm": 0.2287693632580429,
|
|
"learning_rate": 1.6655642220607585e-05,
|
|
"loss": 0.4841,
|
|
"step": 5595
|
|
},
|
|
{
|
|
"epoch": 0.8052919183203912,
|
|
"grad_norm": 0.2160365599554167,
|
|
"learning_rate": 1.665002079157018e-05,
|
|
"loss": 0.4812,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 0.8060109289617486,
|
|
"grad_norm": 0.23398640973486998,
|
|
"learning_rate": 1.6644395592450275e-05,
|
|
"loss": 0.4978,
|
|
"step": 5605
|
|
},
|
|
{
|
|
"epoch": 0.8067299396031061,
|
|
"grad_norm": 0.2246444511777012,
|
|
"learning_rate": 1.6638766626436942e-05,
|
|
"loss": 0.4949,
|
|
"step": 5610
|
|
},
|
|
{
|
|
"epoch": 0.8074489502444636,
|
|
"grad_norm": 0.23046563410664012,
|
|
"learning_rate": 1.663313389672141e-05,
|
|
"loss": 0.4782,
|
|
"step": 5615
|
|
},
|
|
{
|
|
"epoch": 0.8081679608858211,
|
|
"grad_norm": 0.2236054981949869,
|
|
"learning_rate": 1.662749740649702e-05,
|
|
"loss": 0.5058,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 0.8088869715271786,
|
|
"grad_norm": 0.22204436891909438,
|
|
"learning_rate": 1.662185715895926e-05,
|
|
"loss": 0.4795,
|
|
"step": 5625
|
|
},
|
|
{
|
|
"epoch": 0.8096059821685361,
|
|
"grad_norm": 0.22077332371837027,
|
|
"learning_rate": 1.6616213157305742e-05,
|
|
"loss": 0.4898,
|
|
"step": 5630
|
|
},
|
|
{
|
|
"epoch": 0.8103249928098936,
|
|
"grad_norm": 0.23330823438995923,
|
|
"learning_rate": 1.6610565404736216e-05,
|
|
"loss": 0.4825,
|
|
"step": 5635
|
|
},
|
|
{
|
|
"epoch": 0.811044003451251,
|
|
"grad_norm": 0.2189476109187264,
|
|
"learning_rate": 1.660491390445254e-05,
|
|
"loss": 0.4748,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 0.8117630140926085,
|
|
"grad_norm": 0.2216225341020065,
|
|
"learning_rate": 1.6599258659658716e-05,
|
|
"loss": 0.4804,
|
|
"step": 5645
|
|
},
|
|
{
|
|
"epoch": 0.812482024733966,
|
|
"grad_norm": 0.23714374623134352,
|
|
"learning_rate": 1.6593599673560854e-05,
|
|
"loss": 0.5001,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 0.8132010353753235,
|
|
"grad_norm": 0.23710103986620198,
|
|
"learning_rate": 1.6587936949367195e-05,
|
|
"loss": 0.4703,
|
|
"step": 5655
|
|
},
|
|
{
|
|
"epoch": 0.813920046016681,
|
|
"grad_norm": 0.2386050339880569,
|
|
"learning_rate": 1.658227049028809e-05,
|
|
"loss": 0.4987,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 0.8146390566580385,
|
|
"grad_norm": 0.21525085942313,
|
|
"learning_rate": 1.6576600299536024e-05,
|
|
"loss": 0.4944,
|
|
"step": 5665
|
|
},
|
|
{
|
|
"epoch": 0.8153580672993961,
|
|
"grad_norm": 0.23108927827783027,
|
|
"learning_rate": 1.6570926380325574e-05,
|
|
"loss": 0.4817,
|
|
"step": 5670
|
|
},
|
|
{
|
|
"epoch": 0.8160770779407536,
|
|
"grad_norm": 0.2300507584194407,
|
|
"learning_rate": 1.6565248735873452e-05,
|
|
"loss": 0.4992,
|
|
"step": 5675
|
|
},
|
|
{
|
|
"epoch": 0.8167960885821111,
|
|
"grad_norm": 0.21965071462741484,
|
|
"learning_rate": 1.6559567369398468e-05,
|
|
"loss": 0.4658,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 0.8175150992234685,
|
|
"grad_norm": 0.2229343886670813,
|
|
"learning_rate": 1.6553882284121554e-05,
|
|
"loss": 0.4758,
|
|
"step": 5685
|
|
},
|
|
{
|
|
"epoch": 0.818234109864826,
|
|
"grad_norm": 0.23703192379223698,
|
|
"learning_rate": 1.6548193483265737e-05,
|
|
"loss": 0.4663,
|
|
"step": 5690
|
|
},
|
|
{
|
|
"epoch": 0.8189531205061835,
|
|
"grad_norm": 0.23582047078488924,
|
|
"learning_rate": 1.6542500970056154e-05,
|
|
"loss": 0.4941,
|
|
"step": 5695
|
|
},
|
|
{
|
|
"epoch": 0.819672131147541,
|
|
"grad_norm": 0.21828699915638525,
|
|
"learning_rate": 1.653680474772006e-05,
|
|
"loss": 0.4629,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 0.8203911417888985,
|
|
"grad_norm": 0.24568154916375534,
|
|
"learning_rate": 1.6531104819486795e-05,
|
|
"loss": 0.481,
|
|
"step": 5705
|
|
},
|
|
{
|
|
"epoch": 0.821110152430256,
|
|
"grad_norm": 0.22389567629138732,
|
|
"learning_rate": 1.6525401188587812e-05,
|
|
"loss": 0.4887,
|
|
"step": 5710
|
|
},
|
|
{
|
|
"epoch": 0.8218291630716135,
|
|
"grad_norm": 0.2168816136348257,
|
|
"learning_rate": 1.6519693858256657e-05,
|
|
"loss": 0.5099,
|
|
"step": 5715
|
|
},
|
|
{
|
|
"epoch": 0.822548173712971,
|
|
"grad_norm": 0.23615713732079055,
|
|
"learning_rate": 1.6513982831728975e-05,
|
|
"loss": 0.4799,
|
|
"step": 5720
|
|
},
|
|
{
|
|
"epoch": 0.8232671843543284,
|
|
"grad_norm": 0.21988093075155635,
|
|
"learning_rate": 1.6508268112242502e-05,
|
|
"loss": 0.4759,
|
|
"step": 5725
|
|
},
|
|
{
|
|
"epoch": 0.8239861949956859,
|
|
"grad_norm": 0.22800000584062294,
|
|
"learning_rate": 1.650254970303708e-05,
|
|
"loss": 0.4814,
|
|
"step": 5730
|
|
},
|
|
{
|
|
"epoch": 0.8247052056370434,
|
|
"grad_norm": 0.21678615221681993,
|
|
"learning_rate": 1.6496827607354626e-05,
|
|
"loss": 0.4847,
|
|
"step": 5735
|
|
},
|
|
{
|
|
"epoch": 0.8254242162784009,
|
|
"grad_norm": 0.23408400973634177,
|
|
"learning_rate": 1.6491101828439166e-05,
|
|
"loss": 0.4881,
|
|
"step": 5740
|
|
},
|
|
{
|
|
"epoch": 0.8261432269197584,
|
|
"grad_norm": 0.2186056862208376,
|
|
"learning_rate": 1.6485372369536795e-05,
|
|
"loss": 0.4924,
|
|
"step": 5745
|
|
},
|
|
{
|
|
"epoch": 0.8268622375611159,
|
|
"grad_norm": 0.2159187104796151,
|
|
"learning_rate": 1.647963923389571e-05,
|
|
"loss": 0.4825,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 0.8275812482024734,
|
|
"grad_norm": 0.24496266550950746,
|
|
"learning_rate": 1.6473902424766183e-05,
|
|
"loss": 0.494,
|
|
"step": 5755
|
|
},
|
|
{
|
|
"epoch": 0.8283002588438308,
|
|
"grad_norm": 0.2216005575843833,
|
|
"learning_rate": 1.6468161945400563e-05,
|
|
"loss": 0.4986,
|
|
"step": 5760
|
|
},
|
|
{
|
|
"epoch": 0.8290192694851883,
|
|
"grad_norm": 0.22620718671453902,
|
|
"learning_rate": 1.6462417799053305e-05,
|
|
"loss": 0.4852,
|
|
"step": 5765
|
|
},
|
|
{
|
|
"epoch": 0.8297382801265458,
|
|
"grad_norm": 0.22835005662918015,
|
|
"learning_rate": 1.6456669988980914e-05,
|
|
"loss": 0.4908,
|
|
"step": 5770
|
|
},
|
|
{
|
|
"epoch": 0.8304572907679033,
|
|
"grad_norm": 0.22567869815441383,
|
|
"learning_rate": 1.6450918518441987e-05,
|
|
"loss": 0.4833,
|
|
"step": 5775
|
|
},
|
|
{
|
|
"epoch": 0.8311763014092609,
|
|
"grad_norm": 0.2315328450963394,
|
|
"learning_rate": 1.6445163390697195e-05,
|
|
"loss": 0.5077,
|
|
"step": 5780
|
|
},
|
|
{
|
|
"epoch": 0.8318953120506184,
|
|
"grad_norm": 0.22117832252886435,
|
|
"learning_rate": 1.6439404609009274e-05,
|
|
"loss": 0.4814,
|
|
"step": 5785
|
|
},
|
|
{
|
|
"epoch": 0.8326143226919759,
|
|
"grad_norm": 0.2341708532379826,
|
|
"learning_rate": 1.643364217664305e-05,
|
|
"loss": 0.4788,
|
|
"step": 5790
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 0.22387820306203415,
|
|
"learning_rate": 1.6427876096865394e-05,
|
|
"loss": 0.4817,
|
|
"step": 5795
|
|
},
|
|
{
|
|
"epoch": 0.8340523439746909,
|
|
"grad_norm": 0.22556092688610593,
|
|
"learning_rate": 1.642210637294527e-05,
|
|
"loss": 0.4717,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 0.8347713546160483,
|
|
"grad_norm": 0.23744905375208034,
|
|
"learning_rate": 1.6416333008153686e-05,
|
|
"loss": 0.4758,
|
|
"step": 5805
|
|
},
|
|
{
|
|
"epoch": 0.8354903652574058,
|
|
"grad_norm": 0.23711306495500717,
|
|
"learning_rate": 1.6410556005763722e-05,
|
|
"loss": 0.4779,
|
|
"step": 5810
|
|
},
|
|
{
|
|
"epoch": 0.8362093758987633,
|
|
"grad_norm": 0.22411311826775682,
|
|
"learning_rate": 1.640477536905053e-05,
|
|
"loss": 0.4859,
|
|
"step": 5815
|
|
},
|
|
{
|
|
"epoch": 0.8369283865401208,
|
|
"grad_norm": 0.2428542391128517,
|
|
"learning_rate": 1.6398991101291316e-05,
|
|
"loss": 0.471,
|
|
"step": 5820
|
|
},
|
|
{
|
|
"epoch": 0.8376473971814783,
|
|
"grad_norm": 0.22509448770506563,
|
|
"learning_rate": 1.6393203205765335e-05,
|
|
"loss": 0.4782,
|
|
"step": 5825
|
|
},
|
|
{
|
|
"epoch": 0.8383664078228358,
|
|
"grad_norm": 0.22559902539830068,
|
|
"learning_rate": 1.6387411685753912e-05,
|
|
"loss": 0.468,
|
|
"step": 5830
|
|
},
|
|
{
|
|
"epoch": 0.8390854184641933,
|
|
"grad_norm": 0.24306805583011284,
|
|
"learning_rate": 1.6381616544540415e-05,
|
|
"loss": 0.4905,
|
|
"step": 5835
|
|
},
|
|
{
|
|
"epoch": 0.8398044291055508,
|
|
"grad_norm": 0.23787097633193682,
|
|
"learning_rate": 1.637581778541028e-05,
|
|
"loss": 0.4898,
|
|
"step": 5840
|
|
},
|
|
{
|
|
"epoch": 0.8405234397469082,
|
|
"grad_norm": 0.21778768663408346,
|
|
"learning_rate": 1.637001541165098e-05,
|
|
"loss": 0.4726,
|
|
"step": 5845
|
|
},
|
|
{
|
|
"epoch": 0.8412424503882657,
|
|
"grad_norm": 0.23070983780461993,
|
|
"learning_rate": 1.6364209426552046e-05,
|
|
"loss": 0.4758,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 0.8419614610296232,
|
|
"grad_norm": 0.2835667407348202,
|
|
"learning_rate": 1.6358399833405044e-05,
|
|
"loss": 0.4956,
|
|
"step": 5855
|
|
},
|
|
{
|
|
"epoch": 0.8426804716709807,
|
|
"grad_norm": 0.2381888745289789,
|
|
"learning_rate": 1.6352586635503608e-05,
|
|
"loss": 0.4841,
|
|
"step": 5860
|
|
},
|
|
{
|
|
"epoch": 0.8433994823123382,
|
|
"grad_norm": 0.22569894243887517,
|
|
"learning_rate": 1.6346769836143393e-05,
|
|
"loss": 0.4857,
|
|
"step": 5865
|
|
},
|
|
{
|
|
"epoch": 0.8441184929536957,
|
|
"grad_norm": 0.2302954548936336,
|
|
"learning_rate": 1.6340949438622112e-05,
|
|
"loss": 0.5063,
|
|
"step": 5870
|
|
},
|
|
{
|
|
"epoch": 0.8448375035950532,
|
|
"grad_norm": 0.21531230746504856,
|
|
"learning_rate": 1.6335125446239505e-05,
|
|
"loss": 0.4786,
|
|
"step": 5875
|
|
},
|
|
{
|
|
"epoch": 0.8455565142364107,
|
|
"grad_norm": 0.23073398174636456,
|
|
"learning_rate": 1.6329297862297357e-05,
|
|
"loss": 0.4808,
|
|
"step": 5880
|
|
},
|
|
{
|
|
"epoch": 0.8462755248777681,
|
|
"grad_norm": 0.22976229717518745,
|
|
"learning_rate": 1.632346669009949e-05,
|
|
"loss": 0.4716,
|
|
"step": 5885
|
|
},
|
|
{
|
|
"epoch": 0.8469945355191257,
|
|
"grad_norm": 0.22232867606276158,
|
|
"learning_rate": 1.6317631932951754e-05,
|
|
"loss": 0.4775,
|
|
"step": 5890
|
|
},
|
|
{
|
|
"epoch": 0.8477135461604832,
|
|
"grad_norm": 0.21947980567589506,
|
|
"learning_rate": 1.631179359416204e-05,
|
|
"loss": 0.4893,
|
|
"step": 5895
|
|
},
|
|
{
|
|
"epoch": 0.8484325568018407,
|
|
"grad_norm": 0.2110588984550264,
|
|
"learning_rate": 1.6305951677040267e-05,
|
|
"loss": 0.471,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 0.8491515674431982,
|
|
"grad_norm": 0.23131721067942174,
|
|
"learning_rate": 1.6300106184898378e-05,
|
|
"loss": 0.4965,
|
|
"step": 5905
|
|
},
|
|
{
|
|
"epoch": 0.8498705780845557,
|
|
"grad_norm": 0.23118088988577612,
|
|
"learning_rate": 1.6294257121050346e-05,
|
|
"loss": 0.4725,
|
|
"step": 5910
|
|
},
|
|
{
|
|
"epoch": 0.8505895887259132,
|
|
"grad_norm": 0.22190058206659963,
|
|
"learning_rate": 1.6288404488812166e-05,
|
|
"loss": 0.5111,
|
|
"step": 5915
|
|
},
|
|
{
|
|
"epoch": 0.8513085993672707,
|
|
"grad_norm": 0.21259685560742111,
|
|
"learning_rate": 1.6282548291501862e-05,
|
|
"loss": 0.4737,
|
|
"step": 5920
|
|
},
|
|
{
|
|
"epoch": 0.8520276100086281,
|
|
"grad_norm": 0.2205808860614832,
|
|
"learning_rate": 1.6276688532439476e-05,
|
|
"loss": 0.4773,
|
|
"step": 5925
|
|
},
|
|
{
|
|
"epoch": 0.8527466206499856,
|
|
"grad_norm": 0.22014342140272772,
|
|
"learning_rate": 1.6270825214947067e-05,
|
|
"loss": 0.477,
|
|
"step": 5930
|
|
},
|
|
{
|
|
"epoch": 0.8534656312913431,
|
|
"grad_norm": 0.22028914254683596,
|
|
"learning_rate": 1.626495834234872e-05,
|
|
"loss": 0.4976,
|
|
"step": 5935
|
|
},
|
|
{
|
|
"epoch": 0.8541846419327006,
|
|
"grad_norm": 0.22540771119256428,
|
|
"learning_rate": 1.625908791797052e-05,
|
|
"loss": 0.49,
|
|
"step": 5940
|
|
},
|
|
{
|
|
"epoch": 0.8549036525740581,
|
|
"grad_norm": 0.2243457513999384,
|
|
"learning_rate": 1.6253213945140577e-05,
|
|
"loss": 0.4708,
|
|
"step": 5945
|
|
},
|
|
{
|
|
"epoch": 0.8556226632154156,
|
|
"grad_norm": 0.22179506056212145,
|
|
"learning_rate": 1.6247336427189013e-05,
|
|
"loss": 0.4612,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 0.8563416738567731,
|
|
"grad_norm": 0.22499318240500388,
|
|
"learning_rate": 1.6241455367447955e-05,
|
|
"loss": 0.4799,
|
|
"step": 5955
|
|
},
|
|
{
|
|
"epoch": 0.8570606844981306,
|
|
"grad_norm": 0.23238220072024518,
|
|
"learning_rate": 1.623557076925154e-05,
|
|
"loss": 0.4944,
|
|
"step": 5960
|
|
},
|
|
{
|
|
"epoch": 0.857779695139488,
|
|
"grad_norm": 0.2328029055532124,
|
|
"learning_rate": 1.6229682635935913e-05,
|
|
"loss": 0.4896,
|
|
"step": 5965
|
|
},
|
|
{
|
|
"epoch": 0.8584987057808455,
|
|
"grad_norm": 0.23127132014501922,
|
|
"learning_rate": 1.6223790970839214e-05,
|
|
"loss": 0.48,
|
|
"step": 5970
|
|
},
|
|
{
|
|
"epoch": 0.859217716422203,
|
|
"grad_norm": 0.2231009382922071,
|
|
"learning_rate": 1.6217895777301606e-05,
|
|
"loss": 0.4787,
|
|
"step": 5975
|
|
},
|
|
{
|
|
"epoch": 0.8599367270635605,
|
|
"grad_norm": 0.2317042007787483,
|
|
"learning_rate": 1.6211997058665226e-05,
|
|
"loss": 0.4766,
|
|
"step": 5980
|
|
},
|
|
{
|
|
"epoch": 0.860655737704918,
|
|
"grad_norm": 0.22457281308081492,
|
|
"learning_rate": 1.6206094818274228e-05,
|
|
"loss": 0.5016,
|
|
"step": 5985
|
|
},
|
|
{
|
|
"epoch": 0.8613747483462755,
|
|
"grad_norm": 0.22725088886882755,
|
|
"learning_rate": 1.6200189059474758e-05,
|
|
"loss": 0.4776,
|
|
"step": 5990
|
|
},
|
|
{
|
|
"epoch": 0.862093758987633,
|
|
"grad_norm": 0.2357506938091471,
|
|
"learning_rate": 1.6194279785614955e-05,
|
|
"loss": 0.4896,
|
|
"step": 5995
|
|
},
|
|
{
|
|
"epoch": 0.8628127696289906,
|
|
"grad_norm": 0.23057941226085277,
|
|
"learning_rate": 1.618836700004495e-05,
|
|
"loss": 0.487,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 0.863531780270348,
|
|
"grad_norm": 0.22756225548351794,
|
|
"learning_rate": 1.6182450706116863e-05,
|
|
"loss": 0.4989,
|
|
"step": 6005
|
|
},
|
|
{
|
|
"epoch": 0.8642507909117055,
|
|
"grad_norm": 0.22167997938476994,
|
|
"learning_rate": 1.617653090718481e-05,
|
|
"loss": 0.481,
|
|
"step": 6010
|
|
},
|
|
{
|
|
"epoch": 0.864969801553063,
|
|
"grad_norm": 0.2316491758487818,
|
|
"learning_rate": 1.6170607606604895e-05,
|
|
"loss": 0.4638,
|
|
"step": 6015
|
|
},
|
|
{
|
|
"epoch": 0.8656888121944205,
|
|
"grad_norm": 0.23171481513913114,
|
|
"learning_rate": 1.6164680807735192e-05,
|
|
"loss": 0.4881,
|
|
"step": 6020
|
|
},
|
|
{
|
|
"epoch": 0.866407822835778,
|
|
"grad_norm": 0.23045109254133472,
|
|
"learning_rate": 1.615875051393578e-05,
|
|
"loss": 0.4797,
|
|
"step": 6025
|
|
},
|
|
{
|
|
"epoch": 0.8671268334771355,
|
|
"grad_norm": 0.21834412170756476,
|
|
"learning_rate": 1.6152816728568697e-05,
|
|
"loss": 0.5082,
|
|
"step": 6030
|
|
},
|
|
{
|
|
"epoch": 0.867845844118493,
|
|
"grad_norm": 0.20756527996156474,
|
|
"learning_rate": 1.614687945499798e-05,
|
|
"loss": 0.4718,
|
|
"step": 6035
|
|
},
|
|
{
|
|
"epoch": 0.8685648547598505,
|
|
"grad_norm": 0.22570883487100146,
|
|
"learning_rate": 1.6140938696589634e-05,
|
|
"loss": 0.4769,
|
|
"step": 6040
|
|
},
|
|
{
|
|
"epoch": 0.869283865401208,
|
|
"grad_norm": 0.2242382985650034,
|
|
"learning_rate": 1.6134994456711638e-05,
|
|
"loss": 0.4707,
|
|
"step": 6045
|
|
},
|
|
{
|
|
"epoch": 0.8700028760425654,
|
|
"grad_norm": 0.2199373810957077,
|
|
"learning_rate": 1.6129046738733947e-05,
|
|
"loss": 0.4822,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 0.8707218866839229,
|
|
"grad_norm": 0.2230809670907246,
|
|
"learning_rate": 1.6123095546028495e-05,
|
|
"loss": 0.4898,
|
|
"step": 6055
|
|
},
|
|
{
|
|
"epoch": 0.8714408973252804,
|
|
"grad_norm": 0.22136148728649427,
|
|
"learning_rate": 1.611714088196917e-05,
|
|
"loss": 0.482,
|
|
"step": 6060
|
|
},
|
|
{
|
|
"epoch": 0.8721599079666379,
|
|
"grad_norm": 0.23195219942814263,
|
|
"learning_rate": 1.6111182749931845e-05,
|
|
"loss": 0.4687,
|
|
"step": 6065
|
|
},
|
|
{
|
|
"epoch": 0.8728789186079954,
|
|
"grad_norm": 0.22535271756104616,
|
|
"learning_rate": 1.610522115329435e-05,
|
|
"loss": 0.4797,
|
|
"step": 6070
|
|
},
|
|
{
|
|
"epoch": 0.8735979292493529,
|
|
"grad_norm": 0.23272576269146622,
|
|
"learning_rate": 1.6099256095436476e-05,
|
|
"loss": 0.4873,
|
|
"step": 6075
|
|
},
|
|
{
|
|
"epoch": 0.8743169398907104,
|
|
"grad_norm": 0.23328064687107508,
|
|
"learning_rate": 1.6093287579739983e-05,
|
|
"loss": 0.495,
|
|
"step": 6080
|
|
},
|
|
{
|
|
"epoch": 0.8750359505320678,
|
|
"grad_norm": 0.23241446465065863,
|
|
"learning_rate": 1.608731560958859e-05,
|
|
"loss": 0.4958,
|
|
"step": 6085
|
|
},
|
|
{
|
|
"epoch": 0.8757549611734253,
|
|
"grad_norm": 0.2165342273687269,
|
|
"learning_rate": 1.608134018836798e-05,
|
|
"loss": 0.4872,
|
|
"step": 6090
|
|
},
|
|
{
|
|
"epoch": 0.8764739718147828,
|
|
"grad_norm": 0.21036060325924805,
|
|
"learning_rate": 1.6075361319465773e-05,
|
|
"loss": 0.4892,
|
|
"step": 6095
|
|
},
|
|
{
|
|
"epoch": 0.8771929824561403,
|
|
"grad_norm": 0.22465055999089578,
|
|
"learning_rate": 1.606937900627157e-05,
|
|
"loss": 0.4693,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 0.8779119930974978,
|
|
"grad_norm": 0.21896389536444244,
|
|
"learning_rate": 1.60633932521769e-05,
|
|
"loss": 0.4781,
|
|
"step": 6105
|
|
},
|
|
{
|
|
"epoch": 0.8786310037388554,
|
|
"grad_norm": 0.2221483004455103,
|
|
"learning_rate": 1.6057404060575264e-05,
|
|
"loss": 0.4857,
|
|
"step": 6110
|
|
},
|
|
{
|
|
"epoch": 0.8793500143802129,
|
|
"grad_norm": 0.22614345726496707,
|
|
"learning_rate": 1.6051411434862094e-05,
|
|
"loss": 0.4763,
|
|
"step": 6115
|
|
},
|
|
{
|
|
"epoch": 0.8800690250215704,
|
|
"grad_norm": 0.22634152148536316,
|
|
"learning_rate": 1.604541537843478e-05,
|
|
"loss": 0.4911,
|
|
"step": 6120
|
|
},
|
|
{
|
|
"epoch": 0.8807880356629278,
|
|
"grad_norm": 0.22412631243319087,
|
|
"learning_rate": 1.6039415894692657e-05,
|
|
"loss": 0.4606,
|
|
"step": 6125
|
|
},
|
|
{
|
|
"epoch": 0.8815070463042853,
|
|
"grad_norm": 0.22735655451709952,
|
|
"learning_rate": 1.6033412987036994e-05,
|
|
"loss": 0.4785,
|
|
"step": 6130
|
|
},
|
|
{
|
|
"epoch": 0.8822260569456428,
|
|
"grad_norm": 0.23678519265006817,
|
|
"learning_rate": 1.6027406658871014e-05,
|
|
"loss": 0.4825,
|
|
"step": 6135
|
|
},
|
|
{
|
|
"epoch": 0.8829450675870003,
|
|
"grad_norm": 0.22413195344276224,
|
|
"learning_rate": 1.6021396913599865e-05,
|
|
"loss": 0.4792,
|
|
"step": 6140
|
|
},
|
|
{
|
|
"epoch": 0.8836640782283578,
|
|
"grad_norm": 0.23892307294965057,
|
|
"learning_rate": 1.601538375463064e-05,
|
|
"loss": 0.4825,
|
|
"step": 6145
|
|
},
|
|
{
|
|
"epoch": 0.8843830888697153,
|
|
"grad_norm": 0.21516185870148855,
|
|
"learning_rate": 1.6009367185372377e-05,
|
|
"loss": 0.4757,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 0.8851020995110728,
|
|
"grad_norm": 0.2170681075937456,
|
|
"learning_rate": 1.6003347209236025e-05,
|
|
"loss": 0.4799,
|
|
"step": 6155
|
|
},
|
|
{
|
|
"epoch": 0.8858211101524303,
|
|
"grad_norm": 0.2294809317302986,
|
|
"learning_rate": 1.599732382963448e-05,
|
|
"loss": 0.4611,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 0.8865401207937877,
|
|
"grad_norm": 0.22283423190183876,
|
|
"learning_rate": 1.599129704998257e-05,
|
|
"loss": 0.4864,
|
|
"step": 6165
|
|
},
|
|
{
|
|
"epoch": 0.8872591314351452,
|
|
"grad_norm": 0.2175656604739115,
|
|
"learning_rate": 1.598526687369703e-05,
|
|
"loss": 0.4869,
|
|
"step": 6170
|
|
},
|
|
{
|
|
"epoch": 0.8879781420765027,
|
|
"grad_norm": 0.23105587197199792,
|
|
"learning_rate": 1.5979233304196556e-05,
|
|
"loss": 0.4873,
|
|
"step": 6175
|
|
},
|
|
{
|
|
"epoch": 0.8886971527178602,
|
|
"grad_norm": 0.21272643771628316,
|
|
"learning_rate": 1.597319634490173e-05,
|
|
"loss": 0.4688,
|
|
"step": 6180
|
|
},
|
|
{
|
|
"epoch": 0.8894161633592177,
|
|
"grad_norm": 0.21893173889511963,
|
|
"learning_rate": 1.5967155999235083e-05,
|
|
"loss": 0.4778,
|
|
"step": 6185
|
|
},
|
|
{
|
|
"epoch": 0.8901351740005752,
|
|
"grad_norm": 0.2272209278510632,
|
|
"learning_rate": 1.5961112270621048e-05,
|
|
"loss": 0.4664,
|
|
"step": 6190
|
|
},
|
|
{
|
|
"epoch": 0.8908541846419327,
|
|
"grad_norm": 0.22082710241141104,
|
|
"learning_rate": 1.595506516248599e-05,
|
|
"loss": 0.4981,
|
|
"step": 6195
|
|
},
|
|
{
|
|
"epoch": 0.8915731952832902,
|
|
"grad_norm": 0.2217593697200759,
|
|
"learning_rate": 1.594901467825818e-05,
|
|
"loss": 0.4796,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 0.8922922059246476,
|
|
"grad_norm": 0.22488503585245775,
|
|
"learning_rate": 1.594296082136781e-05,
|
|
"loss": 0.4865,
|
|
"step": 6205
|
|
},
|
|
{
|
|
"epoch": 0.8930112165660051,
|
|
"grad_norm": 0.22575245374790812,
|
|
"learning_rate": 1.5936903595246974e-05,
|
|
"loss": 0.4875,
|
|
"step": 6210
|
|
},
|
|
{
|
|
"epoch": 0.8937302272073626,
|
|
"grad_norm": 0.22818628540292302,
|
|
"learning_rate": 1.593084300332969e-05,
|
|
"loss": 0.4985,
|
|
"step": 6215
|
|
},
|
|
{
|
|
"epoch": 0.8944492378487202,
|
|
"grad_norm": 0.2302092002355399,
|
|
"learning_rate": 1.592477904905187e-05,
|
|
"loss": 0.5042,
|
|
"step": 6220
|
|
},
|
|
{
|
|
"epoch": 0.8951682484900777,
|
|
"grad_norm": 0.21943538633345835,
|
|
"learning_rate": 1.5918711735851342e-05,
|
|
"loss": 0.4778,
|
|
"step": 6225
|
|
},
|
|
{
|
|
"epoch": 0.8958872591314352,
|
|
"grad_norm": 0.2242531512662913,
|
|
"learning_rate": 1.591264106716784e-05,
|
|
"loss": 0.4807,
|
|
"step": 6230
|
|
},
|
|
{
|
|
"epoch": 0.8966062697727927,
|
|
"grad_norm": 0.21745708652231507,
|
|
"learning_rate": 1.5906567046442987e-05,
|
|
"loss": 0.476,
|
|
"step": 6235
|
|
},
|
|
{
|
|
"epoch": 0.8973252804141502,
|
|
"grad_norm": 0.22501323800463438,
|
|
"learning_rate": 1.5900489677120318e-05,
|
|
"loss": 0.4858,
|
|
"step": 6240
|
|
},
|
|
{
|
|
"epoch": 0.8980442910555076,
|
|
"grad_norm": 0.22790024126820935,
|
|
"learning_rate": 1.589440896264527e-05,
|
|
"loss": 0.4697,
|
|
"step": 6245
|
|
},
|
|
{
|
|
"epoch": 0.8987633016968651,
|
|
"grad_norm": 0.21995145936225782,
|
|
"learning_rate": 1.5888324906465164e-05,
|
|
"loss": 0.4641,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 0.8994823123382226,
|
|
"grad_norm": 0.23120518687662991,
|
|
"learning_rate": 1.5882237512029217e-05,
|
|
"loss": 0.4863,
|
|
"step": 6255
|
|
},
|
|
{
|
|
"epoch": 0.9002013229795801,
|
|
"grad_norm": 0.23010807888424378,
|
|
"learning_rate": 1.5876146782788552e-05,
|
|
"loss": 0.4968,
|
|
"step": 6260
|
|
},
|
|
{
|
|
"epoch": 0.9009203336209376,
|
|
"grad_norm": 0.231826121768125,
|
|
"learning_rate": 1.587005272219617e-05,
|
|
"loss": 0.4952,
|
|
"step": 6265
|
|
},
|
|
{
|
|
"epoch": 0.9016393442622951,
|
|
"grad_norm": 0.22390104604646177,
|
|
"learning_rate": 1.586395533370696e-05,
|
|
"loss": 0.4692,
|
|
"step": 6270
|
|
},
|
|
{
|
|
"epoch": 0.9023583549036526,
|
|
"grad_norm": 0.2184873369261828,
|
|
"learning_rate": 1.5857854620777705e-05,
|
|
"loss": 0.4874,
|
|
"step": 6275
|
|
},
|
|
{
|
|
"epoch": 0.9030773655450101,
|
|
"grad_norm": 0.21982204158725013,
|
|
"learning_rate": 1.5851750586867072e-05,
|
|
"loss": 0.4907,
|
|
"step": 6280
|
|
},
|
|
{
|
|
"epoch": 0.9037963761863675,
|
|
"grad_norm": 0.23976510415423974,
|
|
"learning_rate": 1.5845643235435603e-05,
|
|
"loss": 0.4985,
|
|
"step": 6285
|
|
},
|
|
{
|
|
"epoch": 0.904515386827725,
|
|
"grad_norm": 0.23374421323926556,
|
|
"learning_rate": 1.5839532569945733e-05,
|
|
"loss": 0.4908,
|
|
"step": 6290
|
|
},
|
|
{
|
|
"epoch": 0.9052343974690825,
|
|
"grad_norm": 0.21948693325100235,
|
|
"learning_rate": 1.5833418593861764e-05,
|
|
"loss": 0.4747,
|
|
"step": 6295
|
|
},
|
|
{
|
|
"epoch": 0.90595340811044,
|
|
"grad_norm": 0.22071951471152612,
|
|
"learning_rate": 1.5827301310649882e-05,
|
|
"loss": 0.4778,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 0.9066724187517975,
|
|
"grad_norm": 0.21646960390367734,
|
|
"learning_rate": 1.582118072377814e-05,
|
|
"loss": 0.4866,
|
|
"step": 6305
|
|
},
|
|
{
|
|
"epoch": 0.907391429393155,
|
|
"grad_norm": 0.21923777159118818,
|
|
"learning_rate": 1.581505683671648e-05,
|
|
"loss": 0.4817,
|
|
"step": 6310
|
|
},
|
|
{
|
|
"epoch": 0.9081104400345125,
|
|
"grad_norm": 0.21454771867122877,
|
|
"learning_rate": 1.5808929652936696e-05,
|
|
"loss": 0.4738,
|
|
"step": 6315
|
|
},
|
|
{
|
|
"epoch": 0.90882945067587,
|
|
"grad_norm": 0.22792992248683422,
|
|
"learning_rate": 1.580279917591246e-05,
|
|
"loss": 0.503,
|
|
"step": 6320
|
|
},
|
|
{
|
|
"epoch": 0.9095484613172274,
|
|
"grad_norm": 0.2538238320439887,
|
|
"learning_rate": 1.5796665409119314e-05,
|
|
"loss": 0.4775,
|
|
"step": 6325
|
|
},
|
|
{
|
|
"epoch": 0.910267471958585,
|
|
"grad_norm": 0.22588015715472973,
|
|
"learning_rate": 1.5790528356034664e-05,
|
|
"loss": 0.4903,
|
|
"step": 6330
|
|
},
|
|
{
|
|
"epoch": 0.9109864825999425,
|
|
"grad_norm": 0.22418936739523307,
|
|
"learning_rate": 1.578438802013777e-05,
|
|
"loss": 0.4867,
|
|
"step": 6335
|
|
},
|
|
{
|
|
"epoch": 0.9117054932413,
|
|
"grad_norm": 0.22751983453106048,
|
|
"learning_rate": 1.5778244404909766e-05,
|
|
"loss": 0.4754,
|
|
"step": 6340
|
|
},
|
|
{
|
|
"epoch": 0.9124245038826575,
|
|
"grad_norm": 0.21899944187479214,
|
|
"learning_rate": 1.5772097513833638e-05,
|
|
"loss": 0.4678,
|
|
"step": 6345
|
|
},
|
|
{
|
|
"epoch": 0.913143514524015,
|
|
"grad_norm": 0.2417862929099873,
|
|
"learning_rate": 1.5765947350394223e-05,
|
|
"loss": 0.4857,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 0.9138625251653725,
|
|
"grad_norm": 0.24055106261970338,
|
|
"learning_rate": 1.575979391807823e-05,
|
|
"loss": 0.4781,
|
|
"step": 6355
|
|
},
|
|
{
|
|
"epoch": 0.91458153580673,
|
|
"grad_norm": 0.22946156855092792,
|
|
"learning_rate": 1.5753637220374207e-05,
|
|
"loss": 0.4904,
|
|
"step": 6360
|
|
},
|
|
{
|
|
"epoch": 0.9153005464480874,
|
|
"grad_norm": 0.22385014546780724,
|
|
"learning_rate": 1.574747726077256e-05,
|
|
"loss": 0.4604,
|
|
"step": 6365
|
|
},
|
|
{
|
|
"epoch": 0.9160195570894449,
|
|
"grad_norm": 0.22505092536829716,
|
|
"learning_rate": 1.5741314042765538e-05,
|
|
"loss": 0.4759,
|
|
"step": 6370
|
|
},
|
|
{
|
|
"epoch": 0.9167385677308024,
|
|
"grad_norm": 0.22073380209287308,
|
|
"learning_rate": 1.5735147569847246e-05,
|
|
"loss": 0.4827,
|
|
"step": 6375
|
|
},
|
|
{
|
|
"epoch": 0.9174575783721599,
|
|
"grad_norm": 0.22423700907272914,
|
|
"learning_rate": 1.572897784551363e-05,
|
|
"loss": 0.4858,
|
|
"step": 6380
|
|
},
|
|
{
|
|
"epoch": 0.9181765890135174,
|
|
"grad_norm": 0.21893309238006559,
|
|
"learning_rate": 1.572280487326247e-05,
|
|
"loss": 0.4682,
|
|
"step": 6385
|
|
},
|
|
{
|
|
"epoch": 0.9188955996548749,
|
|
"grad_norm": 0.22147352269071435,
|
|
"learning_rate": 1.571662865659341e-05,
|
|
"loss": 0.4659,
|
|
"step": 6390
|
|
},
|
|
{
|
|
"epoch": 0.9196146102962324,
|
|
"grad_norm": 0.21802176576616122,
|
|
"learning_rate": 1.571044919900791e-05,
|
|
"loss": 0.4596,
|
|
"step": 6395
|
|
},
|
|
{
|
|
"epoch": 0.9203336209375899,
|
|
"grad_norm": 0.2158313716245226,
|
|
"learning_rate": 1.570426650400928e-05,
|
|
"loss": 0.4789,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 0.9210526315789473,
|
|
"grad_norm": 0.24928740250212056,
|
|
"learning_rate": 1.5698080575102662e-05,
|
|
"loss": 0.4854,
|
|
"step": 6405
|
|
},
|
|
{
|
|
"epoch": 0.9217716422203048,
|
|
"grad_norm": 0.22848767277634377,
|
|
"learning_rate": 1.5691891415795036e-05,
|
|
"loss": 0.488,
|
|
"step": 6410
|
|
},
|
|
{
|
|
"epoch": 0.9224906528616623,
|
|
"grad_norm": 0.2256690554653882,
|
|
"learning_rate": 1.5685699029595204e-05,
|
|
"loss": 0.4961,
|
|
"step": 6415
|
|
},
|
|
{
|
|
"epoch": 0.9232096635030198,
|
|
"grad_norm": 0.2282178032512667,
|
|
"learning_rate": 1.5679503420013802e-05,
|
|
"loss": 0.4801,
|
|
"step": 6420
|
|
},
|
|
{
|
|
"epoch": 0.9239286741443773,
|
|
"grad_norm": 0.2369155203213744,
|
|
"learning_rate": 1.5673304590563296e-05,
|
|
"loss": 0.4826,
|
|
"step": 6425
|
|
},
|
|
{
|
|
"epoch": 0.9246476847857348,
|
|
"grad_norm": 0.23112462486864008,
|
|
"learning_rate": 1.5667102544757978e-05,
|
|
"loss": 0.5034,
|
|
"step": 6430
|
|
},
|
|
{
|
|
"epoch": 0.9253666954270923,
|
|
"grad_norm": 0.22463678559485087,
|
|
"learning_rate": 1.566089728611396e-05,
|
|
"loss": 0.4746,
|
|
"step": 6435
|
|
},
|
|
{
|
|
"epoch": 0.9260857060684499,
|
|
"grad_norm": 0.22740379538540387,
|
|
"learning_rate": 1.5654688818149173e-05,
|
|
"loss": 0.4775,
|
|
"step": 6440
|
|
},
|
|
{
|
|
"epoch": 0.9268047167098074,
|
|
"grad_norm": 0.22671726134834985,
|
|
"learning_rate": 1.5648477144383374e-05,
|
|
"loss": 0.4722,
|
|
"step": 6445
|
|
},
|
|
{
|
|
"epoch": 0.9275237273511648,
|
|
"grad_norm": 0.22361203094445511,
|
|
"learning_rate": 1.5642262268338134e-05,
|
|
"loss": 0.4875,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 0.9282427379925223,
|
|
"grad_norm": 0.22042860812674375,
|
|
"learning_rate": 1.5636044193536838e-05,
|
|
"loss": 0.5021,
|
|
"step": 6455
|
|
},
|
|
{
|
|
"epoch": 0.9289617486338798,
|
|
"grad_norm": 0.21880500542128759,
|
|
"learning_rate": 1.5629822923504692e-05,
|
|
"loss": 0.4901,
|
|
"step": 6460
|
|
},
|
|
{
|
|
"epoch": 0.9296807592752373,
|
|
"grad_norm": 0.22161424076526037,
|
|
"learning_rate": 1.56235984617687e-05,
|
|
"loss": 0.476,
|
|
"step": 6465
|
|
},
|
|
{
|
|
"epoch": 0.9303997699165948,
|
|
"grad_norm": 0.2295143312197024,
|
|
"learning_rate": 1.5617370811857683e-05,
|
|
"loss": 0.4692,
|
|
"step": 6470
|
|
},
|
|
{
|
|
"epoch": 0.9311187805579523,
|
|
"grad_norm": 0.23064816205367686,
|
|
"learning_rate": 1.5611139977302278e-05,
|
|
"loss": 0.4845,
|
|
"step": 6475
|
|
},
|
|
{
|
|
"epoch": 0.9318377911993098,
|
|
"grad_norm": 0.2067989481827472,
|
|
"learning_rate": 1.5604905961634913e-05,
|
|
"loss": 0.4955,
|
|
"step": 6480
|
|
},
|
|
{
|
|
"epoch": 0.9325568018406672,
|
|
"grad_norm": 0.22781771912016957,
|
|
"learning_rate": 1.5598668768389827e-05,
|
|
"loss": 0.4752,
|
|
"step": 6485
|
|
},
|
|
{
|
|
"epoch": 0.9332758124820247,
|
|
"grad_norm": 0.22430664209549425,
|
|
"learning_rate": 1.5592428401103057e-05,
|
|
"loss": 0.4749,
|
|
"step": 6490
|
|
},
|
|
{
|
|
"epoch": 0.9339948231233822,
|
|
"grad_norm": 0.22702362842749063,
|
|
"learning_rate": 1.558618486331245e-05,
|
|
"loss": 0.4735,
|
|
"step": 6495
|
|
},
|
|
{
|
|
"epoch": 0.9347138337647397,
|
|
"grad_norm": 0.23188979467673745,
|
|
"learning_rate": 1.557993815855763e-05,
|
|
"loss": 0.471,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 0.9354328444060972,
|
|
"grad_norm": 0.2316035087138115,
|
|
"learning_rate": 1.557368829038003e-05,
|
|
"loss": 0.4914,
|
|
"step": 6505
|
|
},
|
|
{
|
|
"epoch": 0.9361518550474547,
|
|
"grad_norm": 0.22225177951148622,
|
|
"learning_rate": 1.5567435262322887e-05,
|
|
"loss": 0.4999,
|
|
"step": 6510
|
|
},
|
|
{
|
|
"epoch": 0.9368708656888122,
|
|
"grad_norm": 0.22102749529702834,
|
|
"learning_rate": 1.5561179077931204e-05,
|
|
"loss": 0.473,
|
|
"step": 6515
|
|
},
|
|
{
|
|
"epoch": 0.9375898763301697,
|
|
"grad_norm": 0.22225748257578634,
|
|
"learning_rate": 1.5554919740751794e-05,
|
|
"loss": 0.4871,
|
|
"step": 6520
|
|
},
|
|
{
|
|
"epoch": 0.9383088869715271,
|
|
"grad_norm": 0.2263304052281884,
|
|
"learning_rate": 1.554865725433324e-05,
|
|
"loss": 0.4627,
|
|
"step": 6525
|
|
},
|
|
{
|
|
"epoch": 0.9390278976128846,
|
|
"grad_norm": 0.2255865820044724,
|
|
"learning_rate": 1.5542391622225935e-05,
|
|
"loss": 0.4796,
|
|
"step": 6530
|
|
},
|
|
{
|
|
"epoch": 0.9397469082542421,
|
|
"grad_norm": 0.21548022907518974,
|
|
"learning_rate": 1.5536122847982033e-05,
|
|
"loss": 0.4794,
|
|
"step": 6535
|
|
},
|
|
{
|
|
"epoch": 0.9404659188955996,
|
|
"grad_norm": 0.2192863785121772,
|
|
"learning_rate": 1.552985093515548e-05,
|
|
"loss": 0.5112,
|
|
"step": 6540
|
|
},
|
|
{
|
|
"epoch": 0.9411849295369571,
|
|
"grad_norm": 0.23321238623231505,
|
|
"learning_rate": 1.552357588730199e-05,
|
|
"loss": 0.4774,
|
|
"step": 6545
|
|
},
|
|
{
|
|
"epoch": 0.9419039401783147,
|
|
"grad_norm": 0.22865602059154158,
|
|
"learning_rate": 1.5517297707979075e-05,
|
|
"loss": 0.4846,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 0.9426229508196722,
|
|
"grad_norm": 0.2166415062591689,
|
|
"learning_rate": 1.5511016400746e-05,
|
|
"loss": 0.4676,
|
|
"step": 6555
|
|
},
|
|
{
|
|
"epoch": 0.9433419614610297,
|
|
"grad_norm": 0.22574702698545204,
|
|
"learning_rate": 1.5504731969163825e-05,
|
|
"loss": 0.4897,
|
|
"step": 6560
|
|
},
|
|
{
|
|
"epoch": 0.9440609721023872,
|
|
"grad_norm": 0.23374801842488893,
|
|
"learning_rate": 1.5498444416795356e-05,
|
|
"loss": 0.4686,
|
|
"step": 6565
|
|
},
|
|
{
|
|
"epoch": 0.9447799827437446,
|
|
"grad_norm": 0.2306137475480127,
|
|
"learning_rate": 1.5492153747205193e-05,
|
|
"loss": 0.4808,
|
|
"step": 6570
|
|
},
|
|
{
|
|
"epoch": 0.9454989933851021,
|
|
"grad_norm": 0.21537193186412396,
|
|
"learning_rate": 1.5485859963959687e-05,
|
|
"loss": 0.4882,
|
|
"step": 6575
|
|
},
|
|
{
|
|
"epoch": 0.9462180040264596,
|
|
"grad_norm": 0.21738851906271006,
|
|
"learning_rate": 1.547956307062696e-05,
|
|
"loss": 0.4789,
|
|
"step": 6580
|
|
},
|
|
{
|
|
"epoch": 0.9469370146678171,
|
|
"grad_norm": 0.2207644500562652,
|
|
"learning_rate": 1.5473263070776896e-05,
|
|
"loss": 0.4796,
|
|
"step": 6585
|
|
},
|
|
{
|
|
"epoch": 0.9476560253091746,
|
|
"grad_norm": 0.21677716829427052,
|
|
"learning_rate": 1.5466959967981145e-05,
|
|
"loss": 0.4829,
|
|
"step": 6590
|
|
},
|
|
{
|
|
"epoch": 0.9483750359505321,
|
|
"grad_norm": 0.23097534373286382,
|
|
"learning_rate": 1.5460653765813107e-05,
|
|
"loss": 0.4812,
|
|
"step": 6595
|
|
},
|
|
{
|
|
"epoch": 0.9490940465918896,
|
|
"grad_norm": 0.2093786853394847,
|
|
"learning_rate": 1.5454344467847948e-05,
|
|
"loss": 0.4896,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 0.949813057233247,
|
|
"grad_norm": 0.21999645225575495,
|
|
"learning_rate": 1.5448032077662583e-05,
|
|
"loss": 0.4851,
|
|
"step": 6605
|
|
},
|
|
{
|
|
"epoch": 0.9505320678746045,
|
|
"grad_norm": 0.22477641153328398,
|
|
"learning_rate": 1.5441716598835684e-05,
|
|
"loss": 0.4951,
|
|
"step": 6610
|
|
},
|
|
{
|
|
"epoch": 0.951251078515962,
|
|
"grad_norm": 0.23135266269374266,
|
|
"learning_rate": 1.5435398034947667e-05,
|
|
"loss": 0.4702,
|
|
"step": 6615
|
|
},
|
|
{
|
|
"epoch": 0.9519700891573195,
|
|
"grad_norm": 0.22462726289244378,
|
|
"learning_rate": 1.542907638958071e-05,
|
|
"loss": 0.4744,
|
|
"step": 6620
|
|
},
|
|
{
|
|
"epoch": 0.952689099798677,
|
|
"grad_norm": 0.22612302893339267,
|
|
"learning_rate": 1.542275166631873e-05,
|
|
"loss": 0.4834,
|
|
"step": 6625
|
|
},
|
|
{
|
|
"epoch": 0.9534081104400345,
|
|
"grad_norm": 0.22340595977068603,
|
|
"learning_rate": 1.541642386874738e-05,
|
|
"loss": 0.4948,
|
|
"step": 6630
|
|
},
|
|
{
|
|
"epoch": 0.954127121081392,
|
|
"grad_norm": 0.21601642477661376,
|
|
"learning_rate": 1.541009300045407e-05,
|
|
"loss": 0.4858,
|
|
"step": 6635
|
|
},
|
|
{
|
|
"epoch": 0.9548461317227495,
|
|
"grad_norm": 0.22499946084889408,
|
|
"learning_rate": 1.5403759065027954e-05,
|
|
"loss": 0.4856,
|
|
"step": 6640
|
|
},
|
|
{
|
|
"epoch": 0.955565142364107,
|
|
"grad_norm": 0.2197889154098527,
|
|
"learning_rate": 1.5397422066059906e-05,
|
|
"loss": 0.4725,
|
|
"step": 6645
|
|
},
|
|
{
|
|
"epoch": 0.9562841530054644,
|
|
"grad_norm": 0.23875758339220973,
|
|
"learning_rate": 1.539108200714255e-05,
|
|
"loss": 0.4762,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 0.9570031636468219,
|
|
"grad_norm": 0.2227905479215225,
|
|
"learning_rate": 1.538473889187025e-05,
|
|
"loss": 0.4739,
|
|
"step": 6655
|
|
},
|
|
{
|
|
"epoch": 0.9577221742881795,
|
|
"grad_norm": 0.23357580858983001,
|
|
"learning_rate": 1.5378392723839086e-05,
|
|
"loss": 0.4796,
|
|
"step": 6660
|
|
},
|
|
{
|
|
"epoch": 0.958441184929537,
|
|
"grad_norm": 0.24057115172831314,
|
|
"learning_rate": 1.537204350664688e-05,
|
|
"loss": 0.4808,
|
|
"step": 6665
|
|
},
|
|
{
|
|
"epoch": 0.9591601955708945,
|
|
"grad_norm": 0.22333693699108556,
|
|
"learning_rate": 1.5365691243893186e-05,
|
|
"loss": 0.4797,
|
|
"step": 6670
|
|
},
|
|
{
|
|
"epoch": 0.959879206212252,
|
|
"grad_norm": 0.22164298046976957,
|
|
"learning_rate": 1.535933593917927e-05,
|
|
"loss": 0.4775,
|
|
"step": 6675
|
|
},
|
|
{
|
|
"epoch": 0.9605982168536095,
|
|
"grad_norm": 0.2108983044458471,
|
|
"learning_rate": 1.5352977596108138e-05,
|
|
"loss": 0.4838,
|
|
"step": 6680
|
|
},
|
|
{
|
|
"epoch": 0.961317227494967,
|
|
"grad_norm": 0.22778761172616302,
|
|
"learning_rate": 1.5346616218284514e-05,
|
|
"loss": 0.4695,
|
|
"step": 6685
|
|
},
|
|
{
|
|
"epoch": 0.9620362381363244,
|
|
"grad_norm": 0.21909715873778327,
|
|
"learning_rate": 1.5340251809314833e-05,
|
|
"loss": 0.4734,
|
|
"step": 6690
|
|
},
|
|
{
|
|
"epoch": 0.9627552487776819,
|
|
"grad_norm": 0.2148002346061404,
|
|
"learning_rate": 1.533388437280727e-05,
|
|
"loss": 0.4568,
|
|
"step": 6695
|
|
},
|
|
{
|
|
"epoch": 0.9634742594190394,
|
|
"grad_norm": 0.22905164253993948,
|
|
"learning_rate": 1.5327513912371684e-05,
|
|
"loss": 0.4878,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 0.9641932700603969,
|
|
"grad_norm": 0.2185782489667153,
|
|
"learning_rate": 1.532114043161968e-05,
|
|
"loss": 0.4834,
|
|
"step": 6705
|
|
},
|
|
{
|
|
"epoch": 0.9649122807017544,
|
|
"grad_norm": 0.21833720077174126,
|
|
"learning_rate": 1.531476393416456e-05,
|
|
"loss": 0.4749,
|
|
"step": 6710
|
|
},
|
|
{
|
|
"epoch": 0.9656312913431119,
|
|
"grad_norm": 0.2279245541018691,
|
|
"learning_rate": 1.530838442362134e-05,
|
|
"loss": 0.4982,
|
|
"step": 6715
|
|
},
|
|
{
|
|
"epoch": 0.9663503019844694,
|
|
"grad_norm": 0.2293060476544168,
|
|
"learning_rate": 1.5302001903606735e-05,
|
|
"loss": 0.4741,
|
|
"step": 6720
|
|
},
|
|
{
|
|
"epoch": 0.9670693126258268,
|
|
"grad_norm": 0.22032989655105037,
|
|
"learning_rate": 1.5295616377739178e-05,
|
|
"loss": 0.4726,
|
|
"step": 6725
|
|
},
|
|
{
|
|
"epoch": 0.9677883232671843,
|
|
"grad_norm": 0.2295613322786625,
|
|
"learning_rate": 1.5289227849638803e-05,
|
|
"loss": 0.4769,
|
|
"step": 6730
|
|
},
|
|
{
|
|
"epoch": 0.9685073339085418,
|
|
"grad_norm": 0.2247777045129754,
|
|
"learning_rate": 1.5282836322927446e-05,
|
|
"loss": 0.4835,
|
|
"step": 6735
|
|
},
|
|
{
|
|
"epoch": 0.9692263445498993,
|
|
"grad_norm": 0.225786697144287,
|
|
"learning_rate": 1.527644180122864e-05,
|
|
"loss": 0.4929,
|
|
"step": 6740
|
|
},
|
|
{
|
|
"epoch": 0.9699453551912568,
|
|
"grad_norm": 0.21678269771980435,
|
|
"learning_rate": 1.527004428816762e-05,
|
|
"loss": 0.4798,
|
|
"step": 6745
|
|
},
|
|
{
|
|
"epoch": 0.9706643658326143,
|
|
"grad_norm": 0.22366425391870273,
|
|
"learning_rate": 1.5263643787371313e-05,
|
|
"loss": 0.4809,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 0.9713833764739718,
|
|
"grad_norm": 0.22144782348953934,
|
|
"learning_rate": 1.5257240302468343e-05,
|
|
"loss": 0.4796,
|
|
"step": 6755
|
|
},
|
|
{
|
|
"epoch": 0.9721023871153293,
|
|
"grad_norm": 0.2156443814782539,
|
|
"learning_rate": 1.5250833837089024e-05,
|
|
"loss": 0.4684,
|
|
"step": 6760
|
|
},
|
|
{
|
|
"epoch": 0.9728213977566867,
|
|
"grad_norm": 0.22117236753267636,
|
|
"learning_rate": 1.5244424394865359e-05,
|
|
"loss": 0.4832,
|
|
"step": 6765
|
|
},
|
|
{
|
|
"epoch": 0.9735404083980443,
|
|
"grad_norm": 0.22364864691530395,
|
|
"learning_rate": 1.523801197943104e-05,
|
|
"loss": 0.4863,
|
|
"step": 6770
|
|
},
|
|
{
|
|
"epoch": 0.9742594190394018,
|
|
"grad_norm": 0.2353866035164657,
|
|
"learning_rate": 1.5231596594421443e-05,
|
|
"loss": 0.463,
|
|
"step": 6775
|
|
},
|
|
{
|
|
"epoch": 0.9749784296807593,
|
|
"grad_norm": 0.23914032451055825,
|
|
"learning_rate": 1.5225178243473633e-05,
|
|
"loss": 0.4799,
|
|
"step": 6780
|
|
},
|
|
{
|
|
"epoch": 0.9756974403221168,
|
|
"grad_norm": 0.22286084045601381,
|
|
"learning_rate": 1.521875693022635e-05,
|
|
"loss": 0.4845,
|
|
"step": 6785
|
|
},
|
|
{
|
|
"epoch": 0.9764164509634743,
|
|
"grad_norm": 0.23539655426452574,
|
|
"learning_rate": 1.5212332658320016e-05,
|
|
"loss": 0.484,
|
|
"step": 6790
|
|
},
|
|
{
|
|
"epoch": 0.9771354616048318,
|
|
"grad_norm": 0.21981752748550457,
|
|
"learning_rate": 1.5205905431396728e-05,
|
|
"loss": 0.4751,
|
|
"step": 6795
|
|
},
|
|
{
|
|
"epoch": 0.9778544722461893,
|
|
"grad_norm": 0.2201025870941446,
|
|
"learning_rate": 1.5199475253100264e-05,
|
|
"loss": 0.4721,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 0.9785734828875468,
|
|
"grad_norm": 0.2185983176885855,
|
|
"learning_rate": 1.5193042127076072e-05,
|
|
"loss": 0.4698,
|
|
"step": 6805
|
|
},
|
|
{
|
|
"epoch": 0.9792924935289042,
|
|
"grad_norm": 0.2209307444365703,
|
|
"learning_rate": 1.518660605697127e-05,
|
|
"loss": 0.4816,
|
|
"step": 6810
|
|
},
|
|
{
|
|
"epoch": 0.9800115041702617,
|
|
"grad_norm": 0.23385081037322303,
|
|
"learning_rate": 1.518016704643464e-05,
|
|
"loss": 0.4632,
|
|
"step": 6815
|
|
},
|
|
{
|
|
"epoch": 0.9807305148116192,
|
|
"grad_norm": 0.21479626312740918,
|
|
"learning_rate": 1.5173725099116645e-05,
|
|
"loss": 0.4665,
|
|
"step": 6820
|
|
},
|
|
{
|
|
"epoch": 0.9814495254529767,
|
|
"grad_norm": 0.22238392229375717,
|
|
"learning_rate": 1.51672802186694e-05,
|
|
"loss": 0.4719,
|
|
"step": 6825
|
|
},
|
|
{
|
|
"epoch": 0.9821685360943342,
|
|
"grad_norm": 0.23404902107369124,
|
|
"learning_rate": 1.5160832408746692e-05,
|
|
"loss": 0.5035,
|
|
"step": 6830
|
|
},
|
|
{
|
|
"epoch": 0.9828875467356917,
|
|
"grad_norm": 0.21819905193403144,
|
|
"learning_rate": 1.515438167300396e-05,
|
|
"loss": 0.4794,
|
|
"step": 6835
|
|
},
|
|
{
|
|
"epoch": 0.9836065573770492,
|
|
"grad_norm": 0.2227124367940369,
|
|
"learning_rate": 1.5147928015098309e-05,
|
|
"loss": 0.4683,
|
|
"step": 6840
|
|
},
|
|
{
|
|
"epoch": 0.9843255680184066,
|
|
"grad_norm": 0.23175024743121764,
|
|
"learning_rate": 1.5141471438688497e-05,
|
|
"loss": 0.5067,
|
|
"step": 6845
|
|
},
|
|
{
|
|
"epoch": 0.9850445786597641,
|
|
"grad_norm": 0.23035042801996858,
|
|
"learning_rate": 1.5135011947434937e-05,
|
|
"loss": 0.4856,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 0.9857635893011216,
|
|
"grad_norm": 0.22697895961676684,
|
|
"learning_rate": 1.5128549544999694e-05,
|
|
"loss": 0.482,
|
|
"step": 6855
|
|
},
|
|
{
|
|
"epoch": 0.9864825999424791,
|
|
"grad_norm": 0.21609441944646846,
|
|
"learning_rate": 1.512208423504649e-05,
|
|
"loss": 0.4746,
|
|
"step": 6860
|
|
},
|
|
{
|
|
"epoch": 0.9872016105838366,
|
|
"grad_norm": 0.22593709410358026,
|
|
"learning_rate": 1.5115616021240685e-05,
|
|
"loss": 0.4933,
|
|
"step": 6865
|
|
},
|
|
{
|
|
"epoch": 0.9879206212251941,
|
|
"grad_norm": 0.22718404114173152,
|
|
"learning_rate": 1.510914490724929e-05,
|
|
"loss": 0.4751,
|
|
"step": 6870
|
|
},
|
|
{
|
|
"epoch": 0.9886396318665516,
|
|
"grad_norm": 0.22797193686181583,
|
|
"learning_rate": 1.5102670896740957e-05,
|
|
"loss": 0.4747,
|
|
"step": 6875
|
|
},
|
|
{
|
|
"epoch": 0.9893586425079092,
|
|
"grad_norm": 0.22768531436338516,
|
|
"learning_rate": 1.509619399338599e-05,
|
|
"loss": 0.4706,
|
|
"step": 6880
|
|
},
|
|
{
|
|
"epoch": 0.9900776531492667,
|
|
"grad_norm": 0.2305537352512049,
|
|
"learning_rate": 1.5089714200856325e-05,
|
|
"loss": 0.497,
|
|
"step": 6885
|
|
},
|
|
{
|
|
"epoch": 0.9907966637906241,
|
|
"grad_norm": 0.2366616945336574,
|
|
"learning_rate": 1.5083231522825537e-05,
|
|
"loss": 0.4912,
|
|
"step": 6890
|
|
},
|
|
{
|
|
"epoch": 0.9915156744319816,
|
|
"grad_norm": 0.2217756285681072,
|
|
"learning_rate": 1.5076745962968833e-05,
|
|
"loss": 0.4676,
|
|
"step": 6895
|
|
},
|
|
{
|
|
"epoch": 0.9922346850733391,
|
|
"grad_norm": 0.22771473147282423,
|
|
"learning_rate": 1.5070257524963063e-05,
|
|
"loss": 0.4756,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 0.9929536957146966,
|
|
"grad_norm": 0.23294760309698267,
|
|
"learning_rate": 1.5063766212486704e-05,
|
|
"loss": 0.4928,
|
|
"step": 6905
|
|
},
|
|
{
|
|
"epoch": 0.9936727063560541,
|
|
"grad_norm": 0.21411969868887565,
|
|
"learning_rate": 1.5057272029219857e-05,
|
|
"loss": 0.4753,
|
|
"step": 6910
|
|
},
|
|
{
|
|
"epoch": 0.9943917169974116,
|
|
"grad_norm": 0.2212546197891626,
|
|
"learning_rate": 1.5050774978844263e-05,
|
|
"loss": 0.478,
|
|
"step": 6915
|
|
},
|
|
{
|
|
"epoch": 0.9951107276387691,
|
|
"grad_norm": 0.22268275460533818,
|
|
"learning_rate": 1.5044275065043273e-05,
|
|
"loss": 0.4833,
|
|
"step": 6920
|
|
},
|
|
{
|
|
"epoch": 0.9958297382801266,
|
|
"grad_norm": 0.2449174453256786,
|
|
"learning_rate": 1.503777229150188e-05,
|
|
"loss": 0.4853,
|
|
"step": 6925
|
|
},
|
|
{
|
|
"epoch": 0.996548748921484,
|
|
"grad_norm": 0.21985579467908328,
|
|
"learning_rate": 1.5031266661906678e-05,
|
|
"loss": 0.4657,
|
|
"step": 6930
|
|
},
|
|
{
|
|
"epoch": 0.9972677595628415,
|
|
"grad_norm": 0.22165507711757962,
|
|
"learning_rate": 1.5024758179945896e-05,
|
|
"loss": 0.4934,
|
|
"step": 6935
|
|
},
|
|
{
|
|
"epoch": 0.997986770204199,
|
|
"grad_norm": 0.21902956888212533,
|
|
"learning_rate": 1.501824684930937e-05,
|
|
"loss": 0.4816,
|
|
"step": 6940
|
|
},
|
|
{
|
|
"epoch": 0.9987057808455565,
|
|
"grad_norm": 0.2313696800475175,
|
|
"learning_rate": 1.501173267368856e-05,
|
|
"loss": 0.4866,
|
|
"step": 6945
|
|
},
|
|
{
|
|
"epoch": 0.999424791486914,
|
|
"grad_norm": 0.23660863539279678,
|
|
"learning_rate": 1.5005215656776531e-05,
|
|
"loss": 0.4649,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_loss": 0.45176830887794495,
|
|
"eval_runtime": 0.6251,
|
|
"eval_samples_per_second": 39.991,
|
|
"eval_steps_per_second": 1.6,
|
|
"step": 6954
|
|
},
|
|
{
|
|
"epoch": 1.0001438021282716,
|
|
"grad_norm": 0.2609984891577565,
|
|
"learning_rate": 1.4998695802267965e-05,
|
|
"loss": 0.4361,
|
|
"step": 6955
|
|
},
|
|
{
|
|
"epoch": 1.000862812769629,
|
|
"grad_norm": 0.2516451509161594,
|
|
"learning_rate": 1.4992173113859143e-05,
|
|
"loss": 0.427,
|
|
"step": 6960
|
|
},
|
|
{
|
|
"epoch": 1.0015818234109866,
|
|
"grad_norm": 0.24733817071518213,
|
|
"learning_rate": 1.4985647595247965e-05,
|
|
"loss": 0.4212,
|
|
"step": 6965
|
|
},
|
|
{
|
|
"epoch": 1.002300834052344,
|
|
"grad_norm": 0.24389227857970142,
|
|
"learning_rate": 1.4979119250133929e-05,
|
|
"loss": 0.4249,
|
|
"step": 6970
|
|
},
|
|
{
|
|
"epoch": 1.0030198446937015,
|
|
"grad_norm": 0.24506075363170043,
|
|
"learning_rate": 1.4972588082218136e-05,
|
|
"loss": 0.4265,
|
|
"step": 6975
|
|
},
|
|
{
|
|
"epoch": 1.003738855335059,
|
|
"grad_norm": 0.24593725853045245,
|
|
"learning_rate": 1.4966054095203284e-05,
|
|
"loss": 0.4166,
|
|
"step": 6980
|
|
},
|
|
{
|
|
"epoch": 1.0044578659764165,
|
|
"grad_norm": 0.23807359079460177,
|
|
"learning_rate": 1.4959517292793677e-05,
|
|
"loss": 0.423,
|
|
"step": 6985
|
|
},
|
|
{
|
|
"epoch": 1.005176876617774,
|
|
"grad_norm": 0.24105601006448701,
|
|
"learning_rate": 1.4952977678695211e-05,
|
|
"loss": 0.4143,
|
|
"step": 6990
|
|
},
|
|
{
|
|
"epoch": 1.0058958872591315,
|
|
"grad_norm": 0.2447137201531492,
|
|
"learning_rate": 1.4946435256615373e-05,
|
|
"loss": 0.4199,
|
|
"step": 6995
|
|
},
|
|
{
|
|
"epoch": 1.006614897900489,
|
|
"grad_norm": 0.23351901468885872,
|
|
"learning_rate": 1.4939890030263244e-05,
|
|
"loss": 0.4224,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 1.0073339085418465,
|
|
"grad_norm": 0.24343901453464484,
|
|
"learning_rate": 1.4933342003349502e-05,
|
|
"loss": 0.4256,
|
|
"step": 7005
|
|
},
|
|
{
|
|
"epoch": 1.008052919183204,
|
|
"grad_norm": 0.24632246255177054,
|
|
"learning_rate": 1.49267911795864e-05,
|
|
"loss": 0.4083,
|
|
"step": 7010
|
|
},
|
|
{
|
|
"epoch": 1.0087719298245614,
|
|
"grad_norm": 0.24576841044321335,
|
|
"learning_rate": 1.4920237562687784e-05,
|
|
"loss": 0.4139,
|
|
"step": 7015
|
|
},
|
|
{
|
|
"epoch": 1.009490940465919,
|
|
"grad_norm": 0.2285242809366901,
|
|
"learning_rate": 1.4913681156369083e-05,
|
|
"loss": 0.4254,
|
|
"step": 7020
|
|
},
|
|
{
|
|
"epoch": 1.0102099511072764,
|
|
"grad_norm": 0.24698769719760663,
|
|
"learning_rate": 1.490712196434731e-05,
|
|
"loss": 0.4201,
|
|
"step": 7025
|
|
},
|
|
{
|
|
"epoch": 1.010928961748634,
|
|
"grad_norm": 0.24177562139259248,
|
|
"learning_rate": 1.4900559990341048e-05,
|
|
"loss": 0.405,
|
|
"step": 7030
|
|
},
|
|
{
|
|
"epoch": 1.0116479723899914,
|
|
"grad_norm": 0.2284256513112865,
|
|
"learning_rate": 1.489399523807047e-05,
|
|
"loss": 0.4153,
|
|
"step": 7035
|
|
},
|
|
{
|
|
"epoch": 1.0123669830313489,
|
|
"grad_norm": 0.260722596094778,
|
|
"learning_rate": 1.488742771125731e-05,
|
|
"loss": 0.4192,
|
|
"step": 7040
|
|
},
|
|
{
|
|
"epoch": 1.0130859936727064,
|
|
"grad_norm": 0.2318339583881091,
|
|
"learning_rate": 1.4880857413624888e-05,
|
|
"loss": 0.4311,
|
|
"step": 7045
|
|
},
|
|
{
|
|
"epoch": 1.0138050043140638,
|
|
"grad_norm": 0.2556005671407621,
|
|
"learning_rate": 1.4874284348898089e-05,
|
|
"loss": 0.4289,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 1.0145240149554213,
|
|
"grad_norm": 0.23971405460324446,
|
|
"learning_rate": 1.4867708520803366e-05,
|
|
"loss": 0.4112,
|
|
"step": 7055
|
|
},
|
|
{
|
|
"epoch": 1.0152430255967788,
|
|
"grad_norm": 0.2595593731931635,
|
|
"learning_rate": 1.4861129933068738e-05,
|
|
"loss": 0.4248,
|
|
"step": 7060
|
|
},
|
|
{
|
|
"epoch": 1.0159620362381363,
|
|
"grad_norm": 0.23810363881907462,
|
|
"learning_rate": 1.4854548589423792e-05,
|
|
"loss": 0.4102,
|
|
"step": 7065
|
|
},
|
|
{
|
|
"epoch": 1.0166810468794938,
|
|
"grad_norm": 0.2534738889359647,
|
|
"learning_rate": 1.4847964493599674e-05,
|
|
"loss": 0.4294,
|
|
"step": 7070
|
|
},
|
|
{
|
|
"epoch": 1.0174000575208513,
|
|
"grad_norm": 0.23848085398163255,
|
|
"learning_rate": 1.4841377649329095e-05,
|
|
"loss": 0.4266,
|
|
"step": 7075
|
|
},
|
|
{
|
|
"epoch": 1.0181190681622088,
|
|
"grad_norm": 0.2455758594331574,
|
|
"learning_rate": 1.4834788060346315e-05,
|
|
"loss": 0.4158,
|
|
"step": 7080
|
|
},
|
|
{
|
|
"epoch": 1.0188380788035662,
|
|
"grad_norm": 0.2499471679265644,
|
|
"learning_rate": 1.4828195730387162e-05,
|
|
"loss": 0.4242,
|
|
"step": 7085
|
|
},
|
|
{
|
|
"epoch": 1.0195570894449237,
|
|
"grad_norm": 0.24622402422531225,
|
|
"learning_rate": 1.4821600663189009e-05,
|
|
"loss": 0.4097,
|
|
"step": 7090
|
|
},
|
|
{
|
|
"epoch": 1.0202761000862812,
|
|
"grad_norm": 0.255688979450824,
|
|
"learning_rate": 1.4815002862490784e-05,
|
|
"loss": 0.4359,
|
|
"step": 7095
|
|
},
|
|
{
|
|
"epoch": 1.0209951107276387,
|
|
"grad_norm": 0.2359064077389048,
|
|
"learning_rate": 1.4808402332032966e-05,
|
|
"loss": 0.4215,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 1.0217141213689962,
|
|
"grad_norm": 0.26117531866755295,
|
|
"learning_rate": 1.4801799075557579e-05,
|
|
"loss": 0.4281,
|
|
"step": 7105
|
|
},
|
|
{
|
|
"epoch": 1.0224331320103537,
|
|
"grad_norm": 0.2351672925246976,
|
|
"learning_rate": 1.4795193096808191e-05,
|
|
"loss": 0.4252,
|
|
"step": 7110
|
|
},
|
|
{
|
|
"epoch": 1.0231521426517112,
|
|
"grad_norm": 0.24530936090046365,
|
|
"learning_rate": 1.4788584399529919e-05,
|
|
"loss": 0.4129,
|
|
"step": 7115
|
|
},
|
|
{
|
|
"epoch": 1.0238711532930687,
|
|
"grad_norm": 0.23829577092670978,
|
|
"learning_rate": 1.4781972987469421e-05,
|
|
"loss": 0.4134,
|
|
"step": 7120
|
|
},
|
|
{
|
|
"epoch": 1.0245901639344261,
|
|
"grad_norm": 0.2373278401919916,
|
|
"learning_rate": 1.4775358864374884e-05,
|
|
"loss": 0.4262,
|
|
"step": 7125
|
|
},
|
|
{
|
|
"epoch": 1.0253091745757836,
|
|
"grad_norm": 0.2386215543659765,
|
|
"learning_rate": 1.4768742033996045e-05,
|
|
"loss": 0.4292,
|
|
"step": 7130
|
|
},
|
|
{
|
|
"epoch": 1.0260281852171411,
|
|
"grad_norm": 0.245165207310462,
|
|
"learning_rate": 1.4762122500084163e-05,
|
|
"loss": 0.4111,
|
|
"step": 7135
|
|
},
|
|
{
|
|
"epoch": 1.0267471958584986,
|
|
"grad_norm": 0.24704393239415223,
|
|
"learning_rate": 1.4755500266392044e-05,
|
|
"loss": 0.4282,
|
|
"step": 7140
|
|
},
|
|
{
|
|
"epoch": 1.027466206499856,
|
|
"grad_norm": 0.24281873003447055,
|
|
"learning_rate": 1.4748875336674016e-05,
|
|
"loss": 0.4265,
|
|
"step": 7145
|
|
},
|
|
{
|
|
"epoch": 1.0281852171412136,
|
|
"grad_norm": 0.2469815238178739,
|
|
"learning_rate": 1.474224771468593e-05,
|
|
"loss": 0.4258,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 1.0289042277825713,
|
|
"grad_norm": 0.23933515470857958,
|
|
"learning_rate": 1.4735617404185183e-05,
|
|
"loss": 0.414,
|
|
"step": 7155
|
|
},
|
|
{
|
|
"epoch": 1.0296232384239288,
|
|
"grad_norm": 0.2480531863542624,
|
|
"learning_rate": 1.4728984408930668e-05,
|
|
"loss": 0.4155,
|
|
"step": 7160
|
|
},
|
|
{
|
|
"epoch": 1.0303422490652863,
|
|
"grad_norm": 0.22928500215568193,
|
|
"learning_rate": 1.4722348732682824e-05,
|
|
"loss": 0.41,
|
|
"step": 7165
|
|
},
|
|
{
|
|
"epoch": 1.0310612597066438,
|
|
"grad_norm": 0.23058079356797653,
|
|
"learning_rate": 1.4715710379203602e-05,
|
|
"loss": 0.4222,
|
|
"step": 7170
|
|
},
|
|
{
|
|
"epoch": 1.0317802703480012,
|
|
"grad_norm": 0.23989774674583075,
|
|
"learning_rate": 1.4709069352256467e-05,
|
|
"loss": 0.3973,
|
|
"step": 7175
|
|
},
|
|
{
|
|
"epoch": 1.0324992809893587,
|
|
"grad_norm": 0.23468182850625818,
|
|
"learning_rate": 1.4702425655606403e-05,
|
|
"loss": 0.4261,
|
|
"step": 7180
|
|
},
|
|
{
|
|
"epoch": 1.0332182916307162,
|
|
"grad_norm": 0.24877747571837835,
|
|
"learning_rate": 1.4695779293019908e-05,
|
|
"loss": 0.4382,
|
|
"step": 7185
|
|
},
|
|
{
|
|
"epoch": 1.0339373022720737,
|
|
"grad_norm": 0.23754062310392648,
|
|
"learning_rate": 1.4689130268264989e-05,
|
|
"loss": 0.4272,
|
|
"step": 7190
|
|
},
|
|
{
|
|
"epoch": 1.0346563129134312,
|
|
"grad_norm": 0.24175160857979996,
|
|
"learning_rate": 1.4682478585111165e-05,
|
|
"loss": 0.417,
|
|
"step": 7195
|
|
},
|
|
{
|
|
"epoch": 1.0353753235547887,
|
|
"grad_norm": 0.7431584047698181,
|
|
"learning_rate": 1.467582424732946e-05,
|
|
"loss": 0.4253,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 1.0360943341961462,
|
|
"grad_norm": 0.24693534297845374,
|
|
"learning_rate": 1.4669167258692407e-05,
|
|
"loss": 0.4176,
|
|
"step": 7205
|
|
},
|
|
{
|
|
"epoch": 1.0368133448375036,
|
|
"grad_norm": 0.23950832330092756,
|
|
"learning_rate": 1.4662507622974037e-05,
|
|
"loss": 0.4123,
|
|
"step": 7210
|
|
},
|
|
{
|
|
"epoch": 1.0375323554788611,
|
|
"grad_norm": 0.2442138232459913,
|
|
"learning_rate": 1.4655845343949877e-05,
|
|
"loss": 0.4211,
|
|
"step": 7215
|
|
},
|
|
{
|
|
"epoch": 1.0382513661202186,
|
|
"grad_norm": 0.2470450242567347,
|
|
"learning_rate": 1.4649180425396972e-05,
|
|
"loss": 0.4199,
|
|
"step": 7220
|
|
},
|
|
{
|
|
"epoch": 1.038970376761576,
|
|
"grad_norm": 0.25475772569037297,
|
|
"learning_rate": 1.4642512871093838e-05,
|
|
"loss": 0.4228,
|
|
"step": 7225
|
|
},
|
|
{
|
|
"epoch": 1.0396893874029336,
|
|
"grad_norm": 0.23975823704149188,
|
|
"learning_rate": 1.4635842684820506e-05,
|
|
"loss": 0.4335,
|
|
"step": 7230
|
|
},
|
|
{
|
|
"epoch": 1.040408398044291,
|
|
"grad_norm": 0.2434879248343545,
|
|
"learning_rate": 1.462916987035849e-05,
|
|
"loss": 0.4183,
|
|
"step": 7235
|
|
},
|
|
{
|
|
"epoch": 1.0411274086856486,
|
|
"grad_norm": 0.23917965588817724,
|
|
"learning_rate": 1.462249443149079e-05,
|
|
"loss": 0.4338,
|
|
"step": 7240
|
|
},
|
|
{
|
|
"epoch": 1.041846419327006,
|
|
"grad_norm": 0.2413379742131455,
|
|
"learning_rate": 1.4615816372001904e-05,
|
|
"loss": 0.4226,
|
|
"step": 7245
|
|
},
|
|
{
|
|
"epoch": 1.0425654299683635,
|
|
"grad_norm": 0.24650543649743945,
|
|
"learning_rate": 1.4609135695677805e-05,
|
|
"loss": 0.4268,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 1.043284440609721,
|
|
"grad_norm": 0.2369181897502444,
|
|
"learning_rate": 1.4602452406305962e-05,
|
|
"loss": 0.4108,
|
|
"step": 7255
|
|
},
|
|
{
|
|
"epoch": 1.0440034512510785,
|
|
"grad_norm": 0.25203791980105866,
|
|
"learning_rate": 1.4595766507675313e-05,
|
|
"loss": 0.4186,
|
|
"step": 7260
|
|
},
|
|
{
|
|
"epoch": 1.044722461892436,
|
|
"grad_norm": 0.2870260934077793,
|
|
"learning_rate": 1.4589078003576279e-05,
|
|
"loss": 0.4158,
|
|
"step": 7265
|
|
},
|
|
{
|
|
"epoch": 1.0454414725337935,
|
|
"grad_norm": 0.24269081963109623,
|
|
"learning_rate": 1.4582386897800766e-05,
|
|
"loss": 0.4172,
|
|
"step": 7270
|
|
},
|
|
{
|
|
"epoch": 1.046160483175151,
|
|
"grad_norm": 0.25226442413606864,
|
|
"learning_rate": 1.4575693194142146e-05,
|
|
"loss": 0.429,
|
|
"step": 7275
|
|
},
|
|
{
|
|
"epoch": 1.0468794938165085,
|
|
"grad_norm": 0.2425898262686411,
|
|
"learning_rate": 1.4568996896395264e-05,
|
|
"loss": 0.4266,
|
|
"step": 7280
|
|
},
|
|
{
|
|
"epoch": 1.047598504457866,
|
|
"grad_norm": 0.23733252857410786,
|
|
"learning_rate": 1.4562298008356441e-05,
|
|
"loss": 0.4147,
|
|
"step": 7285
|
|
},
|
|
{
|
|
"epoch": 1.0483175150992234,
|
|
"grad_norm": 0.24222783715783344,
|
|
"learning_rate": 1.4555596533823466e-05,
|
|
"loss": 0.4325,
|
|
"step": 7290
|
|
},
|
|
{
|
|
"epoch": 1.049036525740581,
|
|
"grad_norm": 0.24329552527037612,
|
|
"learning_rate": 1.4548892476595587e-05,
|
|
"loss": 0.4243,
|
|
"step": 7295
|
|
},
|
|
{
|
|
"epoch": 1.0497555363819384,
|
|
"grad_norm": 0.24202286281496768,
|
|
"learning_rate": 1.4542185840473523e-05,
|
|
"loss": 0.4178,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 1.050474547023296,
|
|
"grad_norm": 0.27213465537018944,
|
|
"learning_rate": 1.4535476629259454e-05,
|
|
"loss": 0.4237,
|
|
"step": 7305
|
|
},
|
|
{
|
|
"epoch": 1.0511935576646534,
|
|
"grad_norm": 0.2495004385772135,
|
|
"learning_rate": 1.4528764846757018e-05,
|
|
"loss": 0.423,
|
|
"step": 7310
|
|
},
|
|
{
|
|
"epoch": 1.0519125683060109,
|
|
"grad_norm": 0.23320958886525425,
|
|
"learning_rate": 1.4522050496771314e-05,
|
|
"loss": 0.4282,
|
|
"step": 7315
|
|
},
|
|
{
|
|
"epoch": 1.0526315789473684,
|
|
"grad_norm": 0.24698409693908785,
|
|
"learning_rate": 1.4515333583108896e-05,
|
|
"loss": 0.4256,
|
|
"step": 7320
|
|
},
|
|
{
|
|
"epoch": 1.0533505895887258,
|
|
"grad_norm": 0.2517814174177234,
|
|
"learning_rate": 1.4508614109577766e-05,
|
|
"loss": 0.4267,
|
|
"step": 7325
|
|
},
|
|
{
|
|
"epoch": 1.0540696002300833,
|
|
"grad_norm": 0.2483119203371741,
|
|
"learning_rate": 1.4501892079987378e-05,
|
|
"loss": 0.4152,
|
|
"step": 7330
|
|
},
|
|
{
|
|
"epoch": 1.0547886108714408,
|
|
"grad_norm": 0.23969019890865684,
|
|
"learning_rate": 1.4495167498148648e-05,
|
|
"loss": 0.4156,
|
|
"step": 7335
|
|
},
|
|
{
|
|
"epoch": 1.0555076215127983,
|
|
"grad_norm": 0.2449967426733617,
|
|
"learning_rate": 1.4488440367873922e-05,
|
|
"loss": 0.4277,
|
|
"step": 7340
|
|
},
|
|
{
|
|
"epoch": 1.0562266321541558,
|
|
"grad_norm": 0.2387446871052847,
|
|
"learning_rate": 1.4481710692977e-05,
|
|
"loss": 0.4093,
|
|
"step": 7345
|
|
},
|
|
{
|
|
"epoch": 1.0569456427955133,
|
|
"grad_norm": 0.258326074049477,
|
|
"learning_rate": 1.4474978477273124e-05,
|
|
"loss": 0.4226,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 1.0576646534368708,
|
|
"grad_norm": 0.2524216813764755,
|
|
"learning_rate": 1.4468243724578977e-05,
|
|
"loss": 0.4385,
|
|
"step": 7355
|
|
},
|
|
{
|
|
"epoch": 1.0583836640782283,
|
|
"grad_norm": 0.24424360507027318,
|
|
"learning_rate": 1.4461506438712668e-05,
|
|
"loss": 0.4321,
|
|
"step": 7360
|
|
},
|
|
{
|
|
"epoch": 1.0591026747195857,
|
|
"grad_norm": 0.2547646496981046,
|
|
"learning_rate": 1.4454766623493766e-05,
|
|
"loss": 0.4145,
|
|
"step": 7365
|
|
},
|
|
{
|
|
"epoch": 1.0598216853609435,
|
|
"grad_norm": 0.24839584461899775,
|
|
"learning_rate": 1.4448024282743252e-05,
|
|
"loss": 0.4205,
|
|
"step": 7370
|
|
},
|
|
{
|
|
"epoch": 1.0605406960023007,
|
|
"grad_norm": 0.26600263394387463,
|
|
"learning_rate": 1.444127942028355e-05,
|
|
"loss": 0.4193,
|
|
"step": 7375
|
|
},
|
|
{
|
|
"epoch": 1.0612597066436584,
|
|
"grad_norm": 0.24940257938808494,
|
|
"learning_rate": 1.443453203993851e-05,
|
|
"loss": 0.4392,
|
|
"step": 7380
|
|
},
|
|
{
|
|
"epoch": 1.061978717285016,
|
|
"grad_norm": 0.2335680813250201,
|
|
"learning_rate": 1.4427782145533411e-05,
|
|
"loss": 0.4258,
|
|
"step": 7385
|
|
},
|
|
{
|
|
"epoch": 1.0626977279263734,
|
|
"grad_norm": 0.24488926419935933,
|
|
"learning_rate": 1.4421029740894956e-05,
|
|
"loss": 0.4304,
|
|
"step": 7390
|
|
},
|
|
{
|
|
"epoch": 1.063416738567731,
|
|
"grad_norm": 0.23669795423595294,
|
|
"learning_rate": 1.4414274829851271e-05,
|
|
"loss": 0.416,
|
|
"step": 7395
|
|
},
|
|
{
|
|
"epoch": 1.0641357492090884,
|
|
"grad_norm": 0.25050783762739626,
|
|
"learning_rate": 1.4407517416231906e-05,
|
|
"loss": 0.4153,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 1.0648547598504459,
|
|
"grad_norm": 0.24802843987771775,
|
|
"learning_rate": 1.4400757503867828e-05,
|
|
"loss": 0.4158,
|
|
"step": 7405
|
|
},
|
|
{
|
|
"epoch": 1.0655737704918034,
|
|
"grad_norm": 0.24647940912208477,
|
|
"learning_rate": 1.4393995096591415e-05,
|
|
"loss": 0.427,
|
|
"step": 7410
|
|
},
|
|
{
|
|
"epoch": 1.0662927811331608,
|
|
"grad_norm": 0.2520899190375955,
|
|
"learning_rate": 1.4387230198236473e-05,
|
|
"loss": 0.4063,
|
|
"step": 7415
|
|
},
|
|
{
|
|
"epoch": 1.0670117917745183,
|
|
"grad_norm": 0.24659949302618547,
|
|
"learning_rate": 1.4380462812638205e-05,
|
|
"loss": 0.4252,
|
|
"step": 7420
|
|
},
|
|
{
|
|
"epoch": 1.0677308024158758,
|
|
"grad_norm": 0.23804217507019756,
|
|
"learning_rate": 1.437369294363323e-05,
|
|
"loss": 0.4279,
|
|
"step": 7425
|
|
},
|
|
{
|
|
"epoch": 1.0684498130572333,
|
|
"grad_norm": 0.24609340691898351,
|
|
"learning_rate": 1.4366920595059584e-05,
|
|
"loss": 0.4413,
|
|
"step": 7430
|
|
},
|
|
{
|
|
"epoch": 1.0691688236985908,
|
|
"grad_norm": 0.2706893944507149,
|
|
"learning_rate": 1.436014577075669e-05,
|
|
"loss": 0.4235,
|
|
"step": 7435
|
|
},
|
|
{
|
|
"epoch": 1.0698878343399483,
|
|
"grad_norm": 0.2444436725172846,
|
|
"learning_rate": 1.4353368474565392e-05,
|
|
"loss": 0.4264,
|
|
"step": 7440
|
|
},
|
|
{
|
|
"epoch": 1.0706068449813058,
|
|
"grad_norm": 0.257931501325315,
|
|
"learning_rate": 1.4346588710327926e-05,
|
|
"loss": 0.4154,
|
|
"step": 7445
|
|
},
|
|
{
|
|
"epoch": 1.0713258556226632,
|
|
"grad_norm": 0.23427955361087918,
|
|
"learning_rate": 1.4339806481887934e-05,
|
|
"loss": 0.4118,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 1.0720448662640207,
|
|
"grad_norm": 0.24248707511932324,
|
|
"learning_rate": 1.4333021793090444e-05,
|
|
"loss": 0.4159,
|
|
"step": 7455
|
|
},
|
|
{
|
|
"epoch": 1.0727638769053782,
|
|
"grad_norm": 0.2506867628026418,
|
|
"learning_rate": 1.4326234647781887e-05,
|
|
"loss": 0.4229,
|
|
"step": 7460
|
|
},
|
|
{
|
|
"epoch": 1.0734828875467357,
|
|
"grad_norm": 0.2397145327599731,
|
|
"learning_rate": 1.4319445049810088e-05,
|
|
"loss": 0.4176,
|
|
"step": 7465
|
|
},
|
|
{
|
|
"epoch": 1.0742018981880932,
|
|
"grad_norm": 0.24258713393340053,
|
|
"learning_rate": 1.431265300302426e-05,
|
|
"loss": 0.4271,
|
|
"step": 7470
|
|
},
|
|
{
|
|
"epoch": 1.0749209088294507,
|
|
"grad_norm": 0.2551377420219885,
|
|
"learning_rate": 1.4305858511275004e-05,
|
|
"loss": 0.4188,
|
|
"step": 7475
|
|
},
|
|
{
|
|
"epoch": 1.0756399194708082,
|
|
"grad_norm": 0.24540457752189151,
|
|
"learning_rate": 1.4299061578414303e-05,
|
|
"loss": 0.4244,
|
|
"step": 7480
|
|
},
|
|
{
|
|
"epoch": 1.0763589301121657,
|
|
"grad_norm": 0.25298614154712434,
|
|
"learning_rate": 1.4292262208295534e-05,
|
|
"loss": 0.4296,
|
|
"step": 7485
|
|
},
|
|
{
|
|
"epoch": 1.0770779407535231,
|
|
"grad_norm": 0.2523828578504143,
|
|
"learning_rate": 1.4285460404773442e-05,
|
|
"loss": 0.4225,
|
|
"step": 7490
|
|
},
|
|
{
|
|
"epoch": 1.0777969513948806,
|
|
"grad_norm": 0.2579441208781946,
|
|
"learning_rate": 1.4278656171704165e-05,
|
|
"loss": 0.4258,
|
|
"step": 7495
|
|
},
|
|
{
|
|
"epoch": 1.0785159620362381,
|
|
"grad_norm": 0.24706189517949953,
|
|
"learning_rate": 1.4271849512945218e-05,
|
|
"loss": 0.423,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 1.0792349726775956,
|
|
"grad_norm": 0.2410591304489331,
|
|
"learning_rate": 1.426504043235547e-05,
|
|
"loss": 0.4194,
|
|
"step": 7505
|
|
},
|
|
{
|
|
"epoch": 1.079953983318953,
|
|
"grad_norm": 0.2474535942584965,
|
|
"learning_rate": 1.4258228933795194e-05,
|
|
"loss": 0.4322,
|
|
"step": 7510
|
|
},
|
|
{
|
|
"epoch": 1.0806729939603106,
|
|
"grad_norm": 0.2670023141239513,
|
|
"learning_rate": 1.4251415021126015e-05,
|
|
"loss": 0.4187,
|
|
"step": 7515
|
|
},
|
|
{
|
|
"epoch": 1.081392004601668,
|
|
"grad_norm": 0.24125451781166807,
|
|
"learning_rate": 1.4244598698210927e-05,
|
|
"loss": 0.4195,
|
|
"step": 7520
|
|
},
|
|
{
|
|
"epoch": 1.0821110152430256,
|
|
"grad_norm": 0.2541324517150731,
|
|
"learning_rate": 1.4237779968914294e-05,
|
|
"loss": 0.43,
|
|
"step": 7525
|
|
},
|
|
{
|
|
"epoch": 1.082830025884383,
|
|
"grad_norm": 0.24984127697393865,
|
|
"learning_rate": 1.4230958837101847e-05,
|
|
"loss": 0.4303,
|
|
"step": 7530
|
|
},
|
|
{
|
|
"epoch": 1.0835490365257405,
|
|
"grad_norm": 0.23895121972780423,
|
|
"learning_rate": 1.4224135306640674e-05,
|
|
"loss": 0.4256,
|
|
"step": 7535
|
|
},
|
|
{
|
|
"epoch": 1.084268047167098,
|
|
"grad_norm": 0.22815843129758234,
|
|
"learning_rate": 1.4217309381399227e-05,
|
|
"loss": 0.4165,
|
|
"step": 7540
|
|
},
|
|
{
|
|
"epoch": 1.0849870578084555,
|
|
"grad_norm": 0.24987427549234392,
|
|
"learning_rate": 1.4210481065247312e-05,
|
|
"loss": 0.4062,
|
|
"step": 7545
|
|
},
|
|
{
|
|
"epoch": 1.085706068449813,
|
|
"grad_norm": 0.2440521322641604,
|
|
"learning_rate": 1.4203650362056094e-05,
|
|
"loss": 0.4218,
|
|
"step": 7550
|
|
},
|
|
{
|
|
"epoch": 1.0864250790911705,
|
|
"grad_norm": 0.24695188285324185,
|
|
"learning_rate": 1.4196817275698085e-05,
|
|
"loss": 0.4327,
|
|
"step": 7555
|
|
},
|
|
{
|
|
"epoch": 1.087144089732528,
|
|
"grad_norm": 0.24914710534389795,
|
|
"learning_rate": 1.4189981810047155e-05,
|
|
"loss": 0.4136,
|
|
"step": 7560
|
|
},
|
|
{
|
|
"epoch": 1.0878631003738854,
|
|
"grad_norm": 0.24358389545644626,
|
|
"learning_rate": 1.4183143968978523e-05,
|
|
"loss": 0.4264,
|
|
"step": 7565
|
|
},
|
|
{
|
|
"epoch": 1.088582111015243,
|
|
"grad_norm": 0.2459376500553283,
|
|
"learning_rate": 1.4176303756368753e-05,
|
|
"loss": 0.4148,
|
|
"step": 7570
|
|
},
|
|
{
|
|
"epoch": 1.0893011216566004,
|
|
"grad_norm": 0.26083867482554024,
|
|
"learning_rate": 1.4169461176095745e-05,
|
|
"loss": 0.4351,
|
|
"step": 7575
|
|
},
|
|
{
|
|
"epoch": 1.090020132297958,
|
|
"grad_norm": 0.24715188774379035,
|
|
"learning_rate": 1.4162616232038754e-05,
|
|
"loss": 0.4199,
|
|
"step": 7580
|
|
},
|
|
{
|
|
"epoch": 1.0907391429393154,
|
|
"grad_norm": 0.24491240798280314,
|
|
"learning_rate": 1.4155768928078371e-05,
|
|
"loss": 0.418,
|
|
"step": 7585
|
|
},
|
|
{
|
|
"epoch": 1.0914581535806729,
|
|
"grad_norm": 0.25181985639995874,
|
|
"learning_rate": 1.4148919268096519e-05,
|
|
"loss": 0.4232,
|
|
"step": 7590
|
|
},
|
|
{
|
|
"epoch": 1.0921771642220306,
|
|
"grad_norm": 0.24005994198180422,
|
|
"learning_rate": 1.4142067255976466e-05,
|
|
"loss": 0.4309,
|
|
"step": 7595
|
|
},
|
|
{
|
|
"epoch": 1.092896174863388,
|
|
"grad_norm": 0.2447397337880326,
|
|
"learning_rate": 1.413521289560281e-05,
|
|
"loss": 0.4143,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 1.0936151855047456,
|
|
"grad_norm": 0.2425734376554363,
|
|
"learning_rate": 1.4128356190861471e-05,
|
|
"loss": 0.4184,
|
|
"step": 7605
|
|
},
|
|
{
|
|
"epoch": 1.094334196146103,
|
|
"grad_norm": 0.2530970688941995,
|
|
"learning_rate": 1.412149714563972e-05,
|
|
"loss": 0.4127,
|
|
"step": 7610
|
|
},
|
|
{
|
|
"epoch": 1.0950532067874605,
|
|
"grad_norm": 0.2448959328514985,
|
|
"learning_rate": 1.411463576382613e-05,
|
|
"loss": 0.4205,
|
|
"step": 7615
|
|
},
|
|
{
|
|
"epoch": 1.095772217428818,
|
|
"grad_norm": 0.24869817141051245,
|
|
"learning_rate": 1.4107772049310615e-05,
|
|
"loss": 0.4193,
|
|
"step": 7620
|
|
},
|
|
{
|
|
"epoch": 1.0964912280701755,
|
|
"grad_norm": 0.2717766488293654,
|
|
"learning_rate": 1.4100906005984404e-05,
|
|
"loss": 0.4325,
|
|
"step": 7625
|
|
},
|
|
{
|
|
"epoch": 1.097210238711533,
|
|
"grad_norm": 0.2539575468369953,
|
|
"learning_rate": 1.4094037637740048e-05,
|
|
"loss": 0.4185,
|
|
"step": 7630
|
|
},
|
|
{
|
|
"epoch": 1.0979292493528905,
|
|
"grad_norm": 0.25130113289646006,
|
|
"learning_rate": 1.408716694847142e-05,
|
|
"loss": 0.4169,
|
|
"step": 7635
|
|
},
|
|
{
|
|
"epoch": 1.098648259994248,
|
|
"grad_norm": 0.24884985415016497,
|
|
"learning_rate": 1.4080293942073704e-05,
|
|
"loss": 0.42,
|
|
"step": 7640
|
|
},
|
|
{
|
|
"epoch": 1.0993672706356055,
|
|
"grad_norm": 0.24074102770601502,
|
|
"learning_rate": 1.4073418622443402e-05,
|
|
"loss": 0.4127,
|
|
"step": 7645
|
|
},
|
|
{
|
|
"epoch": 1.100086281276963,
|
|
"grad_norm": 0.25304182275332077,
|
|
"learning_rate": 1.4066540993478321e-05,
|
|
"loss": 0.4241,
|
|
"step": 7650
|
|
},
|
|
{
|
|
"epoch": 1.1008052919183204,
|
|
"grad_norm": 0.29644282230759933,
|
|
"learning_rate": 1.405966105907758e-05,
|
|
"loss": 0.4305,
|
|
"step": 7655
|
|
},
|
|
{
|
|
"epoch": 1.101524302559678,
|
|
"grad_norm": 0.2436353807120827,
|
|
"learning_rate": 1.4052778823141609e-05,
|
|
"loss": 0.416,
|
|
"step": 7660
|
|
},
|
|
{
|
|
"epoch": 1.1022433132010354,
|
|
"grad_norm": 0.24484423851073356,
|
|
"learning_rate": 1.4045894289572142e-05,
|
|
"loss": 0.4346,
|
|
"step": 7665
|
|
},
|
|
{
|
|
"epoch": 1.102962323842393,
|
|
"grad_norm": 0.24391951324397565,
|
|
"learning_rate": 1.4039007462272207e-05,
|
|
"loss": 0.4121,
|
|
"step": 7670
|
|
},
|
|
{
|
|
"epoch": 1.1036813344837504,
|
|
"grad_norm": 0.24224843292543935,
|
|
"learning_rate": 1.4032118345146141e-05,
|
|
"loss": 0.4423,
|
|
"step": 7675
|
|
},
|
|
{
|
|
"epoch": 1.1044003451251079,
|
|
"grad_norm": 0.24151880011846152,
|
|
"learning_rate": 1.4025226942099579e-05,
|
|
"loss": 0.4315,
|
|
"step": 7680
|
|
},
|
|
{
|
|
"epoch": 1.1051193557664654,
|
|
"grad_norm": 0.24674435136342923,
|
|
"learning_rate": 1.4018333257039449e-05,
|
|
"loss": 0.4258,
|
|
"step": 7685
|
|
},
|
|
{
|
|
"epoch": 1.1058383664078228,
|
|
"grad_norm": 0.23572907486634379,
|
|
"learning_rate": 1.4011437293873975e-05,
|
|
"loss": 0.4065,
|
|
"step": 7690
|
|
},
|
|
{
|
|
"epoch": 1.1065573770491803,
|
|
"grad_norm": 0.2563816055424987,
|
|
"learning_rate": 1.4004539056512667e-05,
|
|
"loss": 0.4355,
|
|
"step": 7695
|
|
},
|
|
{
|
|
"epoch": 1.1072763876905378,
|
|
"grad_norm": 0.2566698703429824,
|
|
"learning_rate": 1.399763854886633e-05,
|
|
"loss": 0.4252,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 1.1079953983318953,
|
|
"grad_norm": 0.2439159913441445,
|
|
"learning_rate": 1.3990735774847057e-05,
|
|
"loss": 0.4252,
|
|
"step": 7705
|
|
},
|
|
{
|
|
"epoch": 1.1087144089732528,
|
|
"grad_norm": 0.24823739748302368,
|
|
"learning_rate": 1.398383073836822e-05,
|
|
"loss": 0.4256,
|
|
"step": 7710
|
|
},
|
|
{
|
|
"epoch": 1.1094334196146103,
|
|
"grad_norm": 0.27016829076384286,
|
|
"learning_rate": 1.3976923443344483e-05,
|
|
"loss": 0.4257,
|
|
"step": 7715
|
|
},
|
|
{
|
|
"epoch": 1.1101524302559678,
|
|
"grad_norm": 0.24301415679475427,
|
|
"learning_rate": 1.3970013893691776e-05,
|
|
"loss": 0.4163,
|
|
"step": 7720
|
|
},
|
|
{
|
|
"epoch": 1.1108714408973253,
|
|
"grad_norm": 0.25527015767883365,
|
|
"learning_rate": 1.396310209332732e-05,
|
|
"loss": 0.4145,
|
|
"step": 7725
|
|
},
|
|
{
|
|
"epoch": 1.1115904515386827,
|
|
"grad_norm": 0.23980610885688808,
|
|
"learning_rate": 1.3956188046169607e-05,
|
|
"loss": 0.4145,
|
|
"step": 7730
|
|
},
|
|
{
|
|
"epoch": 1.1123094621800402,
|
|
"grad_norm": 0.2468743079078507,
|
|
"learning_rate": 1.3949271756138407e-05,
|
|
"loss": 0.4256,
|
|
"step": 7735
|
|
},
|
|
{
|
|
"epoch": 1.1130284728213977,
|
|
"grad_norm": 0.2633663801233096,
|
|
"learning_rate": 1.3942353227154755e-05,
|
|
"loss": 0.4226,
|
|
"step": 7740
|
|
},
|
|
{
|
|
"epoch": 1.1137474834627552,
|
|
"grad_norm": 0.25326954886349157,
|
|
"learning_rate": 1.3935432463140954e-05,
|
|
"loss": 0.4004,
|
|
"step": 7745
|
|
},
|
|
{
|
|
"epoch": 1.1144664941041127,
|
|
"grad_norm": 0.252813511679432,
|
|
"learning_rate": 1.3928509468020586e-05,
|
|
"loss": 0.4142,
|
|
"step": 7750
|
|
},
|
|
{
|
|
"epoch": 1.1151855047454702,
|
|
"grad_norm": 0.24948974834562754,
|
|
"learning_rate": 1.3921584245718485e-05,
|
|
"loss": 0.4275,
|
|
"step": 7755
|
|
},
|
|
{
|
|
"epoch": 1.1159045153868277,
|
|
"grad_norm": 0.2490774493997426,
|
|
"learning_rate": 1.3914656800160755e-05,
|
|
"loss": 0.4401,
|
|
"step": 7760
|
|
},
|
|
{
|
|
"epoch": 1.1166235260281852,
|
|
"grad_norm": 0.24731238961604693,
|
|
"learning_rate": 1.390772713527476e-05,
|
|
"loss": 0.413,
|
|
"step": 7765
|
|
},
|
|
{
|
|
"epoch": 1.1173425366695426,
|
|
"grad_norm": 0.24009966652121423,
|
|
"learning_rate": 1.3900795254989117e-05,
|
|
"loss": 0.4326,
|
|
"step": 7770
|
|
},
|
|
{
|
|
"epoch": 1.1180615473109001,
|
|
"grad_norm": 0.241251844552185,
|
|
"learning_rate": 1.3893861163233704e-05,
|
|
"loss": 0.4046,
|
|
"step": 7775
|
|
},
|
|
{
|
|
"epoch": 1.1187805579522576,
|
|
"grad_norm": 0.24711082983975394,
|
|
"learning_rate": 1.388692486393965e-05,
|
|
"loss": 0.4046,
|
|
"step": 7780
|
|
},
|
|
{
|
|
"epoch": 1.119499568593615,
|
|
"grad_norm": 0.24611081640811172,
|
|
"learning_rate": 1.3879986361039341e-05,
|
|
"loss": 0.4254,
|
|
"step": 7785
|
|
},
|
|
{
|
|
"epoch": 1.1202185792349726,
|
|
"grad_norm": 0.24883474377488835,
|
|
"learning_rate": 1.3873045658466404e-05,
|
|
"loss": 0.4179,
|
|
"step": 7790
|
|
},
|
|
{
|
|
"epoch": 1.12093758987633,
|
|
"grad_norm": 0.24887871632954492,
|
|
"learning_rate": 1.386610276015572e-05,
|
|
"loss": 0.4092,
|
|
"step": 7795
|
|
},
|
|
{
|
|
"epoch": 1.1216566005176876,
|
|
"grad_norm": 0.2339971482090034,
|
|
"learning_rate": 1.3859157670043409e-05,
|
|
"loss": 0.4139,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 1.122375611159045,
|
|
"grad_norm": 0.25291445076907043,
|
|
"learning_rate": 1.3852210392066837e-05,
|
|
"loss": 0.4435,
|
|
"step": 7805
|
|
},
|
|
{
|
|
"epoch": 1.1230946218004028,
|
|
"grad_norm": 0.23886337976308974,
|
|
"learning_rate": 1.384526093016461e-05,
|
|
"loss": 0.3929,
|
|
"step": 7810
|
|
},
|
|
{
|
|
"epoch": 1.12381363244176,
|
|
"grad_norm": 0.24867184978452428,
|
|
"learning_rate": 1.3838309288276577e-05,
|
|
"loss": 0.4214,
|
|
"step": 7815
|
|
},
|
|
{
|
|
"epoch": 1.1245326430831177,
|
|
"grad_norm": 0.2416953267844077,
|
|
"learning_rate": 1.383135547034381e-05,
|
|
"loss": 0.4268,
|
|
"step": 7820
|
|
},
|
|
{
|
|
"epoch": 1.125251653724475,
|
|
"grad_norm": 0.24714780517863277,
|
|
"learning_rate": 1.3824399480308625e-05,
|
|
"loss": 0.4255,
|
|
"step": 7825
|
|
},
|
|
{
|
|
"epoch": 1.1259706643658327,
|
|
"grad_norm": 0.24697736669814996,
|
|
"learning_rate": 1.3817441322114573e-05,
|
|
"loss": 0.4217,
|
|
"step": 7830
|
|
},
|
|
{
|
|
"epoch": 1.1266896750071902,
|
|
"grad_norm": 0.2446263674892036,
|
|
"learning_rate": 1.3810480999706424e-05,
|
|
"loss": 0.4333,
|
|
"step": 7835
|
|
},
|
|
{
|
|
"epoch": 1.1274086856485477,
|
|
"grad_norm": 0.24137930683222925,
|
|
"learning_rate": 1.3803518517030175e-05,
|
|
"loss": 0.4387,
|
|
"step": 7840
|
|
},
|
|
{
|
|
"epoch": 1.1281276962899052,
|
|
"grad_norm": 0.25949236377089124,
|
|
"learning_rate": 1.3796553878033056e-05,
|
|
"loss": 0.4309,
|
|
"step": 7845
|
|
},
|
|
{
|
|
"epoch": 1.1288467069312627,
|
|
"grad_norm": 0.25804771630659423,
|
|
"learning_rate": 1.3789587086663516e-05,
|
|
"loss": 0.4334,
|
|
"step": 7850
|
|
},
|
|
{
|
|
"epoch": 1.1295657175726201,
|
|
"grad_norm": 0.24467407111888867,
|
|
"learning_rate": 1.3782618146871222e-05,
|
|
"loss": 0.4189,
|
|
"step": 7855
|
|
},
|
|
{
|
|
"epoch": 1.1302847282139776,
|
|
"grad_norm": 0.24540869178112273,
|
|
"learning_rate": 1.3775647062607062e-05,
|
|
"loss": 0.426,
|
|
"step": 7860
|
|
},
|
|
{
|
|
"epoch": 1.1310037388553351,
|
|
"grad_norm": 0.2581233546593125,
|
|
"learning_rate": 1.3768673837823138e-05,
|
|
"loss": 0.4295,
|
|
"step": 7865
|
|
},
|
|
{
|
|
"epoch": 1.1317227494966926,
|
|
"grad_norm": 0.23733084790193076,
|
|
"learning_rate": 1.3761698476472767e-05,
|
|
"loss": 0.4099,
|
|
"step": 7870
|
|
},
|
|
{
|
|
"epoch": 1.13244176013805,
|
|
"grad_norm": 0.26599088742069377,
|
|
"learning_rate": 1.375472098251047e-05,
|
|
"loss": 0.4011,
|
|
"step": 7875
|
|
},
|
|
{
|
|
"epoch": 1.1331607707794076,
|
|
"grad_norm": 0.253357189978077,
|
|
"learning_rate": 1.3747741359891991e-05,
|
|
"loss": 0.4217,
|
|
"step": 7880
|
|
},
|
|
{
|
|
"epoch": 1.133879781420765,
|
|
"grad_norm": 0.25256913460661307,
|
|
"learning_rate": 1.3740759612574268e-05,
|
|
"loss": 0.4187,
|
|
"step": 7885
|
|
},
|
|
{
|
|
"epoch": 1.1345987920621226,
|
|
"grad_norm": 0.2434106525342183,
|
|
"learning_rate": 1.3733775744515452e-05,
|
|
"loss": 0.4259,
|
|
"step": 7890
|
|
},
|
|
{
|
|
"epoch": 1.13531780270348,
|
|
"grad_norm": 0.25358779229776013,
|
|
"learning_rate": 1.372678975967489e-05,
|
|
"loss": 0.4361,
|
|
"step": 7895
|
|
},
|
|
{
|
|
"epoch": 1.1360368133448375,
|
|
"grad_norm": 0.24904816611960595,
|
|
"learning_rate": 1.3719801662013133e-05,
|
|
"loss": 0.4202,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 1.136755823986195,
|
|
"grad_norm": 0.24093806308462346,
|
|
"learning_rate": 1.3712811455491927e-05,
|
|
"loss": 0.4176,
|
|
"step": 7905
|
|
},
|
|
{
|
|
"epoch": 1.1374748346275525,
|
|
"grad_norm": 0.24362253736347905,
|
|
"learning_rate": 1.370581914407422e-05,
|
|
"loss": 0.4065,
|
|
"step": 7910
|
|
},
|
|
{
|
|
"epoch": 1.13819384526891,
|
|
"grad_norm": 0.24153784261120567,
|
|
"learning_rate": 1.3698824731724147e-05,
|
|
"loss": 0.4263,
|
|
"step": 7915
|
|
},
|
|
{
|
|
"epoch": 1.1389128559102675,
|
|
"grad_norm": 0.26957486232644123,
|
|
"learning_rate": 1.3691828222407032e-05,
|
|
"loss": 0.4149,
|
|
"step": 7920
|
|
},
|
|
{
|
|
"epoch": 1.139631866551625,
|
|
"grad_norm": 0.25905751939209765,
|
|
"learning_rate": 1.3684829620089391e-05,
|
|
"loss": 0.421,
|
|
"step": 7925
|
|
},
|
|
{
|
|
"epoch": 1.1403508771929824,
|
|
"grad_norm": 0.2510604462676102,
|
|
"learning_rate": 1.3677828928738934e-05,
|
|
"loss": 0.4231,
|
|
"step": 7930
|
|
},
|
|
{
|
|
"epoch": 1.14106988783434,
|
|
"grad_norm": 0.24835283662485474,
|
|
"learning_rate": 1.3670826152324543e-05,
|
|
"loss": 0.4211,
|
|
"step": 7935
|
|
},
|
|
{
|
|
"epoch": 1.1417888984756974,
|
|
"grad_norm": 0.24748031095299838,
|
|
"learning_rate": 1.3663821294816289e-05,
|
|
"loss": 0.4218,
|
|
"step": 7940
|
|
},
|
|
{
|
|
"epoch": 1.142507909117055,
|
|
"grad_norm": 0.24418549656367056,
|
|
"learning_rate": 1.3656814360185422e-05,
|
|
"loss": 0.4239,
|
|
"step": 7945
|
|
},
|
|
{
|
|
"epoch": 1.1432269197584124,
|
|
"grad_norm": 0.24667828916811363,
|
|
"learning_rate": 1.3649805352404366e-05,
|
|
"loss": 0.4132,
|
|
"step": 7950
|
|
},
|
|
{
|
|
"epoch": 1.1439459303997699,
|
|
"grad_norm": 0.24542712661129085,
|
|
"learning_rate": 1.3642794275446728e-05,
|
|
"loss": 0.4138,
|
|
"step": 7955
|
|
},
|
|
{
|
|
"epoch": 1.1446649410411274,
|
|
"grad_norm": 0.25146817243635045,
|
|
"learning_rate": 1.363578113328728e-05,
|
|
"loss": 0.4319,
|
|
"step": 7960
|
|
},
|
|
{
|
|
"epoch": 1.1453839516824849,
|
|
"grad_norm": 0.25652758801455494,
|
|
"learning_rate": 1.362876592990197e-05,
|
|
"loss": 0.4197,
|
|
"step": 7965
|
|
},
|
|
{
|
|
"epoch": 1.1461029623238423,
|
|
"grad_norm": 0.2527655011860801,
|
|
"learning_rate": 1.3621748669267911e-05,
|
|
"loss": 0.4148,
|
|
"step": 7970
|
|
},
|
|
{
|
|
"epoch": 1.1468219729651998,
|
|
"grad_norm": 0.2437231962419539,
|
|
"learning_rate": 1.3614729355363382e-05,
|
|
"loss": 0.4087,
|
|
"step": 7975
|
|
},
|
|
{
|
|
"epoch": 1.1475409836065573,
|
|
"grad_norm": 0.2519247554626112,
|
|
"learning_rate": 1.3607707992167836e-05,
|
|
"loss": 0.4205,
|
|
"step": 7980
|
|
},
|
|
{
|
|
"epoch": 1.1482599942479148,
|
|
"grad_norm": 0.255776034441485,
|
|
"learning_rate": 1.3600684583661872e-05,
|
|
"loss": 0.415,
|
|
"step": 7985
|
|
},
|
|
{
|
|
"epoch": 1.1489790048892723,
|
|
"grad_norm": 0.25702160420203973,
|
|
"learning_rate": 1.3593659133827258e-05,
|
|
"loss": 0.4285,
|
|
"step": 7990
|
|
},
|
|
{
|
|
"epoch": 1.1496980155306298,
|
|
"grad_norm": 0.251075718116356,
|
|
"learning_rate": 1.358663164664692e-05,
|
|
"loss": 0.4233,
|
|
"step": 7995
|
|
},
|
|
{
|
|
"epoch": 1.1504170261719873,
|
|
"grad_norm": 0.24391957218362018,
|
|
"learning_rate": 1.3579602126104935e-05,
|
|
"loss": 0.4321,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 1.1511360368133448,
|
|
"grad_norm": 0.2542217038989035,
|
|
"learning_rate": 1.3572570576186535e-05,
|
|
"loss": 0.4246,
|
|
"step": 8005
|
|
},
|
|
{
|
|
"epoch": 1.1518550474547022,
|
|
"grad_norm": 0.24617547563080916,
|
|
"learning_rate": 1.3565537000878102e-05,
|
|
"loss": 0.4195,
|
|
"step": 8010
|
|
},
|
|
{
|
|
"epoch": 1.1525740580960597,
|
|
"grad_norm": 0.2406743128878967,
|
|
"learning_rate": 1.3558501404167168e-05,
|
|
"loss": 0.4211,
|
|
"step": 8015
|
|
},
|
|
{
|
|
"epoch": 1.1532930687374172,
|
|
"grad_norm": 0.2492803217604934,
|
|
"learning_rate": 1.3551463790042405e-05,
|
|
"loss": 0.4483,
|
|
"step": 8020
|
|
},
|
|
{
|
|
"epoch": 1.154012079378775,
|
|
"grad_norm": 0.2687431622773391,
|
|
"learning_rate": 1.3544424162493636e-05,
|
|
"loss": 0.4034,
|
|
"step": 8025
|
|
},
|
|
{
|
|
"epoch": 1.1547310900201322,
|
|
"grad_norm": 0.2474863901410814,
|
|
"learning_rate": 1.3537382525511827e-05,
|
|
"loss": 0.4248,
|
|
"step": 8030
|
|
},
|
|
{
|
|
"epoch": 1.15545010066149,
|
|
"grad_norm": 0.24277793893103647,
|
|
"learning_rate": 1.3530338883089068e-05,
|
|
"loss": 0.4138,
|
|
"step": 8035
|
|
},
|
|
{
|
|
"epoch": 1.1561691113028472,
|
|
"grad_norm": 0.2376746683514846,
|
|
"learning_rate": 1.3523293239218607e-05,
|
|
"loss": 0.405,
|
|
"step": 8040
|
|
},
|
|
{
|
|
"epoch": 1.1568881219442049,
|
|
"grad_norm": 0.2586039263754343,
|
|
"learning_rate": 1.3516245597894809e-05,
|
|
"loss": 0.4151,
|
|
"step": 8045
|
|
},
|
|
{
|
|
"epoch": 1.1576071325855624,
|
|
"grad_norm": 0.2573459661290238,
|
|
"learning_rate": 1.3509195963113179e-05,
|
|
"loss": 0.4208,
|
|
"step": 8050
|
|
},
|
|
{
|
|
"epoch": 1.1583261432269198,
|
|
"grad_norm": 0.26358292190965493,
|
|
"learning_rate": 1.3502144338870358e-05,
|
|
"loss": 0.4281,
|
|
"step": 8055
|
|
},
|
|
{
|
|
"epoch": 1.1590451538682773,
|
|
"grad_norm": 0.25201890612946326,
|
|
"learning_rate": 1.3495090729164103e-05,
|
|
"loss": 0.4108,
|
|
"step": 8060
|
|
},
|
|
{
|
|
"epoch": 1.1597641645096348,
|
|
"grad_norm": 0.25152231329952435,
|
|
"learning_rate": 1.3488035137993305e-05,
|
|
"loss": 0.4331,
|
|
"step": 8065
|
|
},
|
|
{
|
|
"epoch": 1.1604831751509923,
|
|
"grad_norm": 0.26500500442079494,
|
|
"learning_rate": 1.3480977569357974e-05,
|
|
"loss": 0.4222,
|
|
"step": 8070
|
|
},
|
|
{
|
|
"epoch": 1.1612021857923498,
|
|
"grad_norm": 0.25151093241524297,
|
|
"learning_rate": 1.3473918027259242e-05,
|
|
"loss": 0.4245,
|
|
"step": 8075
|
|
},
|
|
{
|
|
"epoch": 1.1619211964337073,
|
|
"grad_norm": 0.24952523717373598,
|
|
"learning_rate": 1.3466856515699367e-05,
|
|
"loss": 0.4213,
|
|
"step": 8080
|
|
},
|
|
{
|
|
"epoch": 1.1626402070750648,
|
|
"grad_norm": 0.24856168521182317,
|
|
"learning_rate": 1.345979303868171e-05,
|
|
"loss": 0.4099,
|
|
"step": 8085
|
|
},
|
|
{
|
|
"epoch": 1.1633592177164223,
|
|
"grad_norm": 0.26059949588767317,
|
|
"learning_rate": 1.3452727600210755e-05,
|
|
"loss": 0.4207,
|
|
"step": 8090
|
|
},
|
|
{
|
|
"epoch": 1.1640782283577797,
|
|
"grad_norm": 0.2610866688103912,
|
|
"learning_rate": 1.3445660204292098e-05,
|
|
"loss": 0.4105,
|
|
"step": 8095
|
|
},
|
|
{
|
|
"epoch": 1.1647972389991372,
|
|
"grad_norm": 0.24596872245945312,
|
|
"learning_rate": 1.3438590854932442e-05,
|
|
"loss": 0.427,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 1.1655162496404947,
|
|
"grad_norm": 0.250687613838101,
|
|
"learning_rate": 1.3431519556139599e-05,
|
|
"loss": 0.4031,
|
|
"step": 8105
|
|
},
|
|
{
|
|
"epoch": 1.1662352602818522,
|
|
"grad_norm": 0.24182193223557452,
|
|
"learning_rate": 1.3424446311922486e-05,
|
|
"loss": 0.4363,
|
|
"step": 8110
|
|
},
|
|
{
|
|
"epoch": 1.1669542709232097,
|
|
"grad_norm": 0.24363623767786466,
|
|
"learning_rate": 1.341737112629112e-05,
|
|
"loss": 0.4205,
|
|
"step": 8115
|
|
},
|
|
{
|
|
"epoch": 1.1676732815645672,
|
|
"grad_norm": 0.23567571845844587,
|
|
"learning_rate": 1.3410294003256623e-05,
|
|
"loss": 0.4273,
|
|
"step": 8120
|
|
},
|
|
{
|
|
"epoch": 1.1683922922059247,
|
|
"grad_norm": 0.2481386696407478,
|
|
"learning_rate": 1.3403214946831218e-05,
|
|
"loss": 0.4242,
|
|
"step": 8125
|
|
},
|
|
{
|
|
"epoch": 1.1691113028472822,
|
|
"grad_norm": 0.24984866659117091,
|
|
"learning_rate": 1.3396133961028214e-05,
|
|
"loss": 0.4151,
|
|
"step": 8130
|
|
},
|
|
{
|
|
"epoch": 1.1698303134886396,
|
|
"grad_norm": 0.2587341281267679,
|
|
"learning_rate": 1.3389051049862024e-05,
|
|
"loss": 0.4324,
|
|
"step": 8135
|
|
},
|
|
{
|
|
"epoch": 1.1705493241299971,
|
|
"grad_norm": 0.24480606706358338,
|
|
"learning_rate": 1.3381966217348143e-05,
|
|
"loss": 0.417,
|
|
"step": 8140
|
|
},
|
|
{
|
|
"epoch": 1.1712683347713546,
|
|
"grad_norm": 0.2467942565642489,
|
|
"learning_rate": 1.3374879467503163e-05,
|
|
"loss": 0.4279,
|
|
"step": 8145
|
|
},
|
|
{
|
|
"epoch": 1.171987345412712,
|
|
"grad_norm": 0.2554295736313284,
|
|
"learning_rate": 1.3367790804344762e-05,
|
|
"loss": 0.4398,
|
|
"step": 8150
|
|
},
|
|
{
|
|
"epoch": 1.1727063560540696,
|
|
"grad_norm": 0.24686156456775635,
|
|
"learning_rate": 1.33607002318917e-05,
|
|
"loss": 0.4292,
|
|
"step": 8155
|
|
},
|
|
{
|
|
"epoch": 1.173425366695427,
|
|
"grad_norm": 0.25350234857874493,
|
|
"learning_rate": 1.3353607754163822e-05,
|
|
"loss": 0.4171,
|
|
"step": 8160
|
|
},
|
|
{
|
|
"epoch": 1.1741443773367846,
|
|
"grad_norm": 0.2675757948436432,
|
|
"learning_rate": 1.3346513375182049e-05,
|
|
"loss": 0.425,
|
|
"step": 8165
|
|
},
|
|
{
|
|
"epoch": 1.174863387978142,
|
|
"grad_norm": 0.24840026964787834,
|
|
"learning_rate": 1.333941709896838e-05,
|
|
"loss": 0.4243,
|
|
"step": 8170
|
|
},
|
|
{
|
|
"epoch": 1.1755823986194995,
|
|
"grad_norm": 0.24658512060060236,
|
|
"learning_rate": 1.3332318929545898e-05,
|
|
"loss": 0.4429,
|
|
"step": 8175
|
|
},
|
|
{
|
|
"epoch": 1.176301409260857,
|
|
"grad_norm": 0.24447367376785173,
|
|
"learning_rate": 1.3325218870938751e-05,
|
|
"loss": 0.4117,
|
|
"step": 8180
|
|
},
|
|
{
|
|
"epoch": 1.1770204199022145,
|
|
"grad_norm": 0.24290491986399293,
|
|
"learning_rate": 1.3318116927172162e-05,
|
|
"loss": 0.4111,
|
|
"step": 8185
|
|
},
|
|
{
|
|
"epoch": 1.177739430543572,
|
|
"grad_norm": 0.25434081056777064,
|
|
"learning_rate": 1.331101310227242e-05,
|
|
"loss": 0.4295,
|
|
"step": 8190
|
|
},
|
|
{
|
|
"epoch": 1.1784584411849295,
|
|
"grad_norm": 0.24918566963063316,
|
|
"learning_rate": 1.330390740026688e-05,
|
|
"loss": 0.4169,
|
|
"step": 8195
|
|
},
|
|
{
|
|
"epoch": 1.179177451826287,
|
|
"grad_norm": 0.2466669288347175,
|
|
"learning_rate": 1.3296799825183966e-05,
|
|
"loss": 0.4318,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 1.1798964624676445,
|
|
"grad_norm": 0.25137643483645633,
|
|
"learning_rate": 1.328969038105316e-05,
|
|
"loss": 0.4304,
|
|
"step": 8205
|
|
},
|
|
{
|
|
"epoch": 1.180615473109002,
|
|
"grad_norm": 0.25921147868275496,
|
|
"learning_rate": 1.3282579071905004e-05,
|
|
"loss": 0.4238,
|
|
"step": 8210
|
|
},
|
|
{
|
|
"epoch": 1.1813344837503594,
|
|
"grad_norm": 0.24375862567652218,
|
|
"learning_rate": 1.3275465901771094e-05,
|
|
"loss": 0.4086,
|
|
"step": 8215
|
|
},
|
|
{
|
|
"epoch": 1.182053494391717,
|
|
"grad_norm": 0.24452988663026934,
|
|
"learning_rate": 1.326835087468409e-05,
|
|
"loss": 0.4138,
|
|
"step": 8220
|
|
},
|
|
{
|
|
"epoch": 1.1827725050330744,
|
|
"grad_norm": 0.25794940392873345,
|
|
"learning_rate": 1.32612339946777e-05,
|
|
"loss": 0.4271,
|
|
"step": 8225
|
|
},
|
|
{
|
|
"epoch": 1.183491515674432,
|
|
"grad_norm": 0.24798941451520337,
|
|
"learning_rate": 1.3254115265786682e-05,
|
|
"loss": 0.405,
|
|
"step": 8230
|
|
},
|
|
{
|
|
"epoch": 1.1842105263157894,
|
|
"grad_norm": 0.2475664814793249,
|
|
"learning_rate": 1.3246994692046837e-05,
|
|
"loss": 0.4374,
|
|
"step": 8235
|
|
},
|
|
{
|
|
"epoch": 1.184929536957147,
|
|
"grad_norm": 0.2545691836978971,
|
|
"learning_rate": 1.323987227749502e-05,
|
|
"loss": 0.4209,
|
|
"step": 8240
|
|
},
|
|
{
|
|
"epoch": 1.1856485475985044,
|
|
"grad_norm": 0.26838769791648476,
|
|
"learning_rate": 1.323274802616913e-05,
|
|
"loss": 0.4113,
|
|
"step": 8245
|
|
},
|
|
{
|
|
"epoch": 1.186367558239862,
|
|
"grad_norm": 0.24614739433809657,
|
|
"learning_rate": 1.3225621942108098e-05,
|
|
"loss": 0.4301,
|
|
"step": 8250
|
|
},
|
|
{
|
|
"epoch": 1.1870865688812193,
|
|
"grad_norm": 0.2365370918897361,
|
|
"learning_rate": 1.3218494029351903e-05,
|
|
"loss": 0.4308,
|
|
"step": 8255
|
|
},
|
|
{
|
|
"epoch": 1.187805579522577,
|
|
"grad_norm": 0.262541605445906,
|
|
"learning_rate": 1.3211364291941562e-05,
|
|
"loss": 0.418,
|
|
"step": 8260
|
|
},
|
|
{
|
|
"epoch": 1.1885245901639343,
|
|
"grad_norm": 0.23740679928960856,
|
|
"learning_rate": 1.3204232733919113e-05,
|
|
"loss": 0.4251,
|
|
"step": 8265
|
|
},
|
|
{
|
|
"epoch": 1.189243600805292,
|
|
"grad_norm": 0.24576537383506467,
|
|
"learning_rate": 1.3197099359327643e-05,
|
|
"loss": 0.4216,
|
|
"step": 8270
|
|
},
|
|
{
|
|
"epoch": 1.1899626114466495,
|
|
"grad_norm": 0.2545486272631958,
|
|
"learning_rate": 1.318996417221126e-05,
|
|
"loss": 0.4294,
|
|
"step": 8275
|
|
},
|
|
{
|
|
"epoch": 1.190681622088007,
|
|
"grad_norm": 0.25064130034355747,
|
|
"learning_rate": 1.3182827176615098e-05,
|
|
"loss": 0.412,
|
|
"step": 8280
|
|
},
|
|
{
|
|
"epoch": 1.1914006327293645,
|
|
"grad_norm": 0.23643340314959274,
|
|
"learning_rate": 1.3175688376585323e-05,
|
|
"loss": 0.4399,
|
|
"step": 8285
|
|
},
|
|
{
|
|
"epoch": 1.192119643370722,
|
|
"grad_norm": 0.2537867803665056,
|
|
"learning_rate": 1.3168547776169117e-05,
|
|
"loss": 0.435,
|
|
"step": 8290
|
|
},
|
|
{
|
|
"epoch": 1.1928386540120794,
|
|
"grad_norm": 0.25676163776653566,
|
|
"learning_rate": 1.3161405379414686e-05,
|
|
"loss": 0.4288,
|
|
"step": 8295
|
|
},
|
|
{
|
|
"epoch": 1.193557664653437,
|
|
"grad_norm": 0.24692052623095706,
|
|
"learning_rate": 1.3154261190371255e-05,
|
|
"loss": 0.4169,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 1.1942766752947944,
|
|
"grad_norm": 0.24644854980084216,
|
|
"learning_rate": 1.3147115213089065e-05,
|
|
"loss": 0.4209,
|
|
"step": 8305
|
|
},
|
|
{
|
|
"epoch": 1.194995685936152,
|
|
"grad_norm": 0.2626283251769723,
|
|
"learning_rate": 1.3139967451619371e-05,
|
|
"loss": 0.4239,
|
|
"step": 8310
|
|
},
|
|
{
|
|
"epoch": 1.1957146965775094,
|
|
"grad_norm": 0.25140453305076255,
|
|
"learning_rate": 1.3132817910014435e-05,
|
|
"loss": 0.4325,
|
|
"step": 8315
|
|
},
|
|
{
|
|
"epoch": 1.1964337072188669,
|
|
"grad_norm": 0.24253654316024326,
|
|
"learning_rate": 1.3125666592327534e-05,
|
|
"loss": 0.4091,
|
|
"step": 8320
|
|
},
|
|
{
|
|
"epoch": 1.1971527178602244,
|
|
"grad_norm": 0.2523257882774095,
|
|
"learning_rate": 1.3118513502612951e-05,
|
|
"loss": 0.4269,
|
|
"step": 8325
|
|
},
|
|
{
|
|
"epoch": 1.1978717285015819,
|
|
"grad_norm": 0.2541110638728842,
|
|
"learning_rate": 1.311135864492597e-05,
|
|
"loss": 0.419,
|
|
"step": 8330
|
|
},
|
|
{
|
|
"epoch": 1.1985907391429393,
|
|
"grad_norm": 0.25649704405579965,
|
|
"learning_rate": 1.3104202023322879e-05,
|
|
"loss": 0.4154,
|
|
"step": 8335
|
|
},
|
|
{
|
|
"epoch": 1.1993097497842968,
|
|
"grad_norm": 0.2533668272093602,
|
|
"learning_rate": 1.3097043641860965e-05,
|
|
"loss": 0.4337,
|
|
"step": 8340
|
|
},
|
|
{
|
|
"epoch": 1.2000287604256543,
|
|
"grad_norm": 0.25450629122005425,
|
|
"learning_rate": 1.3089883504598525e-05,
|
|
"loss": 0.42,
|
|
"step": 8345
|
|
},
|
|
{
|
|
"epoch": 1.2007477710670118,
|
|
"grad_norm": 0.2420736074725916,
|
|
"learning_rate": 1.3082721615594828e-05,
|
|
"loss": 0.418,
|
|
"step": 8350
|
|
},
|
|
{
|
|
"epoch": 1.2014667817083693,
|
|
"grad_norm": 0.25432487846975754,
|
|
"learning_rate": 1.3075557978910156e-05,
|
|
"loss": 0.4233,
|
|
"step": 8355
|
|
},
|
|
{
|
|
"epoch": 1.2021857923497268,
|
|
"grad_norm": 0.24677056731156752,
|
|
"learning_rate": 1.3068392598605775e-05,
|
|
"loss": 0.4086,
|
|
"step": 8360
|
|
},
|
|
{
|
|
"epoch": 1.2029048029910843,
|
|
"grad_norm": 0.24647318234129603,
|
|
"learning_rate": 1.3061225478743933e-05,
|
|
"loss": 0.4071,
|
|
"step": 8365
|
|
},
|
|
{
|
|
"epoch": 1.2036238136324418,
|
|
"grad_norm": 0.2538408007703241,
|
|
"learning_rate": 1.3054056623387876e-05,
|
|
"loss": 0.4259,
|
|
"step": 8370
|
|
},
|
|
{
|
|
"epoch": 1.2043428242737992,
|
|
"grad_norm": 0.24974410344574643,
|
|
"learning_rate": 1.3046886036601829e-05,
|
|
"loss": 0.4127,
|
|
"step": 8375
|
|
},
|
|
{
|
|
"epoch": 1.2050618349151567,
|
|
"grad_norm": 0.24230198275162937,
|
|
"learning_rate": 1.3039713722450995e-05,
|
|
"loss": 0.4125,
|
|
"step": 8380
|
|
},
|
|
{
|
|
"epoch": 1.2057808455565142,
|
|
"grad_norm": 0.24913475757853068,
|
|
"learning_rate": 1.3032539685001558e-05,
|
|
"loss": 0.423,
|
|
"step": 8385
|
|
},
|
|
{
|
|
"epoch": 1.2064998561978717,
|
|
"grad_norm": 0.27232861147614684,
|
|
"learning_rate": 1.302536392832068e-05,
|
|
"loss": 0.415,
|
|
"step": 8390
|
|
},
|
|
{
|
|
"epoch": 1.2072188668392292,
|
|
"grad_norm": 0.24370010266072922,
|
|
"learning_rate": 1.3018186456476504e-05,
|
|
"loss": 0.4228,
|
|
"step": 8395
|
|
},
|
|
{
|
|
"epoch": 1.2079378774805867,
|
|
"grad_norm": 0.24506828275307008,
|
|
"learning_rate": 1.3011007273538134e-05,
|
|
"loss": 0.424,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 1.2086568881219442,
|
|
"grad_norm": 0.2628538262349111,
|
|
"learning_rate": 1.300382638357565e-05,
|
|
"loss": 0.4275,
|
|
"step": 8405
|
|
},
|
|
{
|
|
"epoch": 1.2093758987633016,
|
|
"grad_norm": 0.25852077220155995,
|
|
"learning_rate": 1.2996643790660102e-05,
|
|
"loss": 0.42,
|
|
"step": 8410
|
|
},
|
|
{
|
|
"epoch": 1.2100949094046591,
|
|
"grad_norm": 0.2555973034880567,
|
|
"learning_rate": 1.2989459498863498e-05,
|
|
"loss": 0.4266,
|
|
"step": 8415
|
|
},
|
|
{
|
|
"epoch": 1.2108139200460166,
|
|
"grad_norm": 0.37250123766470045,
|
|
"learning_rate": 1.2982273512258813e-05,
|
|
"loss": 0.3953,
|
|
"step": 8420
|
|
},
|
|
{
|
|
"epoch": 1.211532930687374,
|
|
"grad_norm": 0.2485536945534741,
|
|
"learning_rate": 1.2975085834919991e-05,
|
|
"loss": 0.4312,
|
|
"step": 8425
|
|
},
|
|
{
|
|
"epoch": 1.2122519413287316,
|
|
"grad_norm": 0.24032018458794435,
|
|
"learning_rate": 1.2967896470921922e-05,
|
|
"loss": 0.4168,
|
|
"step": 8430
|
|
},
|
|
{
|
|
"epoch": 1.212970951970089,
|
|
"grad_norm": 0.2526115365005817,
|
|
"learning_rate": 1.2960705424340453e-05,
|
|
"loss": 0.4139,
|
|
"step": 8435
|
|
},
|
|
{
|
|
"epoch": 1.2136899626114466,
|
|
"grad_norm": 0.26723808958337547,
|
|
"learning_rate": 1.2953512699252398e-05,
|
|
"loss": 0.4321,
|
|
"step": 8440
|
|
},
|
|
{
|
|
"epoch": 1.214408973252804,
|
|
"grad_norm": 0.23980364599778922,
|
|
"learning_rate": 1.2946318299735508e-05,
|
|
"loss": 0.4231,
|
|
"step": 8445
|
|
},
|
|
{
|
|
"epoch": 1.2151279838941615,
|
|
"grad_norm": 0.23168784713537172,
|
|
"learning_rate": 1.2939122229868489e-05,
|
|
"loss": 0.418,
|
|
"step": 8450
|
|
},
|
|
{
|
|
"epoch": 1.215846994535519,
|
|
"grad_norm": 0.25321859227375604,
|
|
"learning_rate": 1.2931924493730997e-05,
|
|
"loss": 0.431,
|
|
"step": 8455
|
|
},
|
|
{
|
|
"epoch": 1.2165660051768765,
|
|
"grad_norm": 0.25390809268714226,
|
|
"learning_rate": 1.2924725095403625e-05,
|
|
"loss": 0.436,
|
|
"step": 8460
|
|
},
|
|
{
|
|
"epoch": 1.2172850158182342,
|
|
"grad_norm": 0.25004190514605684,
|
|
"learning_rate": 1.2917524038967919e-05,
|
|
"loss": 0.4098,
|
|
"step": 8465
|
|
},
|
|
{
|
|
"epoch": 1.2180040264595915,
|
|
"grad_norm": 0.24400882190317358,
|
|
"learning_rate": 1.2910321328506355e-05,
|
|
"loss": 0.4175,
|
|
"step": 8470
|
|
},
|
|
{
|
|
"epoch": 1.2187230371009492,
|
|
"grad_norm": 0.25849678176050406,
|
|
"learning_rate": 1.2903116968102354e-05,
|
|
"loss": 0.4239,
|
|
"step": 8475
|
|
},
|
|
{
|
|
"epoch": 1.2194420477423065,
|
|
"grad_norm": 0.2511130706595977,
|
|
"learning_rate": 1.2895910961840263e-05,
|
|
"loss": 0.4092,
|
|
"step": 8480
|
|
},
|
|
{
|
|
"epoch": 1.2201610583836642,
|
|
"grad_norm": 0.2662565814750477,
|
|
"learning_rate": 1.2888703313805375e-05,
|
|
"loss": 0.4321,
|
|
"step": 8485
|
|
},
|
|
{
|
|
"epoch": 1.2208800690250217,
|
|
"grad_norm": 0.2610809882106791,
|
|
"learning_rate": 1.2881494028083901e-05,
|
|
"loss": 0.4385,
|
|
"step": 8490
|
|
},
|
|
{
|
|
"epoch": 1.2215990796663792,
|
|
"grad_norm": 0.2562432167176942,
|
|
"learning_rate": 1.2874283108762991e-05,
|
|
"loss": 0.4253,
|
|
"step": 8495
|
|
},
|
|
{
|
|
"epoch": 1.2223180903077366,
|
|
"grad_norm": 0.23942443774657562,
|
|
"learning_rate": 1.2867070559930715e-05,
|
|
"loss": 0.4174,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 1.2230371009490941,
|
|
"grad_norm": 0.2504392672198876,
|
|
"learning_rate": 1.2859856385676066e-05,
|
|
"loss": 0.4186,
|
|
"step": 8505
|
|
},
|
|
{
|
|
"epoch": 1.2237561115904516,
|
|
"grad_norm": 0.2413481143352121,
|
|
"learning_rate": 1.2852640590088964e-05,
|
|
"loss": 0.4273,
|
|
"step": 8510
|
|
},
|
|
{
|
|
"epoch": 1.224475122231809,
|
|
"grad_norm": 0.24855331184524024,
|
|
"learning_rate": 1.2845423177260245e-05,
|
|
"loss": 0.4249,
|
|
"step": 8515
|
|
},
|
|
{
|
|
"epoch": 1.2251941328731666,
|
|
"grad_norm": 0.2539988946799221,
|
|
"learning_rate": 1.2838204151281661e-05,
|
|
"loss": 0.4339,
|
|
"step": 8520
|
|
},
|
|
{
|
|
"epoch": 1.225913143514524,
|
|
"grad_norm": 0.26062461026941763,
|
|
"learning_rate": 1.2830983516245883e-05,
|
|
"loss": 0.4232,
|
|
"step": 8525
|
|
},
|
|
{
|
|
"epoch": 1.2266321541558816,
|
|
"grad_norm": 0.2405582501284004,
|
|
"learning_rate": 1.2823761276246483e-05,
|
|
"loss": 0.4208,
|
|
"step": 8530
|
|
},
|
|
{
|
|
"epoch": 1.227351164797239,
|
|
"grad_norm": 0.2560490797414399,
|
|
"learning_rate": 1.2816537435377953e-05,
|
|
"loss": 0.412,
|
|
"step": 8535
|
|
},
|
|
{
|
|
"epoch": 1.2280701754385965,
|
|
"grad_norm": 0.2490737915145687,
|
|
"learning_rate": 1.2809311997735697e-05,
|
|
"loss": 0.4406,
|
|
"step": 8540
|
|
},
|
|
{
|
|
"epoch": 1.228789186079954,
|
|
"grad_norm": 0.23857478709388596,
|
|
"learning_rate": 1.280208496741601e-05,
|
|
"loss": 0.4183,
|
|
"step": 8545
|
|
},
|
|
{
|
|
"epoch": 1.2295081967213115,
|
|
"grad_norm": 0.24339980366638034,
|
|
"learning_rate": 1.2794856348516095e-05,
|
|
"loss": 0.423,
|
|
"step": 8550
|
|
},
|
|
{
|
|
"epoch": 1.230227207362669,
|
|
"grad_norm": 0.23505042392329964,
|
|
"learning_rate": 1.2787626145134066e-05,
|
|
"loss": 0.4199,
|
|
"step": 8555
|
|
},
|
|
{
|
|
"epoch": 1.2309462180040265,
|
|
"grad_norm": 0.24505729095145254,
|
|
"learning_rate": 1.2780394361368923e-05,
|
|
"loss": 0.4306,
|
|
"step": 8560
|
|
},
|
|
{
|
|
"epoch": 1.231665228645384,
|
|
"grad_norm": 0.24991699271782725,
|
|
"learning_rate": 1.2773161001320568e-05,
|
|
"loss": 0.4174,
|
|
"step": 8565
|
|
},
|
|
{
|
|
"epoch": 1.2323842392867415,
|
|
"grad_norm": 0.2613446094274954,
|
|
"learning_rate": 1.2765926069089796e-05,
|
|
"loss": 0.4085,
|
|
"step": 8570
|
|
},
|
|
{
|
|
"epoch": 1.233103249928099,
|
|
"grad_norm": 0.24970744977753517,
|
|
"learning_rate": 1.2758689568778286e-05,
|
|
"loss": 0.4203,
|
|
"step": 8575
|
|
},
|
|
{
|
|
"epoch": 1.2338222605694564,
|
|
"grad_norm": 0.24192710879802426,
|
|
"learning_rate": 1.275145150448862e-05,
|
|
"loss": 0.414,
|
|
"step": 8580
|
|
},
|
|
{
|
|
"epoch": 1.234541271210814,
|
|
"grad_norm": 0.26464114170276903,
|
|
"learning_rate": 1.2744211880324248e-05,
|
|
"loss": 0.4177,
|
|
"step": 8585
|
|
},
|
|
{
|
|
"epoch": 1.2352602818521714,
|
|
"grad_norm": 0.24058953049530782,
|
|
"learning_rate": 1.2736970700389528e-05,
|
|
"loss": 0.4291,
|
|
"step": 8590
|
|
},
|
|
{
|
|
"epoch": 1.235979292493529,
|
|
"grad_norm": 0.26170215003131153,
|
|
"learning_rate": 1.2729727968789678e-05,
|
|
"loss": 0.4143,
|
|
"step": 8595
|
|
},
|
|
{
|
|
"epoch": 1.2366983031348864,
|
|
"grad_norm": 0.24147063478240458,
|
|
"learning_rate": 1.272248368963081e-05,
|
|
"loss": 0.4165,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 1.2374173137762439,
|
|
"grad_norm": 0.26028828499534745,
|
|
"learning_rate": 1.2715237867019904e-05,
|
|
"loss": 0.4268,
|
|
"step": 8605
|
|
},
|
|
{
|
|
"epoch": 1.2381363244176014,
|
|
"grad_norm": 0.26999382144154427,
|
|
"learning_rate": 1.270799050506482e-05,
|
|
"loss": 0.4277,
|
|
"step": 8610
|
|
},
|
|
{
|
|
"epoch": 1.2388553350589588,
|
|
"grad_norm": 0.24594834114587194,
|
|
"learning_rate": 1.2700741607874295e-05,
|
|
"loss": 0.429,
|
|
"step": 8615
|
|
},
|
|
{
|
|
"epoch": 1.2395743457003163,
|
|
"grad_norm": 0.24907555641346754,
|
|
"learning_rate": 1.2693491179557922e-05,
|
|
"loss": 0.4289,
|
|
"step": 8620
|
|
},
|
|
{
|
|
"epoch": 1.2402933563416738,
|
|
"grad_norm": 0.2527414030085228,
|
|
"learning_rate": 1.2686239224226183e-05,
|
|
"loss": 0.418,
|
|
"step": 8625
|
|
},
|
|
{
|
|
"epoch": 1.2410123669830313,
|
|
"grad_norm": 0.24952338659009965,
|
|
"learning_rate": 1.2678985745990401e-05,
|
|
"loss": 0.4277,
|
|
"step": 8630
|
|
},
|
|
{
|
|
"epoch": 1.2417313776243888,
|
|
"grad_norm": 0.2576321199932301,
|
|
"learning_rate": 1.2671730748962785e-05,
|
|
"loss": 0.4309,
|
|
"step": 8635
|
|
},
|
|
{
|
|
"epoch": 1.2424503882657463,
|
|
"grad_norm": 0.25162676433589115,
|
|
"learning_rate": 1.2664474237256394e-05,
|
|
"loss": 0.4221,
|
|
"step": 8640
|
|
},
|
|
{
|
|
"epoch": 1.2431693989071038,
|
|
"grad_norm": 0.25372190105004244,
|
|
"learning_rate": 1.2657216214985144e-05,
|
|
"loss": 0.4164,
|
|
"step": 8645
|
|
},
|
|
{
|
|
"epoch": 1.2438884095484612,
|
|
"grad_norm": 0.2513779098353839,
|
|
"learning_rate": 1.2649956686263814e-05,
|
|
"loss": 0.4243,
|
|
"step": 8650
|
|
},
|
|
{
|
|
"epoch": 1.2446074201898187,
|
|
"grad_norm": 0.26709377522559713,
|
|
"learning_rate": 1.2642695655208028e-05,
|
|
"loss": 0.4215,
|
|
"step": 8655
|
|
},
|
|
{
|
|
"epoch": 1.2453264308311762,
|
|
"grad_norm": 0.2701441209301692,
|
|
"learning_rate": 1.2635433125934273e-05,
|
|
"loss": 0.4209,
|
|
"step": 8660
|
|
},
|
|
{
|
|
"epoch": 1.2460454414725337,
|
|
"grad_norm": 0.24942264858926272,
|
|
"learning_rate": 1.2628169102559878e-05,
|
|
"loss": 0.4115,
|
|
"step": 8665
|
|
},
|
|
{
|
|
"epoch": 1.2467644521138912,
|
|
"grad_norm": 0.24566825676531506,
|
|
"learning_rate": 1.262090358920302e-05,
|
|
"loss": 0.4194,
|
|
"step": 8670
|
|
},
|
|
{
|
|
"epoch": 1.2474834627552487,
|
|
"grad_norm": 0.2475653256105879,
|
|
"learning_rate": 1.2613636589982723e-05,
|
|
"loss": 0.4166,
|
|
"step": 8675
|
|
},
|
|
{
|
|
"epoch": 1.2482024733966064,
|
|
"grad_norm": 0.25544820127515167,
|
|
"learning_rate": 1.260636810901885e-05,
|
|
"loss": 0.4039,
|
|
"step": 8680
|
|
},
|
|
{
|
|
"epoch": 1.2489214840379637,
|
|
"grad_norm": 0.2427882896810874,
|
|
"learning_rate": 1.2599098150432103e-05,
|
|
"loss": 0.4381,
|
|
"step": 8685
|
|
},
|
|
{
|
|
"epoch": 1.2496404946793214,
|
|
"grad_norm": 0.2558819267236487,
|
|
"learning_rate": 1.2591826718344034e-05,
|
|
"loss": 0.4282,
|
|
"step": 8690
|
|
},
|
|
{
|
|
"epoch": 1.2503595053206786,
|
|
"grad_norm": 0.2632854056788182,
|
|
"learning_rate": 1.2584553816877012e-05,
|
|
"loss": 0.4185,
|
|
"step": 8695
|
|
},
|
|
{
|
|
"epoch": 1.2510785159620363,
|
|
"grad_norm": 0.25350775902291917,
|
|
"learning_rate": 1.257727945015425e-05,
|
|
"loss": 0.4245,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 1.2517975266033936,
|
|
"grad_norm": 0.24278316129715752,
|
|
"learning_rate": 1.2570003622299792e-05,
|
|
"loss": 0.4011,
|
|
"step": 8705
|
|
},
|
|
{
|
|
"epoch": 1.2525165372447513,
|
|
"grad_norm": 0.25336770292287847,
|
|
"learning_rate": 1.2562726337438504e-05,
|
|
"loss": 0.4172,
|
|
"step": 8710
|
|
},
|
|
{
|
|
"epoch": 1.2532355478861086,
|
|
"grad_norm": 0.25741926259404213,
|
|
"learning_rate": 1.2555447599696086e-05,
|
|
"loss": 0.4356,
|
|
"step": 8715
|
|
},
|
|
{
|
|
"epoch": 1.2539545585274663,
|
|
"grad_norm": 0.24037401587557164,
|
|
"learning_rate": 1.254816741319906e-05,
|
|
"loss": 0.418,
|
|
"step": 8720
|
|
},
|
|
{
|
|
"epoch": 1.2546735691688238,
|
|
"grad_norm": 0.2383795745878674,
|
|
"learning_rate": 1.2540885782074756e-05,
|
|
"loss": 0.4341,
|
|
"step": 8725
|
|
},
|
|
{
|
|
"epoch": 1.2553925798101813,
|
|
"grad_norm": 0.28056766382702697,
|
|
"learning_rate": 1.2533602710451345e-05,
|
|
"loss": 0.4255,
|
|
"step": 8730
|
|
},
|
|
{
|
|
"epoch": 1.2561115904515388,
|
|
"grad_norm": 0.25016980018947965,
|
|
"learning_rate": 1.25263182024578e-05,
|
|
"loss": 0.4309,
|
|
"step": 8735
|
|
},
|
|
{
|
|
"epoch": 1.2568306010928962,
|
|
"grad_norm": 0.25477712843950795,
|
|
"learning_rate": 1.2519032262223913e-05,
|
|
"loss": 0.4081,
|
|
"step": 8740
|
|
},
|
|
{
|
|
"epoch": 1.2575496117342537,
|
|
"grad_norm": 0.2512275325163774,
|
|
"learning_rate": 1.2511744893880286e-05,
|
|
"loss": 0.4297,
|
|
"step": 8745
|
|
},
|
|
{
|
|
"epoch": 1.2582686223756112,
|
|
"grad_norm": 0.25050196308698536,
|
|
"learning_rate": 1.250445610155833e-05,
|
|
"loss": 0.4396,
|
|
"step": 8750
|
|
},
|
|
{
|
|
"epoch": 1.2589876330169687,
|
|
"grad_norm": 0.24522131997693686,
|
|
"learning_rate": 1.2497165889390269e-05,
|
|
"loss": 0.4147,
|
|
"step": 8755
|
|
},
|
|
{
|
|
"epoch": 1.2597066436583262,
|
|
"grad_norm": 0.25364950567029165,
|
|
"learning_rate": 1.2489874261509123e-05,
|
|
"loss": 0.4313,
|
|
"step": 8760
|
|
},
|
|
{
|
|
"epoch": 1.2604256542996837,
|
|
"grad_norm": 0.24474088408856176,
|
|
"learning_rate": 1.2482581222048724e-05,
|
|
"loss": 0.4146,
|
|
"step": 8765
|
|
},
|
|
{
|
|
"epoch": 1.2611446649410412,
|
|
"grad_norm": 0.24892207109833964,
|
|
"learning_rate": 1.2475286775143698e-05,
|
|
"loss": 0.4079,
|
|
"step": 8770
|
|
},
|
|
{
|
|
"epoch": 1.2618636755823986,
|
|
"grad_norm": 0.24562891750634785,
|
|
"learning_rate": 1.246799092492947e-05,
|
|
"loss": 0.4153,
|
|
"step": 8775
|
|
},
|
|
{
|
|
"epoch": 1.2625826862237561,
|
|
"grad_norm": 0.2676145885530362,
|
|
"learning_rate": 1.2460693675542257e-05,
|
|
"loss": 0.4134,
|
|
"step": 8780
|
|
},
|
|
{
|
|
"epoch": 1.2633016968651136,
|
|
"grad_norm": 0.24387436135045176,
|
|
"learning_rate": 1.2453395031119082e-05,
|
|
"loss": 0.4097,
|
|
"step": 8785
|
|
},
|
|
{
|
|
"epoch": 1.264020707506471,
|
|
"grad_norm": 0.2544788927491767,
|
|
"learning_rate": 1.2446094995797748e-05,
|
|
"loss": 0.4206,
|
|
"step": 8790
|
|
},
|
|
{
|
|
"epoch": 1.2647397181478286,
|
|
"grad_norm": 0.24259194034592435,
|
|
"learning_rate": 1.2438793573716848e-05,
|
|
"loss": 0.4172,
|
|
"step": 8795
|
|
},
|
|
{
|
|
"epoch": 1.265458728789186,
|
|
"grad_norm": 0.2536800428499078,
|
|
"learning_rate": 1.2431490769015757e-05,
|
|
"loss": 0.428,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 1.2661777394305436,
|
|
"grad_norm": 0.2536542632015627,
|
|
"learning_rate": 1.2424186585834646e-05,
|
|
"loss": 0.4207,
|
|
"step": 8805
|
|
},
|
|
{
|
|
"epoch": 1.266896750071901,
|
|
"grad_norm": 0.2535409529020453,
|
|
"learning_rate": 1.2416881028314457e-05,
|
|
"loss": 0.4292,
|
|
"step": 8810
|
|
},
|
|
{
|
|
"epoch": 1.2676157607132585,
|
|
"grad_norm": 0.2760392008221144,
|
|
"learning_rate": 1.2409574100596917e-05,
|
|
"loss": 0.4266,
|
|
"step": 8815
|
|
},
|
|
{
|
|
"epoch": 1.268334771354616,
|
|
"grad_norm": 0.26927077491976653,
|
|
"learning_rate": 1.2402265806824528e-05,
|
|
"loss": 0.4254,
|
|
"step": 8820
|
|
},
|
|
{
|
|
"epoch": 1.2690537819959735,
|
|
"grad_norm": 0.24132731029927765,
|
|
"learning_rate": 1.2394956151140558e-05,
|
|
"loss": 0.4287,
|
|
"step": 8825
|
|
},
|
|
{
|
|
"epoch": 1.269772792637331,
|
|
"grad_norm": 0.2506050738859499,
|
|
"learning_rate": 1.238764513768906e-05,
|
|
"loss": 0.4217,
|
|
"step": 8830
|
|
},
|
|
{
|
|
"epoch": 1.2704918032786885,
|
|
"grad_norm": 0.24099722920341918,
|
|
"learning_rate": 1.2380332770614856e-05,
|
|
"loss": 0.4156,
|
|
"step": 8835
|
|
},
|
|
{
|
|
"epoch": 1.271210813920046,
|
|
"grad_norm": 0.25846590141325754,
|
|
"learning_rate": 1.2373019054063528e-05,
|
|
"loss": 0.3999,
|
|
"step": 8840
|
|
},
|
|
{
|
|
"epoch": 1.2719298245614035,
|
|
"grad_norm": 0.2537504710160106,
|
|
"learning_rate": 1.2365703992181425e-05,
|
|
"loss": 0.42,
|
|
"step": 8845
|
|
},
|
|
{
|
|
"epoch": 1.272648835202761,
|
|
"grad_norm": 0.2500198765457716,
|
|
"learning_rate": 1.235838758911566e-05,
|
|
"loss": 0.4135,
|
|
"step": 8850
|
|
},
|
|
{
|
|
"epoch": 1.2733678458441184,
|
|
"grad_norm": 0.25212979662710866,
|
|
"learning_rate": 1.2351069849014106e-05,
|
|
"loss": 0.4144,
|
|
"step": 8855
|
|
},
|
|
{
|
|
"epoch": 1.274086856485476,
|
|
"grad_norm": 0.24726969745223198,
|
|
"learning_rate": 1.2343750776025396e-05,
|
|
"loss": 0.4327,
|
|
"step": 8860
|
|
},
|
|
{
|
|
"epoch": 1.2748058671268334,
|
|
"grad_norm": 0.2418920714048062,
|
|
"learning_rate": 1.2336430374298914e-05,
|
|
"loss": 0.4329,
|
|
"step": 8865
|
|
},
|
|
{
|
|
"epoch": 1.275524877768191,
|
|
"grad_norm": 0.25764744072883017,
|
|
"learning_rate": 1.2329108647984805e-05,
|
|
"loss": 0.4427,
|
|
"step": 8870
|
|
},
|
|
{
|
|
"epoch": 1.2762438884095484,
|
|
"grad_norm": 0.241722982585825,
|
|
"learning_rate": 1.2321785601233956e-05,
|
|
"loss": 0.4207,
|
|
"step": 8875
|
|
},
|
|
{
|
|
"epoch": 1.2769628990509059,
|
|
"grad_norm": 0.2496516652588619,
|
|
"learning_rate": 1.2314461238198003e-05,
|
|
"loss": 0.4136,
|
|
"step": 8880
|
|
},
|
|
{
|
|
"epoch": 1.2776819096922634,
|
|
"grad_norm": 0.25612736017110765,
|
|
"learning_rate": 1.2307135563029343e-05,
|
|
"loss": 0.4077,
|
|
"step": 8885
|
|
},
|
|
{
|
|
"epoch": 1.2784009203336208,
|
|
"grad_norm": 0.2587883532969212,
|
|
"learning_rate": 1.2299808579881096e-05,
|
|
"loss": 0.4061,
|
|
"step": 8890
|
|
},
|
|
{
|
|
"epoch": 1.2791199309749786,
|
|
"grad_norm": 0.26266354817234644,
|
|
"learning_rate": 1.2292480292907139e-05,
|
|
"loss": 0.4194,
|
|
"step": 8895
|
|
},
|
|
{
|
|
"epoch": 1.2798389416163358,
|
|
"grad_norm": 0.26595637586806237,
|
|
"learning_rate": 1.2285150706262079e-05,
|
|
"loss": 0.4165,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 1.2805579522576935,
|
|
"grad_norm": 0.24296353166495893,
|
|
"learning_rate": 1.2277819824101267e-05,
|
|
"loss": 0.4156,
|
|
"step": 8905
|
|
},
|
|
{
|
|
"epoch": 1.2812769628990508,
|
|
"grad_norm": 0.24802595972323246,
|
|
"learning_rate": 1.227048765058078e-05,
|
|
"loss": 0.4358,
|
|
"step": 8910
|
|
},
|
|
{
|
|
"epoch": 1.2819959735404085,
|
|
"grad_norm": 0.24939904196648624,
|
|
"learning_rate": 1.2263154189857437e-05,
|
|
"loss": 0.4202,
|
|
"step": 8915
|
|
},
|
|
{
|
|
"epoch": 1.2827149841817658,
|
|
"grad_norm": 0.2631457107121235,
|
|
"learning_rate": 1.225581944608878e-05,
|
|
"loss": 0.4221,
|
|
"step": 8920
|
|
},
|
|
{
|
|
"epoch": 1.2834339948231235,
|
|
"grad_norm": 0.25265520731043345,
|
|
"learning_rate": 1.2248483423433075e-05,
|
|
"loss": 0.4254,
|
|
"step": 8925
|
|
},
|
|
{
|
|
"epoch": 1.2841530054644807,
|
|
"grad_norm": 0.2571500562558599,
|
|
"learning_rate": 1.2241146126049326e-05,
|
|
"loss": 0.4205,
|
|
"step": 8930
|
|
},
|
|
{
|
|
"epoch": 1.2848720161058385,
|
|
"grad_norm": 0.261232092036823,
|
|
"learning_rate": 1.2233807558097248e-05,
|
|
"loss": 0.4057,
|
|
"step": 8935
|
|
},
|
|
{
|
|
"epoch": 1.2855910267471957,
|
|
"grad_norm": 0.2701298075039673,
|
|
"learning_rate": 1.2226467723737282e-05,
|
|
"loss": 0.4073,
|
|
"step": 8940
|
|
},
|
|
{
|
|
"epoch": 1.2863100373885534,
|
|
"grad_norm": 0.23623260788931721,
|
|
"learning_rate": 1.221912662713058e-05,
|
|
"loss": 0.4246,
|
|
"step": 8945
|
|
},
|
|
{
|
|
"epoch": 1.287029048029911,
|
|
"grad_norm": 0.23770205432246938,
|
|
"learning_rate": 1.221178427243902e-05,
|
|
"loss": 0.4206,
|
|
"step": 8950
|
|
},
|
|
{
|
|
"epoch": 1.2877480586712684,
|
|
"grad_norm": 0.23938848354815995,
|
|
"learning_rate": 1.2204440663825185e-05,
|
|
"loss": 0.4264,
|
|
"step": 8955
|
|
},
|
|
{
|
|
"epoch": 1.288467069312626,
|
|
"grad_norm": 0.24699020765493063,
|
|
"learning_rate": 1.2197095805452374e-05,
|
|
"loss": 0.4169,
|
|
"step": 8960
|
|
},
|
|
{
|
|
"epoch": 1.2891860799539834,
|
|
"grad_norm": 0.2534254886038366,
|
|
"learning_rate": 1.2189749701484593e-05,
|
|
"loss": 0.4155,
|
|
"step": 8965
|
|
},
|
|
{
|
|
"epoch": 1.2899050905953409,
|
|
"grad_norm": 0.2589522794130376,
|
|
"learning_rate": 1.2182402356086552e-05,
|
|
"loss": 0.4152,
|
|
"step": 8970
|
|
},
|
|
{
|
|
"epoch": 1.2906241012366984,
|
|
"grad_norm": 0.24692218182129114,
|
|
"learning_rate": 1.2175053773423663e-05,
|
|
"loss": 0.4281,
|
|
"step": 8975
|
|
},
|
|
{
|
|
"epoch": 1.2913431118780558,
|
|
"grad_norm": 0.24207100979568935,
|
|
"learning_rate": 1.2167703957662047e-05,
|
|
"loss": 0.4139,
|
|
"step": 8980
|
|
},
|
|
{
|
|
"epoch": 1.2920621225194133,
|
|
"grad_norm": 0.25062807272082144,
|
|
"learning_rate": 1.2160352912968521e-05,
|
|
"loss": 0.4171,
|
|
"step": 8985
|
|
},
|
|
{
|
|
"epoch": 1.2927811331607708,
|
|
"grad_norm": 0.24926449357321184,
|
|
"learning_rate": 1.2153000643510593e-05,
|
|
"loss": 0.4205,
|
|
"step": 8990
|
|
},
|
|
{
|
|
"epoch": 1.2935001438021283,
|
|
"grad_norm": 0.2453477272661754,
|
|
"learning_rate": 1.214564715345647e-05,
|
|
"loss": 0.4141,
|
|
"step": 8995
|
|
},
|
|
{
|
|
"epoch": 1.2942191544434858,
|
|
"grad_norm": 0.25636505060068915,
|
|
"learning_rate": 1.2138292446975055e-05,
|
|
"loss": 0.4308,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 1.2949381650848433,
|
|
"grad_norm": 0.26972091191831327,
|
|
"learning_rate": 1.2130936528235936e-05,
|
|
"loss": 0.4046,
|
|
"step": 9005
|
|
},
|
|
{
|
|
"epoch": 1.2956571757262008,
|
|
"grad_norm": 0.25918071989917435,
|
|
"learning_rate": 1.2123579401409384e-05,
|
|
"loss": 0.4279,
|
|
"step": 9010
|
|
},
|
|
{
|
|
"epoch": 1.2963761863675582,
|
|
"grad_norm": 0.2630262093217878,
|
|
"learning_rate": 1.2116221070666365e-05,
|
|
"loss": 0.4175,
|
|
"step": 9015
|
|
},
|
|
{
|
|
"epoch": 1.2970951970089157,
|
|
"grad_norm": 0.24243825902734034,
|
|
"learning_rate": 1.2108861540178523e-05,
|
|
"loss": 0.4122,
|
|
"step": 9020
|
|
},
|
|
{
|
|
"epoch": 1.2978142076502732,
|
|
"grad_norm": 0.25506959698223824,
|
|
"learning_rate": 1.2101500814118173e-05,
|
|
"loss": 0.4152,
|
|
"step": 9025
|
|
},
|
|
{
|
|
"epoch": 1.2985332182916307,
|
|
"grad_norm": 0.2631526036818052,
|
|
"learning_rate": 1.2094138896658323e-05,
|
|
"loss": 0.4216,
|
|
"step": 9030
|
|
},
|
|
{
|
|
"epoch": 1.2992522289329882,
|
|
"grad_norm": 0.24600693980020885,
|
|
"learning_rate": 1.2086775791972652e-05,
|
|
"loss": 0.419,
|
|
"step": 9035
|
|
},
|
|
{
|
|
"epoch": 1.2999712395743457,
|
|
"grad_norm": 0.25090266996787813,
|
|
"learning_rate": 1.2079411504235503e-05,
|
|
"loss": 0.4295,
|
|
"step": 9040
|
|
},
|
|
{
|
|
"epoch": 1.3006902502157032,
|
|
"grad_norm": 0.24291223275973534,
|
|
"learning_rate": 1.2072046037621898e-05,
|
|
"loss": 0.4222,
|
|
"step": 9045
|
|
},
|
|
{
|
|
"epoch": 1.3014092608570607,
|
|
"grad_norm": 0.23813746309860986,
|
|
"learning_rate": 1.206467939630753e-05,
|
|
"loss": 0.4103,
|
|
"step": 9050
|
|
},
|
|
{
|
|
"epoch": 1.3021282714984181,
|
|
"grad_norm": 0.24884896644308455,
|
|
"learning_rate": 1.205731158446875e-05,
|
|
"loss": 0.4172,
|
|
"step": 9055
|
|
},
|
|
{
|
|
"epoch": 1.3028472821397756,
|
|
"grad_norm": 0.2555252116292196,
|
|
"learning_rate": 1.2049942606282575e-05,
|
|
"loss": 0.4252,
|
|
"step": 9060
|
|
},
|
|
{
|
|
"epoch": 1.3035662927811331,
|
|
"grad_norm": 0.24401614065301408,
|
|
"learning_rate": 1.2042572465926687e-05,
|
|
"loss": 0.427,
|
|
"step": 9065
|
|
},
|
|
{
|
|
"epoch": 1.3042853034224906,
|
|
"grad_norm": 0.24417830806495353,
|
|
"learning_rate": 1.2035201167579427e-05,
|
|
"loss": 0.4256,
|
|
"step": 9070
|
|
},
|
|
{
|
|
"epoch": 1.305004314063848,
|
|
"grad_norm": 0.2715529724400531,
|
|
"learning_rate": 1.2027828715419782e-05,
|
|
"loss": 0.4265,
|
|
"step": 9075
|
|
},
|
|
{
|
|
"epoch": 1.3057233247052056,
|
|
"grad_norm": 0.25038790481816453,
|
|
"learning_rate": 1.202045511362741e-05,
|
|
"loss": 0.4268,
|
|
"step": 9080
|
|
},
|
|
{
|
|
"epoch": 1.306442335346563,
|
|
"grad_norm": 0.2634032672442618,
|
|
"learning_rate": 1.2013080366382608e-05,
|
|
"loss": 0.4201,
|
|
"step": 9085
|
|
},
|
|
{
|
|
"epoch": 1.3071613459879206,
|
|
"grad_norm": 0.253186940890943,
|
|
"learning_rate": 1.2005704477866324e-05,
|
|
"loss": 0.4158,
|
|
"step": 9090
|
|
},
|
|
{
|
|
"epoch": 1.307880356629278,
|
|
"grad_norm": 0.2336901935095103,
|
|
"learning_rate": 1.1998327452260156e-05,
|
|
"loss": 0.4315,
|
|
"step": 9095
|
|
},
|
|
{
|
|
"epoch": 1.3085993672706355,
|
|
"grad_norm": 0.25825144487765334,
|
|
"learning_rate": 1.1990949293746348e-05,
|
|
"loss": 0.4061,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 1.309318377911993,
|
|
"grad_norm": 0.2444218307835328,
|
|
"learning_rate": 1.1983570006507782e-05,
|
|
"loss": 0.4368,
|
|
"step": 9105
|
|
},
|
|
{
|
|
"epoch": 1.3100373885533507,
|
|
"grad_norm": 0.24922025162474865,
|
|
"learning_rate": 1.1976189594727984e-05,
|
|
"loss": 0.4103,
|
|
"step": 9110
|
|
},
|
|
{
|
|
"epoch": 1.310756399194708,
|
|
"grad_norm": 0.2522836576143024,
|
|
"learning_rate": 1.1968808062591115e-05,
|
|
"loss": 0.4157,
|
|
"step": 9115
|
|
},
|
|
{
|
|
"epoch": 1.3114754098360657,
|
|
"grad_norm": 0.25283861678020636,
|
|
"learning_rate": 1.196142541428197e-05,
|
|
"loss": 0.4139,
|
|
"step": 9120
|
|
},
|
|
{
|
|
"epoch": 1.312194420477423,
|
|
"grad_norm": 0.24988364344354413,
|
|
"learning_rate": 1.1954041653985982e-05,
|
|
"loss": 0.4301,
|
|
"step": 9125
|
|
},
|
|
{
|
|
"epoch": 1.3129134311187807,
|
|
"grad_norm": 0.2553856076148972,
|
|
"learning_rate": 1.1946656785889206e-05,
|
|
"loss": 0.418,
|
|
"step": 9130
|
|
},
|
|
{
|
|
"epoch": 1.313632441760138,
|
|
"grad_norm": 0.2550773085985317,
|
|
"learning_rate": 1.1939270814178337e-05,
|
|
"loss": 0.4185,
|
|
"step": 9135
|
|
},
|
|
{
|
|
"epoch": 1.3143514524014956,
|
|
"grad_norm": 0.25301840877820897,
|
|
"learning_rate": 1.193188374304068e-05,
|
|
"loss": 0.4046,
|
|
"step": 9140
|
|
},
|
|
{
|
|
"epoch": 1.315070463042853,
|
|
"grad_norm": 0.24418782329920338,
|
|
"learning_rate": 1.1924495576664176e-05,
|
|
"loss": 0.4207,
|
|
"step": 9145
|
|
},
|
|
{
|
|
"epoch": 1.3157894736842106,
|
|
"grad_norm": 0.2570383060224066,
|
|
"learning_rate": 1.1917106319237386e-05,
|
|
"loss": 0.4334,
|
|
"step": 9150
|
|
},
|
|
{
|
|
"epoch": 1.3165084843255679,
|
|
"grad_norm": 0.2538138864816419,
|
|
"learning_rate": 1.1909715974949481e-05,
|
|
"loss": 0.429,
|
|
"step": 9155
|
|
},
|
|
{
|
|
"epoch": 1.3172274949669256,
|
|
"grad_norm": 0.24631562920038322,
|
|
"learning_rate": 1.1902324547990257e-05,
|
|
"loss": 0.4197,
|
|
"step": 9160
|
|
},
|
|
{
|
|
"epoch": 1.317946505608283,
|
|
"grad_norm": 0.2596974676338609,
|
|
"learning_rate": 1.189493204255012e-05,
|
|
"loss": 0.4207,
|
|
"step": 9165
|
|
},
|
|
{
|
|
"epoch": 1.3186655162496406,
|
|
"grad_norm": 0.2676532626426566,
|
|
"learning_rate": 1.1887538462820088e-05,
|
|
"loss": 0.4163,
|
|
"step": 9170
|
|
},
|
|
{
|
|
"epoch": 1.319384526890998,
|
|
"grad_norm": 0.2515849338266774,
|
|
"learning_rate": 1.1880143812991785e-05,
|
|
"loss": 0.4227,
|
|
"step": 9175
|
|
},
|
|
{
|
|
"epoch": 1.3201035375323555,
|
|
"grad_norm": 0.24876902146122476,
|
|
"learning_rate": 1.1872748097257446e-05,
|
|
"loss": 0.4217,
|
|
"step": 9180
|
|
},
|
|
{
|
|
"epoch": 1.320822548173713,
|
|
"grad_norm": 0.24178909751029976,
|
|
"learning_rate": 1.1865351319809913e-05,
|
|
"loss": 0.4027,
|
|
"step": 9185
|
|
},
|
|
{
|
|
"epoch": 1.3215415588150705,
|
|
"grad_norm": 0.2743929624580491,
|
|
"learning_rate": 1.185795348484262e-05,
|
|
"loss": 0.4103,
|
|
"step": 9190
|
|
},
|
|
{
|
|
"epoch": 1.322260569456428,
|
|
"grad_norm": 0.24909239087389742,
|
|
"learning_rate": 1.1850554596549606e-05,
|
|
"loss": 0.4062,
|
|
"step": 9195
|
|
},
|
|
{
|
|
"epoch": 1.3229795800977855,
|
|
"grad_norm": 0.26268882213569555,
|
|
"learning_rate": 1.1843154659125513e-05,
|
|
"loss": 0.4198,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 1.323698590739143,
|
|
"grad_norm": 0.2593561140192512,
|
|
"learning_rate": 1.1835753676765567e-05,
|
|
"loss": 0.4401,
|
|
"step": 9205
|
|
},
|
|
{
|
|
"epoch": 1.3244176013805005,
|
|
"grad_norm": 0.23760104891675088,
|
|
"learning_rate": 1.1828351653665596e-05,
|
|
"loss": 0.4125,
|
|
"step": 9210
|
|
},
|
|
{
|
|
"epoch": 1.325136612021858,
|
|
"grad_norm": 0.2579797719848589,
|
|
"learning_rate": 1.1820948594022009e-05,
|
|
"loss": 0.4312,
|
|
"step": 9215
|
|
},
|
|
{
|
|
"epoch": 1.3258556226632154,
|
|
"grad_norm": 0.2549569947724984,
|
|
"learning_rate": 1.1813544502031808e-05,
|
|
"loss": 0.4266,
|
|
"step": 9220
|
|
},
|
|
{
|
|
"epoch": 1.326574633304573,
|
|
"grad_norm": 0.27832142998017334,
|
|
"learning_rate": 1.180613938189258e-05,
|
|
"loss": 0.4066,
|
|
"step": 9225
|
|
},
|
|
{
|
|
"epoch": 1.3272936439459304,
|
|
"grad_norm": 0.24598302504979594,
|
|
"learning_rate": 1.17987332378025e-05,
|
|
"loss": 0.4133,
|
|
"step": 9230
|
|
},
|
|
{
|
|
"epoch": 1.328012654587288,
|
|
"grad_norm": 0.23934405812207502,
|
|
"learning_rate": 1.1791326073960313e-05,
|
|
"loss": 0.4147,
|
|
"step": 9235
|
|
},
|
|
{
|
|
"epoch": 1.3287316652286454,
|
|
"grad_norm": 0.2470972899882699,
|
|
"learning_rate": 1.1783917894565344e-05,
|
|
"loss": 0.4194,
|
|
"step": 9240
|
|
},
|
|
{
|
|
"epoch": 1.3294506758700029,
|
|
"grad_norm": 0.25432228325449185,
|
|
"learning_rate": 1.1776508703817503e-05,
|
|
"loss": 0.4259,
|
|
"step": 9245
|
|
},
|
|
{
|
|
"epoch": 1.3301696865113604,
|
|
"grad_norm": 0.2509491368439348,
|
|
"learning_rate": 1.176909850591726e-05,
|
|
"loss": 0.4315,
|
|
"step": 9250
|
|
},
|
|
{
|
|
"epoch": 1.3308886971527178,
|
|
"grad_norm": 0.24480475137300584,
|
|
"learning_rate": 1.176168730506567e-05,
|
|
"loss": 0.4211,
|
|
"step": 9255
|
|
},
|
|
{
|
|
"epoch": 1.3316077077940753,
|
|
"grad_norm": 0.2465616123435715,
|
|
"learning_rate": 1.1754275105464349e-05,
|
|
"loss": 0.4132,
|
|
"step": 9260
|
|
},
|
|
{
|
|
"epoch": 1.3323267184354328,
|
|
"grad_norm": 0.24228887418427103,
|
|
"learning_rate": 1.1746861911315476e-05,
|
|
"loss": 0.4167,
|
|
"step": 9265
|
|
},
|
|
{
|
|
"epoch": 1.3330457290767903,
|
|
"grad_norm": 0.24133922804214428,
|
|
"learning_rate": 1.1739447726821798e-05,
|
|
"loss": 0.4313,
|
|
"step": 9270
|
|
},
|
|
{
|
|
"epoch": 1.3337647397181478,
|
|
"grad_norm": 0.2577785421881333,
|
|
"learning_rate": 1.1732032556186626e-05,
|
|
"loss": 0.415,
|
|
"step": 9275
|
|
},
|
|
{
|
|
"epoch": 1.3344837503595053,
|
|
"grad_norm": 0.2628027543411037,
|
|
"learning_rate": 1.1724616403613827e-05,
|
|
"loss": 0.4035,
|
|
"step": 9280
|
|
},
|
|
{
|
|
"epoch": 1.3352027610008628,
|
|
"grad_norm": 0.23816228620413699,
|
|
"learning_rate": 1.1717199273307826e-05,
|
|
"loss": 0.4208,
|
|
"step": 9285
|
|
},
|
|
{
|
|
"epoch": 1.3359217716422203,
|
|
"grad_norm": 0.263877912817765,
|
|
"learning_rate": 1.1709781169473599e-05,
|
|
"loss": 0.4236,
|
|
"step": 9290
|
|
},
|
|
{
|
|
"epoch": 1.3366407822835777,
|
|
"grad_norm": 0.2438005305187124,
|
|
"learning_rate": 1.1702362096316675e-05,
|
|
"loss": 0.4227,
|
|
"step": 9295
|
|
},
|
|
{
|
|
"epoch": 1.3373597929249352,
|
|
"grad_norm": 0.2621549663696597,
|
|
"learning_rate": 1.169494205804314e-05,
|
|
"loss": 0.4241,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 1.3380788035662927,
|
|
"grad_norm": 0.24567814302505314,
|
|
"learning_rate": 1.1687521058859612e-05,
|
|
"loss": 0.4281,
|
|
"step": 9305
|
|
},
|
|
{
|
|
"epoch": 1.3387978142076502,
|
|
"grad_norm": 0.26479997475978473,
|
|
"learning_rate": 1.1680099102973271e-05,
|
|
"loss": 0.4254,
|
|
"step": 9310
|
|
},
|
|
{
|
|
"epoch": 1.3395168248490077,
|
|
"grad_norm": 0.23805591644841448,
|
|
"learning_rate": 1.1672676194591825e-05,
|
|
"loss": 0.4172,
|
|
"step": 9315
|
|
},
|
|
{
|
|
"epoch": 1.3402358354903652,
|
|
"grad_norm": 0.25929710952180146,
|
|
"learning_rate": 1.1665252337923529e-05,
|
|
"loss": 0.42,
|
|
"step": 9320
|
|
},
|
|
{
|
|
"epoch": 1.3409548461317227,
|
|
"grad_norm": 0.24398070016346668,
|
|
"learning_rate": 1.165782753717718e-05,
|
|
"loss": 0.4106,
|
|
"step": 9325
|
|
},
|
|
{
|
|
"epoch": 1.3416738567730802,
|
|
"grad_norm": 0.25164634515915246,
|
|
"learning_rate": 1.1650401796562098e-05,
|
|
"loss": 0.4204,
|
|
"step": 9330
|
|
},
|
|
{
|
|
"epoch": 1.3423928674144379,
|
|
"grad_norm": 0.2594687930418735,
|
|
"learning_rate": 1.1642975120288148e-05,
|
|
"loss": 0.4362,
|
|
"step": 9335
|
|
},
|
|
{
|
|
"epoch": 1.3431118780557951,
|
|
"grad_norm": 0.25432427841688693,
|
|
"learning_rate": 1.1635547512565719e-05,
|
|
"loss": 0.401,
|
|
"step": 9340
|
|
},
|
|
{
|
|
"epoch": 1.3438308886971528,
|
|
"grad_norm": 0.2658648488496538,
|
|
"learning_rate": 1.1628118977605724e-05,
|
|
"loss": 0.4117,
|
|
"step": 9345
|
|
},
|
|
{
|
|
"epoch": 1.34454989933851,
|
|
"grad_norm": 0.27131429608303353,
|
|
"learning_rate": 1.1620689519619614e-05,
|
|
"loss": 0.4269,
|
|
"step": 9350
|
|
},
|
|
{
|
|
"epoch": 1.3452689099798678,
|
|
"grad_norm": 0.24804847320340484,
|
|
"learning_rate": 1.1613259142819352e-05,
|
|
"loss": 0.4191,
|
|
"step": 9355
|
|
},
|
|
{
|
|
"epoch": 1.345987920621225,
|
|
"grad_norm": 0.2567138794450249,
|
|
"learning_rate": 1.160582785141743e-05,
|
|
"loss": 0.4088,
|
|
"step": 9360
|
|
},
|
|
{
|
|
"epoch": 1.3467069312625828,
|
|
"grad_norm": 0.2544226922118282,
|
|
"learning_rate": 1.159839564962685e-05,
|
|
"loss": 0.4294,
|
|
"step": 9365
|
|
},
|
|
{
|
|
"epoch": 1.34742594190394,
|
|
"grad_norm": 0.25368336566165745,
|
|
"learning_rate": 1.159096254166114e-05,
|
|
"loss": 0.4211,
|
|
"step": 9370
|
|
},
|
|
{
|
|
"epoch": 1.3481449525452978,
|
|
"grad_norm": 0.2533635232451979,
|
|
"learning_rate": 1.158352853173433e-05,
|
|
"loss": 0.4158,
|
|
"step": 9375
|
|
},
|
|
{
|
|
"epoch": 1.348863963186655,
|
|
"grad_norm": 0.24139131564856997,
|
|
"learning_rate": 1.1576093624060973e-05,
|
|
"loss": 0.4182,
|
|
"step": 9380
|
|
},
|
|
{
|
|
"epoch": 1.3495829738280127,
|
|
"grad_norm": 0.25838832899405517,
|
|
"learning_rate": 1.1568657822856124e-05,
|
|
"loss": 0.4118,
|
|
"step": 9385
|
|
},
|
|
{
|
|
"epoch": 1.3503019844693702,
|
|
"grad_norm": 0.24150676562955783,
|
|
"learning_rate": 1.1561221132335345e-05,
|
|
"loss": 0.4262,
|
|
"step": 9390
|
|
},
|
|
{
|
|
"epoch": 1.3510209951107277,
|
|
"grad_norm": 0.24973426047606528,
|
|
"learning_rate": 1.1553783556714705e-05,
|
|
"loss": 0.4327,
|
|
"step": 9395
|
|
},
|
|
{
|
|
"epoch": 1.3517400057520852,
|
|
"grad_norm": 0.2552059385133282,
|
|
"learning_rate": 1.1546345100210774e-05,
|
|
"loss": 0.4231,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 1.3524590163934427,
|
|
"grad_norm": 0.24879979757131104,
|
|
"learning_rate": 1.153890576704062e-05,
|
|
"loss": 0.4212,
|
|
"step": 9405
|
|
},
|
|
{
|
|
"epoch": 1.3531780270348002,
|
|
"grad_norm": 0.2510385449145052,
|
|
"learning_rate": 1.1531465561421808e-05,
|
|
"loss": 0.4266,
|
|
"step": 9410
|
|
},
|
|
{
|
|
"epoch": 1.3538970376761577,
|
|
"grad_norm": 0.2458257142853843,
|
|
"learning_rate": 1.1524024487572399e-05,
|
|
"loss": 0.4166,
|
|
"step": 9415
|
|
},
|
|
{
|
|
"epoch": 1.3546160483175151,
|
|
"grad_norm": 0.24926383458245444,
|
|
"learning_rate": 1.1516582549710947e-05,
|
|
"loss": 0.4169,
|
|
"step": 9420
|
|
},
|
|
{
|
|
"epoch": 1.3553350589588726,
|
|
"grad_norm": 0.26251012756652353,
|
|
"learning_rate": 1.1509139752056493e-05,
|
|
"loss": 0.4139,
|
|
"step": 9425
|
|
},
|
|
{
|
|
"epoch": 1.3560540696002301,
|
|
"grad_norm": 0.25493395903499944,
|
|
"learning_rate": 1.1501696098828568e-05,
|
|
"loss": 0.4397,
|
|
"step": 9430
|
|
},
|
|
{
|
|
"epoch": 1.3567730802415876,
|
|
"grad_norm": 0.2484301244686449,
|
|
"learning_rate": 1.1494251594247183e-05,
|
|
"loss": 0.4132,
|
|
"step": 9435
|
|
},
|
|
{
|
|
"epoch": 1.357492090882945,
|
|
"grad_norm": 0.25680415561428516,
|
|
"learning_rate": 1.1486806242532839e-05,
|
|
"loss": 0.4157,
|
|
"step": 9440
|
|
},
|
|
{
|
|
"epoch": 1.3582111015243026,
|
|
"grad_norm": 0.25511888621037243,
|
|
"learning_rate": 1.1479360047906511e-05,
|
|
"loss": 0.4248,
|
|
"step": 9445
|
|
},
|
|
{
|
|
"epoch": 1.35893011216566,
|
|
"grad_norm": 0.24114788267993964,
|
|
"learning_rate": 1.1471913014589665e-05,
|
|
"loss": 0.4089,
|
|
"step": 9450
|
|
},
|
|
{
|
|
"epoch": 1.3596491228070176,
|
|
"grad_norm": 0.24653045174658883,
|
|
"learning_rate": 1.1464465146804218e-05,
|
|
"loss": 0.4121,
|
|
"step": 9455
|
|
},
|
|
{
|
|
"epoch": 1.360368133448375,
|
|
"grad_norm": 0.2535711076643114,
|
|
"learning_rate": 1.145701644877258e-05,
|
|
"loss": 0.4175,
|
|
"step": 9460
|
|
},
|
|
{
|
|
"epoch": 1.3610871440897325,
|
|
"grad_norm": 0.23796423442406128,
|
|
"learning_rate": 1.1449566924717627e-05,
|
|
"loss": 0.4115,
|
|
"step": 9465
|
|
},
|
|
{
|
|
"epoch": 1.36180615473109,
|
|
"grad_norm": 0.25045376780440926,
|
|
"learning_rate": 1.1442116578862701e-05,
|
|
"loss": 0.4182,
|
|
"step": 9470
|
|
},
|
|
{
|
|
"epoch": 1.3625251653724475,
|
|
"grad_norm": 0.24208080670495713,
|
|
"learning_rate": 1.1434665415431614e-05,
|
|
"loss": 0.4127,
|
|
"step": 9475
|
|
},
|
|
{
|
|
"epoch": 1.363244176013805,
|
|
"grad_norm": 0.2559483678996027,
|
|
"learning_rate": 1.1427213438648636e-05,
|
|
"loss": 0.4128,
|
|
"step": 9480
|
|
},
|
|
{
|
|
"epoch": 1.3639631866551625,
|
|
"grad_norm": 0.2509038564055273,
|
|
"learning_rate": 1.1419760652738498e-05,
|
|
"loss": 0.4253,
|
|
"step": 9485
|
|
},
|
|
{
|
|
"epoch": 1.36468219729652,
|
|
"grad_norm": 0.26408296167827605,
|
|
"learning_rate": 1.1412307061926396e-05,
|
|
"loss": 0.4242,
|
|
"step": 9490
|
|
},
|
|
{
|
|
"epoch": 1.3654012079378774,
|
|
"grad_norm": 0.24820214493663295,
|
|
"learning_rate": 1.140485267043798e-05,
|
|
"loss": 0.4198,
|
|
"step": 9495
|
|
},
|
|
{
|
|
"epoch": 1.366120218579235,
|
|
"grad_norm": 0.24623099519803363,
|
|
"learning_rate": 1.1397397482499352e-05,
|
|
"loss": 0.4192,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 1.3668392292205924,
|
|
"grad_norm": 0.2541389919088227,
|
|
"learning_rate": 1.1389941502337063e-05,
|
|
"loss": 0.4114,
|
|
"step": 9505
|
|
},
|
|
{
|
|
"epoch": 1.36755823986195,
|
|
"grad_norm": 0.25804970955333406,
|
|
"learning_rate": 1.138248473417812e-05,
|
|
"loss": 0.4182,
|
|
"step": 9510
|
|
},
|
|
{
|
|
"epoch": 1.3682772505033074,
|
|
"grad_norm": 0.2417742930782717,
|
|
"learning_rate": 1.1375027182249971e-05,
|
|
"loss": 0.4231,
|
|
"step": 9515
|
|
},
|
|
{
|
|
"epoch": 1.3689962611446649,
|
|
"grad_norm": 0.2690508189851924,
|
|
"learning_rate": 1.1367568850780511e-05,
|
|
"loss": 0.4412,
|
|
"step": 9520
|
|
},
|
|
{
|
|
"epoch": 1.3697152717860224,
|
|
"grad_norm": 0.26211289218773653,
|
|
"learning_rate": 1.1360109743998075e-05,
|
|
"loss": 0.4319,
|
|
"step": 9525
|
|
},
|
|
{
|
|
"epoch": 1.3704342824273799,
|
|
"grad_norm": 0.25732312239806404,
|
|
"learning_rate": 1.1352649866131447e-05,
|
|
"loss": 0.4102,
|
|
"step": 9530
|
|
},
|
|
{
|
|
"epoch": 1.3711532930687373,
|
|
"grad_norm": 0.25182004898998794,
|
|
"learning_rate": 1.1345189221409828e-05,
|
|
"loss": 0.4109,
|
|
"step": 9535
|
|
},
|
|
{
|
|
"epoch": 1.3718723037100948,
|
|
"grad_norm": 0.24814394052739985,
|
|
"learning_rate": 1.133772781406287e-05,
|
|
"loss": 0.4182,
|
|
"step": 9540
|
|
},
|
|
{
|
|
"epoch": 1.3725913143514523,
|
|
"grad_norm": 0.25001805051958936,
|
|
"learning_rate": 1.133026564832066e-05,
|
|
"loss": 0.4153,
|
|
"step": 9545
|
|
},
|
|
{
|
|
"epoch": 1.37331032499281,
|
|
"grad_norm": 0.24659292069576322,
|
|
"learning_rate": 1.13228027284137e-05,
|
|
"loss": 0.4141,
|
|
"step": 9550
|
|
},
|
|
{
|
|
"epoch": 1.3740293356341673,
|
|
"grad_norm": 0.24357609150858484,
|
|
"learning_rate": 1.131533905857293e-05,
|
|
"loss": 0.4301,
|
|
"step": 9555
|
|
},
|
|
{
|
|
"epoch": 1.374748346275525,
|
|
"grad_norm": 0.2575243150984694,
|
|
"learning_rate": 1.1307874643029715e-05,
|
|
"loss": 0.4189,
|
|
"step": 9560
|
|
},
|
|
{
|
|
"epoch": 1.3754673569168823,
|
|
"grad_norm": 0.24769625836215414,
|
|
"learning_rate": 1.1300409486015837e-05,
|
|
"loss": 0.4251,
|
|
"step": 9565
|
|
},
|
|
{
|
|
"epoch": 1.37618636755824,
|
|
"grad_norm": 0.2522589458200581,
|
|
"learning_rate": 1.1292943591763506e-05,
|
|
"loss": 0.4152,
|
|
"step": 9570
|
|
},
|
|
{
|
|
"epoch": 1.3769053781995972,
|
|
"grad_norm": 0.2533970724785977,
|
|
"learning_rate": 1.1285476964505341e-05,
|
|
"loss": 0.4109,
|
|
"step": 9575
|
|
},
|
|
{
|
|
"epoch": 1.377624388840955,
|
|
"grad_norm": 0.24809731784067882,
|
|
"learning_rate": 1.1278009608474389e-05,
|
|
"loss": 0.426,
|
|
"step": 9580
|
|
},
|
|
{
|
|
"epoch": 1.3783433994823122,
|
|
"grad_norm": 0.24430656121325212,
|
|
"learning_rate": 1.1270541527904098e-05,
|
|
"loss": 0.4233,
|
|
"step": 9585
|
|
},
|
|
{
|
|
"epoch": 1.37906241012367,
|
|
"grad_norm": 0.24883279197937416,
|
|
"learning_rate": 1.1263072727028325e-05,
|
|
"loss": 0.4131,
|
|
"step": 9590
|
|
},
|
|
{
|
|
"epoch": 1.3797814207650272,
|
|
"grad_norm": 0.24854922226819207,
|
|
"learning_rate": 1.1255603210081358e-05,
|
|
"loss": 0.4103,
|
|
"step": 9595
|
|
},
|
|
{
|
|
"epoch": 1.380500431406385,
|
|
"grad_norm": 0.25070734498380887,
|
|
"learning_rate": 1.1248132981297858e-05,
|
|
"loss": 0.4332,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 1.3812194420477424,
|
|
"grad_norm": 0.24205854629117363,
|
|
"learning_rate": 1.1240662044912917e-05,
|
|
"loss": 0.4062,
|
|
"step": 9605
|
|
},
|
|
{
|
|
"epoch": 1.3819384526890999,
|
|
"grad_norm": 0.24975559484913917,
|
|
"learning_rate": 1.1233190405162014e-05,
|
|
"loss": 0.4234,
|
|
"step": 9610
|
|
},
|
|
{
|
|
"epoch": 1.3826574633304574,
|
|
"grad_norm": 0.255512768099968,
|
|
"learning_rate": 1.1225718066281029e-05,
|
|
"loss": 0.437,
|
|
"step": 9615
|
|
},
|
|
{
|
|
"epoch": 1.3833764739718148,
|
|
"grad_norm": 0.25492180126484526,
|
|
"learning_rate": 1.1218245032506241e-05,
|
|
"loss": 0.4227,
|
|
"step": 9620
|
|
},
|
|
{
|
|
"epoch": 1.3840954846131723,
|
|
"grad_norm": 0.25878749808087337,
|
|
"learning_rate": 1.1210771308074321e-05,
|
|
"loss": 0.4181,
|
|
"step": 9625
|
|
},
|
|
{
|
|
"epoch": 1.3848144952545298,
|
|
"grad_norm": 0.2516230476695916,
|
|
"learning_rate": 1.1203296897222335e-05,
|
|
"loss": 0.4273,
|
|
"step": 9630
|
|
},
|
|
{
|
|
"epoch": 1.3855335058958873,
|
|
"grad_norm": 0.24514289255287167,
|
|
"learning_rate": 1.119582180418773e-05,
|
|
"loss": 0.4333,
|
|
"step": 9635
|
|
},
|
|
{
|
|
"epoch": 1.3862525165372448,
|
|
"grad_norm": 0.2556132386527134,
|
|
"learning_rate": 1.1188346033208349e-05,
|
|
"loss": 0.4116,
|
|
"step": 9640
|
|
},
|
|
{
|
|
"epoch": 1.3869715271786023,
|
|
"grad_norm": 0.25596622811491904,
|
|
"learning_rate": 1.1180869588522415e-05,
|
|
"loss": 0.4357,
|
|
"step": 9645
|
|
},
|
|
{
|
|
"epoch": 1.3876905378199598,
|
|
"grad_norm": 0.24824004793430757,
|
|
"learning_rate": 1.1173392474368532e-05,
|
|
"loss": 0.4123,
|
|
"step": 9650
|
|
},
|
|
{
|
|
"epoch": 1.3884095484613173,
|
|
"grad_norm": 0.24390818336880687,
|
|
"learning_rate": 1.1165914694985684e-05,
|
|
"loss": 0.4114,
|
|
"step": 9655
|
|
},
|
|
{
|
|
"epoch": 1.3891285591026747,
|
|
"grad_norm": 0.24472961261740755,
|
|
"learning_rate": 1.1158436254613237e-05,
|
|
"loss": 0.4106,
|
|
"step": 9660
|
|
},
|
|
{
|
|
"epoch": 1.3898475697440322,
|
|
"grad_norm": 0.2562298705368656,
|
|
"learning_rate": 1.1150957157490922e-05,
|
|
"loss": 0.4194,
|
|
"step": 9665
|
|
},
|
|
{
|
|
"epoch": 1.3905665803853897,
|
|
"grad_norm": 0.25020129785017103,
|
|
"learning_rate": 1.114347740785885e-05,
|
|
"loss": 0.4197,
|
|
"step": 9670
|
|
},
|
|
{
|
|
"epoch": 1.3912855910267472,
|
|
"grad_norm": 0.2669599095249636,
|
|
"learning_rate": 1.1135997009957504e-05,
|
|
"loss": 0.4206,
|
|
"step": 9675
|
|
},
|
|
{
|
|
"epoch": 1.3920046016681047,
|
|
"grad_norm": 0.26084835531794537,
|
|
"learning_rate": 1.1128515968027729e-05,
|
|
"loss": 0.4285,
|
|
"step": 9680
|
|
},
|
|
{
|
|
"epoch": 1.3927236123094622,
|
|
"grad_norm": 0.24661487482958494,
|
|
"learning_rate": 1.112103428631073e-05,
|
|
"loss": 0.4266,
|
|
"step": 9685
|
|
},
|
|
{
|
|
"epoch": 1.3934426229508197,
|
|
"grad_norm": 0.25431214598260804,
|
|
"learning_rate": 1.1113551969048088e-05,
|
|
"loss": 0.4391,
|
|
"step": 9690
|
|
},
|
|
{
|
|
"epoch": 1.3941616335921772,
|
|
"grad_norm": 0.26744876168382925,
|
|
"learning_rate": 1.1106069020481738e-05,
|
|
"loss": 0.4286,
|
|
"step": 9695
|
|
},
|
|
{
|
|
"epoch": 1.3948806442335346,
|
|
"grad_norm": 0.2536205533902165,
|
|
"learning_rate": 1.1098585444853969e-05,
|
|
"loss": 0.4091,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 1.3955996548748921,
|
|
"grad_norm": 0.2526351650046927,
|
|
"learning_rate": 1.1091101246407431e-05,
|
|
"loss": 0.4234,
|
|
"step": 9705
|
|
},
|
|
{
|
|
"epoch": 1.3963186655162496,
|
|
"grad_norm": 0.24987142557874195,
|
|
"learning_rate": 1.1083616429385125e-05,
|
|
"loss": 0.395,
|
|
"step": 9710
|
|
},
|
|
{
|
|
"epoch": 1.397037676157607,
|
|
"grad_norm": 0.2500600739779468,
|
|
"learning_rate": 1.1076130998030401e-05,
|
|
"loss": 0.4114,
|
|
"step": 9715
|
|
},
|
|
{
|
|
"epoch": 1.3977566867989646,
|
|
"grad_norm": 0.25971817692692917,
|
|
"learning_rate": 1.106864495658696e-05,
|
|
"loss": 0.4156,
|
|
"step": 9720
|
|
},
|
|
{
|
|
"epoch": 1.398475697440322,
|
|
"grad_norm": 0.24390586391176858,
|
|
"learning_rate": 1.106115830929885e-05,
|
|
"loss": 0.4215,
|
|
"step": 9725
|
|
},
|
|
{
|
|
"epoch": 1.3991947080816796,
|
|
"grad_norm": 0.2572365424296446,
|
|
"learning_rate": 1.105367106041046e-05,
|
|
"loss": 0.4264,
|
|
"step": 9730
|
|
},
|
|
{
|
|
"epoch": 1.399913718723037,
|
|
"grad_norm": 0.2531752093277882,
|
|
"learning_rate": 1.1046183214166515e-05,
|
|
"loss": 0.4308,
|
|
"step": 9735
|
|
},
|
|
{
|
|
"epoch": 1.4006327293643945,
|
|
"grad_norm": 0.310930690806812,
|
|
"learning_rate": 1.1038694774812091e-05,
|
|
"loss": 0.4036,
|
|
"step": 9740
|
|
},
|
|
{
|
|
"epoch": 1.401351740005752,
|
|
"grad_norm": 0.24453563822550906,
|
|
"learning_rate": 1.1031205746592593e-05,
|
|
"loss": 0.4019,
|
|
"step": 9745
|
|
},
|
|
{
|
|
"epoch": 1.4020707506471095,
|
|
"grad_norm": 0.24514692054203863,
|
|
"learning_rate": 1.1023716133753758e-05,
|
|
"loss": 0.417,
|
|
"step": 9750
|
|
},
|
|
{
|
|
"epoch": 1.402789761288467,
|
|
"grad_norm": 0.267653217070886,
|
|
"learning_rate": 1.1016225940541654e-05,
|
|
"loss": 0.4208,
|
|
"step": 9755
|
|
},
|
|
{
|
|
"epoch": 1.4035087719298245,
|
|
"grad_norm": 0.24405169547485742,
|
|
"learning_rate": 1.1008735171202685e-05,
|
|
"loss": 0.4348,
|
|
"step": 9760
|
|
},
|
|
{
|
|
"epoch": 1.404227782571182,
|
|
"grad_norm": 0.2592343425587176,
|
|
"learning_rate": 1.1001243829983575e-05,
|
|
"loss": 0.4284,
|
|
"step": 9765
|
|
},
|
|
{
|
|
"epoch": 1.4049467932125395,
|
|
"grad_norm": 0.26906403680949453,
|
|
"learning_rate": 1.0993751921131375e-05,
|
|
"loss": 0.4178,
|
|
"step": 9770
|
|
},
|
|
{
|
|
"epoch": 1.4056658038538972,
|
|
"grad_norm": 0.24745255746857836,
|
|
"learning_rate": 1.098625944889346e-05,
|
|
"loss": 0.4232,
|
|
"step": 9775
|
|
},
|
|
{
|
|
"epoch": 1.4063848144952544,
|
|
"grad_norm": 0.24632817658584125,
|
|
"learning_rate": 1.097876641751752e-05,
|
|
"loss": 0.4149,
|
|
"step": 9780
|
|
},
|
|
{
|
|
"epoch": 1.4071038251366121,
|
|
"grad_norm": 0.2950557010074856,
|
|
"learning_rate": 1.0971272831251557e-05,
|
|
"loss": 0.4328,
|
|
"step": 9785
|
|
},
|
|
{
|
|
"epoch": 1.4078228357779694,
|
|
"grad_norm": 0.25024781565923276,
|
|
"learning_rate": 1.0963778694343908e-05,
|
|
"loss": 0.4059,
|
|
"step": 9790
|
|
},
|
|
{
|
|
"epoch": 1.4085418464193271,
|
|
"grad_norm": 0.2597517594562987,
|
|
"learning_rate": 1.0956284011043199e-05,
|
|
"loss": 0.4194,
|
|
"step": 9795
|
|
},
|
|
{
|
|
"epoch": 1.4092608570606844,
|
|
"grad_norm": 0.26603567927424965,
|
|
"learning_rate": 1.094878878559838e-05,
|
|
"loss": 0.4193,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 1.409979867702042,
|
|
"grad_norm": 0.24432102690114063,
|
|
"learning_rate": 1.0941293022258697e-05,
|
|
"loss": 0.397,
|
|
"step": 9805
|
|
},
|
|
{
|
|
"epoch": 1.4106988783433994,
|
|
"grad_norm": 0.25657527895947746,
|
|
"learning_rate": 1.093379672527371e-05,
|
|
"loss": 0.4076,
|
|
"step": 9810
|
|
},
|
|
{
|
|
"epoch": 1.411417888984757,
|
|
"grad_norm": 0.2604569361478616,
|
|
"learning_rate": 1.0926299898893284e-05,
|
|
"loss": 0.4036,
|
|
"step": 9815
|
|
},
|
|
{
|
|
"epoch": 1.4121368996261143,
|
|
"grad_norm": 0.24733722138877323,
|
|
"learning_rate": 1.0918802547367575e-05,
|
|
"loss": 0.4177,
|
|
"step": 9820
|
|
},
|
|
{
|
|
"epoch": 1.412855910267472,
|
|
"grad_norm": 0.2443676689862657,
|
|
"learning_rate": 1.0911304674947043e-05,
|
|
"loss": 0.429,
|
|
"step": 9825
|
|
},
|
|
{
|
|
"epoch": 1.4135749209088295,
|
|
"grad_norm": 0.2498505339062968,
|
|
"learning_rate": 1.0903806285882441e-05,
|
|
"loss": 0.4248,
|
|
"step": 9830
|
|
},
|
|
{
|
|
"epoch": 1.414293931550187,
|
|
"grad_norm": 0.2673976560861366,
|
|
"learning_rate": 1.089630738442481e-05,
|
|
"loss": 0.414,
|
|
"step": 9835
|
|
},
|
|
{
|
|
"epoch": 1.4150129421915445,
|
|
"grad_norm": 0.24990001920378563,
|
|
"learning_rate": 1.0888807974825496e-05,
|
|
"loss": 0.4138,
|
|
"step": 9840
|
|
},
|
|
{
|
|
"epoch": 1.415731952832902,
|
|
"grad_norm": 0.2539166543872164,
|
|
"learning_rate": 1.088130806133612e-05,
|
|
"loss": 0.4048,
|
|
"step": 9845
|
|
},
|
|
{
|
|
"epoch": 1.4164509634742595,
|
|
"grad_norm": 0.2440319611051078,
|
|
"learning_rate": 1.0873807648208587e-05,
|
|
"loss": 0.4264,
|
|
"step": 9850
|
|
},
|
|
{
|
|
"epoch": 1.417169974115617,
|
|
"grad_norm": 0.24993266032512565,
|
|
"learning_rate": 1.0866306739695097e-05,
|
|
"loss": 0.4138,
|
|
"step": 9855
|
|
},
|
|
{
|
|
"epoch": 1.4178889847569744,
|
|
"grad_norm": 0.24901540914858042,
|
|
"learning_rate": 1.0858805340048121e-05,
|
|
"loss": 0.4342,
|
|
"step": 9860
|
|
},
|
|
{
|
|
"epoch": 1.418607995398332,
|
|
"grad_norm": 0.2476975113243009,
|
|
"learning_rate": 1.0851303453520414e-05,
|
|
"loss": 0.4061,
|
|
"step": 9865
|
|
},
|
|
{
|
|
"epoch": 1.4193270060396894,
|
|
"grad_norm": 0.2581664703794402,
|
|
"learning_rate": 1.0843801084365004e-05,
|
|
"loss": 0.4074,
|
|
"step": 9870
|
|
},
|
|
{
|
|
"epoch": 1.420046016681047,
|
|
"grad_norm": 0.252494581026853,
|
|
"learning_rate": 1.0836298236835197e-05,
|
|
"loss": 0.4163,
|
|
"step": 9875
|
|
},
|
|
{
|
|
"epoch": 1.4207650273224044,
|
|
"grad_norm": 0.2508404738384896,
|
|
"learning_rate": 1.0828794915184556e-05,
|
|
"loss": 0.4096,
|
|
"step": 9880
|
|
},
|
|
{
|
|
"epoch": 1.4214840379637619,
|
|
"grad_norm": 0.24030733519590325,
|
|
"learning_rate": 1.0821291123666939e-05,
|
|
"loss": 0.4192,
|
|
"step": 9885
|
|
},
|
|
{
|
|
"epoch": 1.4222030486051194,
|
|
"grad_norm": 0.2526576940274779,
|
|
"learning_rate": 1.0813786866536445e-05,
|
|
"loss": 0.4081,
|
|
"step": 9890
|
|
},
|
|
{
|
|
"epoch": 1.4229220592464769,
|
|
"grad_norm": 0.24985206673472138,
|
|
"learning_rate": 1.0806282148047448e-05,
|
|
"loss": 0.4172,
|
|
"step": 9895
|
|
},
|
|
{
|
|
"epoch": 1.4236410698878343,
|
|
"grad_norm": 0.24422182429307604,
|
|
"learning_rate": 1.0798776972454586e-05,
|
|
"loss": 0.4007,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 1.4243600805291918,
|
|
"grad_norm": 0.2570413067331986,
|
|
"learning_rate": 1.0791271344012748e-05,
|
|
"loss": 0.4173,
|
|
"step": 9905
|
|
},
|
|
{
|
|
"epoch": 1.4250790911705493,
|
|
"grad_norm": 0.2507723594504243,
|
|
"learning_rate": 1.0783765266977088e-05,
|
|
"loss": 0.4073,
|
|
"step": 9910
|
|
},
|
|
{
|
|
"epoch": 1.4257981018119068,
|
|
"grad_norm": 0.2524376425793629,
|
|
"learning_rate": 1.077625874560301e-05,
|
|
"loss": 0.4324,
|
|
"step": 9915
|
|
},
|
|
{
|
|
"epoch": 1.4265171124532643,
|
|
"grad_norm": 0.26484532697084445,
|
|
"learning_rate": 1.076875178414617e-05,
|
|
"loss": 0.4131,
|
|
"step": 9920
|
|
},
|
|
{
|
|
"epoch": 1.4272361230946218,
|
|
"grad_norm": 0.25445991101378906,
|
|
"learning_rate": 1.0761244386862475e-05,
|
|
"loss": 0.3948,
|
|
"step": 9925
|
|
},
|
|
{
|
|
"epoch": 1.4279551337359793,
|
|
"grad_norm": 0.2721341341876498,
|
|
"learning_rate": 1.0753736558008074e-05,
|
|
"loss": 0.4077,
|
|
"step": 9930
|
|
},
|
|
{
|
|
"epoch": 1.4286741443773368,
|
|
"grad_norm": 0.2527863317405902,
|
|
"learning_rate": 1.074622830183937e-05,
|
|
"loss": 0.4266,
|
|
"step": 9935
|
|
},
|
|
{
|
|
"epoch": 1.4293931550186942,
|
|
"grad_norm": 0.25451031488199266,
|
|
"learning_rate": 1.0738719622613e-05,
|
|
"loss": 0.4238,
|
|
"step": 9940
|
|
},
|
|
{
|
|
"epoch": 1.4301121656600517,
|
|
"grad_norm": 0.2552071960367096,
|
|
"learning_rate": 1.0731210524585852e-05,
|
|
"loss": 0.4155,
|
|
"step": 9945
|
|
},
|
|
{
|
|
"epoch": 1.4308311763014092,
|
|
"grad_norm": 0.24824533306246047,
|
|
"learning_rate": 1.0723701012015032e-05,
|
|
"loss": 0.4094,
|
|
"step": 9950
|
|
},
|
|
{
|
|
"epoch": 1.4315501869427667,
|
|
"grad_norm": 0.2498459830748846,
|
|
"learning_rate": 1.0716191089157895e-05,
|
|
"loss": 0.4224,
|
|
"step": 9955
|
|
},
|
|
{
|
|
"epoch": 1.4322691975841242,
|
|
"grad_norm": 0.2500368139104141,
|
|
"learning_rate": 1.070868076027203e-05,
|
|
"loss": 0.4138,
|
|
"step": 9960
|
|
},
|
|
{
|
|
"epoch": 1.4329882082254817,
|
|
"grad_norm": 0.2657392868538405,
|
|
"learning_rate": 1.0701170029615248e-05,
|
|
"loss": 0.4229,
|
|
"step": 9965
|
|
},
|
|
{
|
|
"epoch": 1.4337072188668392,
|
|
"grad_norm": 0.2451600737041558,
|
|
"learning_rate": 1.0693658901445596e-05,
|
|
"loss": 0.4054,
|
|
"step": 9970
|
|
},
|
|
{
|
|
"epoch": 1.4344262295081966,
|
|
"grad_norm": 0.27267755056042187,
|
|
"learning_rate": 1.0686147380021343e-05,
|
|
"loss": 0.4148,
|
|
"step": 9975
|
|
},
|
|
{
|
|
"epoch": 1.4351452401495541,
|
|
"grad_norm": 0.2565989948882745,
|
|
"learning_rate": 1.0678635469600974e-05,
|
|
"loss": 0.4042,
|
|
"step": 9980
|
|
},
|
|
{
|
|
"epoch": 1.4358642507909116,
|
|
"grad_norm": 0.2579820399597762,
|
|
"learning_rate": 1.0671123174443205e-05,
|
|
"loss": 0.4265,
|
|
"step": 9985
|
|
},
|
|
{
|
|
"epoch": 1.4365832614322693,
|
|
"grad_norm": 0.2551052561872372,
|
|
"learning_rate": 1.0663610498806967e-05,
|
|
"loss": 0.4129,
|
|
"step": 9990
|
|
},
|
|
{
|
|
"epoch": 1.4373022720736266,
|
|
"grad_norm": 0.26506244582250904,
|
|
"learning_rate": 1.0656097446951405e-05,
|
|
"loss": 0.4019,
|
|
"step": 9995
|
|
},
|
|
{
|
|
"epoch": 1.4380212827149843,
|
|
"grad_norm": 0.2563926253213933,
|
|
"learning_rate": 1.0648584023135878e-05,
|
|
"loss": 0.4259,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 1.4387402933563416,
|
|
"grad_norm": 0.23996561192295213,
|
|
"learning_rate": 1.064107023161995e-05,
|
|
"loss": 0.402,
|
|
"step": 10005
|
|
},
|
|
{
|
|
"epoch": 1.4394593039976993,
|
|
"grad_norm": 0.2609602746289666,
|
|
"learning_rate": 1.063355607666341e-05,
|
|
"loss": 0.433,
|
|
"step": 10010
|
|
},
|
|
{
|
|
"epoch": 1.4401783146390565,
|
|
"grad_norm": 0.2572423732120433,
|
|
"learning_rate": 1.0626041562526232e-05,
|
|
"loss": 0.4144,
|
|
"step": 10015
|
|
},
|
|
{
|
|
"epoch": 1.4408973252804143,
|
|
"grad_norm": 0.2532322139448099,
|
|
"learning_rate": 1.0618526693468611e-05,
|
|
"loss": 0.4104,
|
|
"step": 10020
|
|
},
|
|
{
|
|
"epoch": 1.4416163359217715,
|
|
"grad_norm": 0.256513588435813,
|
|
"learning_rate": 1.0611011473750932e-05,
|
|
"loss": 0.4284,
|
|
"step": 10025
|
|
},
|
|
{
|
|
"epoch": 1.4423353465631292,
|
|
"grad_norm": 0.2512556503761663,
|
|
"learning_rate": 1.0603495907633785e-05,
|
|
"loss": 0.4167,
|
|
"step": 10030
|
|
},
|
|
{
|
|
"epoch": 1.4430543572044865,
|
|
"grad_norm": 0.2612955230091824,
|
|
"learning_rate": 1.0595979999377953e-05,
|
|
"loss": 0.4303,
|
|
"step": 10035
|
|
},
|
|
{
|
|
"epoch": 1.4437733678458442,
|
|
"grad_norm": 0.2389982500909746,
|
|
"learning_rate": 1.0588463753244419e-05,
|
|
"loss": 0.4081,
|
|
"step": 10040
|
|
},
|
|
{
|
|
"epoch": 1.4444923784872017,
|
|
"grad_norm": 0.24034615966848993,
|
|
"learning_rate": 1.0580947173494344e-05,
|
|
"loss": 0.4168,
|
|
"step": 10045
|
|
},
|
|
{
|
|
"epoch": 1.4452113891285592,
|
|
"grad_norm": 0.25396503366625184,
|
|
"learning_rate": 1.0573430264389095e-05,
|
|
"loss": 0.4172,
|
|
"step": 10050
|
|
},
|
|
{
|
|
"epoch": 1.4459303997699167,
|
|
"grad_norm": 0.25299540098124423,
|
|
"learning_rate": 1.056591303019021e-05,
|
|
"loss": 0.4226,
|
|
"step": 10055
|
|
},
|
|
{
|
|
"epoch": 1.4466494104112742,
|
|
"grad_norm": 0.2538181787663044,
|
|
"learning_rate": 1.0558395475159429e-05,
|
|
"loss": 0.4181,
|
|
"step": 10060
|
|
},
|
|
{
|
|
"epoch": 1.4473684210526316,
|
|
"grad_norm": 0.2583341815420175,
|
|
"learning_rate": 1.0550877603558656e-05,
|
|
"loss": 0.4178,
|
|
"step": 10065
|
|
},
|
|
{
|
|
"epoch": 1.4480874316939891,
|
|
"grad_norm": 0.24865034683634282,
|
|
"learning_rate": 1.0543359419649986e-05,
|
|
"loss": 0.402,
|
|
"step": 10070
|
|
},
|
|
{
|
|
"epoch": 1.4488064423353466,
|
|
"grad_norm": 0.266590676113421,
|
|
"learning_rate": 1.0535840927695684e-05,
|
|
"loss": 0.4358,
|
|
"step": 10075
|
|
},
|
|
{
|
|
"epoch": 1.449525452976704,
|
|
"grad_norm": 0.25905004609181675,
|
|
"learning_rate": 1.0528322131958198e-05,
|
|
"loss": 0.4041,
|
|
"step": 10080
|
|
},
|
|
{
|
|
"epoch": 1.4502444636180616,
|
|
"grad_norm": 0.25026534058827304,
|
|
"learning_rate": 1.0520803036700138e-05,
|
|
"loss": 0.4233,
|
|
"step": 10085
|
|
},
|
|
{
|
|
"epoch": 1.450963474259419,
|
|
"grad_norm": 0.25318916934109487,
|
|
"learning_rate": 1.0513283646184297e-05,
|
|
"loss": 0.4269,
|
|
"step": 10090
|
|
},
|
|
{
|
|
"epoch": 1.4516824849007766,
|
|
"grad_norm": 0.2644612965612271,
|
|
"learning_rate": 1.0505763964673617e-05,
|
|
"loss": 0.4169,
|
|
"step": 10095
|
|
},
|
|
{
|
|
"epoch": 1.452401495542134,
|
|
"grad_norm": 0.2461679414258703,
|
|
"learning_rate": 1.049824399643122e-05,
|
|
"loss": 0.4077,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 1.4531205061834915,
|
|
"grad_norm": 0.2531626796998085,
|
|
"learning_rate": 1.0490723745720387e-05,
|
|
"loss": 0.4112,
|
|
"step": 10105
|
|
},
|
|
{
|
|
"epoch": 1.453839516824849,
|
|
"grad_norm": 0.2503388208313446,
|
|
"learning_rate": 1.0483203216804562e-05,
|
|
"loss": 0.417,
|
|
"step": 10110
|
|
},
|
|
{
|
|
"epoch": 1.4545585274662065,
|
|
"grad_norm": 0.24133257517578935,
|
|
"learning_rate": 1.0475682413947337e-05,
|
|
"loss": 0.4283,
|
|
"step": 10115
|
|
},
|
|
{
|
|
"epoch": 1.455277538107564,
|
|
"grad_norm": 0.2447893414574943,
|
|
"learning_rate": 1.0468161341412466e-05,
|
|
"loss": 0.4137,
|
|
"step": 10120
|
|
},
|
|
{
|
|
"epoch": 1.4559965487489215,
|
|
"grad_norm": 0.2540357859067168,
|
|
"learning_rate": 1.0460640003463855e-05,
|
|
"loss": 0.4349,
|
|
"step": 10125
|
|
},
|
|
{
|
|
"epoch": 1.456715559390279,
|
|
"grad_norm": 0.237597538775147,
|
|
"learning_rate": 1.0453118404365563e-05,
|
|
"loss": 0.4034,
|
|
"step": 10130
|
|
},
|
|
{
|
|
"epoch": 1.4574345700316365,
|
|
"grad_norm": 0.2496607379914245,
|
|
"learning_rate": 1.0445596548381793e-05,
|
|
"loss": 0.4168,
|
|
"step": 10135
|
|
},
|
|
{
|
|
"epoch": 1.458153580672994,
|
|
"grad_norm": 0.2701412787204999,
|
|
"learning_rate": 1.0438074439776895e-05,
|
|
"loss": 0.4158,
|
|
"step": 10140
|
|
},
|
|
{
|
|
"epoch": 1.4588725913143514,
|
|
"grad_norm": 0.25006441637973104,
|
|
"learning_rate": 1.0430552082815363e-05,
|
|
"loss": 0.4039,
|
|
"step": 10145
|
|
},
|
|
{
|
|
"epoch": 1.459591601955709,
|
|
"grad_norm": 0.2633613283531685,
|
|
"learning_rate": 1.0423029481761831e-05,
|
|
"loss": 0.4235,
|
|
"step": 10150
|
|
},
|
|
{
|
|
"epoch": 1.4603106125970664,
|
|
"grad_norm": 0.24826147447761096,
|
|
"learning_rate": 1.0415506640881068e-05,
|
|
"loss": 0.4246,
|
|
"step": 10155
|
|
},
|
|
{
|
|
"epoch": 1.461029623238424,
|
|
"grad_norm": 0.2613503948237542,
|
|
"learning_rate": 1.0407983564437992e-05,
|
|
"loss": 0.4144,
|
|
"step": 10160
|
|
},
|
|
{
|
|
"epoch": 1.4617486338797814,
|
|
"grad_norm": 0.24539329475298266,
|
|
"learning_rate": 1.0400460256697638e-05,
|
|
"loss": 0.4282,
|
|
"step": 10165
|
|
},
|
|
{
|
|
"epoch": 1.4624676445211389,
|
|
"grad_norm": 0.2505553555477828,
|
|
"learning_rate": 1.0392936721925178e-05,
|
|
"loss": 0.4341,
|
|
"step": 10170
|
|
},
|
|
{
|
|
"epoch": 1.4631866551624964,
|
|
"grad_norm": 0.2515603381742112,
|
|
"learning_rate": 1.0385412964385916e-05,
|
|
"loss": 0.4321,
|
|
"step": 10175
|
|
},
|
|
{
|
|
"epoch": 1.4639056658038538,
|
|
"grad_norm": 0.2501549781754998,
|
|
"learning_rate": 1.0377888988345283e-05,
|
|
"loss": 0.4056,
|
|
"step": 10180
|
|
},
|
|
{
|
|
"epoch": 1.4646246764452113,
|
|
"grad_norm": 0.24808066699770787,
|
|
"learning_rate": 1.037036479806883e-05,
|
|
"loss": 0.4278,
|
|
"step": 10185
|
|
},
|
|
{
|
|
"epoch": 1.4653436870865688,
|
|
"grad_norm": 0.2554829719204167,
|
|
"learning_rate": 1.0362840397822228e-05,
|
|
"loss": 0.4249,
|
|
"step": 10190
|
|
},
|
|
{
|
|
"epoch": 1.4660626977279263,
|
|
"grad_norm": 0.25503395067806245,
|
|
"learning_rate": 1.0355315791871275e-05,
|
|
"loss": 0.425,
|
|
"step": 10195
|
|
},
|
|
{
|
|
"epoch": 1.4667817083692838,
|
|
"grad_norm": 0.2516936218004914,
|
|
"learning_rate": 1.0347790984481868e-05,
|
|
"loss": 0.4165,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 1.4675007190106413,
|
|
"grad_norm": 0.25736666923114426,
|
|
"learning_rate": 1.0340265979920047e-05,
|
|
"loss": 0.4205,
|
|
"step": 10205
|
|
},
|
|
{
|
|
"epoch": 1.4682197296519988,
|
|
"grad_norm": 0.2370907375593564,
|
|
"learning_rate": 1.0332740782451936e-05,
|
|
"loss": 0.3983,
|
|
"step": 10210
|
|
},
|
|
{
|
|
"epoch": 1.4689387402933565,
|
|
"grad_norm": 0.26807117481941795,
|
|
"learning_rate": 1.0325215396343782e-05,
|
|
"loss": 0.4176,
|
|
"step": 10215
|
|
},
|
|
{
|
|
"epoch": 1.4696577509347137,
|
|
"grad_norm": 0.25537168710013947,
|
|
"learning_rate": 1.031768982586194e-05,
|
|
"loss": 0.4271,
|
|
"step": 10220
|
|
},
|
|
{
|
|
"epoch": 1.4703767615760714,
|
|
"grad_norm": 0.2498004049578793,
|
|
"learning_rate": 1.031016407527286e-05,
|
|
"loss": 0.416,
|
|
"step": 10225
|
|
},
|
|
{
|
|
"epoch": 1.4710957722174287,
|
|
"grad_norm": 0.2518779383607219,
|
|
"learning_rate": 1.0302638148843105e-05,
|
|
"loss": 0.4288,
|
|
"step": 10230
|
|
},
|
|
{
|
|
"epoch": 1.4718147828587864,
|
|
"grad_norm": 0.25224022010410896,
|
|
"learning_rate": 1.0295112050839331e-05,
|
|
"loss": 0.4137,
|
|
"step": 10235
|
|
},
|
|
{
|
|
"epoch": 1.4725337935001437,
|
|
"grad_norm": 0.2652334016292149,
|
|
"learning_rate": 1.0287585785528298e-05,
|
|
"loss": 0.4168,
|
|
"step": 10240
|
|
},
|
|
{
|
|
"epoch": 1.4732528041415014,
|
|
"grad_norm": 0.2592847480057149,
|
|
"learning_rate": 1.0280059357176846e-05,
|
|
"loss": 0.4346,
|
|
"step": 10245
|
|
},
|
|
{
|
|
"epoch": 1.4739718147828587,
|
|
"grad_norm": 0.26403942909123734,
|
|
"learning_rate": 1.0272532770051924e-05,
|
|
"loss": 0.4163,
|
|
"step": 10250
|
|
},
|
|
{
|
|
"epoch": 1.4746908254242164,
|
|
"grad_norm": 0.2533517614216728,
|
|
"learning_rate": 1.0265006028420565e-05,
|
|
"loss": 0.403,
|
|
"step": 10255
|
|
},
|
|
{
|
|
"epoch": 1.4754098360655736,
|
|
"grad_norm": 0.24750296558877358,
|
|
"learning_rate": 1.0257479136549889e-05,
|
|
"loss": 0.4081,
|
|
"step": 10260
|
|
},
|
|
{
|
|
"epoch": 1.4761288467069313,
|
|
"grad_norm": 0.244400346489122,
|
|
"learning_rate": 1.0249952098707096e-05,
|
|
"loss": 0.4179,
|
|
"step": 10265
|
|
},
|
|
{
|
|
"epoch": 1.4768478573482888,
|
|
"grad_norm": 0.24817124170891883,
|
|
"learning_rate": 1.024242491915948e-05,
|
|
"loss": 0.3997,
|
|
"step": 10270
|
|
},
|
|
{
|
|
"epoch": 1.4775668679896463,
|
|
"grad_norm": 0.25013198917785917,
|
|
"learning_rate": 1.0234897602174405e-05,
|
|
"loss": 0.4209,
|
|
"step": 10275
|
|
},
|
|
{
|
|
"epoch": 1.4782858786310038,
|
|
"grad_norm": 0.25576260336241796,
|
|
"learning_rate": 1.022737015201932e-05,
|
|
"loss": 0.4061,
|
|
"step": 10280
|
|
},
|
|
{
|
|
"epoch": 1.4790048892723613,
|
|
"grad_norm": 0.2584500349591197,
|
|
"learning_rate": 1.0219842572961747e-05,
|
|
"loss": 0.4246,
|
|
"step": 10285
|
|
},
|
|
{
|
|
"epoch": 1.4797238999137188,
|
|
"grad_norm": 0.24933293916123048,
|
|
"learning_rate": 1.0212314869269282e-05,
|
|
"loss": 0.4147,
|
|
"step": 10290
|
|
},
|
|
{
|
|
"epoch": 1.4804429105550763,
|
|
"grad_norm": 0.24627965296883847,
|
|
"learning_rate": 1.0204787045209583e-05,
|
|
"loss": 0.4077,
|
|
"step": 10295
|
|
},
|
|
{
|
|
"epoch": 1.4811619211964338,
|
|
"grad_norm": 0.2535039331883902,
|
|
"learning_rate": 1.019725910505039e-05,
|
|
"loss": 0.4324,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 1.4818809318377912,
|
|
"grad_norm": 0.2597546595277921,
|
|
"learning_rate": 1.0189731053059504e-05,
|
|
"loss": 0.427,
|
|
"step": 10305
|
|
},
|
|
{
|
|
"epoch": 1.4825999424791487,
|
|
"grad_norm": 0.26682658026370226,
|
|
"learning_rate": 1.0182202893504784e-05,
|
|
"loss": 0.4114,
|
|
"step": 10310
|
|
},
|
|
{
|
|
"epoch": 1.4833189531205062,
|
|
"grad_norm": 0.25604836937005826,
|
|
"learning_rate": 1.0174674630654156e-05,
|
|
"loss": 0.3984,
|
|
"step": 10315
|
|
},
|
|
{
|
|
"epoch": 1.4840379637618637,
|
|
"grad_norm": 0.2702117005184134,
|
|
"learning_rate": 1.0167146268775601e-05,
|
|
"loss": 0.4182,
|
|
"step": 10320
|
|
},
|
|
{
|
|
"epoch": 1.4847569744032212,
|
|
"grad_norm": 0.2640341573786068,
|
|
"learning_rate": 1.0159617812137157e-05,
|
|
"loss": 0.414,
|
|
"step": 10325
|
|
},
|
|
{
|
|
"epoch": 1.4854759850445787,
|
|
"grad_norm": 0.26282420196086725,
|
|
"learning_rate": 1.0152089265006916e-05,
|
|
"loss": 0.4285,
|
|
"step": 10330
|
|
},
|
|
{
|
|
"epoch": 1.4861949956859362,
|
|
"grad_norm": 0.2553042564800194,
|
|
"learning_rate": 1.0144560631653026e-05,
|
|
"loss": 0.4222,
|
|
"step": 10335
|
|
},
|
|
{
|
|
"epoch": 1.4869140063272936,
|
|
"grad_norm": 0.25678271676292025,
|
|
"learning_rate": 1.0137031916343681e-05,
|
|
"loss": 0.422,
|
|
"step": 10340
|
|
},
|
|
{
|
|
"epoch": 1.4876330169686511,
|
|
"grad_norm": 0.2405527517769915,
|
|
"learning_rate": 1.0129503123347108e-05,
|
|
"loss": 0.4296,
|
|
"step": 10345
|
|
},
|
|
{
|
|
"epoch": 1.4883520276100086,
|
|
"grad_norm": 0.24107070336536038,
|
|
"learning_rate": 1.01219742569316e-05,
|
|
"loss": 0.4115,
|
|
"step": 10350
|
|
},
|
|
{
|
|
"epoch": 1.489071038251366,
|
|
"grad_norm": 0.26737435669796555,
|
|
"learning_rate": 1.0114445321365483e-05,
|
|
"loss": 0.4293,
|
|
"step": 10355
|
|
},
|
|
{
|
|
"epoch": 1.4897900488927236,
|
|
"grad_norm": 0.2448869318476463,
|
|
"learning_rate": 1.0106916320917113e-05,
|
|
"loss": 0.4269,
|
|
"step": 10360
|
|
},
|
|
{
|
|
"epoch": 1.490509059534081,
|
|
"grad_norm": 0.2512691007866713,
|
|
"learning_rate": 1.0099387259854897e-05,
|
|
"loss": 0.4234,
|
|
"step": 10365
|
|
},
|
|
{
|
|
"epoch": 1.4912280701754386,
|
|
"grad_norm": 0.25139839806292674,
|
|
"learning_rate": 1.0091858142447266e-05,
|
|
"loss": 0.418,
|
|
"step": 10370
|
|
},
|
|
{
|
|
"epoch": 1.491947080816796,
|
|
"grad_norm": 0.25818314693975347,
|
|
"learning_rate": 1.008432897296269e-05,
|
|
"loss": 0.4091,
|
|
"step": 10375
|
|
},
|
|
{
|
|
"epoch": 1.4926660914581535,
|
|
"grad_norm": 0.24946522475818825,
|
|
"learning_rate": 1.0076799755669662e-05,
|
|
"loss": 0.4191,
|
|
"step": 10380
|
|
},
|
|
{
|
|
"epoch": 1.493385102099511,
|
|
"grad_norm": 0.24475456689761105,
|
|
"learning_rate": 1.0069270494836709e-05,
|
|
"loss": 0.4108,
|
|
"step": 10385
|
|
},
|
|
{
|
|
"epoch": 1.4941041127408685,
|
|
"grad_norm": 0.24666501837527802,
|
|
"learning_rate": 1.006174119473238e-05,
|
|
"loss": 0.4177,
|
|
"step": 10390
|
|
},
|
|
{
|
|
"epoch": 1.494823123382226,
|
|
"grad_norm": 0.24476633350362695,
|
|
"learning_rate": 1.0054211859625238e-05,
|
|
"loss": 0.4188,
|
|
"step": 10395
|
|
},
|
|
{
|
|
"epoch": 1.4955421340235835,
|
|
"grad_norm": 0.24979308411343795,
|
|
"learning_rate": 1.0046682493783881e-05,
|
|
"loss": 0.406,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 1.496261144664941,
|
|
"grad_norm": 0.26279919228449894,
|
|
"learning_rate": 1.0039153101476919e-05,
|
|
"loss": 0.4297,
|
|
"step": 10405
|
|
},
|
|
{
|
|
"epoch": 1.4969801553062985,
|
|
"grad_norm": 0.24398339297174304,
|
|
"learning_rate": 1.0031623686972967e-05,
|
|
"loss": 0.4114,
|
|
"step": 10410
|
|
},
|
|
{
|
|
"epoch": 1.497699165947656,
|
|
"grad_norm": 0.27099933558193773,
|
|
"learning_rate": 1.0024094254540665e-05,
|
|
"loss": 0.4303,
|
|
"step": 10415
|
|
},
|
|
{
|
|
"epoch": 1.4984181765890134,
|
|
"grad_norm": 0.2601120818746926,
|
|
"learning_rate": 1.0016564808448655e-05,
|
|
"loss": 0.4263,
|
|
"step": 10420
|
|
},
|
|
{
|
|
"epoch": 1.499137187230371,
|
|
"grad_norm": 0.2576834197103154,
|
|
"learning_rate": 1.0009035352965593e-05,
|
|
"loss": 0.4166,
|
|
"step": 10425
|
|
},
|
|
{
|
|
"epoch": 1.4998561978717286,
|
|
"grad_norm": 0.2434421252623038,
|
|
"learning_rate": 1.0001505892360138e-05,
|
|
"loss": 0.4131,
|
|
"step": 10430
|
|
},
|
|
{
|
|
"epoch": 1.500575208513086,
|
|
"grad_norm": 0.2427517789901842,
|
|
"learning_rate": 9.993976430900951e-06,
|
|
"loss": 0.4303,
|
|
"step": 10435
|
|
},
|
|
{
|
|
"epoch": 1.5012942191544436,
|
|
"grad_norm": 0.2593244896531974,
|
|
"learning_rate": 9.98644697285669e-06,
|
|
"loss": 0.4238,
|
|
"step": 10440
|
|
},
|
|
{
|
|
"epoch": 1.5020132297958009,
|
|
"grad_norm": 0.25119219381398794,
|
|
"learning_rate": 9.978917522496021e-06,
|
|
"loss": 0.4257,
|
|
"step": 10445
|
|
},
|
|
{
|
|
"epoch": 1.5027322404371586,
|
|
"grad_norm": 0.25106940031364017,
|
|
"learning_rate": 9.9713880840876e-06,
|
|
"loss": 0.4167,
|
|
"step": 10450
|
|
},
|
|
{
|
|
"epoch": 1.5034512510785158,
|
|
"grad_norm": 0.26566863198644425,
|
|
"learning_rate": 9.96385866190007e-06,
|
|
"loss": 0.4269,
|
|
"step": 10455
|
|
},
|
|
{
|
|
"epoch": 1.5041702617198736,
|
|
"grad_norm": 0.25510783732412645,
|
|
"learning_rate": 9.956329260202076e-06,
|
|
"loss": 0.425,
|
|
"step": 10460
|
|
},
|
|
{
|
|
"epoch": 1.5048892723612308,
|
|
"grad_norm": 0.24346425747421713,
|
|
"learning_rate": 9.948799883262241e-06,
|
|
"loss": 0.4273,
|
|
"step": 10465
|
|
},
|
|
{
|
|
"epoch": 1.5056082830025885,
|
|
"grad_norm": 0.24811318518506437,
|
|
"learning_rate": 9.941270535349184e-06,
|
|
"loss": 0.4271,
|
|
"step": 10470
|
|
},
|
|
{
|
|
"epoch": 1.5063272936439458,
|
|
"grad_norm": 0.24991918010004216,
|
|
"learning_rate": 9.9337412207315e-06,
|
|
"loss": 0.412,
|
|
"step": 10475
|
|
},
|
|
{
|
|
"epoch": 1.5070463042853035,
|
|
"grad_norm": 0.2505908963935783,
|
|
"learning_rate": 9.926211943677772e-06,
|
|
"loss": 0.404,
|
|
"step": 10480
|
|
},
|
|
{
|
|
"epoch": 1.5077653149266608,
|
|
"grad_norm": 0.253336551462878,
|
|
"learning_rate": 9.918682708456547e-06,
|
|
"loss": 0.3912,
|
|
"step": 10485
|
|
},
|
|
{
|
|
"epoch": 1.5084843255680185,
|
|
"grad_norm": 0.25830329953524983,
|
|
"learning_rate": 9.911153519336372e-06,
|
|
"loss": 0.4183,
|
|
"step": 10490
|
|
},
|
|
{
|
|
"epoch": 1.5092033362093757,
|
|
"grad_norm": 0.30529482298154637,
|
|
"learning_rate": 9.903624380585744e-06,
|
|
"loss": 0.4076,
|
|
"step": 10495
|
|
},
|
|
{
|
|
"epoch": 1.5099223468507335,
|
|
"grad_norm": 0.2583172817729028,
|
|
"learning_rate": 9.896095296473146e-06,
|
|
"loss": 0.4211,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 1.5106413574920907,
|
|
"grad_norm": 0.2379268205746049,
|
|
"learning_rate": 9.888566271267029e-06,
|
|
"loss": 0.4076,
|
|
"step": 10505
|
|
},
|
|
{
|
|
"epoch": 1.5113603681334484,
|
|
"grad_norm": 0.25206595841422097,
|
|
"learning_rate": 9.881037309235802e-06,
|
|
"loss": 0.4195,
|
|
"step": 10510
|
|
},
|
|
{
|
|
"epoch": 1.512079378774806,
|
|
"grad_norm": 0.25510472525985534,
|
|
"learning_rate": 9.87350841464785e-06,
|
|
"loss": 0.42,
|
|
"step": 10515
|
|
},
|
|
{
|
|
"epoch": 1.5127983894161634,
|
|
"grad_norm": 0.24854147718329755,
|
|
"learning_rate": 9.86597959177151e-06,
|
|
"loss": 0.4211,
|
|
"step": 10520
|
|
},
|
|
{
|
|
"epoch": 1.513517400057521,
|
|
"grad_norm": 0.25067813066623357,
|
|
"learning_rate": 9.858450844875077e-06,
|
|
"loss": 0.435,
|
|
"step": 10525
|
|
},
|
|
{
|
|
"epoch": 1.5142364106988784,
|
|
"grad_norm": 0.24763221009061093,
|
|
"learning_rate": 9.850922178226819e-06,
|
|
"loss": 0.406,
|
|
"step": 10530
|
|
},
|
|
{
|
|
"epoch": 1.5149554213402359,
|
|
"grad_norm": 0.2517774937888186,
|
|
"learning_rate": 9.843393596094943e-06,
|
|
"loss": 0.398,
|
|
"step": 10535
|
|
},
|
|
{
|
|
"epoch": 1.5156744319815934,
|
|
"grad_norm": 0.2581762643671444,
|
|
"learning_rate": 9.835865102747605e-06,
|
|
"loss": 0.4389,
|
|
"step": 10540
|
|
},
|
|
{
|
|
"epoch": 1.5163934426229508,
|
|
"grad_norm": 0.26229897737197194,
|
|
"learning_rate": 9.828336702452926e-06,
|
|
"loss": 0.4245,
|
|
"step": 10545
|
|
},
|
|
{
|
|
"epoch": 1.5171124532643083,
|
|
"grad_norm": 0.2478466974122416,
|
|
"learning_rate": 9.820808399478969e-06,
|
|
"loss": 0.4413,
|
|
"step": 10550
|
|
},
|
|
{
|
|
"epoch": 1.5178314639056658,
|
|
"grad_norm": 0.2613714789547285,
|
|
"learning_rate": 9.813280198093727e-06,
|
|
"loss": 0.4103,
|
|
"step": 10555
|
|
},
|
|
{
|
|
"epoch": 1.5185504745470233,
|
|
"grad_norm": 0.245430331715001,
|
|
"learning_rate": 9.805752102565162e-06,
|
|
"loss": 0.4106,
|
|
"step": 10560
|
|
},
|
|
{
|
|
"epoch": 1.5192694851883808,
|
|
"grad_norm": 0.251148558174525,
|
|
"learning_rate": 9.798224117161153e-06,
|
|
"loss": 0.4189,
|
|
"step": 10565
|
|
},
|
|
{
|
|
"epoch": 1.5199884958297383,
|
|
"grad_norm": 0.25693318875295806,
|
|
"learning_rate": 9.790696246149524e-06,
|
|
"loss": 0.4209,
|
|
"step": 10570
|
|
},
|
|
{
|
|
"epoch": 1.5207075064710958,
|
|
"grad_norm": 0.2591621988908957,
|
|
"learning_rate": 9.783168493798044e-06,
|
|
"loss": 0.4231,
|
|
"step": 10575
|
|
},
|
|
{
|
|
"epoch": 1.5214265171124532,
|
|
"grad_norm": 0.25604656716274005,
|
|
"learning_rate": 9.775640864374398e-06,
|
|
"loss": 0.4026,
|
|
"step": 10580
|
|
},
|
|
{
|
|
"epoch": 1.5221455277538107,
|
|
"grad_norm": 0.24739468808956666,
|
|
"learning_rate": 9.768113362146209e-06,
|
|
"loss": 0.4154,
|
|
"step": 10585
|
|
},
|
|
{
|
|
"epoch": 1.5228645383951682,
|
|
"grad_norm": 0.2593488710273111,
|
|
"learning_rate": 9.760585991381033e-06,
|
|
"loss": 0.4176,
|
|
"step": 10590
|
|
},
|
|
{
|
|
"epoch": 1.5235835490365257,
|
|
"grad_norm": 0.23504431821814106,
|
|
"learning_rate": 9.753058756346346e-06,
|
|
"loss": 0.4181,
|
|
"step": 10595
|
|
},
|
|
{
|
|
"epoch": 1.5243025596778832,
|
|
"grad_norm": 0.245640721324355,
|
|
"learning_rate": 9.745531661309544e-06,
|
|
"loss": 0.4423,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 1.5250215703192407,
|
|
"grad_norm": 0.25215427075046754,
|
|
"learning_rate": 9.738004710537953e-06,
|
|
"loss": 0.4388,
|
|
"step": 10605
|
|
},
|
|
{
|
|
"epoch": 1.5257405809605982,
|
|
"grad_norm": 0.24913812385524794,
|
|
"learning_rate": 9.730477908298806e-06,
|
|
"loss": 0.4136,
|
|
"step": 10610
|
|
},
|
|
{
|
|
"epoch": 1.5264595916019557,
|
|
"grad_norm": 0.26318609212029886,
|
|
"learning_rate": 9.722951258859261e-06,
|
|
"loss": 0.4229,
|
|
"step": 10615
|
|
},
|
|
{
|
|
"epoch": 1.5271786022433131,
|
|
"grad_norm": 0.24957565565496473,
|
|
"learning_rate": 9.715424766486385e-06,
|
|
"loss": 0.4183,
|
|
"step": 10620
|
|
},
|
|
{
|
|
"epoch": 1.5278976128846709,
|
|
"grad_norm": 0.2674208605810236,
|
|
"learning_rate": 9.707898435447153e-06,
|
|
"loss": 0.4159,
|
|
"step": 10625
|
|
},
|
|
{
|
|
"epoch": 1.5286166235260281,
|
|
"grad_norm": 0.26280131334403317,
|
|
"learning_rate": 9.70037227000846e-06,
|
|
"loss": 0.4257,
|
|
"step": 10630
|
|
},
|
|
{
|
|
"epoch": 1.5293356341673858,
|
|
"grad_norm": 0.24938256962028485,
|
|
"learning_rate": 9.692846274437095e-06,
|
|
"loss": 0.4181,
|
|
"step": 10635
|
|
},
|
|
{
|
|
"epoch": 1.530054644808743,
|
|
"grad_norm": 0.25764954483156466,
|
|
"learning_rate": 9.68532045299975e-06,
|
|
"loss": 0.4291,
|
|
"step": 10640
|
|
},
|
|
{
|
|
"epoch": 1.5307736554501008,
|
|
"grad_norm": 0.2578862842527871,
|
|
"learning_rate": 9.677794809963034e-06,
|
|
"loss": 0.4169,
|
|
"step": 10645
|
|
},
|
|
{
|
|
"epoch": 1.531492666091458,
|
|
"grad_norm": 0.2476802687012682,
|
|
"learning_rate": 9.670269349593438e-06,
|
|
"loss": 0.4151,
|
|
"step": 10650
|
|
},
|
|
{
|
|
"epoch": 1.5322116767328158,
|
|
"grad_norm": 0.250049738063665,
|
|
"learning_rate": 9.662744076157353e-06,
|
|
"loss": 0.418,
|
|
"step": 10655
|
|
},
|
|
{
|
|
"epoch": 1.532930687374173,
|
|
"grad_norm": 0.24953016919389576,
|
|
"learning_rate": 9.655218993921072e-06,
|
|
"loss": 0.4181,
|
|
"step": 10660
|
|
},
|
|
{
|
|
"epoch": 1.5336496980155307,
|
|
"grad_norm": 0.25677559512208753,
|
|
"learning_rate": 9.647694107150773e-06,
|
|
"loss": 0.4138,
|
|
"step": 10665
|
|
},
|
|
{
|
|
"epoch": 1.534368708656888,
|
|
"grad_norm": 0.2518384392576241,
|
|
"learning_rate": 9.64016942011252e-06,
|
|
"loss": 0.4064,
|
|
"step": 10670
|
|
},
|
|
{
|
|
"epoch": 1.5350877192982457,
|
|
"grad_norm": 0.2626226969249233,
|
|
"learning_rate": 9.632644937072277e-06,
|
|
"loss": 0.417,
|
|
"step": 10675
|
|
},
|
|
{
|
|
"epoch": 1.535806729939603,
|
|
"grad_norm": 0.25414243414168186,
|
|
"learning_rate": 9.625120662295878e-06,
|
|
"loss": 0.4221,
|
|
"step": 10680
|
|
},
|
|
{
|
|
"epoch": 1.5365257405809607,
|
|
"grad_norm": 0.2598807696270767,
|
|
"learning_rate": 9.617596600049041e-06,
|
|
"loss": 0.4364,
|
|
"step": 10685
|
|
},
|
|
{
|
|
"epoch": 1.537244751222318,
|
|
"grad_norm": 0.25844805472902665,
|
|
"learning_rate": 9.610072754597373e-06,
|
|
"loss": 0.4351,
|
|
"step": 10690
|
|
},
|
|
{
|
|
"epoch": 1.5379637618636757,
|
|
"grad_norm": 0.2524970535348807,
|
|
"learning_rate": 9.602549130206353e-06,
|
|
"loss": 0.4059,
|
|
"step": 10695
|
|
},
|
|
{
|
|
"epoch": 1.538682772505033,
|
|
"grad_norm": 0.26275208611273376,
|
|
"learning_rate": 9.595025731141326e-06,
|
|
"loss": 0.4408,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 1.5394017831463906,
|
|
"grad_norm": 0.25292419698311214,
|
|
"learning_rate": 9.587502561667525e-06,
|
|
"loss": 0.4088,
|
|
"step": 10705
|
|
},
|
|
{
|
|
"epoch": 1.540120793787748,
|
|
"grad_norm": 0.24708841066490864,
|
|
"learning_rate": 9.579979626050043e-06,
|
|
"loss": 0.4069,
|
|
"step": 10710
|
|
},
|
|
{
|
|
"epoch": 1.5408398044291056,
|
|
"grad_norm": 0.2487555695805377,
|
|
"learning_rate": 9.572456928553836e-06,
|
|
"loss": 0.4065,
|
|
"step": 10715
|
|
},
|
|
{
|
|
"epoch": 1.5415588150704629,
|
|
"grad_norm": 0.2617493095764514,
|
|
"learning_rate": 9.564934473443742e-06,
|
|
"loss": 0.4093,
|
|
"step": 10720
|
|
},
|
|
{
|
|
"epoch": 1.5422778257118206,
|
|
"grad_norm": 0.24836706110965545,
|
|
"learning_rate": 9.557412264984444e-06,
|
|
"loss": 0.4065,
|
|
"step": 10725
|
|
},
|
|
{
|
|
"epoch": 1.542996836353178,
|
|
"grad_norm": 0.24844848518424625,
|
|
"learning_rate": 9.54989030744049e-06,
|
|
"loss": 0.4263,
|
|
"step": 10730
|
|
},
|
|
{
|
|
"epoch": 1.5437158469945356,
|
|
"grad_norm": 0.2525521654434998,
|
|
"learning_rate": 9.542368605076296e-06,
|
|
"loss": 0.4075,
|
|
"step": 10735
|
|
},
|
|
{
|
|
"epoch": 1.544434857635893,
|
|
"grad_norm": 0.24642249580899397,
|
|
"learning_rate": 9.534847162156115e-06,
|
|
"loss": 0.3918,
|
|
"step": 10740
|
|
},
|
|
{
|
|
"epoch": 1.5451538682772505,
|
|
"grad_norm": 0.24486119995221942,
|
|
"learning_rate": 9.52732598294407e-06,
|
|
"loss": 0.4073,
|
|
"step": 10745
|
|
},
|
|
{
|
|
"epoch": 1.545872878918608,
|
|
"grad_norm": 0.2549947650456555,
|
|
"learning_rate": 9.519805071704131e-06,
|
|
"loss": 0.4091,
|
|
"step": 10750
|
|
},
|
|
{
|
|
"epoch": 1.5465918895599655,
|
|
"grad_norm": 0.2502840945830212,
|
|
"learning_rate": 9.512284432700101e-06,
|
|
"loss": 0.4066,
|
|
"step": 10755
|
|
},
|
|
{
|
|
"epoch": 1.547310900201323,
|
|
"grad_norm": 0.25620127482812816,
|
|
"learning_rate": 9.504764070195652e-06,
|
|
"loss": 0.4026,
|
|
"step": 10760
|
|
},
|
|
{
|
|
"epoch": 1.5480299108426805,
|
|
"grad_norm": 0.2548526285927597,
|
|
"learning_rate": 9.49724398845428e-06,
|
|
"loss": 0.416,
|
|
"step": 10765
|
|
},
|
|
{
|
|
"epoch": 1.548748921484038,
|
|
"grad_norm": 0.25445359889395053,
|
|
"learning_rate": 9.489724191739329e-06,
|
|
"loss": 0.4165,
|
|
"step": 10770
|
|
},
|
|
{
|
|
"epoch": 1.5494679321253955,
|
|
"grad_norm": 0.2511165733452332,
|
|
"learning_rate": 9.48220468431399e-06,
|
|
"loss": 0.4067,
|
|
"step": 10775
|
|
},
|
|
{
|
|
"epoch": 1.550186942766753,
|
|
"grad_norm": 0.2464055911624285,
|
|
"learning_rate": 9.474685470441274e-06,
|
|
"loss": 0.4088,
|
|
"step": 10780
|
|
},
|
|
{
|
|
"epoch": 1.5509059534081104,
|
|
"grad_norm": 0.24343283618036587,
|
|
"learning_rate": 9.467166554384033e-06,
|
|
"loss": 0.417,
|
|
"step": 10785
|
|
},
|
|
{
|
|
"epoch": 1.551624964049468,
|
|
"grad_norm": 0.2929003871990144,
|
|
"learning_rate": 9.459647940404955e-06,
|
|
"loss": 0.4051,
|
|
"step": 10790
|
|
},
|
|
{
|
|
"epoch": 1.5523439746908254,
|
|
"grad_norm": 0.26335455862311163,
|
|
"learning_rate": 9.452129632766553e-06,
|
|
"loss": 0.4133,
|
|
"step": 10795
|
|
},
|
|
{
|
|
"epoch": 1.553062985332183,
|
|
"grad_norm": 0.26867986196228943,
|
|
"learning_rate": 9.444611635731157e-06,
|
|
"loss": 0.4039,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 1.5537819959735404,
|
|
"grad_norm": 0.2527610281819871,
|
|
"learning_rate": 9.437093953560941e-06,
|
|
"loss": 0.4369,
|
|
"step": 10805
|
|
},
|
|
{
|
|
"epoch": 1.5545010066148979,
|
|
"grad_norm": 0.26359228700844456,
|
|
"learning_rate": 9.429576590517879e-06,
|
|
"loss": 0.4075,
|
|
"step": 10810
|
|
},
|
|
{
|
|
"epoch": 1.5552200172562554,
|
|
"grad_norm": 0.24934557309261163,
|
|
"learning_rate": 9.42205955086378e-06,
|
|
"loss": 0.4181,
|
|
"step": 10815
|
|
},
|
|
{
|
|
"epoch": 1.5559390278976128,
|
|
"grad_norm": 0.28417752458187967,
|
|
"learning_rate": 9.414542838860263e-06,
|
|
"loss": 0.4101,
|
|
"step": 10820
|
|
},
|
|
{
|
|
"epoch": 1.5566580385389703,
|
|
"grad_norm": 0.2687027110590275,
|
|
"learning_rate": 9.407026458768763e-06,
|
|
"loss": 0.4275,
|
|
"step": 10825
|
|
},
|
|
{
|
|
"epoch": 1.5573770491803278,
|
|
"grad_norm": 0.24624048707976195,
|
|
"learning_rate": 9.399510414850518e-06,
|
|
"loss": 0.412,
|
|
"step": 10830
|
|
},
|
|
{
|
|
"epoch": 1.5580960598216853,
|
|
"grad_norm": 0.2591235754039305,
|
|
"learning_rate": 9.391994711366592e-06,
|
|
"loss": 0.4276,
|
|
"step": 10835
|
|
},
|
|
{
|
|
"epoch": 1.558815070463043,
|
|
"grad_norm": 0.2531149607445782,
|
|
"learning_rate": 9.384479352577844e-06,
|
|
"loss": 0.4055,
|
|
"step": 10840
|
|
},
|
|
{
|
|
"epoch": 1.5595340811044003,
|
|
"grad_norm": 0.25099823671722576,
|
|
"learning_rate": 9.376964342744942e-06,
|
|
"loss": 0.4126,
|
|
"step": 10845
|
|
},
|
|
{
|
|
"epoch": 1.560253091745758,
|
|
"grad_norm": 0.2621789703583561,
|
|
"learning_rate": 9.369449686128356e-06,
|
|
"loss": 0.4204,
|
|
"step": 10850
|
|
},
|
|
{
|
|
"epoch": 1.5609721023871153,
|
|
"grad_norm": 0.24146616536060794,
|
|
"learning_rate": 9.361935386988347e-06,
|
|
"loss": 0.4246,
|
|
"step": 10855
|
|
},
|
|
{
|
|
"epoch": 1.561691113028473,
|
|
"grad_norm": 0.25053774015064967,
|
|
"learning_rate": 9.354421449584992e-06,
|
|
"loss": 0.4083,
|
|
"step": 10860
|
|
},
|
|
{
|
|
"epoch": 1.5624101236698302,
|
|
"grad_norm": 0.24254978561460552,
|
|
"learning_rate": 9.346907878178145e-06,
|
|
"loss": 0.4195,
|
|
"step": 10865
|
|
},
|
|
{
|
|
"epoch": 1.563129134311188,
|
|
"grad_norm": 0.2573324547012047,
|
|
"learning_rate": 9.339394677027457e-06,
|
|
"loss": 0.4288,
|
|
"step": 10870
|
|
},
|
|
{
|
|
"epoch": 1.5638481449525452,
|
|
"grad_norm": 0.25421395718926176,
|
|
"learning_rate": 9.331881850392382e-06,
|
|
"loss": 0.413,
|
|
"step": 10875
|
|
},
|
|
{
|
|
"epoch": 1.564567155593903,
|
|
"grad_norm": 0.24639179255443322,
|
|
"learning_rate": 9.324369402532146e-06,
|
|
"loss": 0.4064,
|
|
"step": 10880
|
|
},
|
|
{
|
|
"epoch": 1.5652861662352602,
|
|
"grad_norm": 0.2490898649657323,
|
|
"learning_rate": 9.316857337705757e-06,
|
|
"loss": 0.4018,
|
|
"step": 10885
|
|
},
|
|
{
|
|
"epoch": 1.5660051768766179,
|
|
"grad_norm": 0.3612706971331954,
|
|
"learning_rate": 9.309345660172025e-06,
|
|
"loss": 0.4214,
|
|
"step": 10890
|
|
},
|
|
{
|
|
"epoch": 1.5667241875179752,
|
|
"grad_norm": 0.26504703274984404,
|
|
"learning_rate": 9.30183437418953e-06,
|
|
"loss": 0.4239,
|
|
"step": 10895
|
|
},
|
|
{
|
|
"epoch": 1.5674431981593329,
|
|
"grad_norm": 0.244324764458801,
|
|
"learning_rate": 9.294323484016621e-06,
|
|
"loss": 0.3935,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 1.5681622088006901,
|
|
"grad_norm": 0.2528967431828394,
|
|
"learning_rate": 9.28681299391144e-06,
|
|
"loss": 0.418,
|
|
"step": 10905
|
|
},
|
|
{
|
|
"epoch": 1.5688812194420478,
|
|
"grad_norm": 0.244293237365086,
|
|
"learning_rate": 9.27930290813189e-06,
|
|
"loss": 0.4123,
|
|
"step": 10910
|
|
},
|
|
{
|
|
"epoch": 1.569600230083405,
|
|
"grad_norm": 0.26940252175040885,
|
|
"learning_rate": 9.271793230935646e-06,
|
|
"loss": 0.4166,
|
|
"step": 10915
|
|
},
|
|
{
|
|
"epoch": 1.5703192407247628,
|
|
"grad_norm": 0.24979780787360428,
|
|
"learning_rate": 9.264283966580161e-06,
|
|
"loss": 0.4292,
|
|
"step": 10920
|
|
},
|
|
{
|
|
"epoch": 1.57103825136612,
|
|
"grad_norm": 0.26004584564041544,
|
|
"learning_rate": 9.256775119322642e-06,
|
|
"loss": 0.4252,
|
|
"step": 10925
|
|
},
|
|
{
|
|
"epoch": 1.5717572620074778,
|
|
"grad_norm": 0.2535980461972811,
|
|
"learning_rate": 9.24926669342006e-06,
|
|
"loss": 0.4037,
|
|
"step": 10930
|
|
},
|
|
{
|
|
"epoch": 1.572476272648835,
|
|
"grad_norm": 0.2597973742778775,
|
|
"learning_rate": 9.241758693129157e-06,
|
|
"loss": 0.3816,
|
|
"step": 10935
|
|
},
|
|
{
|
|
"epoch": 1.5731952832901928,
|
|
"grad_norm": 0.26000746055154517,
|
|
"learning_rate": 9.234251122706429e-06,
|
|
"loss": 0.4076,
|
|
"step": 10940
|
|
},
|
|
{
|
|
"epoch": 1.57391429393155,
|
|
"grad_norm": 0.26088456069640015,
|
|
"learning_rate": 9.226743986408123e-06,
|
|
"loss": 0.416,
|
|
"step": 10945
|
|
},
|
|
{
|
|
"epoch": 1.5746333045729077,
|
|
"grad_norm": 0.24785788574271747,
|
|
"learning_rate": 9.219237288490248e-06,
|
|
"loss": 0.4222,
|
|
"step": 10950
|
|
},
|
|
{
|
|
"epoch": 1.5753523152142652,
|
|
"grad_norm": 0.2578127016481043,
|
|
"learning_rate": 9.211731033208555e-06,
|
|
"loss": 0.414,
|
|
"step": 10955
|
|
},
|
|
{
|
|
"epoch": 1.5760713258556227,
|
|
"grad_norm": 0.25311249447487005,
|
|
"learning_rate": 9.204225224818556e-06,
|
|
"loss": 0.4179,
|
|
"step": 10960
|
|
},
|
|
{
|
|
"epoch": 1.5767903364969802,
|
|
"grad_norm": 0.2522348815673237,
|
|
"learning_rate": 9.196719867575504e-06,
|
|
"loss": 0.4071,
|
|
"step": 10965
|
|
},
|
|
{
|
|
"epoch": 1.5775093471383377,
|
|
"grad_norm": 0.24787911038300595,
|
|
"learning_rate": 9.189214965734388e-06,
|
|
"loss": 0.4014,
|
|
"step": 10970
|
|
},
|
|
{
|
|
"epoch": 1.5782283577796952,
|
|
"grad_norm": 0.2560334016849389,
|
|
"learning_rate": 9.181710523549956e-06,
|
|
"loss": 0.4409,
|
|
"step": 10975
|
|
},
|
|
{
|
|
"epoch": 1.5789473684210527,
|
|
"grad_norm": 0.2583820636877086,
|
|
"learning_rate": 9.174206545276678e-06,
|
|
"loss": 0.4184,
|
|
"step": 10980
|
|
},
|
|
{
|
|
"epoch": 1.5796663790624101,
|
|
"grad_norm": 0.26098999460444455,
|
|
"learning_rate": 9.166703035168772e-06,
|
|
"loss": 0.4192,
|
|
"step": 10985
|
|
},
|
|
{
|
|
"epoch": 1.5803853897037676,
|
|
"grad_norm": 0.2536017389758038,
|
|
"learning_rate": 9.159199997480187e-06,
|
|
"loss": 0.4179,
|
|
"step": 10990
|
|
},
|
|
{
|
|
"epoch": 1.5811044003451251,
|
|
"grad_norm": 0.24919709422780187,
|
|
"learning_rate": 9.151697436464608e-06,
|
|
"loss": 0.4135,
|
|
"step": 10995
|
|
},
|
|
{
|
|
"epoch": 1.5818234109864826,
|
|
"grad_norm": 0.359916355739022,
|
|
"learning_rate": 9.144195356375439e-06,
|
|
"loss": 0.4179,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 1.58254242162784,
|
|
"grad_norm": 0.25759112404931467,
|
|
"learning_rate": 9.136693761465827e-06,
|
|
"loss": 0.4165,
|
|
"step": 11005
|
|
},
|
|
{
|
|
"epoch": 1.5832614322691976,
|
|
"grad_norm": 0.24704507666893488,
|
|
"learning_rate": 9.12919265598863e-06,
|
|
"loss": 0.4115,
|
|
"step": 11010
|
|
},
|
|
{
|
|
"epoch": 1.583980442910555,
|
|
"grad_norm": 0.26040498404898454,
|
|
"learning_rate": 9.121692044196433e-06,
|
|
"loss": 0.403,
|
|
"step": 11015
|
|
},
|
|
{
|
|
"epoch": 1.5846994535519126,
|
|
"grad_norm": 0.2572213722815523,
|
|
"learning_rate": 9.11419193034155e-06,
|
|
"loss": 0.4265,
|
|
"step": 11020
|
|
},
|
|
{
|
|
"epoch": 1.58541846419327,
|
|
"grad_norm": 0.26562420815981674,
|
|
"learning_rate": 9.106692318676e-06,
|
|
"loss": 0.4163,
|
|
"step": 11025
|
|
},
|
|
{
|
|
"epoch": 1.5861374748346275,
|
|
"grad_norm": 0.2545731156371461,
|
|
"learning_rate": 9.099193213451518e-06,
|
|
"loss": 0.418,
|
|
"step": 11030
|
|
},
|
|
{
|
|
"epoch": 1.586856485475985,
|
|
"grad_norm": 0.25251780679210656,
|
|
"learning_rate": 9.091694618919563e-06,
|
|
"loss": 0.4177,
|
|
"step": 11035
|
|
},
|
|
{
|
|
"epoch": 1.5875754961173425,
|
|
"grad_norm": 0.25318760884805,
|
|
"learning_rate": 9.084196539331298e-06,
|
|
"loss": 0.416,
|
|
"step": 11040
|
|
},
|
|
{
|
|
"epoch": 1.5882945067587,
|
|
"grad_norm": 0.26462756901637724,
|
|
"learning_rate": 9.076698978937585e-06,
|
|
"loss": 0.412,
|
|
"step": 11045
|
|
},
|
|
{
|
|
"epoch": 1.5890135174000575,
|
|
"grad_norm": 0.25503799261850707,
|
|
"learning_rate": 9.069201941989012e-06,
|
|
"loss": 0.4233,
|
|
"step": 11050
|
|
},
|
|
{
|
|
"epoch": 1.589732528041415,
|
|
"grad_norm": 0.26225264720562946,
|
|
"learning_rate": 9.061705432735852e-06,
|
|
"loss": 0.4253,
|
|
"step": 11055
|
|
},
|
|
{
|
|
"epoch": 1.5904515386827724,
|
|
"grad_norm": 0.24447655868276502,
|
|
"learning_rate": 9.054209455428083e-06,
|
|
"loss": 0.4164,
|
|
"step": 11060
|
|
},
|
|
{
|
|
"epoch": 1.5911705493241302,
|
|
"grad_norm": 0.2547140130287968,
|
|
"learning_rate": 9.046714014315391e-06,
|
|
"loss": 0.4249,
|
|
"step": 11065
|
|
},
|
|
{
|
|
"epoch": 1.5918895599654874,
|
|
"grad_norm": 0.26102607906573094,
|
|
"learning_rate": 9.039219113647144e-06,
|
|
"loss": 0.4304,
|
|
"step": 11070
|
|
},
|
|
{
|
|
"epoch": 1.5926085706068451,
|
|
"grad_norm": 0.25631456582076034,
|
|
"learning_rate": 9.031724757672417e-06,
|
|
"loss": 0.4072,
|
|
"step": 11075
|
|
},
|
|
{
|
|
"epoch": 1.5933275812482024,
|
|
"grad_norm": 0.2638138418681577,
|
|
"learning_rate": 9.024230950639965e-06,
|
|
"loss": 0.4306,
|
|
"step": 11080
|
|
},
|
|
{
|
|
"epoch": 1.59404659188956,
|
|
"grad_norm": 0.25661768187519934,
|
|
"learning_rate": 9.016737696798236e-06,
|
|
"loss": 0.4124,
|
|
"step": 11085
|
|
},
|
|
{
|
|
"epoch": 1.5947656025309174,
|
|
"grad_norm": 0.24843426147942013,
|
|
"learning_rate": 9.009245000395371e-06,
|
|
"loss": 0.429,
|
|
"step": 11090
|
|
},
|
|
{
|
|
"epoch": 1.595484613172275,
|
|
"grad_norm": 0.2606728470226574,
|
|
"learning_rate": 9.001752865679184e-06,
|
|
"loss": 0.4037,
|
|
"step": 11095
|
|
},
|
|
{
|
|
"epoch": 1.5962036238136323,
|
|
"grad_norm": 0.25306716381995065,
|
|
"learning_rate": 8.994261296897174e-06,
|
|
"loss": 0.4072,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 1.59692263445499,
|
|
"grad_norm": 0.251463255993275,
|
|
"learning_rate": 8.986770298296521e-06,
|
|
"loss": 0.4295,
|
|
"step": 11105
|
|
},
|
|
{
|
|
"epoch": 1.5976416450963473,
|
|
"grad_norm": 0.24957252123112794,
|
|
"learning_rate": 8.979279874124088e-06,
|
|
"loss": 0.4135,
|
|
"step": 11110
|
|
},
|
|
{
|
|
"epoch": 1.598360655737705,
|
|
"grad_norm": 0.25487316069456933,
|
|
"learning_rate": 8.971790028626395e-06,
|
|
"loss": 0.4236,
|
|
"step": 11115
|
|
},
|
|
{
|
|
"epoch": 1.5990796663790623,
|
|
"grad_norm": 0.25062693498899585,
|
|
"learning_rate": 8.964300766049657e-06,
|
|
"loss": 0.4158,
|
|
"step": 11120
|
|
},
|
|
{
|
|
"epoch": 1.59979867702042,
|
|
"grad_norm": 0.25586841864722226,
|
|
"learning_rate": 8.956812090639733e-06,
|
|
"loss": 0.4192,
|
|
"step": 11125
|
|
},
|
|
{
|
|
"epoch": 1.6005176876617773,
|
|
"grad_norm": 0.2523516405591824,
|
|
"learning_rate": 8.949324006642171e-06,
|
|
"loss": 0.4163,
|
|
"step": 11130
|
|
},
|
|
{
|
|
"epoch": 1.601236698303135,
|
|
"grad_norm": 0.2547821939348679,
|
|
"learning_rate": 8.941836518302172e-06,
|
|
"loss": 0.4057,
|
|
"step": 11135
|
|
},
|
|
{
|
|
"epoch": 1.6019557089444922,
|
|
"grad_norm": 0.25350007716673045,
|
|
"learning_rate": 8.934349629864605e-06,
|
|
"loss": 0.4075,
|
|
"step": 11140
|
|
},
|
|
{
|
|
"epoch": 1.60267471958585,
|
|
"grad_norm": 0.2545362292573287,
|
|
"learning_rate": 8.92686334557399e-06,
|
|
"loss": 0.41,
|
|
"step": 11145
|
|
},
|
|
{
|
|
"epoch": 1.6033937302272072,
|
|
"grad_norm": 0.2500310953377668,
|
|
"learning_rate": 8.91937766967452e-06,
|
|
"loss": 0.408,
|
|
"step": 11150
|
|
},
|
|
{
|
|
"epoch": 1.604112740868565,
|
|
"grad_norm": 0.26014949108035895,
|
|
"learning_rate": 8.911892606410025e-06,
|
|
"loss": 0.4183,
|
|
"step": 11155
|
|
},
|
|
{
|
|
"epoch": 1.6048317515099222,
|
|
"grad_norm": 0.2579606481931132,
|
|
"learning_rate": 8.904408160023995e-06,
|
|
"loss": 0.4096,
|
|
"step": 11160
|
|
},
|
|
{
|
|
"epoch": 1.60555076215128,
|
|
"grad_norm": 0.2512133091754519,
|
|
"learning_rate": 8.896924334759584e-06,
|
|
"loss": 0.4082,
|
|
"step": 11165
|
|
},
|
|
{
|
|
"epoch": 1.6062697727926374,
|
|
"grad_norm": 0.26226985742683234,
|
|
"learning_rate": 8.889441134859569e-06,
|
|
"loss": 0.4228,
|
|
"step": 11170
|
|
},
|
|
{
|
|
"epoch": 1.6069887834339949,
|
|
"grad_norm": 0.2570180142160302,
|
|
"learning_rate": 8.881958564566391e-06,
|
|
"loss": 0.4275,
|
|
"step": 11175
|
|
},
|
|
{
|
|
"epoch": 1.6077077940753524,
|
|
"grad_norm": 0.2561715585777985,
|
|
"learning_rate": 8.874476628122128e-06,
|
|
"loss": 0.4238,
|
|
"step": 11180
|
|
},
|
|
{
|
|
"epoch": 1.6084268047167098,
|
|
"grad_norm": 0.2550125475833642,
|
|
"learning_rate": 8.866995329768495e-06,
|
|
"loss": 0.4192,
|
|
"step": 11185
|
|
},
|
|
{
|
|
"epoch": 1.6091458153580673,
|
|
"grad_norm": 0.2699936703979793,
|
|
"learning_rate": 8.859514673746856e-06,
|
|
"loss": 0.4196,
|
|
"step": 11190
|
|
},
|
|
{
|
|
"epoch": 1.6098648259994248,
|
|
"grad_norm": 0.2505338294862466,
|
|
"learning_rate": 8.852034664298198e-06,
|
|
"loss": 0.4153,
|
|
"step": 11195
|
|
},
|
|
{
|
|
"epoch": 1.6105838366407823,
|
|
"grad_norm": 0.26502315769999407,
|
|
"learning_rate": 8.844555305663145e-06,
|
|
"loss": 0.4209,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 1.6113028472821398,
|
|
"grad_norm": 0.2500014115346305,
|
|
"learning_rate": 8.83707660208196e-06,
|
|
"loss": 0.4151,
|
|
"step": 11205
|
|
},
|
|
{
|
|
"epoch": 1.6120218579234973,
|
|
"grad_norm": 0.25469064057360297,
|
|
"learning_rate": 8.82959855779453e-06,
|
|
"loss": 0.4202,
|
|
"step": 11210
|
|
},
|
|
{
|
|
"epoch": 1.6127408685648548,
|
|
"grad_norm": 0.25981083449373843,
|
|
"learning_rate": 8.822121177040361e-06,
|
|
"loss": 0.402,
|
|
"step": 11215
|
|
},
|
|
{
|
|
"epoch": 1.6134598792062123,
|
|
"grad_norm": 0.2646924764535613,
|
|
"learning_rate": 8.814644464058593e-06,
|
|
"loss": 0.4172,
|
|
"step": 11220
|
|
},
|
|
{
|
|
"epoch": 1.6141788898475697,
|
|
"grad_norm": 0.25471272472143974,
|
|
"learning_rate": 8.807168423087983e-06,
|
|
"loss": 0.4239,
|
|
"step": 11225
|
|
},
|
|
{
|
|
"epoch": 1.6148979004889272,
|
|
"grad_norm": 0.2601487142133797,
|
|
"learning_rate": 8.799693058366907e-06,
|
|
"loss": 0.3952,
|
|
"step": 11230
|
|
},
|
|
{
|
|
"epoch": 1.6156169111302847,
|
|
"grad_norm": 0.2613849714941667,
|
|
"learning_rate": 8.792218374133356e-06,
|
|
"loss": 0.3974,
|
|
"step": 11235
|
|
},
|
|
{
|
|
"epoch": 1.6163359217716422,
|
|
"grad_norm": 0.2593664689454349,
|
|
"learning_rate": 8.784744374624942e-06,
|
|
"loss": 0.3999,
|
|
"step": 11240
|
|
},
|
|
{
|
|
"epoch": 1.6170549324129997,
|
|
"grad_norm": 0.25484361159747676,
|
|
"learning_rate": 8.777271064078876e-06,
|
|
"loss": 0.4157,
|
|
"step": 11245
|
|
},
|
|
{
|
|
"epoch": 1.6177739430543572,
|
|
"grad_norm": 0.2523734311430229,
|
|
"learning_rate": 8.769798446731998e-06,
|
|
"loss": 0.3991,
|
|
"step": 11250
|
|
},
|
|
{
|
|
"epoch": 1.6184929536957147,
|
|
"grad_norm": 0.26994175615265537,
|
|
"learning_rate": 8.762326526820732e-06,
|
|
"loss": 0.4286,
|
|
"step": 11255
|
|
},
|
|
{
|
|
"epoch": 1.6192119643370722,
|
|
"grad_norm": 0.2632910638122419,
|
|
"learning_rate": 8.754855308581125e-06,
|
|
"loss": 0.4229,
|
|
"step": 11260
|
|
},
|
|
{
|
|
"epoch": 1.6199309749784296,
|
|
"grad_norm": 0.25915962081022337,
|
|
"learning_rate": 8.747384796248819e-06,
|
|
"loss": 0.4139,
|
|
"step": 11265
|
|
},
|
|
{
|
|
"epoch": 1.6206499856197871,
|
|
"grad_norm": 0.25097847169720805,
|
|
"learning_rate": 8.739914994059055e-06,
|
|
"loss": 0.4272,
|
|
"step": 11270
|
|
},
|
|
{
|
|
"epoch": 1.6213689962611446,
|
|
"grad_norm": 0.24985927157515658,
|
|
"learning_rate": 8.732445906246667e-06,
|
|
"loss": 0.4112,
|
|
"step": 11275
|
|
},
|
|
{
|
|
"epoch": 1.6220880069025023,
|
|
"grad_norm": 0.256598628829339,
|
|
"learning_rate": 8.724977537046098e-06,
|
|
"loss": 0.4083,
|
|
"step": 11280
|
|
},
|
|
{
|
|
"epoch": 1.6228070175438596,
|
|
"grad_norm": 0.2450641360853421,
|
|
"learning_rate": 8.717509890691369e-06,
|
|
"loss": 0.4311,
|
|
"step": 11285
|
|
},
|
|
{
|
|
"epoch": 1.6235260281852173,
|
|
"grad_norm": 0.2671795024941469,
|
|
"learning_rate": 8.710042971416103e-06,
|
|
"loss": 0.4121,
|
|
"step": 11290
|
|
},
|
|
{
|
|
"epoch": 1.6242450388265746,
|
|
"grad_norm": 0.2587107726625021,
|
|
"learning_rate": 8.702576783453502e-06,
|
|
"loss": 0.4135,
|
|
"step": 11295
|
|
},
|
|
{
|
|
"epoch": 1.6249640494679323,
|
|
"grad_norm": 0.2622689960782433,
|
|
"learning_rate": 8.695111331036355e-06,
|
|
"loss": 0.4201,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 1.6256830601092895,
|
|
"grad_norm": 0.2513570762063957,
|
|
"learning_rate": 8.687646618397036e-06,
|
|
"loss": 0.416,
|
|
"step": 11305
|
|
},
|
|
{
|
|
"epoch": 1.6264020707506472,
|
|
"grad_norm": 0.24671811263841703,
|
|
"learning_rate": 8.680182649767503e-06,
|
|
"loss": 0.4045,
|
|
"step": 11310
|
|
},
|
|
{
|
|
"epoch": 1.6271210813920045,
|
|
"grad_norm": 0.2623165897224748,
|
|
"learning_rate": 8.672719429379281e-06,
|
|
"loss": 0.4088,
|
|
"step": 11315
|
|
},
|
|
{
|
|
"epoch": 1.6278400920333622,
|
|
"grad_norm": 0.26592779005926837,
|
|
"learning_rate": 8.665256961463484e-06,
|
|
"loss": 0.4234,
|
|
"step": 11320
|
|
},
|
|
{
|
|
"epoch": 1.6285591026747195,
|
|
"grad_norm": 0.25144319821086275,
|
|
"learning_rate": 8.657795250250794e-06,
|
|
"loss": 0.4378,
|
|
"step": 11325
|
|
},
|
|
{
|
|
"epoch": 1.6292781133160772,
|
|
"grad_norm": 0.253085995785813,
|
|
"learning_rate": 8.650334299971455e-06,
|
|
"loss": 0.418,
|
|
"step": 11330
|
|
},
|
|
{
|
|
"epoch": 1.6299971239574345,
|
|
"grad_norm": 0.252589440886837,
|
|
"learning_rate": 8.642874114855301e-06,
|
|
"loss": 0.4168,
|
|
"step": 11335
|
|
},
|
|
{
|
|
"epoch": 1.6307161345987922,
|
|
"grad_norm": 0.2547259874503826,
|
|
"learning_rate": 8.635414699131712e-06,
|
|
"loss": 0.4214,
|
|
"step": 11340
|
|
},
|
|
{
|
|
"epoch": 1.6314351452401494,
|
|
"grad_norm": 0.24450373380712287,
|
|
"learning_rate": 8.627956057029635e-06,
|
|
"loss": 0.4123,
|
|
"step": 11345
|
|
},
|
|
{
|
|
"epoch": 1.6321541558815071,
|
|
"grad_norm": 0.24479091576118742,
|
|
"learning_rate": 8.62049819277759e-06,
|
|
"loss": 0.4095,
|
|
"step": 11350
|
|
},
|
|
{
|
|
"epoch": 1.6328731665228644,
|
|
"grad_norm": 0.2636491979983794,
|
|
"learning_rate": 8.613041110603647e-06,
|
|
"loss": 0.4156,
|
|
"step": 11355
|
|
},
|
|
{
|
|
"epoch": 1.6335921771642221,
|
|
"grad_norm": 0.2590163047833479,
|
|
"learning_rate": 8.605584814735427e-06,
|
|
"loss": 0.4384,
|
|
"step": 11360
|
|
},
|
|
{
|
|
"epoch": 1.6343111878055794,
|
|
"grad_norm": 0.25448186761597574,
|
|
"learning_rate": 8.598129309400127e-06,
|
|
"loss": 0.4151,
|
|
"step": 11365
|
|
},
|
|
{
|
|
"epoch": 1.635030198446937,
|
|
"grad_norm": 0.252721234303155,
|
|
"learning_rate": 8.590674598824466e-06,
|
|
"loss": 0.4155,
|
|
"step": 11370
|
|
},
|
|
{
|
|
"epoch": 1.6357492090882944,
|
|
"grad_norm": 0.37533618952671777,
|
|
"learning_rate": 8.583220687234736e-06,
|
|
"loss": 0.42,
|
|
"step": 11375
|
|
},
|
|
{
|
|
"epoch": 1.636468219729652,
|
|
"grad_norm": 0.2449704487827158,
|
|
"learning_rate": 8.575767578856765e-06,
|
|
"loss": 0.3945,
|
|
"step": 11380
|
|
},
|
|
{
|
|
"epoch": 1.6371872303710093,
|
|
"grad_norm": 0.2702485491460619,
|
|
"learning_rate": 8.568315277915931e-06,
|
|
"loss": 0.4058,
|
|
"step": 11385
|
|
},
|
|
{
|
|
"epoch": 1.637906241012367,
|
|
"grad_norm": 0.26175005707515614,
|
|
"learning_rate": 8.560863788637144e-06,
|
|
"loss": 0.4115,
|
|
"step": 11390
|
|
},
|
|
{
|
|
"epoch": 1.6386252516537245,
|
|
"grad_norm": 0.24693775467819265,
|
|
"learning_rate": 8.553413115244873e-06,
|
|
"loss": 0.3991,
|
|
"step": 11395
|
|
},
|
|
{
|
|
"epoch": 1.639344262295082,
|
|
"grad_norm": 0.25829277088746205,
|
|
"learning_rate": 8.545963261963102e-06,
|
|
"loss": 0.4201,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 1.6400632729364395,
|
|
"grad_norm": 0.2535140416799969,
|
|
"learning_rate": 8.538514233015367e-06,
|
|
"loss": 0.4217,
|
|
"step": 11405
|
|
},
|
|
{
|
|
"epoch": 1.640782283577797,
|
|
"grad_norm": 0.25550572212281636,
|
|
"learning_rate": 8.531066032624732e-06,
|
|
"loss": 0.4111,
|
|
"step": 11410
|
|
},
|
|
{
|
|
"epoch": 1.6415012942191545,
|
|
"grad_norm": 0.2586339585288042,
|
|
"learning_rate": 8.523618665013782e-06,
|
|
"loss": 0.4289,
|
|
"step": 11415
|
|
},
|
|
{
|
|
"epoch": 1.642220304860512,
|
|
"grad_norm": 0.25344775465833025,
|
|
"learning_rate": 8.516172134404647e-06,
|
|
"loss": 0.4272,
|
|
"step": 11420
|
|
},
|
|
{
|
|
"epoch": 1.6429393155018694,
|
|
"grad_norm": 0.25587785003191293,
|
|
"learning_rate": 8.508726445018967e-06,
|
|
"loss": 0.42,
|
|
"step": 11425
|
|
},
|
|
{
|
|
"epoch": 1.643658326143227,
|
|
"grad_norm": 0.2716660468365486,
|
|
"learning_rate": 8.50128160107791e-06,
|
|
"loss": 0.4277,
|
|
"step": 11430
|
|
},
|
|
{
|
|
"epoch": 1.6443773367845844,
|
|
"grad_norm": 0.2574628146609621,
|
|
"learning_rate": 8.493837606802173e-06,
|
|
"loss": 0.4096,
|
|
"step": 11435
|
|
},
|
|
{
|
|
"epoch": 1.645096347425942,
|
|
"grad_norm": 0.251306677542674,
|
|
"learning_rate": 8.486394466411963e-06,
|
|
"loss": 0.4173,
|
|
"step": 11440
|
|
},
|
|
{
|
|
"epoch": 1.6458153580672994,
|
|
"grad_norm": 0.2623028187732833,
|
|
"learning_rate": 8.478952184126994e-06,
|
|
"loss": 0.4132,
|
|
"step": 11445
|
|
},
|
|
{
|
|
"epoch": 1.6465343687086569,
|
|
"grad_norm": 0.2424419146128029,
|
|
"learning_rate": 8.471510764166514e-06,
|
|
"loss": 0.4139,
|
|
"step": 11450
|
|
},
|
|
{
|
|
"epoch": 1.6472533793500144,
|
|
"grad_norm": 0.2531680948215564,
|
|
"learning_rate": 8.464070210749272e-06,
|
|
"loss": 0.4108,
|
|
"step": 11455
|
|
},
|
|
{
|
|
"epoch": 1.6479723899913719,
|
|
"grad_norm": 0.24696367691684704,
|
|
"learning_rate": 8.456630528093516e-06,
|
|
"loss": 0.3996,
|
|
"step": 11460
|
|
},
|
|
{
|
|
"epoch": 1.6486914006327293,
|
|
"grad_norm": 0.24706444705139494,
|
|
"learning_rate": 8.449191720417021e-06,
|
|
"loss": 0.4093,
|
|
"step": 11465
|
|
},
|
|
{
|
|
"epoch": 1.6494104112740868,
|
|
"grad_norm": 0.24554094568955817,
|
|
"learning_rate": 8.441753791937048e-06,
|
|
"loss": 0.4091,
|
|
"step": 11470
|
|
},
|
|
{
|
|
"epoch": 1.6501294219154443,
|
|
"grad_norm": 0.25029044608014944,
|
|
"learning_rate": 8.434316746870366e-06,
|
|
"loss": 0.4209,
|
|
"step": 11475
|
|
},
|
|
{
|
|
"epoch": 1.6508484325568018,
|
|
"grad_norm": 0.264859220220412,
|
|
"learning_rate": 8.426880589433251e-06,
|
|
"loss": 0.3988,
|
|
"step": 11480
|
|
},
|
|
{
|
|
"epoch": 1.6515674431981593,
|
|
"grad_norm": 0.24597444882032374,
|
|
"learning_rate": 8.419445323841464e-06,
|
|
"loss": 0.4182,
|
|
"step": 11485
|
|
},
|
|
{
|
|
"epoch": 1.6522864538395168,
|
|
"grad_norm": 0.2588401531342179,
|
|
"learning_rate": 8.412010954310259e-06,
|
|
"loss": 0.3916,
|
|
"step": 11490
|
|
},
|
|
{
|
|
"epoch": 1.6530054644808743,
|
|
"grad_norm": 0.25570847615940656,
|
|
"learning_rate": 8.404577485054394e-06,
|
|
"loss": 0.4031,
|
|
"step": 11495
|
|
},
|
|
{
|
|
"epoch": 1.6537244751222318,
|
|
"grad_norm": 0.2579101209032299,
|
|
"learning_rate": 8.39714492028811e-06,
|
|
"loss": 0.4161,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 1.6544434857635895,
|
|
"grad_norm": 0.2447172516290584,
|
|
"learning_rate": 8.389713264225134e-06,
|
|
"loss": 0.4217,
|
|
"step": 11505
|
|
},
|
|
{
|
|
"epoch": 1.6551624964049467,
|
|
"grad_norm": 0.24872364074589265,
|
|
"learning_rate": 8.382282521078682e-06,
|
|
"loss": 0.4129,
|
|
"step": 11510
|
|
},
|
|
{
|
|
"epoch": 1.6558815070463044,
|
|
"grad_norm": 0.2718399601622027,
|
|
"learning_rate": 8.374852695061444e-06,
|
|
"loss": 0.416,
|
|
"step": 11515
|
|
},
|
|
{
|
|
"epoch": 1.6566005176876617,
|
|
"grad_norm": 0.2683049774924081,
|
|
"learning_rate": 8.367423790385605e-06,
|
|
"loss": 0.4034,
|
|
"step": 11520
|
|
},
|
|
{
|
|
"epoch": 1.6573195283290194,
|
|
"grad_norm": 0.2573042976025013,
|
|
"learning_rate": 8.35999581126281e-06,
|
|
"loss": 0.4144,
|
|
"step": 11525
|
|
},
|
|
{
|
|
"epoch": 1.6580385389703767,
|
|
"grad_norm": 0.2582272922447818,
|
|
"learning_rate": 8.352568761904187e-06,
|
|
"loss": 0.4143,
|
|
"step": 11530
|
|
},
|
|
{
|
|
"epoch": 1.6587575496117344,
|
|
"grad_norm": 0.25288542470974607,
|
|
"learning_rate": 8.345142646520347e-06,
|
|
"loss": 0.4215,
|
|
"step": 11535
|
|
},
|
|
{
|
|
"epoch": 1.6594765602530916,
|
|
"grad_norm": 0.2827850044449669,
|
|
"learning_rate": 8.337717469321359e-06,
|
|
"loss": 0.418,
|
|
"step": 11540
|
|
},
|
|
{
|
|
"epoch": 1.6601955708944494,
|
|
"grad_norm": 0.2469343296900172,
|
|
"learning_rate": 8.330293234516753e-06,
|
|
"loss": 0.4245,
|
|
"step": 11545
|
|
},
|
|
{
|
|
"epoch": 1.6609145815358066,
|
|
"grad_norm": 0.24968847563271604,
|
|
"learning_rate": 8.322869946315549e-06,
|
|
"loss": 0.4147,
|
|
"step": 11550
|
|
},
|
|
{
|
|
"epoch": 1.6616335921771643,
|
|
"grad_norm": 0.25565355098064496,
|
|
"learning_rate": 8.315447608926211e-06,
|
|
"loss": 0.4174,
|
|
"step": 11555
|
|
},
|
|
{
|
|
"epoch": 1.6623526028185216,
|
|
"grad_norm": 0.25859426197349605,
|
|
"learning_rate": 8.308026226556665e-06,
|
|
"loss": 0.4029,
|
|
"step": 11560
|
|
},
|
|
{
|
|
"epoch": 1.6630716134598793,
|
|
"grad_norm": 0.2681683238557652,
|
|
"learning_rate": 8.300605803414308e-06,
|
|
"loss": 0.4045,
|
|
"step": 11565
|
|
},
|
|
{
|
|
"epoch": 1.6637906241012366,
|
|
"grad_norm": 0.2633484725649643,
|
|
"learning_rate": 8.293186343705979e-06,
|
|
"loss": 0.4057,
|
|
"step": 11570
|
|
},
|
|
{
|
|
"epoch": 1.6645096347425943,
|
|
"grad_norm": 0.2638725208834779,
|
|
"learning_rate": 8.285767851637977e-06,
|
|
"loss": 0.4159,
|
|
"step": 11575
|
|
},
|
|
{
|
|
"epoch": 1.6652286453839515,
|
|
"grad_norm": 0.2553500904157079,
|
|
"learning_rate": 8.278350331416057e-06,
|
|
"loss": 0.4241,
|
|
"step": 11580
|
|
},
|
|
{
|
|
"epoch": 1.6659476560253093,
|
|
"grad_norm": 0.25723705364445204,
|
|
"learning_rate": 8.270933787245417e-06,
|
|
"loss": 0.4017,
|
|
"step": 11585
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.2727500616792483,
|
|
"learning_rate": 8.263518223330698e-06,
|
|
"loss": 0.4053,
|
|
"step": 11590
|
|
},
|
|
{
|
|
"epoch": 1.6673856773080242,
|
|
"grad_norm": 0.26941061846366393,
|
|
"learning_rate": 8.256103643875995e-06,
|
|
"loss": 0.4165,
|
|
"step": 11595
|
|
},
|
|
{
|
|
"epoch": 1.6681046879493815,
|
|
"grad_norm": 0.2444646366791917,
|
|
"learning_rate": 8.248690053084841e-06,
|
|
"loss": 0.4072,
|
|
"step": 11600
|
|
},
|
|
{
|
|
"epoch": 1.6688236985907392,
|
|
"grad_norm": 0.25602855863749197,
|
|
"learning_rate": 8.241277455160202e-06,
|
|
"loss": 0.4113,
|
|
"step": 11605
|
|
},
|
|
{
|
|
"epoch": 1.6695427092320967,
|
|
"grad_norm": 0.25080661137297516,
|
|
"learning_rate": 8.233865854304497e-06,
|
|
"loss": 0.4107,
|
|
"step": 11610
|
|
},
|
|
{
|
|
"epoch": 1.6702617198734542,
|
|
"grad_norm": 0.2601363858469565,
|
|
"learning_rate": 8.226455254719555e-06,
|
|
"loss": 0.432,
|
|
"step": 11615
|
|
},
|
|
{
|
|
"epoch": 1.6709807305148117,
|
|
"grad_norm": 0.28065750563168684,
|
|
"learning_rate": 8.219045660606664e-06,
|
|
"loss": 0.4159,
|
|
"step": 11620
|
|
},
|
|
{
|
|
"epoch": 1.6716997411561692,
|
|
"grad_norm": 0.2548133076568602,
|
|
"learning_rate": 8.211637076166528e-06,
|
|
"loss": 0.4208,
|
|
"step": 11625
|
|
},
|
|
{
|
|
"epoch": 1.6724187517975266,
|
|
"grad_norm": 0.25883680184634433,
|
|
"learning_rate": 8.204229505599273e-06,
|
|
"loss": 0.4372,
|
|
"step": 11630
|
|
},
|
|
{
|
|
"epoch": 1.6731377624388841,
|
|
"grad_norm": 0.2535910290217827,
|
|
"learning_rate": 8.196822953104467e-06,
|
|
"loss": 0.4242,
|
|
"step": 11635
|
|
},
|
|
{
|
|
"epoch": 1.6738567730802416,
|
|
"grad_norm": 0.256481935859069,
|
|
"learning_rate": 8.189417422881089e-06,
|
|
"loss": 0.4179,
|
|
"step": 11640
|
|
},
|
|
{
|
|
"epoch": 1.674575783721599,
|
|
"grad_norm": 0.25562450892249666,
|
|
"learning_rate": 8.182012919127533e-06,
|
|
"loss": 0.4157,
|
|
"step": 11645
|
|
},
|
|
{
|
|
"epoch": 1.6752947943629566,
|
|
"grad_norm": 0.25399721604854614,
|
|
"learning_rate": 8.174609446041629e-06,
|
|
"loss": 0.4128,
|
|
"step": 11650
|
|
},
|
|
{
|
|
"epoch": 1.676013805004314,
|
|
"grad_norm": 0.2573526645625574,
|
|
"learning_rate": 8.167207007820609e-06,
|
|
"loss": 0.3922,
|
|
"step": 11655
|
|
},
|
|
{
|
|
"epoch": 1.6767328156456716,
|
|
"grad_norm": 0.2546319232775869,
|
|
"learning_rate": 8.159805608661118e-06,
|
|
"loss": 0.3997,
|
|
"step": 11660
|
|
},
|
|
{
|
|
"epoch": 1.677451826287029,
|
|
"grad_norm": 0.24976033067638184,
|
|
"learning_rate": 8.152405252759224e-06,
|
|
"loss": 0.4041,
|
|
"step": 11665
|
|
},
|
|
{
|
|
"epoch": 1.6781708369283865,
|
|
"grad_norm": 0.246033218802702,
|
|
"learning_rate": 8.14500594431039e-06,
|
|
"loss": 0.4219,
|
|
"step": 11670
|
|
},
|
|
{
|
|
"epoch": 1.678889847569744,
|
|
"grad_norm": 0.25489234934871835,
|
|
"learning_rate": 8.137607687509488e-06,
|
|
"loss": 0.4253,
|
|
"step": 11675
|
|
},
|
|
{
|
|
"epoch": 1.6796088582111015,
|
|
"grad_norm": 0.2599339826732556,
|
|
"learning_rate": 8.130210486550805e-06,
|
|
"loss": 0.4092,
|
|
"step": 11680
|
|
},
|
|
{
|
|
"epoch": 1.680327868852459,
|
|
"grad_norm": 0.24877726767549682,
|
|
"learning_rate": 8.122814345628016e-06,
|
|
"loss": 0.3958,
|
|
"step": 11685
|
|
},
|
|
{
|
|
"epoch": 1.6810468794938165,
|
|
"grad_norm": 0.2571497954450008,
|
|
"learning_rate": 8.115419268934196e-06,
|
|
"loss": 0.4288,
|
|
"step": 11690
|
|
},
|
|
{
|
|
"epoch": 1.681765890135174,
|
|
"grad_norm": 0.26365956839645893,
|
|
"learning_rate": 8.108025260661826e-06,
|
|
"loss": 0.414,
|
|
"step": 11695
|
|
},
|
|
{
|
|
"epoch": 1.6824849007765315,
|
|
"grad_norm": 0.2583046571555305,
|
|
"learning_rate": 8.100632325002775e-06,
|
|
"loss": 0.4095,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 1.683203911417889,
|
|
"grad_norm": 0.25333167054802613,
|
|
"learning_rate": 8.0932404661483e-06,
|
|
"loss": 0.4184,
|
|
"step": 11705
|
|
},
|
|
{
|
|
"epoch": 1.6839229220592464,
|
|
"grad_norm": 0.2545174291939654,
|
|
"learning_rate": 8.08584968828906e-06,
|
|
"loss": 0.4286,
|
|
"step": 11710
|
|
},
|
|
{
|
|
"epoch": 1.684641932700604,
|
|
"grad_norm": 0.24189809389835804,
|
|
"learning_rate": 8.07845999561509e-06,
|
|
"loss": 0.4244,
|
|
"step": 11715
|
|
},
|
|
{
|
|
"epoch": 1.6853609433419616,
|
|
"grad_norm": 0.25475398318370074,
|
|
"learning_rate": 8.071071392315807e-06,
|
|
"loss": 0.4025,
|
|
"step": 11720
|
|
},
|
|
{
|
|
"epoch": 1.686079953983319,
|
|
"grad_norm": 0.24862476692330737,
|
|
"learning_rate": 8.063683882580027e-06,
|
|
"loss": 0.4017,
|
|
"step": 11725
|
|
},
|
|
{
|
|
"epoch": 1.6867989646246766,
|
|
"grad_norm": 0.2514570552491363,
|
|
"learning_rate": 8.056297470595926e-06,
|
|
"loss": 0.4033,
|
|
"step": 11730
|
|
},
|
|
{
|
|
"epoch": 1.6875179752660339,
|
|
"grad_norm": 0.2570073890835067,
|
|
"learning_rate": 8.048912160551076e-06,
|
|
"loss": 0.4174,
|
|
"step": 11735
|
|
},
|
|
{
|
|
"epoch": 1.6882369859073916,
|
|
"grad_norm": 0.25748440151309115,
|
|
"learning_rate": 8.041527956632412e-06,
|
|
"loss": 0.4304,
|
|
"step": 11740
|
|
},
|
|
{
|
|
"epoch": 1.6889559965487488,
|
|
"grad_norm": 0.25199098359637623,
|
|
"learning_rate": 8.03414486302624e-06,
|
|
"loss": 0.4077,
|
|
"step": 11745
|
|
},
|
|
{
|
|
"epoch": 1.6896750071901065,
|
|
"grad_norm": 0.25570662131789734,
|
|
"learning_rate": 8.02676288391825e-06,
|
|
"loss": 0.4037,
|
|
"step": 11750
|
|
},
|
|
{
|
|
"epoch": 1.6903940178314638,
|
|
"grad_norm": 0.24959302860918878,
|
|
"learning_rate": 8.019382023493491e-06,
|
|
"loss": 0.4185,
|
|
"step": 11755
|
|
},
|
|
{
|
|
"epoch": 1.6911130284728215,
|
|
"grad_norm": 0.2585340435554818,
|
|
"learning_rate": 8.012002285936372e-06,
|
|
"loss": 0.4192,
|
|
"step": 11760
|
|
},
|
|
{
|
|
"epoch": 1.6918320391141788,
|
|
"grad_norm": 0.2512367989560759,
|
|
"learning_rate": 8.00462367543068e-06,
|
|
"loss": 0.4029,
|
|
"step": 11765
|
|
},
|
|
{
|
|
"epoch": 1.6925510497555365,
|
|
"grad_norm": 0.2557612581821425,
|
|
"learning_rate": 7.997246196159552e-06,
|
|
"loss": 0.4161,
|
|
"step": 11770
|
|
},
|
|
{
|
|
"epoch": 1.6932700603968938,
|
|
"grad_norm": 0.25333856532359916,
|
|
"learning_rate": 7.989869852305485e-06,
|
|
"loss": 0.4188,
|
|
"step": 11775
|
|
},
|
|
{
|
|
"epoch": 1.6939890710382515,
|
|
"grad_norm": 0.26739129697120595,
|
|
"learning_rate": 7.982494648050341e-06,
|
|
"loss": 0.4093,
|
|
"step": 11780
|
|
},
|
|
{
|
|
"epoch": 1.6947080816796087,
|
|
"grad_norm": 0.25048961243751644,
|
|
"learning_rate": 7.975120587575325e-06,
|
|
"loss": 0.4203,
|
|
"step": 11785
|
|
},
|
|
{
|
|
"epoch": 1.6954270923209664,
|
|
"grad_norm": 0.2613477012494576,
|
|
"learning_rate": 7.967747675060993e-06,
|
|
"loss": 0.431,
|
|
"step": 11790
|
|
},
|
|
{
|
|
"epoch": 1.6961461029623237,
|
|
"grad_norm": 0.27191929486403327,
|
|
"learning_rate": 7.960375914687264e-06,
|
|
"loss": 0.4323,
|
|
"step": 11795
|
|
},
|
|
{
|
|
"epoch": 1.6968651136036814,
|
|
"grad_norm": 0.24900545939168423,
|
|
"learning_rate": 7.95300531063339e-06,
|
|
"loss": 0.4048,
|
|
"step": 11800
|
|
},
|
|
{
|
|
"epoch": 1.6975841242450387,
|
|
"grad_norm": 0.2618696749267013,
|
|
"learning_rate": 7.945635867077971e-06,
|
|
"loss": 0.405,
|
|
"step": 11805
|
|
},
|
|
{
|
|
"epoch": 1.6983031348863964,
|
|
"grad_norm": 0.258639069476796,
|
|
"learning_rate": 7.938267588198955e-06,
|
|
"loss": 0.4081,
|
|
"step": 11810
|
|
},
|
|
{
|
|
"epoch": 1.6990221455277537,
|
|
"grad_norm": 0.26244337690348685,
|
|
"learning_rate": 7.930900478173621e-06,
|
|
"loss": 0.4121,
|
|
"step": 11815
|
|
},
|
|
{
|
|
"epoch": 1.6997411561691114,
|
|
"grad_norm": 0.2504237314387894,
|
|
"learning_rate": 7.92353454117859e-06,
|
|
"loss": 0.417,
|
|
"step": 11820
|
|
},
|
|
{
|
|
"epoch": 1.7004601668104686,
|
|
"grad_norm": 0.24874053964898726,
|
|
"learning_rate": 7.91616978138982e-06,
|
|
"loss": 0.4058,
|
|
"step": 11825
|
|
},
|
|
{
|
|
"epoch": 1.7011791774518263,
|
|
"grad_norm": 0.24678653994922276,
|
|
"learning_rate": 7.908806202982595e-06,
|
|
"loss": 0.4127,
|
|
"step": 11830
|
|
},
|
|
{
|
|
"epoch": 1.7018981880931838,
|
|
"grad_norm": 0.2543663868155727,
|
|
"learning_rate": 7.90144381013154e-06,
|
|
"loss": 0.4246,
|
|
"step": 11835
|
|
},
|
|
{
|
|
"epoch": 1.7026171987345413,
|
|
"grad_norm": 0.2493487529962913,
|
|
"learning_rate": 7.894082607010593e-06,
|
|
"loss": 0.411,
|
|
"step": 11840
|
|
},
|
|
{
|
|
"epoch": 1.7033362093758988,
|
|
"grad_norm": 0.25970051800487864,
|
|
"learning_rate": 7.886722597793029e-06,
|
|
"loss": 0.4186,
|
|
"step": 11845
|
|
},
|
|
{
|
|
"epoch": 1.7040552200172563,
|
|
"grad_norm": 0.25806108757156404,
|
|
"learning_rate": 7.879363786651445e-06,
|
|
"loss": 0.4187,
|
|
"step": 11850
|
|
},
|
|
{
|
|
"epoch": 1.7047742306586138,
|
|
"grad_norm": 0.26438051545829483,
|
|
"learning_rate": 7.872006177757757e-06,
|
|
"loss": 0.3951,
|
|
"step": 11855
|
|
},
|
|
{
|
|
"epoch": 1.7054932412999713,
|
|
"grad_norm": 0.2729109214158478,
|
|
"learning_rate": 7.86464977528319e-06,
|
|
"loss": 0.4217,
|
|
"step": 11860
|
|
},
|
|
{
|
|
"epoch": 1.7062122519413288,
|
|
"grad_norm": 0.24814552432716896,
|
|
"learning_rate": 7.857294583398303e-06,
|
|
"loss": 0.4196,
|
|
"step": 11865
|
|
},
|
|
{
|
|
"epoch": 1.7069312625826862,
|
|
"grad_norm": 0.25027211561214646,
|
|
"learning_rate": 7.849940606272962e-06,
|
|
"loss": 0.4087,
|
|
"step": 11870
|
|
},
|
|
{
|
|
"epoch": 1.7076502732240437,
|
|
"grad_norm": 0.2566008836069061,
|
|
"learning_rate": 7.842587848076329e-06,
|
|
"loss": 0.4077,
|
|
"step": 11875
|
|
},
|
|
{
|
|
"epoch": 1.7083692838654012,
|
|
"grad_norm": 0.2601929542751101,
|
|
"learning_rate": 7.835236312976903e-06,
|
|
"loss": 0.4126,
|
|
"step": 11880
|
|
},
|
|
{
|
|
"epoch": 1.7090882945067587,
|
|
"grad_norm": 0.2580914302200825,
|
|
"learning_rate": 7.827886005142466e-06,
|
|
"loss": 0.4194,
|
|
"step": 11885
|
|
},
|
|
{
|
|
"epoch": 1.7098073051481162,
|
|
"grad_norm": 0.24538322336023752,
|
|
"learning_rate": 7.820536928740113e-06,
|
|
"loss": 0.4136,
|
|
"step": 11890
|
|
},
|
|
{
|
|
"epoch": 1.7105263157894737,
|
|
"grad_norm": 0.2564642684564163,
|
|
"learning_rate": 7.813189087936243e-06,
|
|
"loss": 0.413,
|
|
"step": 11895
|
|
},
|
|
{
|
|
"epoch": 1.7112453264308312,
|
|
"grad_norm": 0.24768429163281047,
|
|
"learning_rate": 7.805842486896553e-06,
|
|
"loss": 0.4135,
|
|
"step": 11900
|
|
},
|
|
{
|
|
"epoch": 1.7119643370721886,
|
|
"grad_norm": 0.2557952248722331,
|
|
"learning_rate": 7.79849712978603e-06,
|
|
"loss": 0.4206,
|
|
"step": 11905
|
|
},
|
|
{
|
|
"epoch": 1.7126833477135461,
|
|
"grad_norm": 0.2549016847496704,
|
|
"learning_rate": 7.791153020768974e-06,
|
|
"loss": 0.415,
|
|
"step": 11910
|
|
},
|
|
{
|
|
"epoch": 1.7134023583549036,
|
|
"grad_norm": 0.24628783851295163,
|
|
"learning_rate": 7.783810164008954e-06,
|
|
"loss": 0.4039,
|
|
"step": 11915
|
|
},
|
|
{
|
|
"epoch": 1.714121368996261,
|
|
"grad_norm": 0.27296354639667286,
|
|
"learning_rate": 7.776468563668842e-06,
|
|
"loss": 0.4066,
|
|
"step": 11920
|
|
},
|
|
{
|
|
"epoch": 1.7148403796376186,
|
|
"grad_norm": 0.2601042254906317,
|
|
"learning_rate": 7.769128223910805e-06,
|
|
"loss": 0.4246,
|
|
"step": 11925
|
|
},
|
|
{
|
|
"epoch": 1.715559390278976,
|
|
"grad_norm": 0.24466200646047612,
|
|
"learning_rate": 7.761789148896279e-06,
|
|
"loss": 0.3994,
|
|
"step": 11930
|
|
},
|
|
{
|
|
"epoch": 1.7162784009203336,
|
|
"grad_norm": 0.2508836356329874,
|
|
"learning_rate": 7.75445134278599e-06,
|
|
"loss": 0.4112,
|
|
"step": 11935
|
|
},
|
|
{
|
|
"epoch": 1.716997411561691,
|
|
"grad_norm": 0.2500487508873629,
|
|
"learning_rate": 7.747114809739949e-06,
|
|
"loss": 0.4105,
|
|
"step": 11940
|
|
},
|
|
{
|
|
"epoch": 1.7177164222030488,
|
|
"grad_norm": 0.25512578741818087,
|
|
"learning_rate": 7.739779553917437e-06,
|
|
"loss": 0.4133,
|
|
"step": 11945
|
|
},
|
|
{
|
|
"epoch": 1.718435432844406,
|
|
"grad_norm": 0.2506313982709355,
|
|
"learning_rate": 7.732445579477022e-06,
|
|
"loss": 0.4169,
|
|
"step": 11950
|
|
},
|
|
{
|
|
"epoch": 1.7191544434857637,
|
|
"grad_norm": 0.25695235121598614,
|
|
"learning_rate": 7.725112890576537e-06,
|
|
"loss": 0.409,
|
|
"step": 11955
|
|
},
|
|
{
|
|
"epoch": 1.719873454127121,
|
|
"grad_norm": 0.2673624331694823,
|
|
"learning_rate": 7.717781491373082e-06,
|
|
"loss": 0.4036,
|
|
"step": 11960
|
|
},
|
|
{
|
|
"epoch": 1.7205924647684787,
|
|
"grad_norm": 0.25013590803678853,
|
|
"learning_rate": 7.710451386023037e-06,
|
|
"loss": 0.4097,
|
|
"step": 11965
|
|
},
|
|
{
|
|
"epoch": 1.721311475409836,
|
|
"grad_norm": 0.25866063365324493,
|
|
"learning_rate": 7.703122578682047e-06,
|
|
"loss": 0.4118,
|
|
"step": 11970
|
|
},
|
|
{
|
|
"epoch": 1.7220304860511937,
|
|
"grad_norm": 0.2433560125133353,
|
|
"learning_rate": 7.695795073505007e-06,
|
|
"loss": 0.4116,
|
|
"step": 11975
|
|
},
|
|
{
|
|
"epoch": 1.722749496692551,
|
|
"grad_norm": 0.2572523400000175,
|
|
"learning_rate": 7.688468874646096e-06,
|
|
"loss": 0.409,
|
|
"step": 11980
|
|
},
|
|
{
|
|
"epoch": 1.7234685073339087,
|
|
"grad_norm": 0.254914406228328,
|
|
"learning_rate": 7.681143986258734e-06,
|
|
"loss": 0.4055,
|
|
"step": 11985
|
|
},
|
|
{
|
|
"epoch": 1.724187517975266,
|
|
"grad_norm": 0.25364391121639074,
|
|
"learning_rate": 7.673820412495603e-06,
|
|
"loss": 0.4232,
|
|
"step": 11990
|
|
},
|
|
{
|
|
"epoch": 1.7249065286166236,
|
|
"grad_norm": 0.2646047450444079,
|
|
"learning_rate": 7.666498157508651e-06,
|
|
"loss": 0.4396,
|
|
"step": 11995
|
|
},
|
|
{
|
|
"epoch": 1.725625539257981,
|
|
"grad_norm": 0.25353357082340133,
|
|
"learning_rate": 7.65917722544906e-06,
|
|
"loss": 0.4138,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 1.7263445498993386,
|
|
"grad_norm": 0.2530485936086203,
|
|
"learning_rate": 7.65185762046727e-06,
|
|
"loss": 0.4064,
|
|
"step": 12005
|
|
},
|
|
{
|
|
"epoch": 1.7270635605406959,
|
|
"grad_norm": 0.2610894328689711,
|
|
"learning_rate": 7.644539346712975e-06,
|
|
"loss": 0.4091,
|
|
"step": 12010
|
|
},
|
|
{
|
|
"epoch": 1.7277825711820536,
|
|
"grad_norm": 0.2531467817667373,
|
|
"learning_rate": 7.63722240833511e-06,
|
|
"loss": 0.4184,
|
|
"step": 12015
|
|
},
|
|
{
|
|
"epoch": 1.7285015818234108,
|
|
"grad_norm": 0.24719229058431075,
|
|
"learning_rate": 7.629906809481843e-06,
|
|
"loss": 0.4289,
|
|
"step": 12020
|
|
},
|
|
{
|
|
"epoch": 1.7292205924647686,
|
|
"grad_norm": 0.2561062301934565,
|
|
"learning_rate": 7.6225925543006005e-06,
|
|
"loss": 0.4188,
|
|
"step": 12025
|
|
},
|
|
{
|
|
"epoch": 1.7299396031061258,
|
|
"grad_norm": 0.2568015067914993,
|
|
"learning_rate": 7.6152796469380354e-06,
|
|
"loss": 0.4148,
|
|
"step": 12030
|
|
},
|
|
{
|
|
"epoch": 1.7306586137474835,
|
|
"grad_norm": 0.26593094171482756,
|
|
"learning_rate": 7.607968091540032e-06,
|
|
"loss": 0.4022,
|
|
"step": 12035
|
|
},
|
|
{
|
|
"epoch": 1.7313776243888408,
|
|
"grad_norm": 0.2623399010130668,
|
|
"learning_rate": 7.600657892251725e-06,
|
|
"loss": 0.4152,
|
|
"step": 12040
|
|
},
|
|
{
|
|
"epoch": 1.7320966350301985,
|
|
"grad_norm": 0.2550964119530657,
|
|
"learning_rate": 7.593349053217468e-06,
|
|
"loss": 0.4099,
|
|
"step": 12045
|
|
},
|
|
{
|
|
"epoch": 1.732815645671556,
|
|
"grad_norm": 0.25329097455045146,
|
|
"learning_rate": 7.586041578580841e-06,
|
|
"loss": 0.4087,
|
|
"step": 12050
|
|
},
|
|
{
|
|
"epoch": 1.7335346563129135,
|
|
"grad_norm": 0.25088733417566467,
|
|
"learning_rate": 7.578735472484663e-06,
|
|
"loss": 0.4135,
|
|
"step": 12055
|
|
},
|
|
{
|
|
"epoch": 1.734253666954271,
|
|
"grad_norm": 0.26380431521754316,
|
|
"learning_rate": 7.571430739070962e-06,
|
|
"loss": 0.4145,
|
|
"step": 12060
|
|
},
|
|
{
|
|
"epoch": 1.7349726775956285,
|
|
"grad_norm": 0.25931018794731403,
|
|
"learning_rate": 7.564127382481e-06,
|
|
"loss": 0.4185,
|
|
"step": 12065
|
|
},
|
|
{
|
|
"epoch": 1.735691688236986,
|
|
"grad_norm": 0.25817381474927426,
|
|
"learning_rate": 7.556825406855256e-06,
|
|
"loss": 0.4144,
|
|
"step": 12070
|
|
},
|
|
{
|
|
"epoch": 1.7364106988783434,
|
|
"grad_norm": 0.25889864425988607,
|
|
"learning_rate": 7.549524816333416e-06,
|
|
"loss": 0.4348,
|
|
"step": 12075
|
|
},
|
|
{
|
|
"epoch": 1.737129709519701,
|
|
"grad_norm": 0.25930933779186754,
|
|
"learning_rate": 7.542225615054397e-06,
|
|
"loss": 0.4058,
|
|
"step": 12080
|
|
},
|
|
{
|
|
"epoch": 1.7378487201610584,
|
|
"grad_norm": 0.2504175659441515,
|
|
"learning_rate": 7.534927807156316e-06,
|
|
"loss": 0.4047,
|
|
"step": 12085
|
|
},
|
|
{
|
|
"epoch": 1.738567730802416,
|
|
"grad_norm": 0.26844272991946744,
|
|
"learning_rate": 7.527631396776503e-06,
|
|
"loss": 0.4173,
|
|
"step": 12090
|
|
},
|
|
{
|
|
"epoch": 1.7392867414437734,
|
|
"grad_norm": 0.2445372194665716,
|
|
"learning_rate": 7.5203363880515005e-06,
|
|
"loss": 0.4035,
|
|
"step": 12095
|
|
},
|
|
{
|
|
"epoch": 1.7400057520851309,
|
|
"grad_norm": 0.26059488273974246,
|
|
"learning_rate": 7.513042785117052e-06,
|
|
"loss": 0.4278,
|
|
"step": 12100
|
|
},
|
|
{
|
|
"epoch": 1.7407247627264884,
|
|
"grad_norm": 0.2543288007640774,
|
|
"learning_rate": 7.505750592108099e-06,
|
|
"loss": 0.4237,
|
|
"step": 12105
|
|
},
|
|
{
|
|
"epoch": 1.7414437733678458,
|
|
"grad_norm": 0.2528374529354086,
|
|
"learning_rate": 7.498459813158795e-06,
|
|
"loss": 0.4122,
|
|
"step": 12110
|
|
},
|
|
{
|
|
"epoch": 1.7421627840092033,
|
|
"grad_norm": 0.24673863195302134,
|
|
"learning_rate": 7.4911704524024875e-06,
|
|
"loss": 0.3958,
|
|
"step": 12115
|
|
},
|
|
{
|
|
"epoch": 1.7428817946505608,
|
|
"grad_norm": 0.26716661370758454,
|
|
"learning_rate": 7.483882513971712e-06,
|
|
"loss": 0.4197,
|
|
"step": 12120
|
|
},
|
|
{
|
|
"epoch": 1.7436008052919183,
|
|
"grad_norm": 0.26342608501436565,
|
|
"learning_rate": 7.476596001998212e-06,
|
|
"loss": 0.4071,
|
|
"step": 12125
|
|
},
|
|
{
|
|
"epoch": 1.7443198159332758,
|
|
"grad_norm": 0.24654088237466748,
|
|
"learning_rate": 7.469310920612909e-06,
|
|
"loss": 0.3981,
|
|
"step": 12130
|
|
},
|
|
{
|
|
"epoch": 1.7450388265746333,
|
|
"grad_norm": 0.24286792380956676,
|
|
"learning_rate": 7.462027273945922e-06,
|
|
"loss": 0.4047,
|
|
"step": 12135
|
|
},
|
|
{
|
|
"epoch": 1.7457578372159908,
|
|
"grad_norm": 0.26085705339506143,
|
|
"learning_rate": 7.4547450661265516e-06,
|
|
"loss": 0.4265,
|
|
"step": 12140
|
|
},
|
|
{
|
|
"epoch": 1.7464768478573482,
|
|
"grad_norm": 0.24945305437436543,
|
|
"learning_rate": 7.44746430128329e-06,
|
|
"loss": 0.4044,
|
|
"step": 12145
|
|
},
|
|
{
|
|
"epoch": 1.7471958584987057,
|
|
"grad_norm": 0.24713572243066181,
|
|
"learning_rate": 7.440184983543797e-06,
|
|
"loss": 0.3991,
|
|
"step": 12150
|
|
},
|
|
{
|
|
"epoch": 1.7479148691400632,
|
|
"grad_norm": 0.25381336439566454,
|
|
"learning_rate": 7.43290711703493e-06,
|
|
"loss": 0.401,
|
|
"step": 12155
|
|
},
|
|
{
|
|
"epoch": 1.748633879781421,
|
|
"grad_norm": 0.2578191736856875,
|
|
"learning_rate": 7.425630705882707e-06,
|
|
"loss": 0.4056,
|
|
"step": 12160
|
|
},
|
|
{
|
|
"epoch": 1.7493528904227782,
|
|
"grad_norm": 0.26202925689193507,
|
|
"learning_rate": 7.4183557542123344e-06,
|
|
"loss": 0.3927,
|
|
"step": 12165
|
|
},
|
|
{
|
|
"epoch": 1.750071901064136,
|
|
"grad_norm": 0.26943617048489577,
|
|
"learning_rate": 7.4110822661481875e-06,
|
|
"loss": 0.443,
|
|
"step": 12170
|
|
},
|
|
{
|
|
"epoch": 1.7507909117054932,
|
|
"grad_norm": 0.2561860768336747,
|
|
"learning_rate": 7.4038102458138e-06,
|
|
"loss": 0.4143,
|
|
"step": 12175
|
|
},
|
|
{
|
|
"epoch": 1.7515099223468509,
|
|
"grad_norm": 0.25232020768058827,
|
|
"learning_rate": 7.396539697331895e-06,
|
|
"loss": 0.4213,
|
|
"step": 12180
|
|
},
|
|
{
|
|
"epoch": 1.7522289329882081,
|
|
"grad_norm": 0.25036255007581687,
|
|
"learning_rate": 7.389270624824342e-06,
|
|
"loss": 0.4313,
|
|
"step": 12185
|
|
},
|
|
{
|
|
"epoch": 1.7529479436295659,
|
|
"grad_norm": 0.2522764405837337,
|
|
"learning_rate": 7.3820030324121796e-06,
|
|
"loss": 0.4085,
|
|
"step": 12190
|
|
},
|
|
{
|
|
"epoch": 1.7536669542709231,
|
|
"grad_norm": 0.32132127532992866,
|
|
"learning_rate": 7.374736924215618e-06,
|
|
"loss": 0.4203,
|
|
"step": 12195
|
|
},
|
|
{
|
|
"epoch": 1.7543859649122808,
|
|
"grad_norm": 0.2611388242956791,
|
|
"learning_rate": 7.367472304354011e-06,
|
|
"loss": 0.4051,
|
|
"step": 12200
|
|
},
|
|
{
|
|
"epoch": 1.755104975553638,
|
|
"grad_norm": 0.2643832666228025,
|
|
"learning_rate": 7.3602091769458695e-06,
|
|
"loss": 0.4204,
|
|
"step": 12205
|
|
},
|
|
{
|
|
"epoch": 1.7558239861949958,
|
|
"grad_norm": 0.2617194604985132,
|
|
"learning_rate": 7.352947546108873e-06,
|
|
"loss": 0.4099,
|
|
"step": 12210
|
|
},
|
|
{
|
|
"epoch": 1.756542996836353,
|
|
"grad_norm": 0.256218391720951,
|
|
"learning_rate": 7.345687415959839e-06,
|
|
"loss": 0.431,
|
|
"step": 12215
|
|
},
|
|
{
|
|
"epoch": 1.7572620074777108,
|
|
"grad_norm": 0.26285248982787435,
|
|
"learning_rate": 7.338428790614732e-06,
|
|
"loss": 0.4197,
|
|
"step": 12220
|
|
},
|
|
{
|
|
"epoch": 1.757981018119068,
|
|
"grad_norm": 0.25901669856983645,
|
|
"learning_rate": 7.3311716741886806e-06,
|
|
"loss": 0.417,
|
|
"step": 12225
|
|
},
|
|
{
|
|
"epoch": 1.7587000287604257,
|
|
"grad_norm": 0.2513594012736961,
|
|
"learning_rate": 7.323916070795939e-06,
|
|
"loss": 0.4025,
|
|
"step": 12230
|
|
},
|
|
{
|
|
"epoch": 1.759419039401783,
|
|
"grad_norm": 0.25998980539805544,
|
|
"learning_rate": 7.316661984549911e-06,
|
|
"loss": 0.4233,
|
|
"step": 12235
|
|
},
|
|
{
|
|
"epoch": 1.7601380500431407,
|
|
"grad_norm": 0.2591071066572429,
|
|
"learning_rate": 7.309409419563147e-06,
|
|
"loss": 0.4014,
|
|
"step": 12240
|
|
},
|
|
{
|
|
"epoch": 1.760857060684498,
|
|
"grad_norm": 0.2558272045675344,
|
|
"learning_rate": 7.302158379947325e-06,
|
|
"loss": 0.4089,
|
|
"step": 12245
|
|
},
|
|
{
|
|
"epoch": 1.7615760713258557,
|
|
"grad_norm": 0.25425164833366926,
|
|
"learning_rate": 7.294908869813258e-06,
|
|
"loss": 0.3968,
|
|
"step": 12250
|
|
},
|
|
{
|
|
"epoch": 1.762295081967213,
|
|
"grad_norm": 0.2506035785957442,
|
|
"learning_rate": 7.287660893270901e-06,
|
|
"loss": 0.4223,
|
|
"step": 12255
|
|
},
|
|
{
|
|
"epoch": 1.7630140926085707,
|
|
"grad_norm": 0.258303350313874,
|
|
"learning_rate": 7.280414454429335e-06,
|
|
"loss": 0.4134,
|
|
"step": 12260
|
|
},
|
|
{
|
|
"epoch": 1.763733103249928,
|
|
"grad_norm": 0.24989677588921191,
|
|
"learning_rate": 7.27316955739676e-06,
|
|
"loss": 0.4064,
|
|
"step": 12265
|
|
},
|
|
{
|
|
"epoch": 1.7644521138912856,
|
|
"grad_norm": 0.24880185248679065,
|
|
"learning_rate": 7.265926206280523e-06,
|
|
"loss": 0.4064,
|
|
"step": 12270
|
|
},
|
|
{
|
|
"epoch": 1.7651711245326431,
|
|
"grad_norm": 0.24900137931539176,
|
|
"learning_rate": 7.258684405187071e-06,
|
|
"loss": 0.4228,
|
|
"step": 12275
|
|
},
|
|
{
|
|
"epoch": 1.7658901351740006,
|
|
"grad_norm": 0.25847035549259967,
|
|
"learning_rate": 7.251444158221992e-06,
|
|
"loss": 0.4308,
|
|
"step": 12280
|
|
},
|
|
{
|
|
"epoch": 1.766609145815358,
|
|
"grad_norm": 0.2537305545308351,
|
|
"learning_rate": 7.244205469489979e-06,
|
|
"loss": 0.4046,
|
|
"step": 12285
|
|
},
|
|
{
|
|
"epoch": 1.7673281564567156,
|
|
"grad_norm": 0.26388046085735983,
|
|
"learning_rate": 7.236968343094846e-06,
|
|
"loss": 0.4141,
|
|
"step": 12290
|
|
},
|
|
{
|
|
"epoch": 1.768047167098073,
|
|
"grad_norm": 0.26114087770670197,
|
|
"learning_rate": 7.229732783139527e-06,
|
|
"loss": 0.4033,
|
|
"step": 12295
|
|
},
|
|
{
|
|
"epoch": 1.7687661777394306,
|
|
"grad_norm": 0.24486941677588542,
|
|
"learning_rate": 7.222498793726061e-06,
|
|
"loss": 0.414,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 1.769485188380788,
|
|
"grad_norm": 0.2589314180901157,
|
|
"learning_rate": 7.215266378955592e-06,
|
|
"loss": 0.4209,
|
|
"step": 12305
|
|
},
|
|
{
|
|
"epoch": 1.7702041990221455,
|
|
"grad_norm": 0.26416909532177263,
|
|
"learning_rate": 7.208035542928388e-06,
|
|
"loss": 0.4019,
|
|
"step": 12310
|
|
},
|
|
{
|
|
"epoch": 1.770923209663503,
|
|
"grad_norm": 0.2615146702373038,
|
|
"learning_rate": 7.2008062897438084e-06,
|
|
"loss": 0.4177,
|
|
"step": 12315
|
|
},
|
|
{
|
|
"epoch": 1.7716422203048605,
|
|
"grad_norm": 0.2558786053060417,
|
|
"learning_rate": 7.193578623500314e-06,
|
|
"loss": 0.3994,
|
|
"step": 12320
|
|
},
|
|
{
|
|
"epoch": 1.772361230946218,
|
|
"grad_norm": 0.2591167653155897,
|
|
"learning_rate": 7.186352548295479e-06,
|
|
"loss": 0.4176,
|
|
"step": 12325
|
|
},
|
|
{
|
|
"epoch": 1.7730802415875755,
|
|
"grad_norm": 0.2565385002090668,
|
|
"learning_rate": 7.179128068225959e-06,
|
|
"loss": 0.417,
|
|
"step": 12330
|
|
},
|
|
{
|
|
"epoch": 1.773799252228933,
|
|
"grad_norm": 0.2513995475419433,
|
|
"learning_rate": 7.171905187387517e-06,
|
|
"loss": 0.4261,
|
|
"step": 12335
|
|
},
|
|
{
|
|
"epoch": 1.7745182628702905,
|
|
"grad_norm": 0.2540054014535628,
|
|
"learning_rate": 7.16468390987501e-06,
|
|
"loss": 0.4056,
|
|
"step": 12340
|
|
},
|
|
{
|
|
"epoch": 1.775237273511648,
|
|
"grad_norm": 0.26390233707389416,
|
|
"learning_rate": 7.1574642397823764e-06,
|
|
"loss": 0.4284,
|
|
"step": 12345
|
|
},
|
|
{
|
|
"epoch": 1.7759562841530054,
|
|
"grad_norm": 0.251247535422529,
|
|
"learning_rate": 7.150246181202648e-06,
|
|
"loss": 0.4165,
|
|
"step": 12350
|
|
},
|
|
{
|
|
"epoch": 1.776675294794363,
|
|
"grad_norm": 0.25335251899602307,
|
|
"learning_rate": 7.143029738227948e-06,
|
|
"loss": 0.3999,
|
|
"step": 12355
|
|
},
|
|
{
|
|
"epoch": 1.7773943054357204,
|
|
"grad_norm": 0.254603817611765,
|
|
"learning_rate": 7.135814914949479e-06,
|
|
"loss": 0.4183,
|
|
"step": 12360
|
|
},
|
|
{
|
|
"epoch": 1.778113316077078,
|
|
"grad_norm": 0.2627901777993984,
|
|
"learning_rate": 7.128601715457522e-06,
|
|
"loss": 0.4123,
|
|
"step": 12365
|
|
},
|
|
{
|
|
"epoch": 1.7788323267184354,
|
|
"grad_norm": 0.267622124641138,
|
|
"learning_rate": 7.1213901438414455e-06,
|
|
"loss": 0.4159,
|
|
"step": 12370
|
|
},
|
|
{
|
|
"epoch": 1.7795513373597929,
|
|
"grad_norm": 0.25978798992318286,
|
|
"learning_rate": 7.114180204189689e-06,
|
|
"loss": 0.4229,
|
|
"step": 12375
|
|
},
|
|
{
|
|
"epoch": 1.7802703480011504,
|
|
"grad_norm": 0.24647432047225887,
|
|
"learning_rate": 7.106971900589765e-06,
|
|
"loss": 0.4039,
|
|
"step": 12380
|
|
},
|
|
{
|
|
"epoch": 1.780989358642508,
|
|
"grad_norm": 0.2570864161270665,
|
|
"learning_rate": 7.099765237128271e-06,
|
|
"loss": 0.4201,
|
|
"step": 12385
|
|
},
|
|
{
|
|
"epoch": 1.7817083692838653,
|
|
"grad_norm": 0.24494223052253658,
|
|
"learning_rate": 7.0925602178908555e-06,
|
|
"loss": 0.4069,
|
|
"step": 12390
|
|
},
|
|
{
|
|
"epoch": 1.782427379925223,
|
|
"grad_norm": 0.24786662275056098,
|
|
"learning_rate": 7.085356846962256e-06,
|
|
"loss": 0.4088,
|
|
"step": 12395
|
|
},
|
|
{
|
|
"epoch": 1.7831463905665803,
|
|
"grad_norm": 0.27211048051117026,
|
|
"learning_rate": 7.078155128426256e-06,
|
|
"loss": 0.4086,
|
|
"step": 12400
|
|
},
|
|
{
|
|
"epoch": 1.783865401207938,
|
|
"grad_norm": 0.24763847807165335,
|
|
"learning_rate": 7.070955066365714e-06,
|
|
"loss": 0.4066,
|
|
"step": 12405
|
|
},
|
|
{
|
|
"epoch": 1.7845844118492953,
|
|
"grad_norm": 0.28744376769244007,
|
|
"learning_rate": 7.063756664862546e-06,
|
|
"loss": 0.4223,
|
|
"step": 12410
|
|
},
|
|
{
|
|
"epoch": 1.785303422490653,
|
|
"grad_norm": 0.25607430562276395,
|
|
"learning_rate": 7.056559927997728e-06,
|
|
"loss": 0.4165,
|
|
"step": 12415
|
|
},
|
|
{
|
|
"epoch": 1.7860224331320103,
|
|
"grad_norm": 0.25376526296389273,
|
|
"learning_rate": 7.049364859851286e-06,
|
|
"loss": 0.3973,
|
|
"step": 12420
|
|
},
|
|
{
|
|
"epoch": 1.786741443773368,
|
|
"grad_norm": 0.25481894320851795,
|
|
"learning_rate": 7.042171464502314e-06,
|
|
"loss": 0.4037,
|
|
"step": 12425
|
|
},
|
|
{
|
|
"epoch": 1.7874604544147252,
|
|
"grad_norm": 0.24849470990176253,
|
|
"learning_rate": 7.034979746028942e-06,
|
|
"loss": 0.4206,
|
|
"step": 12430
|
|
},
|
|
{
|
|
"epoch": 1.788179465056083,
|
|
"grad_norm": 0.2500859118547924,
|
|
"learning_rate": 7.027789708508355e-06,
|
|
"loss": 0.4141,
|
|
"step": 12435
|
|
},
|
|
{
|
|
"epoch": 1.7888984756974402,
|
|
"grad_norm": 0.2442725442196124,
|
|
"learning_rate": 7.020601356016793e-06,
|
|
"loss": 0.4161,
|
|
"step": 12440
|
|
},
|
|
{
|
|
"epoch": 1.789617486338798,
|
|
"grad_norm": 0.2689372064593602,
|
|
"learning_rate": 7.01341469262953e-06,
|
|
"loss": 0.412,
|
|
"step": 12445
|
|
},
|
|
{
|
|
"epoch": 1.7903364969801552,
|
|
"grad_norm": 0.2652584184287822,
|
|
"learning_rate": 7.0062297224208805e-06,
|
|
"loss": 0.4188,
|
|
"step": 12450
|
|
},
|
|
{
|
|
"epoch": 1.7910555076215129,
|
|
"grad_norm": 0.2548079457132161,
|
|
"learning_rate": 6.999046449464214e-06,
|
|
"loss": 0.4087,
|
|
"step": 12455
|
|
},
|
|
{
|
|
"epoch": 1.7917745182628702,
|
|
"grad_norm": 0.24320376487368286,
|
|
"learning_rate": 6.9918648778319264e-06,
|
|
"loss": 0.412,
|
|
"step": 12460
|
|
},
|
|
{
|
|
"epoch": 1.7924935289042279,
|
|
"grad_norm": 0.24889708442612957,
|
|
"learning_rate": 6.984685011595445e-06,
|
|
"loss": 0.4282,
|
|
"step": 12465
|
|
},
|
|
{
|
|
"epoch": 1.7932125395455851,
|
|
"grad_norm": 0.2533004872723184,
|
|
"learning_rate": 6.977506854825244e-06,
|
|
"loss": 0.4197,
|
|
"step": 12470
|
|
},
|
|
{
|
|
"epoch": 1.7939315501869428,
|
|
"grad_norm": 0.2480345197472715,
|
|
"learning_rate": 6.970330411590818e-06,
|
|
"loss": 0.4078,
|
|
"step": 12475
|
|
},
|
|
{
|
|
"epoch": 1.7946505608283,
|
|
"grad_norm": 0.2641405088587578,
|
|
"learning_rate": 6.963155685960689e-06,
|
|
"loss": 0.4037,
|
|
"step": 12480
|
|
},
|
|
{
|
|
"epoch": 1.7953695714696578,
|
|
"grad_norm": 0.27067691777708486,
|
|
"learning_rate": 6.955982682002419e-06,
|
|
"loss": 0.4337,
|
|
"step": 12485
|
|
},
|
|
{
|
|
"epoch": 1.7960885821110153,
|
|
"grad_norm": 0.25504037126841456,
|
|
"learning_rate": 6.948811403782574e-06,
|
|
"loss": 0.4285,
|
|
"step": 12490
|
|
},
|
|
{
|
|
"epoch": 1.7968075927523728,
|
|
"grad_norm": 0.2626109496981662,
|
|
"learning_rate": 6.941641855366761e-06,
|
|
"loss": 0.4136,
|
|
"step": 12495
|
|
},
|
|
{
|
|
"epoch": 1.7975266033937303,
|
|
"grad_norm": 0.2597384672585987,
|
|
"learning_rate": 6.93447404081959e-06,
|
|
"loss": 0.4144,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 1.7982456140350878,
|
|
"grad_norm": 0.25519689958709324,
|
|
"learning_rate": 6.927307964204695e-06,
|
|
"loss": 0.42,
|
|
"step": 12505
|
|
},
|
|
{
|
|
"epoch": 1.7989646246764452,
|
|
"grad_norm": 0.24724204883741235,
|
|
"learning_rate": 6.920143629584734e-06,
|
|
"loss": 0.4168,
|
|
"step": 12510
|
|
},
|
|
{
|
|
"epoch": 1.7996836353178027,
|
|
"grad_norm": 0.2617317305762162,
|
|
"learning_rate": 6.91298104102136e-06,
|
|
"loss": 0.415,
|
|
"step": 12515
|
|
},
|
|
{
|
|
"epoch": 1.8004026459591602,
|
|
"grad_norm": 0.2874116408502503,
|
|
"learning_rate": 6.905820202575245e-06,
|
|
"loss": 0.4172,
|
|
"step": 12520
|
|
},
|
|
{
|
|
"epoch": 1.8011216566005177,
|
|
"grad_norm": 0.2373847142610924,
|
|
"learning_rate": 6.898661118306074e-06,
|
|
"loss": 0.4065,
|
|
"step": 12525
|
|
},
|
|
{
|
|
"epoch": 1.8018406672418752,
|
|
"grad_norm": 0.24865915584559958,
|
|
"learning_rate": 6.891503792272525e-06,
|
|
"loss": 0.4202,
|
|
"step": 12530
|
|
},
|
|
{
|
|
"epoch": 1.8025596778832327,
|
|
"grad_norm": 0.24895704753808048,
|
|
"learning_rate": 6.884348228532287e-06,
|
|
"loss": 0.4181,
|
|
"step": 12535
|
|
},
|
|
{
|
|
"epoch": 1.8032786885245902,
|
|
"grad_norm": 0.26065825694480377,
|
|
"learning_rate": 6.877194431142055e-06,
|
|
"loss": 0.4141,
|
|
"step": 12540
|
|
},
|
|
{
|
|
"epoch": 1.8039976991659477,
|
|
"grad_norm": 0.26054346482482493,
|
|
"learning_rate": 6.870042404157513e-06,
|
|
"loss": 0.4122,
|
|
"step": 12545
|
|
},
|
|
{
|
|
"epoch": 1.8047167098073051,
|
|
"grad_norm": 0.2601813629726882,
|
|
"learning_rate": 6.862892151633339e-06,
|
|
"loss": 0.4271,
|
|
"step": 12550
|
|
},
|
|
{
|
|
"epoch": 1.8054357204486626,
|
|
"grad_norm": 0.25007485946893376,
|
|
"learning_rate": 6.855743677623219e-06,
|
|
"loss": 0.3967,
|
|
"step": 12555
|
|
},
|
|
{
|
|
"epoch": 1.8061547310900201,
|
|
"grad_norm": 0.26287949884764095,
|
|
"learning_rate": 6.848596986179821e-06,
|
|
"loss": 0.4113,
|
|
"step": 12560
|
|
},
|
|
{
|
|
"epoch": 1.8068737417313776,
|
|
"grad_norm": 0.2580383417830257,
|
|
"learning_rate": 6.841452081354799e-06,
|
|
"loss": 0.4142,
|
|
"step": 12565
|
|
},
|
|
{
|
|
"epoch": 1.807592752372735,
|
|
"grad_norm": 0.2544987361957738,
|
|
"learning_rate": 6.834308967198806e-06,
|
|
"loss": 0.4228,
|
|
"step": 12570
|
|
},
|
|
{
|
|
"epoch": 1.8083117630140926,
|
|
"grad_norm": 0.2463911431032656,
|
|
"learning_rate": 6.827167647761469e-06,
|
|
"loss": 0.408,
|
|
"step": 12575
|
|
},
|
|
{
|
|
"epoch": 1.80903077365545,
|
|
"grad_norm": 0.2647094856586929,
|
|
"learning_rate": 6.820028127091398e-06,
|
|
"loss": 0.4177,
|
|
"step": 12580
|
|
},
|
|
{
|
|
"epoch": 1.8097497842968076,
|
|
"grad_norm": 0.2552792364570339,
|
|
"learning_rate": 6.812890409236197e-06,
|
|
"loss": 0.4222,
|
|
"step": 12585
|
|
},
|
|
{
|
|
"epoch": 1.810468794938165,
|
|
"grad_norm": 0.2654823728887575,
|
|
"learning_rate": 6.805754498242429e-06,
|
|
"loss": 0.4217,
|
|
"step": 12590
|
|
},
|
|
{
|
|
"epoch": 1.8111878055795225,
|
|
"grad_norm": 0.24467511945177642,
|
|
"learning_rate": 6.798620398155642e-06,
|
|
"loss": 0.4107,
|
|
"step": 12595
|
|
},
|
|
{
|
|
"epoch": 1.8119068162208802,
|
|
"grad_norm": 0.24596016975854293,
|
|
"learning_rate": 6.791488113020359e-06,
|
|
"loss": 0.407,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 1.8126258268622375,
|
|
"grad_norm": 0.2638435716871941,
|
|
"learning_rate": 6.784357646880069e-06,
|
|
"loss": 0.4177,
|
|
"step": 12605
|
|
},
|
|
{
|
|
"epoch": 1.8133448375035952,
|
|
"grad_norm": 0.2527538982365996,
|
|
"learning_rate": 6.777229003777237e-06,
|
|
"loss": 0.4088,
|
|
"step": 12610
|
|
},
|
|
{
|
|
"epoch": 1.8140638481449525,
|
|
"grad_norm": 0.25342996535741125,
|
|
"learning_rate": 6.770102187753287e-06,
|
|
"loss": 0.4328,
|
|
"step": 12615
|
|
},
|
|
{
|
|
"epoch": 1.8147828587863102,
|
|
"grad_norm": 0.25121476066061776,
|
|
"learning_rate": 6.762977202848606e-06,
|
|
"loss": 0.3992,
|
|
"step": 12620
|
|
},
|
|
{
|
|
"epoch": 1.8155018694276674,
|
|
"grad_norm": 0.25413098892527763,
|
|
"learning_rate": 6.755854053102554e-06,
|
|
"loss": 0.4026,
|
|
"step": 12625
|
|
},
|
|
{
|
|
"epoch": 1.8162208800690252,
|
|
"grad_norm": 0.2554934683514058,
|
|
"learning_rate": 6.748732742553441e-06,
|
|
"loss": 0.4162,
|
|
"step": 12630
|
|
},
|
|
{
|
|
"epoch": 1.8169398907103824,
|
|
"grad_norm": 0.26198073463994964,
|
|
"learning_rate": 6.741613275238535e-06,
|
|
"loss": 0.4146,
|
|
"step": 12635
|
|
},
|
|
{
|
|
"epoch": 1.8176589013517401,
|
|
"grad_norm": 0.2725434061704402,
|
|
"learning_rate": 6.734495655194063e-06,
|
|
"loss": 0.4285,
|
|
"step": 12640
|
|
},
|
|
{
|
|
"epoch": 1.8183779119930974,
|
|
"grad_norm": 0.26161207219094385,
|
|
"learning_rate": 6.727379886455201e-06,
|
|
"loss": 0.4179,
|
|
"step": 12645
|
|
},
|
|
{
|
|
"epoch": 1.819096922634455,
|
|
"grad_norm": 0.25113077663083744,
|
|
"learning_rate": 6.720265973056077e-06,
|
|
"loss": 0.4136,
|
|
"step": 12650
|
|
},
|
|
{
|
|
"epoch": 1.8198159332758124,
|
|
"grad_norm": 0.25588530370887597,
|
|
"learning_rate": 6.713153919029769e-06,
|
|
"loss": 0.4226,
|
|
"step": 12655
|
|
},
|
|
{
|
|
"epoch": 1.82053494391717,
|
|
"grad_norm": 0.2524262779441796,
|
|
"learning_rate": 6.7060437284083004e-06,
|
|
"loss": 0.4025,
|
|
"step": 12660
|
|
},
|
|
{
|
|
"epoch": 1.8212539545585273,
|
|
"grad_norm": 0.25099598105760423,
|
|
"learning_rate": 6.698935405222628e-06,
|
|
"loss": 0.4086,
|
|
"step": 12665
|
|
},
|
|
{
|
|
"epoch": 1.821972965199885,
|
|
"grad_norm": 0.25154432322773645,
|
|
"learning_rate": 6.691828953502673e-06,
|
|
"loss": 0.4042,
|
|
"step": 12670
|
|
},
|
|
{
|
|
"epoch": 1.8226919758412423,
|
|
"grad_norm": 0.26731049015276837,
|
|
"learning_rate": 6.684724377277267e-06,
|
|
"loss": 0.4309,
|
|
"step": 12675
|
|
},
|
|
{
|
|
"epoch": 1.8234109864826,
|
|
"grad_norm": 0.25097642688246163,
|
|
"learning_rate": 6.6776216805742e-06,
|
|
"loss": 0.4071,
|
|
"step": 12680
|
|
},
|
|
{
|
|
"epoch": 1.8241299971239573,
|
|
"grad_norm": 0.2542332473785001,
|
|
"learning_rate": 6.670520867420191e-06,
|
|
"loss": 0.4313,
|
|
"step": 12685
|
|
},
|
|
{
|
|
"epoch": 1.824849007765315,
|
|
"grad_norm": 0.2545845953821712,
|
|
"learning_rate": 6.663421941840889e-06,
|
|
"loss": 0.4106,
|
|
"step": 12690
|
|
},
|
|
{
|
|
"epoch": 1.8255680184066723,
|
|
"grad_norm": 0.27993315805553826,
|
|
"learning_rate": 6.656324907860864e-06,
|
|
"loss": 0.4117,
|
|
"step": 12695
|
|
},
|
|
{
|
|
"epoch": 1.82628702904803,
|
|
"grad_norm": 0.25662187192675506,
|
|
"learning_rate": 6.649229769503632e-06,
|
|
"loss": 0.3998,
|
|
"step": 12700
|
|
},
|
|
{
|
|
"epoch": 1.8270060396893872,
|
|
"grad_norm": 0.26107627516777776,
|
|
"learning_rate": 6.642136530791626e-06,
|
|
"loss": 0.4114,
|
|
"step": 12705
|
|
},
|
|
{
|
|
"epoch": 1.827725050330745,
|
|
"grad_norm": 0.2529386238008444,
|
|
"learning_rate": 6.635045195746192e-06,
|
|
"loss": 0.4183,
|
|
"step": 12710
|
|
},
|
|
{
|
|
"epoch": 1.8284440609721024,
|
|
"grad_norm": 0.2605371133189607,
|
|
"learning_rate": 6.627955768387616e-06,
|
|
"loss": 0.4251,
|
|
"step": 12715
|
|
},
|
|
{
|
|
"epoch": 1.82916307161346,
|
|
"grad_norm": 0.2847902105830355,
|
|
"learning_rate": 6.620868252735084e-06,
|
|
"loss": 0.4048,
|
|
"step": 12720
|
|
},
|
|
{
|
|
"epoch": 1.8298820822548174,
|
|
"grad_norm": 0.25081838850235133,
|
|
"learning_rate": 6.613782652806713e-06,
|
|
"loss": 0.4115,
|
|
"step": 12725
|
|
},
|
|
{
|
|
"epoch": 1.830601092896175,
|
|
"grad_norm": 0.2542820360088698,
|
|
"learning_rate": 6.6066989726195265e-06,
|
|
"loss": 0.3999,
|
|
"step": 12730
|
|
},
|
|
{
|
|
"epoch": 1.8313201035375324,
|
|
"grad_norm": 0.28428395658288935,
|
|
"learning_rate": 6.599617216189456e-06,
|
|
"loss": 0.4176,
|
|
"step": 12735
|
|
},
|
|
{
|
|
"epoch": 1.8320391141788899,
|
|
"grad_norm": 0.2727531478489274,
|
|
"learning_rate": 6.5925373875313524e-06,
|
|
"loss": 0.3978,
|
|
"step": 12740
|
|
},
|
|
{
|
|
"epoch": 1.8327581248202474,
|
|
"grad_norm": 0.2635639047940844,
|
|
"learning_rate": 6.5854594906589655e-06,
|
|
"loss": 0.4236,
|
|
"step": 12745
|
|
},
|
|
{
|
|
"epoch": 1.8334771354616048,
|
|
"grad_norm": 0.26708872883780405,
|
|
"learning_rate": 6.578383529584949e-06,
|
|
"loss": 0.4161,
|
|
"step": 12750
|
|
},
|
|
{
|
|
"epoch": 1.8341961461029623,
|
|
"grad_norm": 0.26688572699946095,
|
|
"learning_rate": 6.571309508320873e-06,
|
|
"loss": 0.4233,
|
|
"step": 12755
|
|
},
|
|
{
|
|
"epoch": 1.8349151567443198,
|
|
"grad_norm": 0.26384569268835023,
|
|
"learning_rate": 6.564237430877192e-06,
|
|
"loss": 0.4087,
|
|
"step": 12760
|
|
},
|
|
{
|
|
"epoch": 1.8356341673856773,
|
|
"grad_norm": 0.27821558281067704,
|
|
"learning_rate": 6.557167301263258e-06,
|
|
"loss": 0.4052,
|
|
"step": 12765
|
|
},
|
|
{
|
|
"epoch": 1.8363531780270348,
|
|
"grad_norm": 0.259505941454427,
|
|
"learning_rate": 6.550099123487336e-06,
|
|
"loss": 0.4102,
|
|
"step": 12770
|
|
},
|
|
{
|
|
"epoch": 1.8370721886683923,
|
|
"grad_norm": 0.25155415594346603,
|
|
"learning_rate": 6.543032901556569e-06,
|
|
"loss": 0.4187,
|
|
"step": 12775
|
|
},
|
|
{
|
|
"epoch": 1.8377911993097498,
|
|
"grad_norm": 0.2634295028970137,
|
|
"learning_rate": 6.5359686394769905e-06,
|
|
"loss": 0.4074,
|
|
"step": 12780
|
|
},
|
|
{
|
|
"epoch": 1.8385102099511073,
|
|
"grad_norm": 0.25269820552671673,
|
|
"learning_rate": 6.528906341253536e-06,
|
|
"loss": 0.4201,
|
|
"step": 12785
|
|
},
|
|
{
|
|
"epoch": 1.8392292205924647,
|
|
"grad_norm": 0.2520139078540608,
|
|
"learning_rate": 6.521846010890014e-06,
|
|
"loss": 0.4208,
|
|
"step": 12790
|
|
},
|
|
{
|
|
"epoch": 1.8399482312338222,
|
|
"grad_norm": 0.25448265431325007,
|
|
"learning_rate": 6.514787652389125e-06,
|
|
"loss": 0.4127,
|
|
"step": 12795
|
|
},
|
|
{
|
|
"epoch": 1.8406672418751797,
|
|
"grad_norm": 0.25435034988332555,
|
|
"learning_rate": 6.507731269752448e-06,
|
|
"loss": 0.433,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 1.8413862525165372,
|
|
"grad_norm": 0.25312210692886666,
|
|
"learning_rate": 6.500676866980449e-06,
|
|
"loss": 0.4073,
|
|
"step": 12805
|
|
},
|
|
{
|
|
"epoch": 1.8421052631578947,
|
|
"grad_norm": 0.25850193244283504,
|
|
"learning_rate": 6.4936244480724575e-06,
|
|
"loss": 0.3943,
|
|
"step": 12810
|
|
},
|
|
{
|
|
"epoch": 1.8428242737992522,
|
|
"grad_norm": 0.2654030793300329,
|
|
"learning_rate": 6.486574017026694e-06,
|
|
"loss": 0.4157,
|
|
"step": 12815
|
|
},
|
|
{
|
|
"epoch": 1.8435432844406097,
|
|
"grad_norm": 0.2584387715247146,
|
|
"learning_rate": 6.4795255778402375e-06,
|
|
"loss": 0.4032,
|
|
"step": 12820
|
|
},
|
|
{
|
|
"epoch": 1.8442622950819674,
|
|
"grad_norm": 0.2654331773866595,
|
|
"learning_rate": 6.472479134509052e-06,
|
|
"loss": 0.4061,
|
|
"step": 12825
|
|
},
|
|
{
|
|
"epoch": 1.8449813057233246,
|
|
"grad_norm": 0.25224831243599477,
|
|
"learning_rate": 6.465434691027963e-06,
|
|
"loss": 0.4144,
|
|
"step": 12830
|
|
},
|
|
{
|
|
"epoch": 1.8457003163646823,
|
|
"grad_norm": 0.24964713143355033,
|
|
"learning_rate": 6.458392251390654e-06,
|
|
"loss": 0.4234,
|
|
"step": 12835
|
|
},
|
|
{
|
|
"epoch": 1.8464193270060396,
|
|
"grad_norm": 0.2729379241895573,
|
|
"learning_rate": 6.45135181958969e-06,
|
|
"loss": 0.435,
|
|
"step": 12840
|
|
},
|
|
{
|
|
"epoch": 1.8471383376473973,
|
|
"grad_norm": 0.26302708975701156,
|
|
"learning_rate": 6.4443133996164844e-06,
|
|
"loss": 0.4125,
|
|
"step": 12845
|
|
},
|
|
{
|
|
"epoch": 1.8478573482887546,
|
|
"grad_norm": 0.2555072174920062,
|
|
"learning_rate": 6.437276995461311e-06,
|
|
"loss": 0.4058,
|
|
"step": 12850
|
|
},
|
|
{
|
|
"epoch": 1.8485763589301123,
|
|
"grad_norm": 0.25696777543860977,
|
|
"learning_rate": 6.430242611113312e-06,
|
|
"loss": 0.4202,
|
|
"step": 12855
|
|
},
|
|
{
|
|
"epoch": 1.8492953695714696,
|
|
"grad_norm": 0.2774263836915849,
|
|
"learning_rate": 6.423210250560471e-06,
|
|
"loss": 0.414,
|
|
"step": 12860
|
|
},
|
|
{
|
|
"epoch": 1.8500143802128273,
|
|
"grad_norm": 0.25292831806005195,
|
|
"learning_rate": 6.4161799177896265e-06,
|
|
"loss": 0.4246,
|
|
"step": 12865
|
|
},
|
|
{
|
|
"epoch": 1.8507333908541845,
|
|
"grad_norm": 0.2523045428415083,
|
|
"learning_rate": 6.409151616786475e-06,
|
|
"loss": 0.4077,
|
|
"step": 12870
|
|
},
|
|
{
|
|
"epoch": 1.8514524014955422,
|
|
"grad_norm": 0.260183429075673,
|
|
"learning_rate": 6.402125351535557e-06,
|
|
"loss": 0.4137,
|
|
"step": 12875
|
|
},
|
|
{
|
|
"epoch": 1.8521714121368995,
|
|
"grad_norm": 0.25793382343739896,
|
|
"learning_rate": 6.395101126020256e-06,
|
|
"loss": 0.4201,
|
|
"step": 12880
|
|
},
|
|
{
|
|
"epoch": 1.8528904227782572,
|
|
"grad_norm": 0.2534260150257859,
|
|
"learning_rate": 6.388078944222804e-06,
|
|
"loss": 0.4015,
|
|
"step": 12885
|
|
},
|
|
{
|
|
"epoch": 1.8536094334196145,
|
|
"grad_norm": 0.24802590654137802,
|
|
"learning_rate": 6.38105881012427e-06,
|
|
"loss": 0.4148,
|
|
"step": 12890
|
|
},
|
|
{
|
|
"epoch": 1.8543284440609722,
|
|
"grad_norm": 0.25789258495597844,
|
|
"learning_rate": 6.374040727704562e-06,
|
|
"loss": 0.4012,
|
|
"step": 12895
|
|
},
|
|
{
|
|
"epoch": 1.8550474547023295,
|
|
"grad_norm": 0.26345993239303045,
|
|
"learning_rate": 6.367024700942435e-06,
|
|
"loss": 0.4096,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 1.8557664653436872,
|
|
"grad_norm": 0.2668160591818745,
|
|
"learning_rate": 6.360010733815465e-06,
|
|
"loss": 0.4047,
|
|
"step": 12905
|
|
},
|
|
{
|
|
"epoch": 1.8564854759850444,
|
|
"grad_norm": 0.2566605802151127,
|
|
"learning_rate": 6.352998830300061e-06,
|
|
"loss": 0.4265,
|
|
"step": 12910
|
|
},
|
|
{
|
|
"epoch": 1.8572044866264021,
|
|
"grad_norm": 0.2759657295529972,
|
|
"learning_rate": 6.345988994371477e-06,
|
|
"loss": 0.4189,
|
|
"step": 12915
|
|
},
|
|
{
|
|
"epoch": 1.8579234972677594,
|
|
"grad_norm": 0.2552560819729121,
|
|
"learning_rate": 6.3389812300037774e-06,
|
|
"loss": 0.4065,
|
|
"step": 12920
|
|
},
|
|
{
|
|
"epoch": 1.8586425079091171,
|
|
"grad_norm": 0.24802835502600706,
|
|
"learning_rate": 6.33197554116986e-06,
|
|
"loss": 0.4004,
|
|
"step": 12925
|
|
},
|
|
{
|
|
"epoch": 1.8593615185504746,
|
|
"grad_norm": 0.3281056503270654,
|
|
"learning_rate": 6.324971931841453e-06,
|
|
"loss": 0.4188,
|
|
"step": 12930
|
|
},
|
|
{
|
|
"epoch": 1.860080529191832,
|
|
"grad_norm": 0.25313620800312736,
|
|
"learning_rate": 6.317970405989086e-06,
|
|
"loss": 0.4176,
|
|
"step": 12935
|
|
},
|
|
{
|
|
"epoch": 1.8607995398331896,
|
|
"grad_norm": 0.25855560585825904,
|
|
"learning_rate": 6.310970967582131e-06,
|
|
"loss": 0.4116,
|
|
"step": 12940
|
|
},
|
|
{
|
|
"epoch": 1.861518550474547,
|
|
"grad_norm": 0.26802055162752714,
|
|
"learning_rate": 6.303973620588757e-06,
|
|
"loss": 0.4169,
|
|
"step": 12945
|
|
},
|
|
{
|
|
"epoch": 1.8622375611159045,
|
|
"grad_norm": 0.2578356282861538,
|
|
"learning_rate": 6.296978368975958e-06,
|
|
"loss": 0.4217,
|
|
"step": 12950
|
|
},
|
|
{
|
|
"epoch": 1.862956571757262,
|
|
"grad_norm": 0.26944809309820655,
|
|
"learning_rate": 6.289985216709542e-06,
|
|
"loss": 0.4283,
|
|
"step": 12955
|
|
},
|
|
{
|
|
"epoch": 1.8636755823986195,
|
|
"grad_norm": 0.25758698256009627,
|
|
"learning_rate": 6.282994167754117e-06,
|
|
"loss": 0.4156,
|
|
"step": 12960
|
|
},
|
|
{
|
|
"epoch": 1.864394593039977,
|
|
"grad_norm": 0.2481740645545019,
|
|
"learning_rate": 6.276005226073103e-06,
|
|
"loss": 0.412,
|
|
"step": 12965
|
|
},
|
|
{
|
|
"epoch": 1.8651136036813345,
|
|
"grad_norm": 0.258105239791881,
|
|
"learning_rate": 6.26901839562873e-06,
|
|
"loss": 0.3994,
|
|
"step": 12970
|
|
},
|
|
{
|
|
"epoch": 1.865832614322692,
|
|
"grad_norm": 0.2614869674290543,
|
|
"learning_rate": 6.262033680382027e-06,
|
|
"loss": 0.4363,
|
|
"step": 12975
|
|
},
|
|
{
|
|
"epoch": 1.8665516249640495,
|
|
"grad_norm": 0.24276092784851724,
|
|
"learning_rate": 6.255051084292821e-06,
|
|
"loss": 0.4002,
|
|
"step": 12980
|
|
},
|
|
{
|
|
"epoch": 1.867270635605407,
|
|
"grad_norm": 0.25774303470890625,
|
|
"learning_rate": 6.2480706113197445e-06,
|
|
"loss": 0.4014,
|
|
"step": 12985
|
|
},
|
|
{
|
|
"epoch": 1.8679896462467644,
|
|
"grad_norm": 0.26867975206439915,
|
|
"learning_rate": 6.241092265420219e-06,
|
|
"loss": 0.409,
|
|
"step": 12990
|
|
},
|
|
{
|
|
"epoch": 1.868708656888122,
|
|
"grad_norm": 0.25712796179182074,
|
|
"learning_rate": 6.2341160505504636e-06,
|
|
"loss": 0.422,
|
|
"step": 12995
|
|
},
|
|
{
|
|
"epoch": 1.8694276675294794,
|
|
"grad_norm": 0.26283971628716823,
|
|
"learning_rate": 6.227141970665496e-06,
|
|
"loss": 0.4163,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 1.870146678170837,
|
|
"grad_norm": 0.26169927239092655,
|
|
"learning_rate": 6.220170029719111e-06,
|
|
"loss": 0.4106,
|
|
"step": 13005
|
|
},
|
|
{
|
|
"epoch": 1.8708656888121944,
|
|
"grad_norm": 0.24783098567903028,
|
|
"learning_rate": 6.213200231663894e-06,
|
|
"loss": 0.4216,
|
|
"step": 13010
|
|
},
|
|
{
|
|
"epoch": 1.8715846994535519,
|
|
"grad_norm": 0.2640476765001785,
|
|
"learning_rate": 6.206232580451225e-06,
|
|
"loss": 0.4137,
|
|
"step": 13015
|
|
},
|
|
{
|
|
"epoch": 1.8723037100949094,
|
|
"grad_norm": 0.24416363628731877,
|
|
"learning_rate": 6.199267080031257e-06,
|
|
"loss": 0.3997,
|
|
"step": 13020
|
|
},
|
|
{
|
|
"epoch": 1.8730227207362669,
|
|
"grad_norm": 0.25814773161479015,
|
|
"learning_rate": 6.192303734352925e-06,
|
|
"loss": 0.4153,
|
|
"step": 13025
|
|
},
|
|
{
|
|
"epoch": 1.8737417313776243,
|
|
"grad_norm": 0.2569713415930947,
|
|
"learning_rate": 6.185342547363947e-06,
|
|
"loss": 0.412,
|
|
"step": 13030
|
|
},
|
|
{
|
|
"epoch": 1.8744607420189818,
|
|
"grad_norm": 0.2516973791241243,
|
|
"learning_rate": 6.178383523010813e-06,
|
|
"loss": 0.4111,
|
|
"step": 13035
|
|
},
|
|
{
|
|
"epoch": 1.8751797526603395,
|
|
"grad_norm": 0.26717730576239873,
|
|
"learning_rate": 6.171426665238787e-06,
|
|
"loss": 0.4258,
|
|
"step": 13040
|
|
},
|
|
{
|
|
"epoch": 1.8758987633016968,
|
|
"grad_norm": 0.2525000043033776,
|
|
"learning_rate": 6.164471977991908e-06,
|
|
"loss": 0.4084,
|
|
"step": 13045
|
|
},
|
|
{
|
|
"epoch": 1.8766177739430545,
|
|
"grad_norm": 0.2568542615635378,
|
|
"learning_rate": 6.15751946521298e-06,
|
|
"loss": 0.4228,
|
|
"step": 13050
|
|
},
|
|
{
|
|
"epoch": 1.8773367845844118,
|
|
"grad_norm": 0.25254186094659625,
|
|
"learning_rate": 6.150569130843582e-06,
|
|
"loss": 0.411,
|
|
"step": 13055
|
|
},
|
|
{
|
|
"epoch": 1.8780557952257695,
|
|
"grad_norm": 0.26287766874238017,
|
|
"learning_rate": 6.143620978824048e-06,
|
|
"loss": 0.4057,
|
|
"step": 13060
|
|
},
|
|
{
|
|
"epoch": 1.8787748058671268,
|
|
"grad_norm": 0.2691898986952207,
|
|
"learning_rate": 6.1366750130934785e-06,
|
|
"loss": 0.4093,
|
|
"step": 13065
|
|
},
|
|
{
|
|
"epoch": 1.8794938165084845,
|
|
"grad_norm": 0.24589000962412044,
|
|
"learning_rate": 6.129731237589738e-06,
|
|
"loss": 0.3976,
|
|
"step": 13070
|
|
},
|
|
{
|
|
"epoch": 1.8802128271498417,
|
|
"grad_norm": 0.24933464122253188,
|
|
"learning_rate": 6.1227896562494485e-06,
|
|
"loss": 0.3975,
|
|
"step": 13075
|
|
},
|
|
{
|
|
"epoch": 1.8809318377911994,
|
|
"grad_norm": 0.2551105905003046,
|
|
"learning_rate": 6.11585027300798e-06,
|
|
"loss": 0.4197,
|
|
"step": 13080
|
|
},
|
|
{
|
|
"epoch": 1.8816508484325567,
|
|
"grad_norm": 0.2548425666015531,
|
|
"learning_rate": 6.10891309179947e-06,
|
|
"loss": 0.4134,
|
|
"step": 13085
|
|
},
|
|
{
|
|
"epoch": 1.8823698590739144,
|
|
"grad_norm": 0.2519283793583434,
|
|
"learning_rate": 6.1019781165567946e-06,
|
|
"loss": 0.4058,
|
|
"step": 13090
|
|
},
|
|
{
|
|
"epoch": 1.8830888697152717,
|
|
"grad_norm": 0.258597723985932,
|
|
"learning_rate": 6.095045351211586e-06,
|
|
"loss": 0.4083,
|
|
"step": 13095
|
|
},
|
|
{
|
|
"epoch": 1.8838078803566294,
|
|
"grad_norm": 0.2571978705208484,
|
|
"learning_rate": 6.088114799694229e-06,
|
|
"loss": 0.4177,
|
|
"step": 13100
|
|
},
|
|
{
|
|
"epoch": 1.8845268909979866,
|
|
"grad_norm": 0.25178796849206486,
|
|
"learning_rate": 6.081186465933839e-06,
|
|
"loss": 0.4056,
|
|
"step": 13105
|
|
},
|
|
{
|
|
"epoch": 1.8852459016393444,
|
|
"grad_norm": 0.2474711452655329,
|
|
"learning_rate": 6.074260353858283e-06,
|
|
"loss": 0.4215,
|
|
"step": 13110
|
|
},
|
|
{
|
|
"epoch": 1.8859649122807016,
|
|
"grad_norm": 0.2570837114079029,
|
|
"learning_rate": 6.067336467394169e-06,
|
|
"loss": 0.395,
|
|
"step": 13115
|
|
},
|
|
{
|
|
"epoch": 1.8866839229220593,
|
|
"grad_norm": 0.26398409651585353,
|
|
"learning_rate": 6.060414810466844e-06,
|
|
"loss": 0.4118,
|
|
"step": 13120
|
|
},
|
|
{
|
|
"epoch": 1.8874029335634166,
|
|
"grad_norm": 0.24110724622419008,
|
|
"learning_rate": 6.053495387000382e-06,
|
|
"loss": 0.3981,
|
|
"step": 13125
|
|
},
|
|
{
|
|
"epoch": 1.8881219442047743,
|
|
"grad_norm": 0.25341520879124657,
|
|
"learning_rate": 6.0465782009176056e-06,
|
|
"loss": 0.4209,
|
|
"step": 13130
|
|
},
|
|
{
|
|
"epoch": 1.8888409548461316,
|
|
"grad_norm": 0.25898627368433486,
|
|
"learning_rate": 6.039663256140055e-06,
|
|
"loss": 0.4053,
|
|
"step": 13135
|
|
},
|
|
{
|
|
"epoch": 1.8895599654874893,
|
|
"grad_norm": 0.26413833142848836,
|
|
"learning_rate": 6.032750556588004e-06,
|
|
"loss": 0.4044,
|
|
"step": 13140
|
|
},
|
|
{
|
|
"epoch": 1.8902789761288465,
|
|
"grad_norm": 0.26163899057288426,
|
|
"learning_rate": 6.0258401061804625e-06,
|
|
"loss": 0.4061,
|
|
"step": 13145
|
|
},
|
|
{
|
|
"epoch": 1.8909979867702043,
|
|
"grad_norm": 0.2626566632795502,
|
|
"learning_rate": 6.01893190883515e-06,
|
|
"loss": 0.4197,
|
|
"step": 13150
|
|
},
|
|
{
|
|
"epoch": 1.8917169974115617,
|
|
"grad_norm": 0.24307479703105075,
|
|
"learning_rate": 6.012025968468525e-06,
|
|
"loss": 0.4182,
|
|
"step": 13155
|
|
},
|
|
{
|
|
"epoch": 1.8924360080529192,
|
|
"grad_norm": 0.2566826095216113,
|
|
"learning_rate": 6.005122288995748e-06,
|
|
"loss": 0.4163,
|
|
"step": 13160
|
|
},
|
|
{
|
|
"epoch": 1.8931550186942767,
|
|
"grad_norm": 0.2560152561936091,
|
|
"learning_rate": 5.998220874330714e-06,
|
|
"loss": 0.4284,
|
|
"step": 13165
|
|
},
|
|
{
|
|
"epoch": 1.8938740293356342,
|
|
"grad_norm": 0.26048476462091,
|
|
"learning_rate": 5.991321728386028e-06,
|
|
"loss": 0.4049,
|
|
"step": 13170
|
|
},
|
|
{
|
|
"epoch": 1.8945930399769917,
|
|
"grad_norm": 0.2585226989827463,
|
|
"learning_rate": 5.984424855073007e-06,
|
|
"loss": 0.431,
|
|
"step": 13175
|
|
},
|
|
{
|
|
"epoch": 1.8953120506183492,
|
|
"grad_norm": 0.26135587087437545,
|
|
"learning_rate": 5.977530258301678e-06,
|
|
"loss": 0.4132,
|
|
"step": 13180
|
|
},
|
|
{
|
|
"epoch": 1.8960310612597067,
|
|
"grad_norm": 0.2523667592703136,
|
|
"learning_rate": 5.970637941980786e-06,
|
|
"loss": 0.3932,
|
|
"step": 13185
|
|
},
|
|
{
|
|
"epoch": 1.8967500719010641,
|
|
"grad_norm": 0.24588638369790913,
|
|
"learning_rate": 5.963747910017774e-06,
|
|
"loss": 0.4186,
|
|
"step": 13190
|
|
},
|
|
{
|
|
"epoch": 1.8974690825424216,
|
|
"grad_norm": 0.2537110075846357,
|
|
"learning_rate": 5.956860166318792e-06,
|
|
"loss": 0.4132,
|
|
"step": 13195
|
|
},
|
|
{
|
|
"epoch": 1.8981880931837791,
|
|
"grad_norm": 0.24279395102296025,
|
|
"learning_rate": 5.949974714788702e-06,
|
|
"loss": 0.4037,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 1.8989071038251366,
|
|
"grad_norm": 0.2658080544254367,
|
|
"learning_rate": 5.943091559331054e-06,
|
|
"loss": 0.3998,
|
|
"step": 13205
|
|
},
|
|
{
|
|
"epoch": 1.899626114466494,
|
|
"grad_norm": 0.2710711751352035,
|
|
"learning_rate": 5.936210703848095e-06,
|
|
"loss": 0.4138,
|
|
"step": 13210
|
|
},
|
|
{
|
|
"epoch": 1.9003451251078516,
|
|
"grad_norm": 0.2500168805986903,
|
|
"learning_rate": 5.929332152240782e-06,
|
|
"loss": 0.4035,
|
|
"step": 13215
|
|
},
|
|
{
|
|
"epoch": 1.901064135749209,
|
|
"grad_norm": 0.2586364723380644,
|
|
"learning_rate": 5.922455908408757e-06,
|
|
"loss": 0.4062,
|
|
"step": 13220
|
|
},
|
|
{
|
|
"epoch": 1.9017831463905666,
|
|
"grad_norm": 0.2602658295039629,
|
|
"learning_rate": 5.915581976250351e-06,
|
|
"loss": 0.4154,
|
|
"step": 13225
|
|
},
|
|
{
|
|
"epoch": 1.902502157031924,
|
|
"grad_norm": 0.2646875529076759,
|
|
"learning_rate": 5.908710359662595e-06,
|
|
"loss": 0.4235,
|
|
"step": 13230
|
|
},
|
|
{
|
|
"epoch": 1.9032211676732815,
|
|
"grad_norm": 0.25362401233014675,
|
|
"learning_rate": 5.901841062541192e-06,
|
|
"loss": 0.4195,
|
|
"step": 13235
|
|
},
|
|
{
|
|
"epoch": 1.903940178314639,
|
|
"grad_norm": 0.2522932738809794,
|
|
"learning_rate": 5.894974088780543e-06,
|
|
"loss": 0.4002,
|
|
"step": 13240
|
|
},
|
|
{
|
|
"epoch": 1.9046591889559965,
|
|
"grad_norm": 0.2641327392718366,
|
|
"learning_rate": 5.888109442273729e-06,
|
|
"loss": 0.4084,
|
|
"step": 13245
|
|
},
|
|
{
|
|
"epoch": 1.905378199597354,
|
|
"grad_norm": 0.24629504499371987,
|
|
"learning_rate": 5.881247126912506e-06,
|
|
"loss": 0.4099,
|
|
"step": 13250
|
|
},
|
|
{
|
|
"epoch": 1.9060972102387115,
|
|
"grad_norm": 0.2542730372298661,
|
|
"learning_rate": 5.874387146587311e-06,
|
|
"loss": 0.4094,
|
|
"step": 13255
|
|
},
|
|
{
|
|
"epoch": 1.906816220880069,
|
|
"grad_norm": 0.2550950761308887,
|
|
"learning_rate": 5.867529505187264e-06,
|
|
"loss": 0.4031,
|
|
"step": 13260
|
|
},
|
|
{
|
|
"epoch": 1.9075352315214267,
|
|
"grad_norm": 0.2625217431254056,
|
|
"learning_rate": 5.860674206600145e-06,
|
|
"loss": 0.4129,
|
|
"step": 13265
|
|
},
|
|
{
|
|
"epoch": 1.908254242162784,
|
|
"grad_norm": 0.2557313481219586,
|
|
"learning_rate": 5.853821254712426e-06,
|
|
"loss": 0.3976,
|
|
"step": 13270
|
|
},
|
|
{
|
|
"epoch": 1.9089732528041417,
|
|
"grad_norm": 0.25342454097146244,
|
|
"learning_rate": 5.8469706534092315e-06,
|
|
"loss": 0.3964,
|
|
"step": 13275
|
|
},
|
|
{
|
|
"epoch": 1.909692263445499,
|
|
"grad_norm": 0.2478590136487242,
|
|
"learning_rate": 5.840122406574352e-06,
|
|
"loss": 0.402,
|
|
"step": 13280
|
|
},
|
|
{
|
|
"epoch": 1.9104112740868566,
|
|
"grad_norm": 0.25092233391931035,
|
|
"learning_rate": 5.833276518090261e-06,
|
|
"loss": 0.413,
|
|
"step": 13285
|
|
},
|
|
{
|
|
"epoch": 1.911130284728214,
|
|
"grad_norm": 0.25427748407834705,
|
|
"learning_rate": 5.826432991838077e-06,
|
|
"loss": 0.4184,
|
|
"step": 13290
|
|
},
|
|
{
|
|
"epoch": 1.9118492953695716,
|
|
"grad_norm": 0.26016680851467244,
|
|
"learning_rate": 5.819591831697584e-06,
|
|
"loss": 0.4262,
|
|
"step": 13295
|
|
},
|
|
{
|
|
"epoch": 1.9125683060109289,
|
|
"grad_norm": 0.2753524866938533,
|
|
"learning_rate": 5.81275304154723e-06,
|
|
"loss": 0.421,
|
|
"step": 13300
|
|
},
|
|
{
|
|
"epoch": 1.9132873166522866,
|
|
"grad_norm": 0.2614118633604326,
|
|
"learning_rate": 5.805916625264121e-06,
|
|
"loss": 0.4089,
|
|
"step": 13305
|
|
},
|
|
{
|
|
"epoch": 1.9140063272936438,
|
|
"grad_norm": 0.2551883246804781,
|
|
"learning_rate": 5.799082586724003e-06,
|
|
"loss": 0.4057,
|
|
"step": 13310
|
|
},
|
|
{
|
|
"epoch": 1.9147253379350015,
|
|
"grad_norm": 0.3150339976324254,
|
|
"learning_rate": 5.792250929801292e-06,
|
|
"loss": 0.4191,
|
|
"step": 13315
|
|
},
|
|
{
|
|
"epoch": 1.9154443485763588,
|
|
"grad_norm": 0.2567802822715765,
|
|
"learning_rate": 5.785421658369041e-06,
|
|
"loss": 0.4276,
|
|
"step": 13320
|
|
},
|
|
{
|
|
"epoch": 1.9161633592177165,
|
|
"grad_norm": 0.25215346030617053,
|
|
"learning_rate": 5.7785947762989515e-06,
|
|
"loss": 0.4148,
|
|
"step": 13325
|
|
},
|
|
{
|
|
"epoch": 1.9168823698590738,
|
|
"grad_norm": 0.26247158778480834,
|
|
"learning_rate": 5.771770287461381e-06,
|
|
"loss": 0.4112,
|
|
"step": 13330
|
|
},
|
|
{
|
|
"epoch": 1.9176013805004315,
|
|
"grad_norm": 0.2593393629670231,
|
|
"learning_rate": 5.7649481957253195e-06,
|
|
"loss": 0.4107,
|
|
"step": 13335
|
|
},
|
|
{
|
|
"epoch": 1.9183203911417888,
|
|
"grad_norm": 0.26044429642699995,
|
|
"learning_rate": 5.758128504958396e-06,
|
|
"loss": 0.417,
|
|
"step": 13340
|
|
},
|
|
{
|
|
"epoch": 1.9190394017831465,
|
|
"grad_norm": 0.26474646616105685,
|
|
"learning_rate": 5.751311219026887e-06,
|
|
"loss": 0.419,
|
|
"step": 13345
|
|
},
|
|
{
|
|
"epoch": 1.9197584124245037,
|
|
"grad_norm": 0.2557340922976566,
|
|
"learning_rate": 5.744496341795709e-06,
|
|
"loss": 0.4199,
|
|
"step": 13350
|
|
},
|
|
{
|
|
"epoch": 1.9204774230658614,
|
|
"grad_norm": 0.25120018293705426,
|
|
"learning_rate": 5.737683877128396e-06,
|
|
"loss": 0.4138,
|
|
"step": 13355
|
|
},
|
|
{
|
|
"epoch": 1.9211964337072187,
|
|
"grad_norm": 0.25802447772756554,
|
|
"learning_rate": 5.730873828887133e-06,
|
|
"loss": 0.4358,
|
|
"step": 13360
|
|
},
|
|
{
|
|
"epoch": 1.9219154443485764,
|
|
"grad_norm": 0.268303239943271,
|
|
"learning_rate": 5.724066200932724e-06,
|
|
"loss": 0.391,
|
|
"step": 13365
|
|
},
|
|
{
|
|
"epoch": 1.922634454989934,
|
|
"grad_norm": 0.266224674709648,
|
|
"learning_rate": 5.717260997124597e-06,
|
|
"loss": 0.4182,
|
|
"step": 13370
|
|
},
|
|
{
|
|
"epoch": 1.9233534656312914,
|
|
"grad_norm": 0.25210315862281496,
|
|
"learning_rate": 5.710458221320823e-06,
|
|
"loss": 0.4069,
|
|
"step": 13375
|
|
},
|
|
{
|
|
"epoch": 1.9240724762726489,
|
|
"grad_norm": 0.258824638291291,
|
|
"learning_rate": 5.703657877378074e-06,
|
|
"loss": 0.4149,
|
|
"step": 13380
|
|
},
|
|
{
|
|
"epoch": 1.9247914869140064,
|
|
"grad_norm": 0.2581183392991827,
|
|
"learning_rate": 5.696859969151664e-06,
|
|
"loss": 0.3981,
|
|
"step": 13385
|
|
},
|
|
{
|
|
"epoch": 1.9255104975553639,
|
|
"grad_norm": 0.24773348494876463,
|
|
"learning_rate": 5.6900645004955155e-06,
|
|
"loss": 0.4234,
|
|
"step": 13390
|
|
},
|
|
{
|
|
"epoch": 1.9262295081967213,
|
|
"grad_norm": 0.24356325001336865,
|
|
"learning_rate": 5.683271475262165e-06,
|
|
"loss": 0.4102,
|
|
"step": 13395
|
|
},
|
|
{
|
|
"epoch": 1.9269485188380788,
|
|
"grad_norm": 0.25903420847328895,
|
|
"learning_rate": 5.676480897302767e-06,
|
|
"loss": 0.4045,
|
|
"step": 13400
|
|
},
|
|
{
|
|
"epoch": 1.9276675294794363,
|
|
"grad_norm": 0.2598026496554222,
|
|
"learning_rate": 5.669692770467101e-06,
|
|
"loss": 0.4305,
|
|
"step": 13405
|
|
},
|
|
{
|
|
"epoch": 1.9283865401207938,
|
|
"grad_norm": 0.25495982306280357,
|
|
"learning_rate": 5.6629070986035336e-06,
|
|
"loss": 0.4123,
|
|
"step": 13410
|
|
},
|
|
{
|
|
"epoch": 1.9291055507621513,
|
|
"grad_norm": 0.25261257640552676,
|
|
"learning_rate": 5.6561238855590605e-06,
|
|
"loss": 0.3984,
|
|
"step": 13415
|
|
},
|
|
{
|
|
"epoch": 1.9298245614035088,
|
|
"grad_norm": 0.2549778638324147,
|
|
"learning_rate": 5.649343135179271e-06,
|
|
"loss": 0.4176,
|
|
"step": 13420
|
|
},
|
|
{
|
|
"epoch": 1.9305435720448663,
|
|
"grad_norm": 0.2630585276751543,
|
|
"learning_rate": 5.642564851308356e-06,
|
|
"loss": 0.413,
|
|
"step": 13425
|
|
},
|
|
{
|
|
"epoch": 1.9312625826862237,
|
|
"grad_norm": 0.2611508569793235,
|
|
"learning_rate": 5.635789037789126e-06,
|
|
"loss": 0.4117,
|
|
"step": 13430
|
|
},
|
|
{
|
|
"epoch": 1.9319815933275812,
|
|
"grad_norm": 0.2622970798730447,
|
|
"learning_rate": 5.629015698462969e-06,
|
|
"loss": 0.4215,
|
|
"step": 13435
|
|
},
|
|
{
|
|
"epoch": 1.9327006039689387,
|
|
"grad_norm": 0.2704859040010164,
|
|
"learning_rate": 5.622244837169881e-06,
|
|
"loss": 0.4196,
|
|
"step": 13440
|
|
},
|
|
{
|
|
"epoch": 1.9334196146102962,
|
|
"grad_norm": 0.25127827927080826,
|
|
"learning_rate": 5.615476457748456e-06,
|
|
"loss": 0.4311,
|
|
"step": 13445
|
|
},
|
|
{
|
|
"epoch": 1.9341386252516537,
|
|
"grad_norm": 0.2596218554377162,
|
|
"learning_rate": 5.6087105640358794e-06,
|
|
"loss": 0.412,
|
|
"step": 13450
|
|
},
|
|
{
|
|
"epoch": 1.9348576358930112,
|
|
"grad_norm": 0.26981195401964797,
|
|
"learning_rate": 5.6019471598679176e-06,
|
|
"loss": 0.4086,
|
|
"step": 13455
|
|
},
|
|
{
|
|
"epoch": 1.9355766465343687,
|
|
"grad_norm": 0.2645329644303195,
|
|
"learning_rate": 5.595186249078943e-06,
|
|
"loss": 0.4126,
|
|
"step": 13460
|
|
},
|
|
{
|
|
"epoch": 1.9362956571757262,
|
|
"grad_norm": 0.25139715586511135,
|
|
"learning_rate": 5.588427835501899e-06,
|
|
"loss": 0.4078,
|
|
"step": 13465
|
|
},
|
|
{
|
|
"epoch": 1.9370146678170836,
|
|
"grad_norm": 0.26008467818228065,
|
|
"learning_rate": 5.581671922968316e-06,
|
|
"loss": 0.4313,
|
|
"step": 13470
|
|
},
|
|
{
|
|
"epoch": 1.9377336784584411,
|
|
"grad_norm": 0.2598127792714966,
|
|
"learning_rate": 5.574918515308316e-06,
|
|
"loss": 0.4104,
|
|
"step": 13475
|
|
},
|
|
{
|
|
"epoch": 1.9384526890997988,
|
|
"grad_norm": 0.26726922409397147,
|
|
"learning_rate": 5.568167616350588e-06,
|
|
"loss": 0.4097,
|
|
"step": 13480
|
|
},
|
|
{
|
|
"epoch": 1.939171699741156,
|
|
"grad_norm": 0.2586683894957867,
|
|
"learning_rate": 5.561419229922414e-06,
|
|
"loss": 0.3944,
|
|
"step": 13485
|
|
},
|
|
{
|
|
"epoch": 1.9398907103825138,
|
|
"grad_norm": 0.2545620194841944,
|
|
"learning_rate": 5.554673359849632e-06,
|
|
"loss": 0.4045,
|
|
"step": 13490
|
|
},
|
|
{
|
|
"epoch": 1.940609721023871,
|
|
"grad_norm": 0.255270974055844,
|
|
"learning_rate": 5.5479300099566735e-06,
|
|
"loss": 0.4124,
|
|
"step": 13495
|
|
},
|
|
{
|
|
"epoch": 1.9413287316652288,
|
|
"grad_norm": 0.2824456607568959,
|
|
"learning_rate": 5.541189184066524e-06,
|
|
"loss": 0.4144,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 1.942047742306586,
|
|
"grad_norm": 0.26608110665659906,
|
|
"learning_rate": 5.534450886000754e-06,
|
|
"loss": 0.3896,
|
|
"step": 13505
|
|
},
|
|
{
|
|
"epoch": 1.9427667529479438,
|
|
"grad_norm": 0.2630987060749629,
|
|
"learning_rate": 5.527715119579484e-06,
|
|
"loss": 0.4041,
|
|
"step": 13510
|
|
},
|
|
{
|
|
"epoch": 1.943485763589301,
|
|
"grad_norm": 0.25227145550355407,
|
|
"learning_rate": 5.520981888621419e-06,
|
|
"loss": 0.399,
|
|
"step": 13515
|
|
},
|
|
{
|
|
"epoch": 1.9442047742306587,
|
|
"grad_norm": 0.2568201628350947,
|
|
"learning_rate": 5.514251196943808e-06,
|
|
"loss": 0.4043,
|
|
"step": 13520
|
|
},
|
|
{
|
|
"epoch": 1.944923784872016,
|
|
"grad_norm": 0.24854524537821318,
|
|
"learning_rate": 5.507523048362464e-06,
|
|
"loss": 0.4037,
|
|
"step": 13525
|
|
},
|
|
{
|
|
"epoch": 1.9456427955133737,
|
|
"grad_norm": 0.24892934206827358,
|
|
"learning_rate": 5.5007974466917745e-06,
|
|
"loss": 0.4061,
|
|
"step": 13530
|
|
},
|
|
{
|
|
"epoch": 1.946361806154731,
|
|
"grad_norm": 0.25452226933530475,
|
|
"learning_rate": 5.494074395744663e-06,
|
|
"loss": 0.4195,
|
|
"step": 13535
|
|
},
|
|
{
|
|
"epoch": 1.9470808167960887,
|
|
"grad_norm": 0.2640790007497816,
|
|
"learning_rate": 5.487353899332613e-06,
|
|
"loss": 0.4066,
|
|
"step": 13540
|
|
},
|
|
{
|
|
"epoch": 1.947799827437446,
|
|
"grad_norm": 0.2571805148580205,
|
|
"learning_rate": 5.480635961265663e-06,
|
|
"loss": 0.4171,
|
|
"step": 13545
|
|
},
|
|
{
|
|
"epoch": 1.9485188380788037,
|
|
"grad_norm": 0.2596407932136947,
|
|
"learning_rate": 5.473920585352408e-06,
|
|
"loss": 0.4178,
|
|
"step": 13550
|
|
},
|
|
{
|
|
"epoch": 1.949237848720161,
|
|
"grad_norm": 0.2508756708764493,
|
|
"learning_rate": 5.46720777539997e-06,
|
|
"loss": 0.4195,
|
|
"step": 13555
|
|
},
|
|
{
|
|
"epoch": 1.9499568593615186,
|
|
"grad_norm": 0.25792489723424716,
|
|
"learning_rate": 5.460497535214037e-06,
|
|
"loss": 0.4141,
|
|
"step": 13560
|
|
},
|
|
{
|
|
"epoch": 1.950675870002876,
|
|
"grad_norm": 0.25915427113602196,
|
|
"learning_rate": 5.453789868598831e-06,
|
|
"loss": 0.3975,
|
|
"step": 13565
|
|
},
|
|
{
|
|
"epoch": 1.9513948806442336,
|
|
"grad_norm": 0.2615882332919308,
|
|
"learning_rate": 5.447084779357108e-06,
|
|
"loss": 0.403,
|
|
"step": 13570
|
|
},
|
|
{
|
|
"epoch": 1.9521138912855909,
|
|
"grad_norm": 0.2764317227668451,
|
|
"learning_rate": 5.4403822712901784e-06,
|
|
"loss": 0.4106,
|
|
"step": 13575
|
|
},
|
|
{
|
|
"epoch": 1.9528329019269486,
|
|
"grad_norm": 0.26583620020318643,
|
|
"learning_rate": 5.43368234819788e-06,
|
|
"loss": 0.4073,
|
|
"step": 13580
|
|
},
|
|
{
|
|
"epoch": 1.9535519125683058,
|
|
"grad_norm": 0.2510693373522846,
|
|
"learning_rate": 5.42698501387858e-06,
|
|
"loss": 0.4073,
|
|
"step": 13585
|
|
},
|
|
{
|
|
"epoch": 1.9542709232096636,
|
|
"grad_norm": 0.2754723365853278,
|
|
"learning_rate": 5.420290272129189e-06,
|
|
"loss": 0.417,
|
|
"step": 13590
|
|
},
|
|
{
|
|
"epoch": 1.954989933851021,
|
|
"grad_norm": 0.26119369021393657,
|
|
"learning_rate": 5.413598126745143e-06,
|
|
"loss": 0.4086,
|
|
"step": 13595
|
|
},
|
|
{
|
|
"epoch": 1.9557089444923785,
|
|
"grad_norm": 0.264200670292894,
|
|
"learning_rate": 5.406908581520411e-06,
|
|
"loss": 0.4234,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 1.956427955133736,
|
|
"grad_norm": 0.25025560341334857,
|
|
"learning_rate": 5.400221640247476e-06,
|
|
"loss": 0.4014,
|
|
"step": 13605
|
|
},
|
|
{
|
|
"epoch": 1.9571469657750935,
|
|
"grad_norm": 0.24933665275190425,
|
|
"learning_rate": 5.393537306717351e-06,
|
|
"loss": 0.4167,
|
|
"step": 13610
|
|
},
|
|
{
|
|
"epoch": 1.957865976416451,
|
|
"grad_norm": 0.25356414864778404,
|
|
"learning_rate": 5.386855584719578e-06,
|
|
"loss": 0.4021,
|
|
"step": 13615
|
|
},
|
|
{
|
|
"epoch": 1.9585849870578085,
|
|
"grad_norm": 0.2589269061483978,
|
|
"learning_rate": 5.380176478042207e-06,
|
|
"loss": 0.4125,
|
|
"step": 13620
|
|
},
|
|
{
|
|
"epoch": 1.959303997699166,
|
|
"grad_norm": 0.25559736392321186,
|
|
"learning_rate": 5.373499990471809e-06,
|
|
"loss": 0.4209,
|
|
"step": 13625
|
|
},
|
|
{
|
|
"epoch": 1.9600230083405235,
|
|
"grad_norm": 0.2597373727126001,
|
|
"learning_rate": 5.3668261257934766e-06,
|
|
"loss": 0.4205,
|
|
"step": 13630
|
|
},
|
|
{
|
|
"epoch": 1.960742018981881,
|
|
"grad_norm": 0.2549162950845531,
|
|
"learning_rate": 5.360154887790806e-06,
|
|
"loss": 0.4124,
|
|
"step": 13635
|
|
},
|
|
{
|
|
"epoch": 1.9614610296232384,
|
|
"grad_norm": 0.2606798442782481,
|
|
"learning_rate": 5.353486280245905e-06,
|
|
"loss": 0.4163,
|
|
"step": 13640
|
|
},
|
|
{
|
|
"epoch": 1.962180040264596,
|
|
"grad_norm": 0.2608700722504959,
|
|
"learning_rate": 5.3468203069394e-06,
|
|
"loss": 0.4171,
|
|
"step": 13645
|
|
},
|
|
{
|
|
"epoch": 1.9628990509059534,
|
|
"grad_norm": 0.2556515194260987,
|
|
"learning_rate": 5.340156971650416e-06,
|
|
"loss": 0.4026,
|
|
"step": 13650
|
|
},
|
|
{
|
|
"epoch": 1.963618061547311,
|
|
"grad_norm": 0.24822280007935313,
|
|
"learning_rate": 5.333496278156581e-06,
|
|
"loss": 0.3912,
|
|
"step": 13655
|
|
},
|
|
{
|
|
"epoch": 1.9643370721886684,
|
|
"grad_norm": 0.2632693931318459,
|
|
"learning_rate": 5.326838230234034e-06,
|
|
"loss": 0.4155,
|
|
"step": 13660
|
|
},
|
|
{
|
|
"epoch": 1.9650560828300259,
|
|
"grad_norm": 0.25566472312596306,
|
|
"learning_rate": 5.320182831657403e-06,
|
|
"loss": 0.4087,
|
|
"step": 13665
|
|
},
|
|
{
|
|
"epoch": 1.9657750934713834,
|
|
"grad_norm": 0.2593060159688528,
|
|
"learning_rate": 5.3135300861998186e-06,
|
|
"loss": 0.4148,
|
|
"step": 13670
|
|
},
|
|
{
|
|
"epoch": 1.9664941041127408,
|
|
"grad_norm": 0.25357384343389155,
|
|
"learning_rate": 5.3068799976329125e-06,
|
|
"loss": 0.4112,
|
|
"step": 13675
|
|
},
|
|
{
|
|
"epoch": 1.9672131147540983,
|
|
"grad_norm": 0.2669274390452347,
|
|
"learning_rate": 5.300232569726805e-06,
|
|
"loss": 0.41,
|
|
"step": 13680
|
|
},
|
|
{
|
|
"epoch": 1.9679321253954558,
|
|
"grad_norm": 0.26011483232030275,
|
|
"learning_rate": 5.2935878062501e-06,
|
|
"loss": 0.4083,
|
|
"step": 13685
|
|
},
|
|
{
|
|
"epoch": 1.9686511360368133,
|
|
"grad_norm": 0.25724239983605096,
|
|
"learning_rate": 5.286945710969909e-06,
|
|
"loss": 0.4197,
|
|
"step": 13690
|
|
},
|
|
{
|
|
"epoch": 1.9693701466781708,
|
|
"grad_norm": 0.26454569242107173,
|
|
"learning_rate": 5.28030628765182e-06,
|
|
"loss": 0.4011,
|
|
"step": 13695
|
|
},
|
|
{
|
|
"epoch": 1.9700891573195283,
|
|
"grad_norm": 0.25623339763113145,
|
|
"learning_rate": 5.273669540059905e-06,
|
|
"loss": 0.4101,
|
|
"step": 13700
|
|
},
|
|
{
|
|
"epoch": 1.970808167960886,
|
|
"grad_norm": 0.26380601542253146,
|
|
"learning_rate": 5.2670354719567256e-06,
|
|
"loss": 0.4012,
|
|
"step": 13705
|
|
},
|
|
{
|
|
"epoch": 1.9715271786022432,
|
|
"grad_norm": 0.2534213399237968,
|
|
"learning_rate": 5.260404087103312e-06,
|
|
"loss": 0.4069,
|
|
"step": 13710
|
|
},
|
|
{
|
|
"epoch": 1.972246189243601,
|
|
"grad_norm": 0.25076119035156,
|
|
"learning_rate": 5.253775389259193e-06,
|
|
"loss": 0.4086,
|
|
"step": 13715
|
|
},
|
|
{
|
|
"epoch": 1.9729651998849582,
|
|
"grad_norm": 0.2798231328631839,
|
|
"learning_rate": 5.247149382182355e-06,
|
|
"loss": 0.4035,
|
|
"step": 13720
|
|
},
|
|
{
|
|
"epoch": 1.973684210526316,
|
|
"grad_norm": 0.24860145144078252,
|
|
"learning_rate": 5.240526069629265e-06,
|
|
"loss": 0.3852,
|
|
"step": 13725
|
|
},
|
|
{
|
|
"epoch": 1.9744032211676732,
|
|
"grad_norm": 0.24961360405881555,
|
|
"learning_rate": 5.23390545535487e-06,
|
|
"loss": 0.4147,
|
|
"step": 13730
|
|
},
|
|
{
|
|
"epoch": 1.975122231809031,
|
|
"grad_norm": 0.26259295202760446,
|
|
"learning_rate": 5.227287543112573e-06,
|
|
"loss": 0.41,
|
|
"step": 13735
|
|
},
|
|
{
|
|
"epoch": 1.9758412424503882,
|
|
"grad_norm": 0.27428978408513577,
|
|
"learning_rate": 5.220672336654265e-06,
|
|
"loss": 0.4079,
|
|
"step": 13740
|
|
},
|
|
{
|
|
"epoch": 1.9765602530917459,
|
|
"grad_norm": 0.2635671472080491,
|
|
"learning_rate": 5.214059839730277e-06,
|
|
"loss": 0.4091,
|
|
"step": 13745
|
|
},
|
|
{
|
|
"epoch": 1.9772792637331031,
|
|
"grad_norm": 0.2513180411967393,
|
|
"learning_rate": 5.207450056089431e-06,
|
|
"loss": 0.4079,
|
|
"step": 13750
|
|
},
|
|
{
|
|
"epoch": 1.9779982743744609,
|
|
"grad_norm": 0.25223460265034453,
|
|
"learning_rate": 5.200842989478989e-06,
|
|
"loss": 0.4059,
|
|
"step": 13755
|
|
},
|
|
{
|
|
"epoch": 1.9787172850158181,
|
|
"grad_norm": 0.24391249457708836,
|
|
"learning_rate": 5.194238643644689e-06,
|
|
"loss": 0.3982,
|
|
"step": 13760
|
|
},
|
|
{
|
|
"epoch": 1.9794362956571758,
|
|
"grad_norm": 0.2531494228581974,
|
|
"learning_rate": 5.187637022330715e-06,
|
|
"loss": 0.4156,
|
|
"step": 13765
|
|
},
|
|
{
|
|
"epoch": 1.980155306298533,
|
|
"grad_norm": 0.2550019649565652,
|
|
"learning_rate": 5.181038129279708e-06,
|
|
"loss": 0.4174,
|
|
"step": 13770
|
|
},
|
|
{
|
|
"epoch": 1.9808743169398908,
|
|
"grad_norm": 0.27942094830551606,
|
|
"learning_rate": 5.174441968232769e-06,
|
|
"loss": 0.4106,
|
|
"step": 13775
|
|
},
|
|
{
|
|
"epoch": 1.981593327581248,
|
|
"grad_norm": 0.25451593113355353,
|
|
"learning_rate": 5.167848542929446e-06,
|
|
"loss": 0.4094,
|
|
"step": 13780
|
|
},
|
|
{
|
|
"epoch": 1.9823123382226058,
|
|
"grad_norm": 0.26568260174618225,
|
|
"learning_rate": 5.161257857107729e-06,
|
|
"loss": 0.4137,
|
|
"step": 13785
|
|
},
|
|
{
|
|
"epoch": 1.983031348863963,
|
|
"grad_norm": 0.2603853156579003,
|
|
"learning_rate": 5.154669914504068e-06,
|
|
"loss": 0.4055,
|
|
"step": 13790
|
|
},
|
|
{
|
|
"epoch": 1.9837503595053207,
|
|
"grad_norm": 0.2620176232894427,
|
|
"learning_rate": 5.148084718853354e-06,
|
|
"loss": 0.4127,
|
|
"step": 13795
|
|
},
|
|
{
|
|
"epoch": 1.984469370146678,
|
|
"grad_norm": 0.2530362930596608,
|
|
"learning_rate": 5.141502273888912e-06,
|
|
"loss": 0.4214,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 1.9851883807880357,
|
|
"grad_norm": 0.25447724836362867,
|
|
"learning_rate": 5.134922583342521e-06,
|
|
"loss": 0.4001,
|
|
"step": 13805
|
|
},
|
|
{
|
|
"epoch": 1.9859073914293932,
|
|
"grad_norm": 0.2540296468666251,
|
|
"learning_rate": 5.128345650944384e-06,
|
|
"loss": 0.4042,
|
|
"step": 13810
|
|
},
|
|
{
|
|
"epoch": 1.9866264020707507,
|
|
"grad_norm": 0.2577371870378256,
|
|
"learning_rate": 5.1217714804231545e-06,
|
|
"loss": 0.4191,
|
|
"step": 13815
|
|
},
|
|
{
|
|
"epoch": 1.9873454127121082,
|
|
"grad_norm": 0.25990723537924326,
|
|
"learning_rate": 5.115200075505908e-06,
|
|
"loss": 0.409,
|
|
"step": 13820
|
|
},
|
|
{
|
|
"epoch": 1.9880644233534657,
|
|
"grad_norm": 0.25299497569163515,
|
|
"learning_rate": 5.108631439918158e-06,
|
|
"loss": 0.4048,
|
|
"step": 13825
|
|
},
|
|
{
|
|
"epoch": 1.9887834339948232,
|
|
"grad_norm": 0.2588824820181239,
|
|
"learning_rate": 5.102065577383852e-06,
|
|
"loss": 0.4205,
|
|
"step": 13830
|
|
},
|
|
{
|
|
"epoch": 1.9895024446361806,
|
|
"grad_norm": 0.26804413155093115,
|
|
"learning_rate": 5.095502491625353e-06,
|
|
"loss": 0.4301,
|
|
"step": 13835
|
|
},
|
|
{
|
|
"epoch": 1.9902214552775381,
|
|
"grad_norm": 0.25278955155155564,
|
|
"learning_rate": 5.0889421863634636e-06,
|
|
"loss": 0.399,
|
|
"step": 13840
|
|
},
|
|
{
|
|
"epoch": 1.9909404659188956,
|
|
"grad_norm": 0.26550430618529297,
|
|
"learning_rate": 5.082384665317406e-06,
|
|
"loss": 0.4136,
|
|
"step": 13845
|
|
},
|
|
{
|
|
"epoch": 1.991659476560253,
|
|
"grad_norm": 0.25015117720339847,
|
|
"learning_rate": 5.075829932204818e-06,
|
|
"loss": 0.3849,
|
|
"step": 13850
|
|
},
|
|
{
|
|
"epoch": 1.9923784872016106,
|
|
"grad_norm": 0.2645179055223207,
|
|
"learning_rate": 5.069277990741758e-06,
|
|
"loss": 0.385,
|
|
"step": 13855
|
|
},
|
|
{
|
|
"epoch": 1.993097497842968,
|
|
"grad_norm": 0.25131298311416017,
|
|
"learning_rate": 5.062728844642712e-06,
|
|
"loss": 0.4058,
|
|
"step": 13860
|
|
},
|
|
{
|
|
"epoch": 1.9938165084843256,
|
|
"grad_norm": 0.25839485709157983,
|
|
"learning_rate": 5.05618249762057e-06,
|
|
"loss": 0.412,
|
|
"step": 13865
|
|
},
|
|
{
|
|
"epoch": 1.994535519125683,
|
|
"grad_norm": 0.2579182912611581,
|
|
"learning_rate": 5.049638953386635e-06,
|
|
"loss": 0.4018,
|
|
"step": 13870
|
|
},
|
|
{
|
|
"epoch": 1.9952545297670405,
|
|
"grad_norm": 0.2580154354036394,
|
|
"learning_rate": 5.043098215650634e-06,
|
|
"loss": 0.4002,
|
|
"step": 13875
|
|
},
|
|
{
|
|
"epoch": 1.995973540408398,
|
|
"grad_norm": 0.25201844554372577,
|
|
"learning_rate": 5.0365602881206845e-06,
|
|
"loss": 0.4069,
|
|
"step": 13880
|
|
},
|
|
{
|
|
"epoch": 1.9966925510497555,
|
|
"grad_norm": 0.25214547421292893,
|
|
"learning_rate": 5.030025174503327e-06,
|
|
"loss": 0.4029,
|
|
"step": 13885
|
|
},
|
|
{
|
|
"epoch": 1.997411561691113,
|
|
"grad_norm": 0.26499122387478796,
|
|
"learning_rate": 5.023492878503495e-06,
|
|
"loss": 0.4104,
|
|
"step": 13890
|
|
},
|
|
{
|
|
"epoch": 1.9981305723324705,
|
|
"grad_norm": 0.26369627402307694,
|
|
"learning_rate": 5.016963403824535e-06,
|
|
"loss": 0.4221,
|
|
"step": 13895
|
|
},
|
|
{
|
|
"epoch": 1.998849582973828,
|
|
"grad_norm": 0.2969355284033133,
|
|
"learning_rate": 5.010436754168182e-06,
|
|
"loss": 0.4133,
|
|
"step": 13900
|
|
},
|
|
{
|
|
"epoch": 1.9995685936151855,
|
|
"grad_norm": 0.2662741266239133,
|
|
"learning_rate": 5.003912933234584e-06,
|
|
"loss": 0.4026,
|
|
"step": 13905
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"eval_loss": 0.44030478596687317,
|
|
"eval_runtime": 0.6026,
|
|
"eval_samples_per_second": 41.485,
|
|
"eval_steps_per_second": 1.659,
|
|
"step": 13908
|
|
},
|
|
{
|
|
"epoch": 2.000287604256543,
|
|
"grad_norm": 0.318210591858962,
|
|
"learning_rate": 4.997391944722272e-06,
|
|
"loss": 0.3762,
|
|
"step": 13910
|
|
},
|
|
{
|
|
"epoch": 2.0010066148979004,
|
|
"grad_norm": 0.2962005622488439,
|
|
"learning_rate": 4.990873792328173e-06,
|
|
"loss": 0.3654,
|
|
"step": 13915
|
|
},
|
|
{
|
|
"epoch": 2.001725625539258,
|
|
"grad_norm": 0.30623782183632015,
|
|
"learning_rate": 4.984358479747618e-06,
|
|
"loss": 0.3534,
|
|
"step": 13920
|
|
},
|
|
{
|
|
"epoch": 2.0024446361806154,
|
|
"grad_norm": 0.30036561935160455,
|
|
"learning_rate": 4.9778460106743134e-06,
|
|
"loss": 0.3678,
|
|
"step": 13925
|
|
},
|
|
{
|
|
"epoch": 2.003163646821973,
|
|
"grad_norm": 0.3307656806538744,
|
|
"learning_rate": 4.971336388800364e-06,
|
|
"loss": 0.3447,
|
|
"step": 13930
|
|
},
|
|
{
|
|
"epoch": 2.0038826574633304,
|
|
"grad_norm": 0.2771791786177321,
|
|
"learning_rate": 4.9648296178162506e-06,
|
|
"loss": 0.3676,
|
|
"step": 13935
|
|
},
|
|
{
|
|
"epoch": 2.004601668104688,
|
|
"grad_norm": 0.28213964927100427,
|
|
"learning_rate": 4.958325701410848e-06,
|
|
"loss": 0.3631,
|
|
"step": 13940
|
|
},
|
|
{
|
|
"epoch": 2.0053206787460454,
|
|
"grad_norm": 0.2861346785121185,
|
|
"learning_rate": 4.951824643271409e-06,
|
|
"loss": 0.3606,
|
|
"step": 13945
|
|
},
|
|
{
|
|
"epoch": 2.006039689387403,
|
|
"grad_norm": 0.2769230735597401,
|
|
"learning_rate": 4.945326447083565e-06,
|
|
"loss": 0.3546,
|
|
"step": 13950
|
|
},
|
|
{
|
|
"epoch": 2.0067587000287603,
|
|
"grad_norm": 0.3057122437215269,
|
|
"learning_rate": 4.938831116531317e-06,
|
|
"loss": 0.3666,
|
|
"step": 13955
|
|
},
|
|
{
|
|
"epoch": 2.007477710670118,
|
|
"grad_norm": 0.2875326612135141,
|
|
"learning_rate": 4.932338655297061e-06,
|
|
"loss": 0.3474,
|
|
"step": 13960
|
|
},
|
|
{
|
|
"epoch": 2.0081967213114753,
|
|
"grad_norm": 0.28108990760304803,
|
|
"learning_rate": 4.925849067061548e-06,
|
|
"loss": 0.3575,
|
|
"step": 13965
|
|
},
|
|
{
|
|
"epoch": 2.008915731952833,
|
|
"grad_norm": 0.2915498411636721,
|
|
"learning_rate": 4.919362355503904e-06,
|
|
"loss": 0.3641,
|
|
"step": 13970
|
|
},
|
|
{
|
|
"epoch": 2.0096347425941903,
|
|
"grad_norm": 0.2987753594184248,
|
|
"learning_rate": 4.912878524301634e-06,
|
|
"loss": 0.3468,
|
|
"step": 13975
|
|
},
|
|
{
|
|
"epoch": 2.010353753235548,
|
|
"grad_norm": 0.2824354278114593,
|
|
"learning_rate": 4.906397577130597e-06,
|
|
"loss": 0.3572,
|
|
"step": 13980
|
|
},
|
|
{
|
|
"epoch": 2.0110727638769053,
|
|
"grad_norm": 0.27434107221527915,
|
|
"learning_rate": 4.899919517665024e-06,
|
|
"loss": 0.3617,
|
|
"step": 13985
|
|
},
|
|
{
|
|
"epoch": 2.011791774518263,
|
|
"grad_norm": 0.2885017590335467,
|
|
"learning_rate": 4.893444349577514e-06,
|
|
"loss": 0.3597,
|
|
"step": 13990
|
|
},
|
|
{
|
|
"epoch": 2.0125107851596202,
|
|
"grad_norm": 0.301491921813707,
|
|
"learning_rate": 4.886972076539016e-06,
|
|
"loss": 0.3466,
|
|
"step": 13995
|
|
},
|
|
{
|
|
"epoch": 2.013229795800978,
|
|
"grad_norm": 0.2843171690037388,
|
|
"learning_rate": 4.880502702218838e-06,
|
|
"loss": 0.3601,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 2.013948806442335,
|
|
"grad_norm": 0.2853742848739993,
|
|
"learning_rate": 4.874036230284658e-06,
|
|
"loss": 0.3503,
|
|
"step": 14005
|
|
},
|
|
{
|
|
"epoch": 2.014667817083693,
|
|
"grad_norm": 0.30382693759446466,
|
|
"learning_rate": 4.867572664402494e-06,
|
|
"loss": 0.3474,
|
|
"step": 14010
|
|
},
|
|
{
|
|
"epoch": 2.01538682772505,
|
|
"grad_norm": 0.28593200867510593,
|
|
"learning_rate": 4.861112008236719e-06,
|
|
"loss": 0.35,
|
|
"step": 14015
|
|
},
|
|
{
|
|
"epoch": 2.016105838366408,
|
|
"grad_norm": 0.27405861148051325,
|
|
"learning_rate": 4.8546542654500674e-06,
|
|
"loss": 0.3506,
|
|
"step": 14020
|
|
},
|
|
{
|
|
"epoch": 2.016824849007765,
|
|
"grad_norm": 0.30547019029543415,
|
|
"learning_rate": 4.848199439703609e-06,
|
|
"loss": 0.3532,
|
|
"step": 14025
|
|
},
|
|
{
|
|
"epoch": 2.017543859649123,
|
|
"grad_norm": 0.32579122125534155,
|
|
"learning_rate": 4.8417475346567635e-06,
|
|
"loss": 0.369,
|
|
"step": 14030
|
|
},
|
|
{
|
|
"epoch": 2.01826287029048,
|
|
"grad_norm": 0.291301403012313,
|
|
"learning_rate": 4.835298553967296e-06,
|
|
"loss": 0.3353,
|
|
"step": 14035
|
|
},
|
|
{
|
|
"epoch": 2.018981880931838,
|
|
"grad_norm": 0.2941231633496769,
|
|
"learning_rate": 4.828852501291317e-06,
|
|
"loss": 0.3484,
|
|
"step": 14040
|
|
},
|
|
{
|
|
"epoch": 2.019700891573195,
|
|
"grad_norm": 0.2862438775719036,
|
|
"learning_rate": 4.822409380283276e-06,
|
|
"loss": 0.3509,
|
|
"step": 14045
|
|
},
|
|
{
|
|
"epoch": 2.020419902214553,
|
|
"grad_norm": 0.3126513367597776,
|
|
"learning_rate": 4.8159691945959554e-06,
|
|
"loss": 0.3577,
|
|
"step": 14050
|
|
},
|
|
{
|
|
"epoch": 2.02113891285591,
|
|
"grad_norm": 0.28619669680342014,
|
|
"learning_rate": 4.809531947880472e-06,
|
|
"loss": 0.3538,
|
|
"step": 14055
|
|
},
|
|
{
|
|
"epoch": 2.021857923497268,
|
|
"grad_norm": 0.28770586287425526,
|
|
"learning_rate": 4.803097643786289e-06,
|
|
"loss": 0.3591,
|
|
"step": 14060
|
|
},
|
|
{
|
|
"epoch": 2.022576934138625,
|
|
"grad_norm": 0.2940569243641126,
|
|
"learning_rate": 4.7966662859611865e-06,
|
|
"loss": 0.3534,
|
|
"step": 14065
|
|
},
|
|
{
|
|
"epoch": 2.0232959447799828,
|
|
"grad_norm": 0.29374459433661443,
|
|
"learning_rate": 4.790237878051282e-06,
|
|
"loss": 0.3507,
|
|
"step": 14070
|
|
},
|
|
{
|
|
"epoch": 2.02401495542134,
|
|
"grad_norm": 0.2907103302826579,
|
|
"learning_rate": 4.783812423701022e-06,
|
|
"loss": 0.3537,
|
|
"step": 14075
|
|
},
|
|
{
|
|
"epoch": 2.0247339660626977,
|
|
"grad_norm": 0.2837728280937334,
|
|
"learning_rate": 4.777389926553172e-06,
|
|
"loss": 0.3628,
|
|
"step": 14080
|
|
},
|
|
{
|
|
"epoch": 2.025452976704055,
|
|
"grad_norm": 0.30257042493792674,
|
|
"learning_rate": 4.770970390248827e-06,
|
|
"loss": 0.3585,
|
|
"step": 14085
|
|
},
|
|
{
|
|
"epoch": 2.0261719873454127,
|
|
"grad_norm": 0.29397252530146495,
|
|
"learning_rate": 4.764553818427405e-06,
|
|
"loss": 0.3473,
|
|
"step": 14090
|
|
},
|
|
{
|
|
"epoch": 2.0268909979867704,
|
|
"grad_norm": 0.29913070232198746,
|
|
"learning_rate": 4.758140214726637e-06,
|
|
"loss": 0.3527,
|
|
"step": 14095
|
|
},
|
|
{
|
|
"epoch": 2.0276100086281277,
|
|
"grad_norm": 0.3033281960114325,
|
|
"learning_rate": 4.751729582782572e-06,
|
|
"loss": 0.3589,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 2.0283290192694854,
|
|
"grad_norm": 0.29961537340269007,
|
|
"learning_rate": 4.745321926229579e-06,
|
|
"loss": 0.3611,
|
|
"step": 14105
|
|
},
|
|
{
|
|
"epoch": 2.0290480299108427,
|
|
"grad_norm": 0.29372344469519207,
|
|
"learning_rate": 4.738917248700337e-06,
|
|
"loss": 0.3498,
|
|
"step": 14110
|
|
},
|
|
{
|
|
"epoch": 2.0297670405522004,
|
|
"grad_norm": 0.29942224504578746,
|
|
"learning_rate": 4.732515553825834e-06,
|
|
"loss": 0.3653,
|
|
"step": 14115
|
|
},
|
|
{
|
|
"epoch": 2.0304860511935576,
|
|
"grad_norm": 0.3108499644186701,
|
|
"learning_rate": 4.726116845235375e-06,
|
|
"loss": 0.3535,
|
|
"step": 14120
|
|
},
|
|
{
|
|
"epoch": 2.0312050618349153,
|
|
"grad_norm": 0.3057455159859912,
|
|
"learning_rate": 4.719721126556558e-06,
|
|
"loss": 0.3534,
|
|
"step": 14125
|
|
},
|
|
{
|
|
"epoch": 2.0319240724762726,
|
|
"grad_norm": 0.28673110439469285,
|
|
"learning_rate": 4.713328401415305e-06,
|
|
"loss": 0.3445,
|
|
"step": 14130
|
|
},
|
|
{
|
|
"epoch": 2.0326430831176303,
|
|
"grad_norm": 0.28869406746532883,
|
|
"learning_rate": 4.70693867343582e-06,
|
|
"loss": 0.3503,
|
|
"step": 14135
|
|
},
|
|
{
|
|
"epoch": 2.0333620937589876,
|
|
"grad_norm": 0.33039922667587013,
|
|
"learning_rate": 4.700551946240625e-06,
|
|
"loss": 0.3435,
|
|
"step": 14140
|
|
},
|
|
{
|
|
"epoch": 2.0340811044003453,
|
|
"grad_norm": 0.284841054056291,
|
|
"learning_rate": 4.694168223450535e-06,
|
|
"loss": 0.3636,
|
|
"step": 14145
|
|
},
|
|
{
|
|
"epoch": 2.0348001150417026,
|
|
"grad_norm": 0.2941154139075771,
|
|
"learning_rate": 4.687787508684658e-06,
|
|
"loss": 0.3637,
|
|
"step": 14150
|
|
},
|
|
{
|
|
"epoch": 2.0355191256830603,
|
|
"grad_norm": 0.30787130118377565,
|
|
"learning_rate": 4.681409805560397e-06,
|
|
"loss": 0.3624,
|
|
"step": 14155
|
|
},
|
|
{
|
|
"epoch": 2.0362381363244175,
|
|
"grad_norm": 0.2877335384595125,
|
|
"learning_rate": 4.675035117693455e-06,
|
|
"loss": 0.3499,
|
|
"step": 14160
|
|
},
|
|
{
|
|
"epoch": 2.0369571469657752,
|
|
"grad_norm": 0.29341964401922843,
|
|
"learning_rate": 4.668663448697819e-06,
|
|
"loss": 0.3517,
|
|
"step": 14165
|
|
},
|
|
{
|
|
"epoch": 2.0376761576071325,
|
|
"grad_norm": 0.28623660995216765,
|
|
"learning_rate": 4.662294802185762e-06,
|
|
"loss": 0.3475,
|
|
"step": 14170
|
|
},
|
|
{
|
|
"epoch": 2.03839516824849,
|
|
"grad_norm": 0.29546272646756755,
|
|
"learning_rate": 4.655929181767853e-06,
|
|
"loss": 0.3516,
|
|
"step": 14175
|
|
},
|
|
{
|
|
"epoch": 2.0391141788898475,
|
|
"grad_norm": 0.2938453129065855,
|
|
"learning_rate": 4.649566591052935e-06,
|
|
"loss": 0.3601,
|
|
"step": 14180
|
|
},
|
|
{
|
|
"epoch": 2.039833189531205,
|
|
"grad_norm": 0.2991841102600859,
|
|
"learning_rate": 4.643207033648141e-06,
|
|
"loss": 0.3501,
|
|
"step": 14185
|
|
},
|
|
{
|
|
"epoch": 2.0405522001725624,
|
|
"grad_norm": 0.297570848958521,
|
|
"learning_rate": 4.6368505131588856e-06,
|
|
"loss": 0.357,
|
|
"step": 14190
|
|
},
|
|
{
|
|
"epoch": 2.04127121081392,
|
|
"grad_norm": 0.3045653759481441,
|
|
"learning_rate": 4.630497033188856e-06,
|
|
"loss": 0.3714,
|
|
"step": 14195
|
|
},
|
|
{
|
|
"epoch": 2.0419902214552774,
|
|
"grad_norm": 0.28008392242760666,
|
|
"learning_rate": 4.624146597340009e-06,
|
|
"loss": 0.3458,
|
|
"step": 14200
|
|
},
|
|
{
|
|
"epoch": 2.042709232096635,
|
|
"grad_norm": 0.28888393828368586,
|
|
"learning_rate": 4.617799209212596e-06,
|
|
"loss": 0.3708,
|
|
"step": 14205
|
|
},
|
|
{
|
|
"epoch": 2.0434282427379924,
|
|
"grad_norm": 0.298326296344726,
|
|
"learning_rate": 4.611454872405122e-06,
|
|
"loss": 0.3479,
|
|
"step": 14210
|
|
},
|
|
{
|
|
"epoch": 2.04414725337935,
|
|
"grad_norm": 0.28692582057259947,
|
|
"learning_rate": 4.605113590514366e-06,
|
|
"loss": 0.3582,
|
|
"step": 14215
|
|
},
|
|
{
|
|
"epoch": 2.0448662640207074,
|
|
"grad_norm": 0.30045309214713506,
|
|
"learning_rate": 4.598775367135386e-06,
|
|
"loss": 0.3522,
|
|
"step": 14220
|
|
},
|
|
{
|
|
"epoch": 2.045585274662065,
|
|
"grad_norm": 0.3058044871128482,
|
|
"learning_rate": 4.5924402058614904e-06,
|
|
"loss": 0.3705,
|
|
"step": 14225
|
|
},
|
|
{
|
|
"epoch": 2.0463042853034223,
|
|
"grad_norm": 0.2946397549664444,
|
|
"learning_rate": 4.586108110284262e-06,
|
|
"loss": 0.3601,
|
|
"step": 14230
|
|
},
|
|
{
|
|
"epoch": 2.04702329594478,
|
|
"grad_norm": 0.29475974329126486,
|
|
"learning_rate": 4.579779083993546e-06,
|
|
"loss": 0.3521,
|
|
"step": 14235
|
|
},
|
|
{
|
|
"epoch": 2.0477423065861373,
|
|
"grad_norm": 0.34681742545231375,
|
|
"learning_rate": 4.573453130577441e-06,
|
|
"loss": 0.3386,
|
|
"step": 14240
|
|
},
|
|
{
|
|
"epoch": 2.048461317227495,
|
|
"grad_norm": 0.3030014599463751,
|
|
"learning_rate": 4.567130253622303e-06,
|
|
"loss": 0.3586,
|
|
"step": 14245
|
|
},
|
|
{
|
|
"epoch": 2.0491803278688523,
|
|
"grad_norm": 0.29317145892468344,
|
|
"learning_rate": 4.560810456712754e-06,
|
|
"loss": 0.3435,
|
|
"step": 14250
|
|
},
|
|
{
|
|
"epoch": 2.04989933851021,
|
|
"grad_norm": 0.29355434870329244,
|
|
"learning_rate": 4.554493743431658e-06,
|
|
"loss": 0.3485,
|
|
"step": 14255
|
|
},
|
|
{
|
|
"epoch": 2.0506183491515673,
|
|
"grad_norm": 0.3317194647189456,
|
|
"learning_rate": 4.548180117360143e-06,
|
|
"loss": 0.378,
|
|
"step": 14260
|
|
},
|
|
{
|
|
"epoch": 2.051337359792925,
|
|
"grad_norm": 0.30572393220881305,
|
|
"learning_rate": 4.5418695820775735e-06,
|
|
"loss": 0.3664,
|
|
"step": 14265
|
|
},
|
|
{
|
|
"epoch": 2.0520563704342822,
|
|
"grad_norm": 0.28519918939779276,
|
|
"learning_rate": 4.535562141161568e-06,
|
|
"loss": 0.3592,
|
|
"step": 14270
|
|
},
|
|
{
|
|
"epoch": 2.05277538107564,
|
|
"grad_norm": 0.2914991729304061,
|
|
"learning_rate": 4.529257798187996e-06,
|
|
"loss": 0.3603,
|
|
"step": 14275
|
|
},
|
|
{
|
|
"epoch": 2.053494391716997,
|
|
"grad_norm": 0.29364357649864903,
|
|
"learning_rate": 4.52295655673096e-06,
|
|
"loss": 0.3512,
|
|
"step": 14280
|
|
},
|
|
{
|
|
"epoch": 2.054213402358355,
|
|
"grad_norm": 0.2882934807059941,
|
|
"learning_rate": 4.516658420362812e-06,
|
|
"loss": 0.3576,
|
|
"step": 14285
|
|
},
|
|
{
|
|
"epoch": 2.054932412999712,
|
|
"grad_norm": 0.3043270892198163,
|
|
"learning_rate": 4.510363392654146e-06,
|
|
"loss": 0.3726,
|
|
"step": 14290
|
|
},
|
|
{
|
|
"epoch": 2.05565142364107,
|
|
"grad_norm": 0.28633599973268004,
|
|
"learning_rate": 4.5040714771737845e-06,
|
|
"loss": 0.3654,
|
|
"step": 14295
|
|
},
|
|
{
|
|
"epoch": 2.056370434282427,
|
|
"grad_norm": 0.2965342297286475,
|
|
"learning_rate": 4.497782677488786e-06,
|
|
"loss": 0.3442,
|
|
"step": 14300
|
|
},
|
|
{
|
|
"epoch": 2.057089444923785,
|
|
"grad_norm": 0.3014694456691445,
|
|
"learning_rate": 4.4914969971644575e-06,
|
|
"loss": 0.357,
|
|
"step": 14305
|
|
},
|
|
{
|
|
"epoch": 2.0578084555651426,
|
|
"grad_norm": 0.3011763462966578,
|
|
"learning_rate": 4.4852144397643196e-06,
|
|
"loss": 0.3382,
|
|
"step": 14310
|
|
},
|
|
{
|
|
"epoch": 2.0585274662065,
|
|
"grad_norm": 0.308543485656549,
|
|
"learning_rate": 4.478935008850126e-06,
|
|
"loss": 0.3506,
|
|
"step": 14315
|
|
},
|
|
{
|
|
"epoch": 2.0592464768478576,
|
|
"grad_norm": 0.29654145322937825,
|
|
"learning_rate": 4.472658707981869e-06,
|
|
"loss": 0.3429,
|
|
"step": 14320
|
|
},
|
|
{
|
|
"epoch": 2.059965487489215,
|
|
"grad_norm": 0.2946816514144125,
|
|
"learning_rate": 4.4663855407177535e-06,
|
|
"loss": 0.3456,
|
|
"step": 14325
|
|
},
|
|
{
|
|
"epoch": 2.0606844981305725,
|
|
"grad_norm": 0.30452616080885697,
|
|
"learning_rate": 4.4601155106142145e-06,
|
|
"loss": 0.3597,
|
|
"step": 14330
|
|
},
|
|
{
|
|
"epoch": 2.06140350877193,
|
|
"grad_norm": 0.2990826005623624,
|
|
"learning_rate": 4.453848621225913e-06,
|
|
"loss": 0.3456,
|
|
"step": 14335
|
|
},
|
|
{
|
|
"epoch": 2.0621225194132875,
|
|
"grad_norm": 0.2880488234980847,
|
|
"learning_rate": 4.4475848761057175e-06,
|
|
"loss": 0.3513,
|
|
"step": 14340
|
|
},
|
|
{
|
|
"epoch": 2.0628415300546448,
|
|
"grad_norm": 0.3021201223017955,
|
|
"learning_rate": 4.441324278804717e-06,
|
|
"loss": 0.3606,
|
|
"step": 14345
|
|
},
|
|
{
|
|
"epoch": 2.0635605406960025,
|
|
"grad_norm": 0.3113479744007447,
|
|
"learning_rate": 4.435066832872228e-06,
|
|
"loss": 0.3709,
|
|
"step": 14350
|
|
},
|
|
{
|
|
"epoch": 2.0642795513373597,
|
|
"grad_norm": 0.3044471482844953,
|
|
"learning_rate": 4.428812541855766e-06,
|
|
"loss": 0.3567,
|
|
"step": 14355
|
|
},
|
|
{
|
|
"epoch": 2.0649985619787175,
|
|
"grad_norm": 0.2996535042717931,
|
|
"learning_rate": 4.422561409301061e-06,
|
|
"loss": 0.353,
|
|
"step": 14360
|
|
},
|
|
{
|
|
"epoch": 2.0657175726200747,
|
|
"grad_norm": 0.29841095505179394,
|
|
"learning_rate": 4.4163134387520604e-06,
|
|
"loss": 0.3646,
|
|
"step": 14365
|
|
},
|
|
{
|
|
"epoch": 2.0664365832614324,
|
|
"grad_norm": 0.2932029120444435,
|
|
"learning_rate": 4.410068633750906e-06,
|
|
"loss": 0.3817,
|
|
"step": 14370
|
|
},
|
|
{
|
|
"epoch": 2.0671555939027897,
|
|
"grad_norm": 0.31253609693634077,
|
|
"learning_rate": 4.4038269978379575e-06,
|
|
"loss": 0.3668,
|
|
"step": 14375
|
|
},
|
|
{
|
|
"epoch": 2.0678746045441474,
|
|
"grad_norm": 0.3116402212010058,
|
|
"learning_rate": 4.397588534551774e-06,
|
|
"loss": 0.3606,
|
|
"step": 14380
|
|
},
|
|
{
|
|
"epoch": 2.0685936151855047,
|
|
"grad_norm": 0.3024380986750069,
|
|
"learning_rate": 4.39135324742911e-06,
|
|
"loss": 0.3509,
|
|
"step": 14385
|
|
},
|
|
{
|
|
"epoch": 2.0693126258268624,
|
|
"grad_norm": 0.2845116612221238,
|
|
"learning_rate": 4.385121140004929e-06,
|
|
"loss": 0.3379,
|
|
"step": 14390
|
|
},
|
|
{
|
|
"epoch": 2.0700316364682196,
|
|
"grad_norm": 0.2835837243213674,
|
|
"learning_rate": 4.3788922158123825e-06,
|
|
"loss": 0.3399,
|
|
"step": 14395
|
|
},
|
|
{
|
|
"epoch": 2.0707506471095773,
|
|
"grad_norm": 0.29576413680103925,
|
|
"learning_rate": 4.372666478382821e-06,
|
|
"loss": 0.3609,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 2.0714696577509346,
|
|
"grad_norm": 0.29558308439934255,
|
|
"learning_rate": 4.366443931245793e-06,
|
|
"loss": 0.3576,
|
|
"step": 14405
|
|
},
|
|
{
|
|
"epoch": 2.0721886683922923,
|
|
"grad_norm": 0.3369409665924106,
|
|
"learning_rate": 4.360224577929032e-06,
|
|
"loss": 0.3564,
|
|
"step": 14410
|
|
},
|
|
{
|
|
"epoch": 2.0729076790336496,
|
|
"grad_norm": 0.2968084345430545,
|
|
"learning_rate": 4.35400842195846e-06,
|
|
"loss": 0.3653,
|
|
"step": 14415
|
|
},
|
|
{
|
|
"epoch": 2.0736266896750073,
|
|
"grad_norm": 0.2985475993720737,
|
|
"learning_rate": 4.347795466858196e-06,
|
|
"loss": 0.3455,
|
|
"step": 14420
|
|
},
|
|
{
|
|
"epoch": 2.0743457003163646,
|
|
"grad_norm": 0.30199060510993814,
|
|
"learning_rate": 4.34158571615053e-06,
|
|
"loss": 0.368,
|
|
"step": 14425
|
|
},
|
|
{
|
|
"epoch": 2.0750647109577223,
|
|
"grad_norm": 0.30041301544820775,
|
|
"learning_rate": 4.335379173355949e-06,
|
|
"loss": 0.3577,
|
|
"step": 14430
|
|
},
|
|
{
|
|
"epoch": 2.0757837215990795,
|
|
"grad_norm": 0.3011286542550627,
|
|
"learning_rate": 4.329175841993116e-06,
|
|
"loss": 0.3486,
|
|
"step": 14435
|
|
},
|
|
{
|
|
"epoch": 2.0765027322404372,
|
|
"grad_norm": 0.3016219566506042,
|
|
"learning_rate": 4.322975725578871e-06,
|
|
"loss": 0.354,
|
|
"step": 14440
|
|
},
|
|
{
|
|
"epoch": 2.0772217428817945,
|
|
"grad_norm": 0.29783170544799753,
|
|
"learning_rate": 4.3167788276282285e-06,
|
|
"loss": 0.3576,
|
|
"step": 14445
|
|
},
|
|
{
|
|
"epoch": 2.077940753523152,
|
|
"grad_norm": 0.31733612657339716,
|
|
"learning_rate": 4.310585151654392e-06,
|
|
"loss": 0.361,
|
|
"step": 14450
|
|
},
|
|
{
|
|
"epoch": 2.0786597641645095,
|
|
"grad_norm": 0.2911711281064737,
|
|
"learning_rate": 4.304394701168724e-06,
|
|
"loss": 0.3508,
|
|
"step": 14455
|
|
},
|
|
{
|
|
"epoch": 2.079378774805867,
|
|
"grad_norm": 0.297068438784082,
|
|
"learning_rate": 4.298207479680761e-06,
|
|
"loss": 0.351,
|
|
"step": 14460
|
|
},
|
|
{
|
|
"epoch": 2.0800977854472245,
|
|
"grad_norm": 0.2955112135325009,
|
|
"learning_rate": 4.292023490698219e-06,
|
|
"loss": 0.332,
|
|
"step": 14465
|
|
},
|
|
{
|
|
"epoch": 2.080816796088582,
|
|
"grad_norm": 0.3010730238291027,
|
|
"learning_rate": 4.285842737726965e-06,
|
|
"loss": 0.356,
|
|
"step": 14470
|
|
},
|
|
{
|
|
"epoch": 2.0815358067299394,
|
|
"grad_norm": 0.3038348805009081,
|
|
"learning_rate": 4.279665224271045e-06,
|
|
"loss": 0.3527,
|
|
"step": 14475
|
|
},
|
|
{
|
|
"epoch": 2.082254817371297,
|
|
"grad_norm": 0.318280624520725,
|
|
"learning_rate": 4.273490953832671e-06,
|
|
"loss": 0.3626,
|
|
"step": 14480
|
|
},
|
|
{
|
|
"epoch": 2.0829738280126544,
|
|
"grad_norm": 0.3171020198243272,
|
|
"learning_rate": 4.267319929912197e-06,
|
|
"loss": 0.3533,
|
|
"step": 14485
|
|
},
|
|
{
|
|
"epoch": 2.083692838654012,
|
|
"grad_norm": 0.28888046653161686,
|
|
"learning_rate": 4.261152156008159e-06,
|
|
"loss": 0.3408,
|
|
"step": 14490
|
|
},
|
|
{
|
|
"epoch": 2.0844118492953694,
|
|
"grad_norm": 0.3065065717283249,
|
|
"learning_rate": 4.2549876356172355e-06,
|
|
"loss": 0.3683,
|
|
"step": 14495
|
|
},
|
|
{
|
|
"epoch": 2.085130859936727,
|
|
"grad_norm": 0.3094188588362977,
|
|
"learning_rate": 4.2488263722342625e-06,
|
|
"loss": 0.3582,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 2.0858498705780844,
|
|
"grad_norm": 0.3052973971809332,
|
|
"learning_rate": 4.2426683693522395e-06,
|
|
"loss": 0.3642,
|
|
"step": 14505
|
|
},
|
|
{
|
|
"epoch": 2.086568881219442,
|
|
"grad_norm": 0.2962269533273906,
|
|
"learning_rate": 4.236513630462305e-06,
|
|
"loss": 0.3619,
|
|
"step": 14510
|
|
},
|
|
{
|
|
"epoch": 2.0872878918607993,
|
|
"grad_norm": 0.2944235945817763,
|
|
"learning_rate": 4.230362159053752e-06,
|
|
"loss": 0.348,
|
|
"step": 14515
|
|
},
|
|
{
|
|
"epoch": 2.088006902502157,
|
|
"grad_norm": 0.2997896341259781,
|
|
"learning_rate": 4.224213958614025e-06,
|
|
"loss": 0.3448,
|
|
"step": 14520
|
|
},
|
|
{
|
|
"epoch": 2.0887259131435147,
|
|
"grad_norm": 0.29245152361115756,
|
|
"learning_rate": 4.218069032628706e-06,
|
|
"loss": 0.3564,
|
|
"step": 14525
|
|
},
|
|
{
|
|
"epoch": 2.089444923784872,
|
|
"grad_norm": 0.31088657601953423,
|
|
"learning_rate": 4.211927384581527e-06,
|
|
"loss": 0.3567,
|
|
"step": 14530
|
|
},
|
|
{
|
|
"epoch": 2.0901639344262297,
|
|
"grad_norm": 0.3059053867853581,
|
|
"learning_rate": 4.205789017954364e-06,
|
|
"loss": 0.3594,
|
|
"step": 14535
|
|
},
|
|
{
|
|
"epoch": 2.090882945067587,
|
|
"grad_norm": 0.29479970664099747,
|
|
"learning_rate": 4.199653936227225e-06,
|
|
"loss": 0.3666,
|
|
"step": 14540
|
|
},
|
|
{
|
|
"epoch": 2.0916019557089447,
|
|
"grad_norm": 0.3027974345567834,
|
|
"learning_rate": 4.193522142878256e-06,
|
|
"loss": 0.3589,
|
|
"step": 14545
|
|
},
|
|
{
|
|
"epoch": 2.092320966350302,
|
|
"grad_norm": 0.2984127166650263,
|
|
"learning_rate": 4.187393641383748e-06,
|
|
"loss": 0.3445,
|
|
"step": 14550
|
|
},
|
|
{
|
|
"epoch": 2.0930399769916597,
|
|
"grad_norm": 0.29434023230091527,
|
|
"learning_rate": 4.181268435218118e-06,
|
|
"loss": 0.367,
|
|
"step": 14555
|
|
},
|
|
{
|
|
"epoch": 2.093758987633017,
|
|
"grad_norm": 0.30990202774075715,
|
|
"learning_rate": 4.175146527853911e-06,
|
|
"loss": 0.3638,
|
|
"step": 14560
|
|
},
|
|
{
|
|
"epoch": 2.0944779982743746,
|
|
"grad_norm": 0.2984330732391715,
|
|
"learning_rate": 4.169027922761814e-06,
|
|
"loss": 0.3647,
|
|
"step": 14565
|
|
},
|
|
{
|
|
"epoch": 2.095197008915732,
|
|
"grad_norm": 0.29561284011499295,
|
|
"learning_rate": 4.16291262341063e-06,
|
|
"loss": 0.3689,
|
|
"step": 14570
|
|
},
|
|
{
|
|
"epoch": 2.0959160195570896,
|
|
"grad_norm": 0.3447341085031207,
|
|
"learning_rate": 4.156800633267295e-06,
|
|
"loss": 0.3627,
|
|
"step": 14575
|
|
},
|
|
{
|
|
"epoch": 2.096635030198447,
|
|
"grad_norm": 0.30914718629127896,
|
|
"learning_rate": 4.150691955796871e-06,
|
|
"loss": 0.3701,
|
|
"step": 14580
|
|
},
|
|
{
|
|
"epoch": 2.0973540408398046,
|
|
"grad_norm": 0.3055974026554414,
|
|
"learning_rate": 4.144586594462532e-06,
|
|
"loss": 0.3643,
|
|
"step": 14585
|
|
},
|
|
{
|
|
"epoch": 2.098073051481162,
|
|
"grad_norm": 0.316488731474431,
|
|
"learning_rate": 4.138484552725582e-06,
|
|
"loss": 0.358,
|
|
"step": 14590
|
|
},
|
|
{
|
|
"epoch": 2.0987920621225196,
|
|
"grad_norm": 0.3181537442868115,
|
|
"learning_rate": 4.132385834045438e-06,
|
|
"loss": 0.3598,
|
|
"step": 14595
|
|
},
|
|
{
|
|
"epoch": 2.099511072763877,
|
|
"grad_norm": 0.2990267629620605,
|
|
"learning_rate": 4.126290441879629e-06,
|
|
"loss": 0.3653,
|
|
"step": 14600
|
|
},
|
|
{
|
|
"epoch": 2.1002300834052345,
|
|
"grad_norm": 0.3031855089515431,
|
|
"learning_rate": 4.120198379683811e-06,
|
|
"loss": 0.365,
|
|
"step": 14605
|
|
},
|
|
{
|
|
"epoch": 2.100949094046592,
|
|
"grad_norm": 0.3139042925193933,
|
|
"learning_rate": 4.11410965091174e-06,
|
|
"loss": 0.3534,
|
|
"step": 14610
|
|
},
|
|
{
|
|
"epoch": 2.1016681046879495,
|
|
"grad_norm": 0.31907498432839176,
|
|
"learning_rate": 4.108024259015283e-06,
|
|
"loss": 0.3484,
|
|
"step": 14615
|
|
},
|
|
{
|
|
"epoch": 2.1023871153293068,
|
|
"grad_norm": 0.29760108656159406,
|
|
"learning_rate": 4.101942207444421e-06,
|
|
"loss": 0.3489,
|
|
"step": 14620
|
|
},
|
|
{
|
|
"epoch": 2.1031061259706645,
|
|
"grad_norm": 0.30405200853613723,
|
|
"learning_rate": 4.095863499647246e-06,
|
|
"loss": 0.3599,
|
|
"step": 14625
|
|
},
|
|
{
|
|
"epoch": 2.1038251366120218,
|
|
"grad_norm": 0.30393682775695036,
|
|
"learning_rate": 4.089788139069936e-06,
|
|
"loss": 0.363,
|
|
"step": 14630
|
|
},
|
|
{
|
|
"epoch": 2.1045441472533795,
|
|
"grad_norm": 0.2967264561430701,
|
|
"learning_rate": 4.083716129156792e-06,
|
|
"loss": 0.349,
|
|
"step": 14635
|
|
},
|
|
{
|
|
"epoch": 2.1052631578947367,
|
|
"grad_norm": 0.2938902261935861,
|
|
"learning_rate": 4.077647473350201e-06,
|
|
"loss": 0.3725,
|
|
"step": 14640
|
|
},
|
|
{
|
|
"epoch": 2.1059821685360944,
|
|
"grad_norm": 0.3156046257036086,
|
|
"learning_rate": 4.071582175090652e-06,
|
|
"loss": 0.3704,
|
|
"step": 14645
|
|
},
|
|
{
|
|
"epoch": 2.1067011791774517,
|
|
"grad_norm": 0.31885314663699743,
|
|
"learning_rate": 4.065520237816738e-06,
|
|
"loss": 0.355,
|
|
"step": 14650
|
|
},
|
|
{
|
|
"epoch": 2.1074201898188094,
|
|
"grad_norm": 0.2976767542993816,
|
|
"learning_rate": 4.059461664965136e-06,
|
|
"loss": 0.3471,
|
|
"step": 14655
|
|
},
|
|
{
|
|
"epoch": 2.1081392004601667,
|
|
"grad_norm": 0.2998513718549436,
|
|
"learning_rate": 4.053406459970618e-06,
|
|
"loss": 0.3646,
|
|
"step": 14660
|
|
},
|
|
{
|
|
"epoch": 2.1088582111015244,
|
|
"grad_norm": 0.29277607344471285,
|
|
"learning_rate": 4.047354626266055e-06,
|
|
"loss": 0.3431,
|
|
"step": 14665
|
|
},
|
|
{
|
|
"epoch": 2.1095772217428816,
|
|
"grad_norm": 0.29228324476185435,
|
|
"learning_rate": 4.041306167282394e-06,
|
|
"loss": 0.3725,
|
|
"step": 14670
|
|
},
|
|
{
|
|
"epoch": 2.1102962323842394,
|
|
"grad_norm": 0.3073195069040935,
|
|
"learning_rate": 4.035261086448678e-06,
|
|
"loss": 0.3471,
|
|
"step": 14675
|
|
},
|
|
{
|
|
"epoch": 2.1110152430255966,
|
|
"grad_norm": 0.29651468102486633,
|
|
"learning_rate": 4.029219387192037e-06,
|
|
"loss": 0.3643,
|
|
"step": 14680
|
|
},
|
|
{
|
|
"epoch": 2.1117342536669543,
|
|
"grad_norm": 0.29495521394621027,
|
|
"learning_rate": 4.0231810729376755e-06,
|
|
"loss": 0.3535,
|
|
"step": 14685
|
|
},
|
|
{
|
|
"epoch": 2.1124532643083116,
|
|
"grad_norm": 0.30215728429801914,
|
|
"learning_rate": 4.017146147108877e-06,
|
|
"loss": 0.371,
|
|
"step": 14690
|
|
},
|
|
{
|
|
"epoch": 2.1131722749496693,
|
|
"grad_norm": 0.30810009456278137,
|
|
"learning_rate": 4.0111146131270185e-06,
|
|
"loss": 0.348,
|
|
"step": 14695
|
|
},
|
|
{
|
|
"epoch": 2.1138912855910266,
|
|
"grad_norm": 0.3038910693861325,
|
|
"learning_rate": 4.005086474411537e-06,
|
|
"loss": 0.3666,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 2.1146102962323843,
|
|
"grad_norm": 0.3179492455943479,
|
|
"learning_rate": 3.999061734379961e-06,
|
|
"loss": 0.3573,
|
|
"step": 14705
|
|
},
|
|
{
|
|
"epoch": 2.1153293068737415,
|
|
"grad_norm": 0.30833578681381413,
|
|
"learning_rate": 3.993040396447878e-06,
|
|
"loss": 0.341,
|
|
"step": 14710
|
|
},
|
|
{
|
|
"epoch": 2.1160483175150993,
|
|
"grad_norm": 0.3060367891311736,
|
|
"learning_rate": 3.987022464028953e-06,
|
|
"loss": 0.3599,
|
|
"step": 14715
|
|
},
|
|
{
|
|
"epoch": 2.1167673281564565,
|
|
"grad_norm": 0.3029894917005249,
|
|
"learning_rate": 3.981007940534919e-06,
|
|
"loss": 0.3666,
|
|
"step": 14720
|
|
},
|
|
{
|
|
"epoch": 2.1174863387978142,
|
|
"grad_norm": 0.3307925019991336,
|
|
"learning_rate": 3.974996829375584e-06,
|
|
"loss": 0.3573,
|
|
"step": 14725
|
|
},
|
|
{
|
|
"epoch": 2.1182053494391715,
|
|
"grad_norm": 0.3030445334315976,
|
|
"learning_rate": 3.968989133958805e-06,
|
|
"loss": 0.3623,
|
|
"step": 14730
|
|
},
|
|
{
|
|
"epoch": 2.118924360080529,
|
|
"grad_norm": 0.2998221367827612,
|
|
"learning_rate": 3.962984857690523e-06,
|
|
"loss": 0.3618,
|
|
"step": 14735
|
|
},
|
|
{
|
|
"epoch": 2.119643370721887,
|
|
"grad_norm": 0.29657577366382637,
|
|
"learning_rate": 3.956984003974723e-06,
|
|
"loss": 0.3661,
|
|
"step": 14740
|
|
},
|
|
{
|
|
"epoch": 2.120362381363244,
|
|
"grad_norm": 0.2942364740462061,
|
|
"learning_rate": 3.950986576213454e-06,
|
|
"loss": 0.3297,
|
|
"step": 14745
|
|
},
|
|
{
|
|
"epoch": 2.1210813920046014,
|
|
"grad_norm": 0.3086184084415184,
|
|
"learning_rate": 3.9449925778068345e-06,
|
|
"loss": 0.3472,
|
|
"step": 14750
|
|
},
|
|
{
|
|
"epoch": 2.121800402645959,
|
|
"grad_norm": 0.31743366653990324,
|
|
"learning_rate": 3.939002012153023e-06,
|
|
"loss": 0.3694,
|
|
"step": 14755
|
|
},
|
|
{
|
|
"epoch": 2.122519413287317,
|
|
"grad_norm": 0.2989229645333128,
|
|
"learning_rate": 3.9330148826482376e-06,
|
|
"loss": 0.3554,
|
|
"step": 14760
|
|
},
|
|
{
|
|
"epoch": 2.123238423928674,
|
|
"grad_norm": 0.30918978741841063,
|
|
"learning_rate": 3.927031192686751e-06,
|
|
"loss": 0.3583,
|
|
"step": 14765
|
|
},
|
|
{
|
|
"epoch": 2.123957434570032,
|
|
"grad_norm": 0.3118035443895809,
|
|
"learning_rate": 3.921050945660888e-06,
|
|
"loss": 0.3618,
|
|
"step": 14770
|
|
},
|
|
{
|
|
"epoch": 2.124676445211389,
|
|
"grad_norm": 0.3118575800553098,
|
|
"learning_rate": 3.91507414496101e-06,
|
|
"loss": 0.3509,
|
|
"step": 14775
|
|
},
|
|
{
|
|
"epoch": 2.125395455852747,
|
|
"grad_norm": 0.28782970273266467,
|
|
"learning_rate": 3.909100793975541e-06,
|
|
"loss": 0.3492,
|
|
"step": 14780
|
|
},
|
|
{
|
|
"epoch": 2.126114466494104,
|
|
"grad_norm": 0.30464380032983196,
|
|
"learning_rate": 3.903130896090935e-06,
|
|
"loss": 0.3559,
|
|
"step": 14785
|
|
},
|
|
{
|
|
"epoch": 2.126833477135462,
|
|
"grad_norm": 0.3192987940414797,
|
|
"learning_rate": 3.897164454691692e-06,
|
|
"loss": 0.344,
|
|
"step": 14790
|
|
},
|
|
{
|
|
"epoch": 2.127552487776819,
|
|
"grad_norm": 0.30121843819306726,
|
|
"learning_rate": 3.891201473160361e-06,
|
|
"loss": 0.3627,
|
|
"step": 14795
|
|
},
|
|
{
|
|
"epoch": 2.1282714984181768,
|
|
"grad_norm": 0.31249465666523135,
|
|
"learning_rate": 3.885241954877514e-06,
|
|
"loss": 0.3754,
|
|
"step": 14800
|
|
},
|
|
{
|
|
"epoch": 2.128990509059534,
|
|
"grad_norm": 0.33257311622890157,
|
|
"learning_rate": 3.8792859032217774e-06,
|
|
"loss": 0.3499,
|
|
"step": 14805
|
|
},
|
|
{
|
|
"epoch": 2.1297095197008917,
|
|
"grad_norm": 0.306534039307065,
|
|
"learning_rate": 3.8733333215698e-06,
|
|
"loss": 0.3537,
|
|
"step": 14810
|
|
},
|
|
{
|
|
"epoch": 2.130428530342249,
|
|
"grad_norm": 0.30919711460875726,
|
|
"learning_rate": 3.867384213296261e-06,
|
|
"loss": 0.3685,
|
|
"step": 14815
|
|
},
|
|
{
|
|
"epoch": 2.1311475409836067,
|
|
"grad_norm": 0.2949428765145592,
|
|
"learning_rate": 3.86143858177388e-06,
|
|
"loss": 0.3556,
|
|
"step": 14820
|
|
},
|
|
{
|
|
"epoch": 2.131866551624964,
|
|
"grad_norm": 0.29868068292926775,
|
|
"learning_rate": 3.855496430373407e-06,
|
|
"loss": 0.348,
|
|
"step": 14825
|
|
},
|
|
{
|
|
"epoch": 2.1325855622663217,
|
|
"grad_norm": 0.29091085369573366,
|
|
"learning_rate": 3.849557762463603e-06,
|
|
"loss": 0.3626,
|
|
"step": 14830
|
|
},
|
|
{
|
|
"epoch": 2.133304572907679,
|
|
"grad_norm": 0.29997048165748763,
|
|
"learning_rate": 3.843622581411277e-06,
|
|
"loss": 0.3633,
|
|
"step": 14835
|
|
},
|
|
{
|
|
"epoch": 2.1340235835490367,
|
|
"grad_norm": 0.30839851301814,
|
|
"learning_rate": 3.83769089058124e-06,
|
|
"loss": 0.3665,
|
|
"step": 14840
|
|
},
|
|
{
|
|
"epoch": 2.134742594190394,
|
|
"grad_norm": 0.3178213348617244,
|
|
"learning_rate": 3.8317626933363335e-06,
|
|
"loss": 0.3536,
|
|
"step": 14845
|
|
},
|
|
{
|
|
"epoch": 2.1354616048317516,
|
|
"grad_norm": 0.2957764219336339,
|
|
"learning_rate": 3.8258379930374235e-06,
|
|
"loss": 0.3481,
|
|
"step": 14850
|
|
},
|
|
{
|
|
"epoch": 2.136180615473109,
|
|
"grad_norm": 0.3063908238021033,
|
|
"learning_rate": 3.819916793043383e-06,
|
|
"loss": 0.3556,
|
|
"step": 14855
|
|
},
|
|
{
|
|
"epoch": 2.1368996261144666,
|
|
"grad_norm": 0.2913578247940509,
|
|
"learning_rate": 3.8139990967111053e-06,
|
|
"loss": 0.3487,
|
|
"step": 14860
|
|
},
|
|
{
|
|
"epoch": 2.137618636755824,
|
|
"grad_norm": 0.3177639829875217,
|
|
"learning_rate": 3.8080849073954996e-06,
|
|
"loss": 0.3534,
|
|
"step": 14865
|
|
},
|
|
{
|
|
"epoch": 2.1383376473971816,
|
|
"grad_norm": 0.30605057135730174,
|
|
"learning_rate": 3.802174228449489e-06,
|
|
"loss": 0.3646,
|
|
"step": 14870
|
|
},
|
|
{
|
|
"epoch": 2.139056658038539,
|
|
"grad_norm": 0.2980466608826346,
|
|
"learning_rate": 3.796267063223994e-06,
|
|
"loss": 0.3584,
|
|
"step": 14875
|
|
},
|
|
{
|
|
"epoch": 2.1397756686798965,
|
|
"grad_norm": 0.3083635667925451,
|
|
"learning_rate": 3.79036341506796e-06,
|
|
"loss": 0.3482,
|
|
"step": 14880
|
|
},
|
|
{
|
|
"epoch": 2.140494679321254,
|
|
"grad_norm": 0.31459440458454185,
|
|
"learning_rate": 3.784463287328326e-06,
|
|
"loss": 0.3458,
|
|
"step": 14885
|
|
},
|
|
{
|
|
"epoch": 2.1412136899626115,
|
|
"grad_norm": 0.29919560929347505,
|
|
"learning_rate": 3.7785666833500356e-06,
|
|
"loss": 0.3446,
|
|
"step": 14890
|
|
},
|
|
{
|
|
"epoch": 2.141932700603969,
|
|
"grad_norm": 0.29062309267369785,
|
|
"learning_rate": 3.772673606476046e-06,
|
|
"loss": 0.353,
|
|
"step": 14895
|
|
},
|
|
{
|
|
"epoch": 2.1426517112453265,
|
|
"grad_norm": 0.3150948575899284,
|
|
"learning_rate": 3.766784060047303e-06,
|
|
"loss": 0.3547,
|
|
"step": 14900
|
|
},
|
|
{
|
|
"epoch": 2.1433707218866838,
|
|
"grad_norm": 0.30156362208356413,
|
|
"learning_rate": 3.760898047402751e-06,
|
|
"loss": 0.367,
|
|
"step": 14905
|
|
},
|
|
{
|
|
"epoch": 2.1440897325280415,
|
|
"grad_norm": 0.2961650363272026,
|
|
"learning_rate": 3.7550155718793433e-06,
|
|
"loss": 0.3611,
|
|
"step": 14910
|
|
},
|
|
{
|
|
"epoch": 2.1448087431693987,
|
|
"grad_norm": 0.32527527776782844,
|
|
"learning_rate": 3.749136636812011e-06,
|
|
"loss": 0.3617,
|
|
"step": 14915
|
|
},
|
|
{
|
|
"epoch": 2.1455277538107564,
|
|
"grad_norm": 0.3018389075600328,
|
|
"learning_rate": 3.7432612455336915e-06,
|
|
"loss": 0.335,
|
|
"step": 14920
|
|
},
|
|
{
|
|
"epoch": 2.1462467644521137,
|
|
"grad_norm": 0.30937969642349905,
|
|
"learning_rate": 3.737389401375311e-06,
|
|
"loss": 0.3686,
|
|
"step": 14925
|
|
},
|
|
{
|
|
"epoch": 2.1469657750934714,
|
|
"grad_norm": 0.2985516260167024,
|
|
"learning_rate": 3.7315211076657745e-06,
|
|
"loss": 0.3426,
|
|
"step": 14930
|
|
},
|
|
{
|
|
"epoch": 2.1476847857348287,
|
|
"grad_norm": 0.31397886253424284,
|
|
"learning_rate": 3.725656367731988e-06,
|
|
"loss": 0.368,
|
|
"step": 14935
|
|
},
|
|
{
|
|
"epoch": 2.1484037963761864,
|
|
"grad_norm": 0.30606097677604527,
|
|
"learning_rate": 3.7197951848988356e-06,
|
|
"loss": 0.3717,
|
|
"step": 14940
|
|
},
|
|
{
|
|
"epoch": 2.1491228070175437,
|
|
"grad_norm": 0.2972460298848214,
|
|
"learning_rate": 3.7139375624891795e-06,
|
|
"loss": 0.3447,
|
|
"step": 14945
|
|
},
|
|
{
|
|
"epoch": 2.1498418176589014,
|
|
"grad_norm": 0.2941505932629645,
|
|
"learning_rate": 3.7080835038238773e-06,
|
|
"loss": 0.3392,
|
|
"step": 14950
|
|
},
|
|
{
|
|
"epoch": 2.150560828300259,
|
|
"grad_norm": 0.31366021885026046,
|
|
"learning_rate": 3.7022330122217543e-06,
|
|
"loss": 0.3614,
|
|
"step": 14955
|
|
},
|
|
{
|
|
"epoch": 2.1512798389416163,
|
|
"grad_norm": 0.3013875352411097,
|
|
"learning_rate": 3.6963860909996154e-06,
|
|
"loss": 0.3624,
|
|
"step": 14960
|
|
},
|
|
{
|
|
"epoch": 2.1519988495829736,
|
|
"grad_norm": 0.30679656936391514,
|
|
"learning_rate": 3.6905427434722452e-06,
|
|
"loss": 0.363,
|
|
"step": 14965
|
|
},
|
|
{
|
|
"epoch": 2.1527178602243313,
|
|
"grad_norm": 0.29533974380722106,
|
|
"learning_rate": 3.6847029729524062e-06,
|
|
"loss": 0.3579,
|
|
"step": 14970
|
|
},
|
|
{
|
|
"epoch": 2.153436870865689,
|
|
"grad_norm": 0.30993853814479577,
|
|
"learning_rate": 3.6788667827508185e-06,
|
|
"loss": 0.3546,
|
|
"step": 14975
|
|
},
|
|
{
|
|
"epoch": 2.1541558815070463,
|
|
"grad_norm": 0.3082180112335835,
|
|
"learning_rate": 3.67303417617619e-06,
|
|
"loss": 0.3604,
|
|
"step": 14980
|
|
},
|
|
{
|
|
"epoch": 2.154874892148404,
|
|
"grad_norm": 0.2993173008034473,
|
|
"learning_rate": 3.667205156535183e-06,
|
|
"loss": 0.354,
|
|
"step": 14985
|
|
},
|
|
{
|
|
"epoch": 2.1555939027897613,
|
|
"grad_norm": 0.30863406552666395,
|
|
"learning_rate": 3.661379727132429e-06,
|
|
"loss": 0.3631,
|
|
"step": 14990
|
|
},
|
|
{
|
|
"epoch": 2.156312913431119,
|
|
"grad_norm": 0.2988879862647366,
|
|
"learning_rate": 3.6555578912705335e-06,
|
|
"loss": 0.3539,
|
|
"step": 14995
|
|
},
|
|
{
|
|
"epoch": 2.1570319240724762,
|
|
"grad_norm": 0.30342094389011454,
|
|
"learning_rate": 3.649739652250055e-06,
|
|
"loss": 0.3498,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 2.157750934713834,
|
|
"grad_norm": 0.29900908254499947,
|
|
"learning_rate": 3.6439250133695113e-06,
|
|
"loss": 0.3627,
|
|
"step": 15005
|
|
},
|
|
{
|
|
"epoch": 2.158469945355191,
|
|
"grad_norm": 0.30994355898884496,
|
|
"learning_rate": 3.638113977925387e-06,
|
|
"loss": 0.3578,
|
|
"step": 15010
|
|
},
|
|
{
|
|
"epoch": 2.159188955996549,
|
|
"grad_norm": 0.3014829152149826,
|
|
"learning_rate": 3.6323065492121244e-06,
|
|
"loss": 0.3485,
|
|
"step": 15015
|
|
},
|
|
{
|
|
"epoch": 2.159907966637906,
|
|
"grad_norm": 0.302328397977993,
|
|
"learning_rate": 3.62650273052211e-06,
|
|
"loss": 0.3579,
|
|
"step": 15020
|
|
},
|
|
{
|
|
"epoch": 2.160626977279264,
|
|
"grad_norm": 0.3058435300497303,
|
|
"learning_rate": 3.6207025251456974e-06,
|
|
"loss": 0.3447,
|
|
"step": 15025
|
|
},
|
|
{
|
|
"epoch": 2.161345987920621,
|
|
"grad_norm": 0.30457986940937143,
|
|
"learning_rate": 3.614905936371178e-06,
|
|
"loss": 0.335,
|
|
"step": 15030
|
|
},
|
|
{
|
|
"epoch": 2.162064998561979,
|
|
"grad_norm": 0.30658271927475794,
|
|
"learning_rate": 3.609112967484807e-06,
|
|
"loss": 0.3717,
|
|
"step": 15035
|
|
},
|
|
{
|
|
"epoch": 2.162784009203336,
|
|
"grad_norm": 0.3029350646367455,
|
|
"learning_rate": 3.6033236217707766e-06,
|
|
"loss": 0.361,
|
|
"step": 15040
|
|
},
|
|
{
|
|
"epoch": 2.163503019844694,
|
|
"grad_norm": 0.3031235787806439,
|
|
"learning_rate": 3.5975379025112254e-06,
|
|
"loss": 0.3405,
|
|
"step": 15045
|
|
},
|
|
{
|
|
"epoch": 2.164222030486051,
|
|
"grad_norm": 0.30775906932936425,
|
|
"learning_rate": 3.591755812986246e-06,
|
|
"loss": 0.3687,
|
|
"step": 15050
|
|
},
|
|
{
|
|
"epoch": 2.164941041127409,
|
|
"grad_norm": 0.30566859413667363,
|
|
"learning_rate": 3.5859773564738633e-06,
|
|
"loss": 0.3591,
|
|
"step": 15055
|
|
},
|
|
{
|
|
"epoch": 2.165660051768766,
|
|
"grad_norm": 0.300167154605826,
|
|
"learning_rate": 3.5802025362500415e-06,
|
|
"loss": 0.3496,
|
|
"step": 15060
|
|
},
|
|
{
|
|
"epoch": 2.166379062410124,
|
|
"grad_norm": 0.29809112797987314,
|
|
"learning_rate": 3.5744313555886912e-06,
|
|
"loss": 0.357,
|
|
"step": 15065
|
|
},
|
|
{
|
|
"epoch": 2.167098073051481,
|
|
"grad_norm": 0.3125754657967547,
|
|
"learning_rate": 3.5686638177616594e-06,
|
|
"loss": 0.3596,
|
|
"step": 15070
|
|
},
|
|
{
|
|
"epoch": 2.1678170836928388,
|
|
"grad_norm": 0.29974889207961936,
|
|
"learning_rate": 3.5628999260387176e-06,
|
|
"loss": 0.3312,
|
|
"step": 15075
|
|
},
|
|
{
|
|
"epoch": 2.168536094334196,
|
|
"grad_norm": 0.2876712343810564,
|
|
"learning_rate": 3.5571396836875848e-06,
|
|
"loss": 0.3464,
|
|
"step": 15080
|
|
},
|
|
{
|
|
"epoch": 2.1692551049755537,
|
|
"grad_norm": 0.2932707741588461,
|
|
"learning_rate": 3.551383093973898e-06,
|
|
"loss": 0.3505,
|
|
"step": 15085
|
|
},
|
|
{
|
|
"epoch": 2.169974115616911,
|
|
"grad_norm": 0.31004210796868886,
|
|
"learning_rate": 3.5456301601612252e-06,
|
|
"loss": 0.3476,
|
|
"step": 15090
|
|
},
|
|
{
|
|
"epoch": 2.1706931262582687,
|
|
"grad_norm": 0.2980460736902264,
|
|
"learning_rate": 3.5398808855110745e-06,
|
|
"loss": 0.3269,
|
|
"step": 15095
|
|
},
|
|
{
|
|
"epoch": 2.171412136899626,
|
|
"grad_norm": 0.3078827121154138,
|
|
"learning_rate": 3.534135273282865e-06,
|
|
"loss": 0.3567,
|
|
"step": 15100
|
|
},
|
|
{
|
|
"epoch": 2.1721311475409837,
|
|
"grad_norm": 0.31095557656155537,
|
|
"learning_rate": 3.528393326733941e-06,
|
|
"loss": 0.3629,
|
|
"step": 15105
|
|
},
|
|
{
|
|
"epoch": 2.172850158182341,
|
|
"grad_norm": 0.3042862356254187,
|
|
"learning_rate": 3.5226550491195765e-06,
|
|
"loss": 0.3579,
|
|
"step": 15110
|
|
},
|
|
{
|
|
"epoch": 2.1735691688236987,
|
|
"grad_norm": 0.30292291186327597,
|
|
"learning_rate": 3.5169204436929647e-06,
|
|
"loss": 0.3557,
|
|
"step": 15115
|
|
},
|
|
{
|
|
"epoch": 2.174288179465056,
|
|
"grad_norm": 0.3001117215317004,
|
|
"learning_rate": 3.5111895137052065e-06,
|
|
"loss": 0.3484,
|
|
"step": 15120
|
|
},
|
|
{
|
|
"epoch": 2.1750071901064136,
|
|
"grad_norm": 0.30233367047550447,
|
|
"learning_rate": 3.5054622624053335e-06,
|
|
"loss": 0.3542,
|
|
"step": 15125
|
|
},
|
|
{
|
|
"epoch": 2.175726200747771,
|
|
"grad_norm": 0.3106028320426427,
|
|
"learning_rate": 3.499738693040278e-06,
|
|
"loss": 0.3666,
|
|
"step": 15130
|
|
},
|
|
{
|
|
"epoch": 2.1764452113891286,
|
|
"grad_norm": 0.3014691795399771,
|
|
"learning_rate": 3.4940188088548963e-06,
|
|
"loss": 0.3425,
|
|
"step": 15135
|
|
},
|
|
{
|
|
"epoch": 2.177164222030486,
|
|
"grad_norm": 0.31040705000513114,
|
|
"learning_rate": 3.4883026130919486e-06,
|
|
"loss": 0.3456,
|
|
"step": 15140
|
|
},
|
|
{
|
|
"epoch": 2.1778832326718436,
|
|
"grad_norm": 0.30381473669958653,
|
|
"learning_rate": 3.482590108992101e-06,
|
|
"loss": 0.362,
|
|
"step": 15145
|
|
},
|
|
{
|
|
"epoch": 2.178602243313201,
|
|
"grad_norm": 0.30480112115160074,
|
|
"learning_rate": 3.4768812997939406e-06,
|
|
"loss": 0.3449,
|
|
"step": 15150
|
|
},
|
|
{
|
|
"epoch": 2.1793212539545586,
|
|
"grad_norm": 0.31175874268061876,
|
|
"learning_rate": 3.4711761887339434e-06,
|
|
"loss": 0.3608,
|
|
"step": 15155
|
|
},
|
|
{
|
|
"epoch": 2.180040264595916,
|
|
"grad_norm": 0.3143375703331615,
|
|
"learning_rate": 3.4654747790465015e-06,
|
|
"loss": 0.3425,
|
|
"step": 15160
|
|
},
|
|
{
|
|
"epoch": 2.1807592752372735,
|
|
"grad_norm": 0.29956531504798056,
|
|
"learning_rate": 3.459777073963898e-06,
|
|
"loss": 0.374,
|
|
"step": 15165
|
|
},
|
|
{
|
|
"epoch": 2.181478285878631,
|
|
"grad_norm": 0.3078129825099616,
|
|
"learning_rate": 3.454083076716327e-06,
|
|
"loss": 0.3512,
|
|
"step": 15170
|
|
},
|
|
{
|
|
"epoch": 2.1821972965199885,
|
|
"grad_norm": 0.29806482217760777,
|
|
"learning_rate": 3.4483927905318683e-06,
|
|
"loss": 0.3518,
|
|
"step": 15175
|
|
},
|
|
{
|
|
"epoch": 2.1829163071613458,
|
|
"grad_norm": 0.3002561520340104,
|
|
"learning_rate": 3.44270621863651e-06,
|
|
"loss": 0.3609,
|
|
"step": 15180
|
|
},
|
|
{
|
|
"epoch": 2.1836353178027035,
|
|
"grad_norm": 0.30917529484807416,
|
|
"learning_rate": 3.4370233642541263e-06,
|
|
"loss": 0.3765,
|
|
"step": 15185
|
|
},
|
|
{
|
|
"epoch": 2.184354328444061,
|
|
"grad_norm": 0.31772980432478815,
|
|
"learning_rate": 3.4313442306064813e-06,
|
|
"loss": 0.3667,
|
|
"step": 15190
|
|
},
|
|
{
|
|
"epoch": 2.1850733390854185,
|
|
"grad_norm": 0.30465170420030224,
|
|
"learning_rate": 3.4256688209132426e-06,
|
|
"loss": 0.3599,
|
|
"step": 15195
|
|
},
|
|
{
|
|
"epoch": 2.185792349726776,
|
|
"grad_norm": 0.31717957291545246,
|
|
"learning_rate": 3.4199971383919538e-06,
|
|
"loss": 0.3721,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 2.1865113603681334,
|
|
"grad_norm": 0.29953324558392497,
|
|
"learning_rate": 3.4143291862580484e-06,
|
|
"loss": 0.3537,
|
|
"step": 15205
|
|
},
|
|
{
|
|
"epoch": 2.187230371009491,
|
|
"grad_norm": 0.29164252095032084,
|
|
"learning_rate": 3.4086649677248494e-06,
|
|
"loss": 0.3499,
|
|
"step": 15210
|
|
},
|
|
{
|
|
"epoch": 2.1879493816508484,
|
|
"grad_norm": 0.3119406011184142,
|
|
"learning_rate": 3.403004486003563e-06,
|
|
"loss": 0.3568,
|
|
"step": 15215
|
|
},
|
|
{
|
|
"epoch": 2.188668392292206,
|
|
"grad_norm": 0.3144362138159923,
|
|
"learning_rate": 3.3973477443032675e-06,
|
|
"loss": 0.3604,
|
|
"step": 15220
|
|
},
|
|
{
|
|
"epoch": 2.1893874029335634,
|
|
"grad_norm": 0.30049624753036813,
|
|
"learning_rate": 3.3916947458309367e-06,
|
|
"loss": 0.3306,
|
|
"step": 15225
|
|
},
|
|
{
|
|
"epoch": 2.190106413574921,
|
|
"grad_norm": 0.2907555205434776,
|
|
"learning_rate": 3.386045493791408e-06,
|
|
"loss": 0.3418,
|
|
"step": 15230
|
|
},
|
|
{
|
|
"epoch": 2.1908254242162783,
|
|
"grad_norm": 0.32286636944068536,
|
|
"learning_rate": 3.3803999913873964e-06,
|
|
"loss": 0.3554,
|
|
"step": 15235
|
|
},
|
|
{
|
|
"epoch": 2.191544434857636,
|
|
"grad_norm": 0.31197806907480435,
|
|
"learning_rate": 3.3747582418195034e-06,
|
|
"loss": 0.3356,
|
|
"step": 15240
|
|
},
|
|
{
|
|
"epoch": 2.1922634454989933,
|
|
"grad_norm": 0.3074070732635455,
|
|
"learning_rate": 3.3691202482861864e-06,
|
|
"loss": 0.3589,
|
|
"step": 15245
|
|
},
|
|
{
|
|
"epoch": 2.192982456140351,
|
|
"grad_norm": 0.30666247444748174,
|
|
"learning_rate": 3.3634860139837877e-06,
|
|
"loss": 0.3561,
|
|
"step": 15250
|
|
},
|
|
{
|
|
"epoch": 2.1937014667817083,
|
|
"grad_norm": 0.29794705043688,
|
|
"learning_rate": 3.357855542106507e-06,
|
|
"loss": 0.3734,
|
|
"step": 15255
|
|
},
|
|
{
|
|
"epoch": 2.194420477423066,
|
|
"grad_norm": 0.3248391107950035,
|
|
"learning_rate": 3.3522288358464184e-06,
|
|
"loss": 0.3379,
|
|
"step": 15260
|
|
},
|
|
{
|
|
"epoch": 2.1951394880644233,
|
|
"grad_norm": 0.29350187702529457,
|
|
"learning_rate": 3.3466058983934623e-06,
|
|
"loss": 0.3693,
|
|
"step": 15265
|
|
},
|
|
{
|
|
"epoch": 2.195858498705781,
|
|
"grad_norm": 0.30924967362422306,
|
|
"learning_rate": 3.3409867329354352e-06,
|
|
"loss": 0.3621,
|
|
"step": 15270
|
|
},
|
|
{
|
|
"epoch": 2.1965775093471382,
|
|
"grad_norm": 0.30868716963529236,
|
|
"learning_rate": 3.335371342657996e-06,
|
|
"loss": 0.3539,
|
|
"step": 15275
|
|
},
|
|
{
|
|
"epoch": 2.197296519988496,
|
|
"grad_norm": 0.32089606318561353,
|
|
"learning_rate": 3.3297597307446738e-06,
|
|
"loss": 0.3598,
|
|
"step": 15280
|
|
},
|
|
{
|
|
"epoch": 2.198015530629853,
|
|
"grad_norm": 0.29621403088366444,
|
|
"learning_rate": 3.324151900376843e-06,
|
|
"loss": 0.3573,
|
|
"step": 15285
|
|
},
|
|
{
|
|
"epoch": 2.198734541271211,
|
|
"grad_norm": 0.30789761677291216,
|
|
"learning_rate": 3.318547854733737e-06,
|
|
"loss": 0.3513,
|
|
"step": 15290
|
|
},
|
|
{
|
|
"epoch": 2.199453551912568,
|
|
"grad_norm": 0.29817485223243784,
|
|
"learning_rate": 3.3129475969924528e-06,
|
|
"loss": 0.3505,
|
|
"step": 15295
|
|
},
|
|
{
|
|
"epoch": 2.200172562553926,
|
|
"grad_norm": 0.3153210416440025,
|
|
"learning_rate": 3.3073511303279282e-06,
|
|
"loss": 0.3578,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 2.200891573195283,
|
|
"grad_norm": 0.3105491649370774,
|
|
"learning_rate": 3.301758457912955e-06,
|
|
"loss": 0.3538,
|
|
"step": 15305
|
|
},
|
|
{
|
|
"epoch": 2.201610583836641,
|
|
"grad_norm": 0.3074164397101723,
|
|
"learning_rate": 3.2961695829181772e-06,
|
|
"loss": 0.3417,
|
|
"step": 15310
|
|
},
|
|
{
|
|
"epoch": 2.202329594477998,
|
|
"grad_norm": 0.31478703453870654,
|
|
"learning_rate": 3.290584508512088e-06,
|
|
"loss": 0.3649,
|
|
"step": 15315
|
|
},
|
|
{
|
|
"epoch": 2.203048605119356,
|
|
"grad_norm": 0.308567424709636,
|
|
"learning_rate": 3.2850032378610154e-06,
|
|
"loss": 0.3508,
|
|
"step": 15320
|
|
},
|
|
{
|
|
"epoch": 2.203767615760713,
|
|
"grad_norm": 0.3117897029814399,
|
|
"learning_rate": 3.2794257741291437e-06,
|
|
"loss": 0.3534,
|
|
"step": 15325
|
|
},
|
|
{
|
|
"epoch": 2.204486626402071,
|
|
"grad_norm": 0.31285403661336864,
|
|
"learning_rate": 3.2738521204784903e-06,
|
|
"loss": 0.3508,
|
|
"step": 15330
|
|
},
|
|
{
|
|
"epoch": 2.205205637043428,
|
|
"grad_norm": 0.32383182492607826,
|
|
"learning_rate": 3.268282280068912e-06,
|
|
"loss": 0.3551,
|
|
"step": 15335
|
|
},
|
|
{
|
|
"epoch": 2.205924647684786,
|
|
"grad_norm": 0.3022420535325948,
|
|
"learning_rate": 3.2627162560581118e-06,
|
|
"loss": 0.3589,
|
|
"step": 15340
|
|
},
|
|
{
|
|
"epoch": 2.206643658326143,
|
|
"grad_norm": 0.3116761418818082,
|
|
"learning_rate": 3.257154051601623e-06,
|
|
"loss": 0.3607,
|
|
"step": 15345
|
|
},
|
|
{
|
|
"epoch": 2.2073626689675008,
|
|
"grad_norm": 0.3072452853157531,
|
|
"learning_rate": 3.2515956698528108e-06,
|
|
"loss": 0.3716,
|
|
"step": 15350
|
|
},
|
|
{
|
|
"epoch": 2.208081679608858,
|
|
"grad_norm": 0.3402342532426353,
|
|
"learning_rate": 3.246041113962879e-06,
|
|
"loss": 0.3693,
|
|
"step": 15355
|
|
},
|
|
{
|
|
"epoch": 2.2088006902502157,
|
|
"grad_norm": 0.30767073922808924,
|
|
"learning_rate": 3.2404903870808625e-06,
|
|
"loss": 0.3742,
|
|
"step": 15360
|
|
},
|
|
{
|
|
"epoch": 2.209519700891573,
|
|
"grad_norm": 0.30262327184958626,
|
|
"learning_rate": 3.2349434923536248e-06,
|
|
"loss": 0.3321,
|
|
"step": 15365
|
|
},
|
|
{
|
|
"epoch": 2.2102387115329307,
|
|
"grad_norm": 0.31283562479069693,
|
|
"learning_rate": 3.2294004329258534e-06,
|
|
"loss": 0.3582,
|
|
"step": 15370
|
|
},
|
|
{
|
|
"epoch": 2.210957722174288,
|
|
"grad_norm": 0.4953642571396755,
|
|
"learning_rate": 3.2238612119400594e-06,
|
|
"loss": 0.3545,
|
|
"step": 15375
|
|
},
|
|
{
|
|
"epoch": 2.2116767328156457,
|
|
"grad_norm": 0.3076785429212798,
|
|
"learning_rate": 3.2183258325365885e-06,
|
|
"loss": 0.3547,
|
|
"step": 15380
|
|
},
|
|
{
|
|
"epoch": 2.212395743457003,
|
|
"grad_norm": 0.3133405325342391,
|
|
"learning_rate": 3.2127942978535987e-06,
|
|
"loss": 0.3419,
|
|
"step": 15385
|
|
},
|
|
{
|
|
"epoch": 2.2131147540983607,
|
|
"grad_norm": 0.30801589506070054,
|
|
"learning_rate": 3.207266611027069e-06,
|
|
"loss": 0.3545,
|
|
"step": 15390
|
|
},
|
|
{
|
|
"epoch": 2.213833764739718,
|
|
"grad_norm": 0.31141767311731,
|
|
"learning_rate": 3.201742775190806e-06,
|
|
"loss": 0.3564,
|
|
"step": 15395
|
|
},
|
|
{
|
|
"epoch": 2.2145527753810756,
|
|
"grad_norm": 0.30431280084831613,
|
|
"learning_rate": 3.1962227934764187e-06,
|
|
"loss": 0.3576,
|
|
"step": 15400
|
|
},
|
|
{
|
|
"epoch": 2.2152717860224334,
|
|
"grad_norm": 0.3126341809804547,
|
|
"learning_rate": 3.190706669013346e-06,
|
|
"loss": 0.3522,
|
|
"step": 15405
|
|
},
|
|
{
|
|
"epoch": 2.2159907966637906,
|
|
"grad_norm": 0.3049806229743361,
|
|
"learning_rate": 3.1851944049288263e-06,
|
|
"loss": 0.361,
|
|
"step": 15410
|
|
},
|
|
{
|
|
"epoch": 2.216709807305148,
|
|
"grad_norm": 0.3040650333890194,
|
|
"learning_rate": 3.179686004347923e-06,
|
|
"loss": 0.346,
|
|
"step": 15415
|
|
},
|
|
{
|
|
"epoch": 2.2174288179465056,
|
|
"grad_norm": 0.30646538919734423,
|
|
"learning_rate": 3.174181470393496e-06,
|
|
"loss": 0.3431,
|
|
"step": 15420
|
|
},
|
|
{
|
|
"epoch": 2.2181478285878633,
|
|
"grad_norm": 0.29807202663145027,
|
|
"learning_rate": 3.168680806186224e-06,
|
|
"loss": 0.3546,
|
|
"step": 15425
|
|
},
|
|
{
|
|
"epoch": 2.2188668392292206,
|
|
"grad_norm": 0.3002164885802567,
|
|
"learning_rate": 3.1631840148445857e-06,
|
|
"loss": 0.3467,
|
|
"step": 15430
|
|
},
|
|
{
|
|
"epoch": 2.2195858498705783,
|
|
"grad_norm": 0.33499720918803877,
|
|
"learning_rate": 3.157691099484863e-06,
|
|
"loss": 0.343,
|
|
"step": 15435
|
|
},
|
|
{
|
|
"epoch": 2.2203048605119355,
|
|
"grad_norm": 0.3028194034436359,
|
|
"learning_rate": 3.152202063221147e-06,
|
|
"loss": 0.3671,
|
|
"step": 15440
|
|
},
|
|
{
|
|
"epoch": 2.2210238711532932,
|
|
"grad_norm": 0.3075807304788357,
|
|
"learning_rate": 3.1467169091653236e-06,
|
|
"loss": 0.356,
|
|
"step": 15445
|
|
},
|
|
{
|
|
"epoch": 2.2217428817946505,
|
|
"grad_norm": 0.30956393385332825,
|
|
"learning_rate": 3.1412356404270785e-06,
|
|
"loss": 0.3665,
|
|
"step": 15450
|
|
},
|
|
{
|
|
"epoch": 2.2224618924360082,
|
|
"grad_norm": 0.3009663731038914,
|
|
"learning_rate": 3.1357582601138958e-06,
|
|
"loss": 0.3484,
|
|
"step": 15455
|
|
},
|
|
{
|
|
"epoch": 2.2231809030773655,
|
|
"grad_norm": 0.3012810747112443,
|
|
"learning_rate": 3.130284771331058e-06,
|
|
"loss": 0.3661,
|
|
"step": 15460
|
|
},
|
|
{
|
|
"epoch": 2.223899913718723,
|
|
"grad_norm": 0.3192250756440365,
|
|
"learning_rate": 3.1248151771816416e-06,
|
|
"loss": 0.3518,
|
|
"step": 15465
|
|
},
|
|
{
|
|
"epoch": 2.2246189243600805,
|
|
"grad_norm": 0.2957842413439877,
|
|
"learning_rate": 3.119349480766507e-06,
|
|
"loss": 0.3601,
|
|
"step": 15470
|
|
},
|
|
{
|
|
"epoch": 2.225337935001438,
|
|
"grad_norm": 0.3130242606501442,
|
|
"learning_rate": 3.1138876851843094e-06,
|
|
"loss": 0.3599,
|
|
"step": 15475
|
|
},
|
|
{
|
|
"epoch": 2.2260569456427954,
|
|
"grad_norm": 0.3001981940997634,
|
|
"learning_rate": 3.108429793531499e-06,
|
|
"loss": 0.3697,
|
|
"step": 15480
|
|
},
|
|
{
|
|
"epoch": 2.226775956284153,
|
|
"grad_norm": 0.32044826136790355,
|
|
"learning_rate": 3.1029758089023032e-06,
|
|
"loss": 0.3452,
|
|
"step": 15485
|
|
},
|
|
{
|
|
"epoch": 2.2274949669255104,
|
|
"grad_norm": 0.2904558932561433,
|
|
"learning_rate": 3.0975257343887343e-06,
|
|
"loss": 0.3755,
|
|
"step": 15490
|
|
},
|
|
{
|
|
"epoch": 2.228213977566868,
|
|
"grad_norm": 0.31093571792372343,
|
|
"learning_rate": 3.0920795730806006e-06,
|
|
"loss": 0.3555,
|
|
"step": 15495
|
|
},
|
|
{
|
|
"epoch": 2.2289329882082254,
|
|
"grad_norm": 0.34203500612032955,
|
|
"learning_rate": 3.086637328065475e-06,
|
|
"loss": 0.3441,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 2.229651998849583,
|
|
"grad_norm": 0.3181575619683084,
|
|
"learning_rate": 3.081199002428721e-06,
|
|
"loss": 0.3432,
|
|
"step": 15505
|
|
},
|
|
{
|
|
"epoch": 2.2303710094909404,
|
|
"grad_norm": 0.30280951337401874,
|
|
"learning_rate": 3.0757645992534812e-06,
|
|
"loss": 0.3656,
|
|
"step": 15510
|
|
},
|
|
{
|
|
"epoch": 2.231090020132298,
|
|
"grad_norm": 0.29228748952483163,
|
|
"learning_rate": 3.0703341216206685e-06,
|
|
"loss": 0.3572,
|
|
"step": 15515
|
|
},
|
|
{
|
|
"epoch": 2.2318090307736553,
|
|
"grad_norm": 0.3150188564033169,
|
|
"learning_rate": 3.064907572608966e-06,
|
|
"loss": 0.3506,
|
|
"step": 15520
|
|
},
|
|
{
|
|
"epoch": 2.232528041415013,
|
|
"grad_norm": 0.2863452623300753,
|
|
"learning_rate": 3.059484955294845e-06,
|
|
"loss": 0.3384,
|
|
"step": 15525
|
|
},
|
|
{
|
|
"epoch": 2.2332470520563703,
|
|
"grad_norm": 0.3136187865723598,
|
|
"learning_rate": 3.054066272752535e-06,
|
|
"loss": 0.3443,
|
|
"step": 15530
|
|
},
|
|
{
|
|
"epoch": 2.233966062697728,
|
|
"grad_norm": 0.29462193365920486,
|
|
"learning_rate": 3.048651528054034e-06,
|
|
"loss": 0.3467,
|
|
"step": 15535
|
|
},
|
|
{
|
|
"epoch": 2.2346850733390853,
|
|
"grad_norm": 0.3073501301884894,
|
|
"learning_rate": 3.0432407242691196e-06,
|
|
"loss": 0.3612,
|
|
"step": 15540
|
|
},
|
|
{
|
|
"epoch": 2.235404083980443,
|
|
"grad_norm": 0.33020786822391146,
|
|
"learning_rate": 3.0378338644653218e-06,
|
|
"loss": 0.3764,
|
|
"step": 15545
|
|
},
|
|
{
|
|
"epoch": 2.2361230946218003,
|
|
"grad_norm": 0.30585435518707055,
|
|
"learning_rate": 3.032430951707945e-06,
|
|
"loss": 0.3499,
|
|
"step": 15550
|
|
},
|
|
{
|
|
"epoch": 2.236842105263158,
|
|
"grad_norm": 0.3155861870757336,
|
|
"learning_rate": 3.0270319890600465e-06,
|
|
"loss": 0.3541,
|
|
"step": 15555
|
|
},
|
|
{
|
|
"epoch": 2.2375611159045152,
|
|
"grad_norm": 0.31654698321161245,
|
|
"learning_rate": 3.021636979582454e-06,
|
|
"loss": 0.3503,
|
|
"step": 15560
|
|
},
|
|
{
|
|
"epoch": 2.238280126545873,
|
|
"grad_norm": 0.31198070231838565,
|
|
"learning_rate": 3.016245926333743e-06,
|
|
"loss": 0.3712,
|
|
"step": 15565
|
|
},
|
|
{
|
|
"epoch": 2.23899913718723,
|
|
"grad_norm": 0.3044694033535308,
|
|
"learning_rate": 3.01085883237026e-06,
|
|
"loss": 0.3605,
|
|
"step": 15570
|
|
},
|
|
{
|
|
"epoch": 2.239718147828588,
|
|
"grad_norm": 0.31146232468825474,
|
|
"learning_rate": 3.005475700746091e-06,
|
|
"loss": 0.3519,
|
|
"step": 15575
|
|
},
|
|
{
|
|
"epoch": 2.240437158469945,
|
|
"grad_norm": 0.2973543686887364,
|
|
"learning_rate": 3.0000965345130904e-06,
|
|
"loss": 0.3437,
|
|
"step": 15580
|
|
},
|
|
{
|
|
"epoch": 2.241156169111303,
|
|
"grad_norm": 0.32080700171427373,
|
|
"learning_rate": 2.994721336720855e-06,
|
|
"loss": 0.3603,
|
|
"step": 15585
|
|
},
|
|
{
|
|
"epoch": 2.24187517975266,
|
|
"grad_norm": 0.3062482822760849,
|
|
"learning_rate": 2.989350110416731e-06,
|
|
"loss": 0.3551,
|
|
"step": 15590
|
|
},
|
|
{
|
|
"epoch": 2.242594190394018,
|
|
"grad_norm": 0.3208660404332907,
|
|
"learning_rate": 2.9839828586458232e-06,
|
|
"loss": 0.3614,
|
|
"step": 15595
|
|
},
|
|
{
|
|
"epoch": 2.243313201035375,
|
|
"grad_norm": 0.30901275585164917,
|
|
"learning_rate": 2.97861958445097e-06,
|
|
"loss": 0.3689,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 2.244032211676733,
|
|
"grad_norm": 0.31725814824936704,
|
|
"learning_rate": 2.9732602908727647e-06,
|
|
"loss": 0.3602,
|
|
"step": 15605
|
|
},
|
|
{
|
|
"epoch": 2.24475122231809,
|
|
"grad_norm": 0.2993168036916425,
|
|
"learning_rate": 2.967904980949543e-06,
|
|
"loss": 0.3639,
|
|
"step": 15610
|
|
},
|
|
{
|
|
"epoch": 2.245470232959448,
|
|
"grad_norm": 0.2995763336344399,
|
|
"learning_rate": 2.9625536577173773e-06,
|
|
"loss": 0.3465,
|
|
"step": 15615
|
|
},
|
|
{
|
|
"epoch": 2.2461892436008055,
|
|
"grad_norm": 0.30855529017030303,
|
|
"learning_rate": 2.957206324210079e-06,
|
|
"loss": 0.3457,
|
|
"step": 15620
|
|
},
|
|
{
|
|
"epoch": 2.246908254242163,
|
|
"grad_norm": 0.30477098341901515,
|
|
"learning_rate": 2.951862983459207e-06,
|
|
"loss": 0.3674,
|
|
"step": 15625
|
|
},
|
|
{
|
|
"epoch": 2.24762726488352,
|
|
"grad_norm": 0.31246333173314134,
|
|
"learning_rate": 2.9465236384940464e-06,
|
|
"loss": 0.3512,
|
|
"step": 15630
|
|
},
|
|
{
|
|
"epoch": 2.2483462755248778,
|
|
"grad_norm": 0.3062742648222082,
|
|
"learning_rate": 2.941188292341619e-06,
|
|
"loss": 0.357,
|
|
"step": 15635
|
|
},
|
|
{
|
|
"epoch": 2.2490652861662355,
|
|
"grad_norm": 0.2916864562044178,
|
|
"learning_rate": 2.9358569480266873e-06,
|
|
"loss": 0.3647,
|
|
"step": 15640
|
|
},
|
|
{
|
|
"epoch": 2.2497842968075927,
|
|
"grad_norm": 0.32087937470394745,
|
|
"learning_rate": 2.930529608571733e-06,
|
|
"loss": 0.3577,
|
|
"step": 15645
|
|
},
|
|
{
|
|
"epoch": 2.25050330744895,
|
|
"grad_norm": 0.3083378561850482,
|
|
"learning_rate": 2.9252062769969767e-06,
|
|
"loss": 0.3493,
|
|
"step": 15650
|
|
},
|
|
{
|
|
"epoch": 2.2512223180903077,
|
|
"grad_norm": 0.312189750363743,
|
|
"learning_rate": 2.919886956320367e-06,
|
|
"loss": 0.3476,
|
|
"step": 15655
|
|
},
|
|
{
|
|
"epoch": 2.2519413287316654,
|
|
"grad_norm": 0.31107989062754704,
|
|
"learning_rate": 2.9145716495575725e-06,
|
|
"loss": 0.3646,
|
|
"step": 15660
|
|
},
|
|
{
|
|
"epoch": 2.2526603393730227,
|
|
"grad_norm": 0.2981106045175469,
|
|
"learning_rate": 2.9092603597219848e-06,
|
|
"loss": 0.3496,
|
|
"step": 15665
|
|
},
|
|
{
|
|
"epoch": 2.2533793500143804,
|
|
"grad_norm": 0.32624578665001563,
|
|
"learning_rate": 2.90395308982473e-06,
|
|
"loss": 0.3802,
|
|
"step": 15670
|
|
},
|
|
{
|
|
"epoch": 2.2540983606557377,
|
|
"grad_norm": 0.3056872661221696,
|
|
"learning_rate": 2.8986498428746448e-06,
|
|
"loss": 0.3561,
|
|
"step": 15675
|
|
},
|
|
{
|
|
"epoch": 2.2548173712970954,
|
|
"grad_norm": 0.2964373136455243,
|
|
"learning_rate": 2.8933506218782826e-06,
|
|
"loss": 0.3598,
|
|
"step": 15680
|
|
},
|
|
{
|
|
"epoch": 2.2555363819384526,
|
|
"grad_norm": 0.31765984610242426,
|
|
"learning_rate": 2.888055429839929e-06,
|
|
"loss": 0.3462,
|
|
"step": 15685
|
|
},
|
|
{
|
|
"epoch": 2.2562553925798103,
|
|
"grad_norm": 0.298625914399591,
|
|
"learning_rate": 2.8827642697615665e-06,
|
|
"loss": 0.3648,
|
|
"step": 15690
|
|
},
|
|
{
|
|
"epoch": 2.2569744032211676,
|
|
"grad_norm": 0.3252226755907827,
|
|
"learning_rate": 2.8774771446429116e-06,
|
|
"loss": 0.3643,
|
|
"step": 15695
|
|
},
|
|
{
|
|
"epoch": 2.2576934138625253,
|
|
"grad_norm": 0.30512926093764386,
|
|
"learning_rate": 2.8721940574813745e-06,
|
|
"loss": 0.3655,
|
|
"step": 15700
|
|
},
|
|
{
|
|
"epoch": 2.2584124245038826,
|
|
"grad_norm": 0.2957418253130133,
|
|
"learning_rate": 2.866915011272089e-06,
|
|
"loss": 0.3501,
|
|
"step": 15705
|
|
},
|
|
{
|
|
"epoch": 2.2591314351452403,
|
|
"grad_norm": 0.2984240975142514,
|
|
"learning_rate": 2.8616400090078956e-06,
|
|
"loss": 0.3529,
|
|
"step": 15710
|
|
},
|
|
{
|
|
"epoch": 2.2598504457865976,
|
|
"grad_norm": 0.29953165879659893,
|
|
"learning_rate": 2.856369053679339e-06,
|
|
"loss": 0.3594,
|
|
"step": 15715
|
|
},
|
|
{
|
|
"epoch": 2.2605694564279553,
|
|
"grad_norm": 0.30859784356444436,
|
|
"learning_rate": 2.8511021482746672e-06,
|
|
"loss": 0.357,
|
|
"step": 15720
|
|
},
|
|
{
|
|
"epoch": 2.2612884670693125,
|
|
"grad_norm": 0.30757631486619363,
|
|
"learning_rate": 2.845839295779841e-06,
|
|
"loss": 0.352,
|
|
"step": 15725
|
|
},
|
|
{
|
|
"epoch": 2.2620074777106702,
|
|
"grad_norm": 0.3191302133672286,
|
|
"learning_rate": 2.840580499178517e-06,
|
|
"loss": 0.3626,
|
|
"step": 15730
|
|
},
|
|
{
|
|
"epoch": 2.2627264883520275,
|
|
"grad_norm": 0.33964528675054473,
|
|
"learning_rate": 2.83532576145205e-06,
|
|
"loss": 0.3512,
|
|
"step": 15735
|
|
},
|
|
{
|
|
"epoch": 2.263445498993385,
|
|
"grad_norm": 0.30911237389364155,
|
|
"learning_rate": 2.8300750855795043e-06,
|
|
"loss": 0.346,
|
|
"step": 15740
|
|
},
|
|
{
|
|
"epoch": 2.2641645096347425,
|
|
"grad_norm": 0.30446224483523926,
|
|
"learning_rate": 2.8248284745376285e-06,
|
|
"loss": 0.3764,
|
|
"step": 15745
|
|
},
|
|
{
|
|
"epoch": 2.2648835202761,
|
|
"grad_norm": 0.2940806440072444,
|
|
"learning_rate": 2.8195859313008754e-06,
|
|
"loss": 0.3457,
|
|
"step": 15750
|
|
},
|
|
{
|
|
"epoch": 2.2656025309174574,
|
|
"grad_norm": 0.3072868701821893,
|
|
"learning_rate": 2.814347458841392e-06,
|
|
"loss": 0.3718,
|
|
"step": 15755
|
|
},
|
|
{
|
|
"epoch": 2.266321541558815,
|
|
"grad_norm": 0.30242544877386035,
|
|
"learning_rate": 2.8091130601290127e-06,
|
|
"loss": 0.3369,
|
|
"step": 15760
|
|
},
|
|
{
|
|
"epoch": 2.2670405522001724,
|
|
"grad_norm": 0.31504463543639355,
|
|
"learning_rate": 2.8038827381312607e-06,
|
|
"loss": 0.3418,
|
|
"step": 15765
|
|
},
|
|
{
|
|
"epoch": 2.26775956284153,
|
|
"grad_norm": 0.31615831738207584,
|
|
"learning_rate": 2.7986564958133564e-06,
|
|
"loss": 0.3514,
|
|
"step": 15770
|
|
},
|
|
{
|
|
"epoch": 2.2684785734828874,
|
|
"grad_norm": 0.3126755495552674,
|
|
"learning_rate": 2.793434336138202e-06,
|
|
"loss": 0.367,
|
|
"step": 15775
|
|
},
|
|
{
|
|
"epoch": 2.269197584124245,
|
|
"grad_norm": 0.3038635159784138,
|
|
"learning_rate": 2.788216262066381e-06,
|
|
"loss": 0.3635,
|
|
"step": 15780
|
|
},
|
|
{
|
|
"epoch": 2.2699165947656024,
|
|
"grad_norm": 0.3786827203869844,
|
|
"learning_rate": 2.7830022765561725e-06,
|
|
"loss": 0.3481,
|
|
"step": 15785
|
|
},
|
|
{
|
|
"epoch": 2.27063560540696,
|
|
"grad_norm": 0.3118278632641194,
|
|
"learning_rate": 2.777792382563522e-06,
|
|
"loss": 0.332,
|
|
"step": 15790
|
|
},
|
|
{
|
|
"epoch": 2.2713546160483173,
|
|
"grad_norm": 0.32160839287608234,
|
|
"learning_rate": 2.7725865830420697e-06,
|
|
"loss": 0.3598,
|
|
"step": 15795
|
|
},
|
|
{
|
|
"epoch": 2.272073626689675,
|
|
"grad_norm": 0.2983078542384667,
|
|
"learning_rate": 2.7673848809431316e-06,
|
|
"loss": 0.3637,
|
|
"step": 15800
|
|
},
|
|
{
|
|
"epoch": 2.2727926373310323,
|
|
"grad_norm": 0.3088423052329731,
|
|
"learning_rate": 2.762187279215689e-06,
|
|
"loss": 0.3374,
|
|
"step": 15805
|
|
},
|
|
{
|
|
"epoch": 2.27351164797239,
|
|
"grad_norm": 0.3086333495661721,
|
|
"learning_rate": 2.7569937808064164e-06,
|
|
"loss": 0.3526,
|
|
"step": 15810
|
|
},
|
|
{
|
|
"epoch": 2.2742306586137473,
|
|
"grad_norm": 0.31900651905313676,
|
|
"learning_rate": 2.7518043886596492e-06,
|
|
"loss": 0.3626,
|
|
"step": 15815
|
|
},
|
|
{
|
|
"epoch": 2.274949669255105,
|
|
"grad_norm": 0.3486672770395252,
|
|
"learning_rate": 2.7466191057173952e-06,
|
|
"loss": 0.3376,
|
|
"step": 15820
|
|
},
|
|
{
|
|
"epoch": 2.2756686798964623,
|
|
"grad_norm": 0.30635695085728376,
|
|
"learning_rate": 2.741437934919342e-06,
|
|
"loss": 0.3482,
|
|
"step": 15825
|
|
},
|
|
{
|
|
"epoch": 2.27638769053782,
|
|
"grad_norm": 0.29498996360939916,
|
|
"learning_rate": 2.736260879202839e-06,
|
|
"loss": 0.3521,
|
|
"step": 15830
|
|
},
|
|
{
|
|
"epoch": 2.2771067011791777,
|
|
"grad_norm": 0.30356297104944324,
|
|
"learning_rate": 2.731087941502898e-06,
|
|
"loss": 0.3576,
|
|
"step": 15835
|
|
},
|
|
{
|
|
"epoch": 2.277825711820535,
|
|
"grad_norm": 0.3006613726046492,
|
|
"learning_rate": 2.72591912475221e-06,
|
|
"loss": 0.3725,
|
|
"step": 15840
|
|
},
|
|
{
|
|
"epoch": 2.278544722461892,
|
|
"grad_norm": 0.30407371669199124,
|
|
"learning_rate": 2.720754431881114e-06,
|
|
"loss": 0.3512,
|
|
"step": 15845
|
|
},
|
|
{
|
|
"epoch": 2.27926373310325,
|
|
"grad_norm": 0.30929772831522084,
|
|
"learning_rate": 2.7155938658176227e-06,
|
|
"loss": 0.3447,
|
|
"step": 15850
|
|
},
|
|
{
|
|
"epoch": 2.2799827437446076,
|
|
"grad_norm": 0.31130611419720466,
|
|
"learning_rate": 2.7104374294874082e-06,
|
|
"loss": 0.356,
|
|
"step": 15855
|
|
},
|
|
{
|
|
"epoch": 2.280701754385965,
|
|
"grad_norm": 0.32375382544412157,
|
|
"learning_rate": 2.7052851258137936e-06,
|
|
"loss": 0.3484,
|
|
"step": 15860
|
|
},
|
|
{
|
|
"epoch": 2.281420765027322,
|
|
"grad_norm": 0.32767196685216654,
|
|
"learning_rate": 2.700136957717763e-06,
|
|
"loss": 0.364,
|
|
"step": 15865
|
|
},
|
|
{
|
|
"epoch": 2.28213977566868,
|
|
"grad_norm": 0.30275935601370696,
|
|
"learning_rate": 2.694992928117961e-06,
|
|
"loss": 0.3548,
|
|
"step": 15870
|
|
},
|
|
{
|
|
"epoch": 2.2828587863100376,
|
|
"grad_norm": 0.2999566528493883,
|
|
"learning_rate": 2.689853039930679e-06,
|
|
"loss": 0.352,
|
|
"step": 15875
|
|
},
|
|
{
|
|
"epoch": 2.283577796951395,
|
|
"grad_norm": 0.3028370068555194,
|
|
"learning_rate": 2.6847172960698607e-06,
|
|
"loss": 0.3567,
|
|
"step": 15880
|
|
},
|
|
{
|
|
"epoch": 2.2842968075927526,
|
|
"grad_norm": 0.30491958972868694,
|
|
"learning_rate": 2.679585699447108e-06,
|
|
"loss": 0.3518,
|
|
"step": 15885
|
|
},
|
|
{
|
|
"epoch": 2.28501581823411,
|
|
"grad_norm": 0.31053388049647473,
|
|
"learning_rate": 2.6744582529716613e-06,
|
|
"loss": 0.3428,
|
|
"step": 15890
|
|
},
|
|
{
|
|
"epoch": 2.2857348288754675,
|
|
"grad_norm": 0.32522931033478,
|
|
"learning_rate": 2.6693349595504146e-06,
|
|
"loss": 0.3738,
|
|
"step": 15895
|
|
},
|
|
{
|
|
"epoch": 2.286453839516825,
|
|
"grad_norm": 0.3166723767856955,
|
|
"learning_rate": 2.664215822087912e-06,
|
|
"loss": 0.3699,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 2.2871728501581825,
|
|
"grad_norm": 0.3064335102460339,
|
|
"learning_rate": 2.6591008434863264e-06,
|
|
"loss": 0.3493,
|
|
"step": 15905
|
|
},
|
|
{
|
|
"epoch": 2.2878918607995398,
|
|
"grad_norm": 0.31630254631437876,
|
|
"learning_rate": 2.6539900266454886e-06,
|
|
"loss": 0.3456,
|
|
"step": 15910
|
|
},
|
|
{
|
|
"epoch": 2.2886108714408975,
|
|
"grad_norm": 0.3347713777428592,
|
|
"learning_rate": 2.6488833744628618e-06,
|
|
"loss": 0.357,
|
|
"step": 15915
|
|
},
|
|
{
|
|
"epoch": 2.2893298820822547,
|
|
"grad_norm": 0.3158742778367817,
|
|
"learning_rate": 2.643780889833546e-06,
|
|
"loss": 0.3688,
|
|
"step": 15920
|
|
},
|
|
{
|
|
"epoch": 2.2900488927236125,
|
|
"grad_norm": 0.32102594487563607,
|
|
"learning_rate": 2.6386825756502878e-06,
|
|
"loss": 0.3661,
|
|
"step": 15925
|
|
},
|
|
{
|
|
"epoch": 2.2907679033649697,
|
|
"grad_norm": 0.30611574690282606,
|
|
"learning_rate": 2.6335884348034614e-06,
|
|
"loss": 0.3366,
|
|
"step": 15930
|
|
},
|
|
{
|
|
"epoch": 2.2914869140063274,
|
|
"grad_norm": 0.2944612402777182,
|
|
"learning_rate": 2.6284984701810745e-06,
|
|
"loss": 0.3516,
|
|
"step": 15935
|
|
},
|
|
{
|
|
"epoch": 2.2922059246476847,
|
|
"grad_norm": 0.35762278792529484,
|
|
"learning_rate": 2.6234126846687757e-06,
|
|
"loss": 0.3505,
|
|
"step": 15940
|
|
},
|
|
{
|
|
"epoch": 2.2929249352890424,
|
|
"grad_norm": 0.3135762338755491,
|
|
"learning_rate": 2.618331081149833e-06,
|
|
"loss": 0.3687,
|
|
"step": 15945
|
|
},
|
|
{
|
|
"epoch": 2.2936439459303997,
|
|
"grad_norm": 0.30807275833208,
|
|
"learning_rate": 2.613253662505153e-06,
|
|
"loss": 0.3523,
|
|
"step": 15950
|
|
},
|
|
{
|
|
"epoch": 2.2943629565717574,
|
|
"grad_norm": 0.2997473598425893,
|
|
"learning_rate": 2.6081804316132685e-06,
|
|
"loss": 0.3363,
|
|
"step": 15955
|
|
},
|
|
{
|
|
"epoch": 2.2950819672131146,
|
|
"grad_norm": 0.30609918339915915,
|
|
"learning_rate": 2.6031113913503337e-06,
|
|
"loss": 0.3663,
|
|
"step": 15960
|
|
},
|
|
{
|
|
"epoch": 2.2958009778544723,
|
|
"grad_norm": 0.3017644713676623,
|
|
"learning_rate": 2.5980465445901247e-06,
|
|
"loss": 0.3476,
|
|
"step": 15965
|
|
},
|
|
{
|
|
"epoch": 2.2965199884958296,
|
|
"grad_norm": 0.3096068413767663,
|
|
"learning_rate": 2.592985894204051e-06,
|
|
"loss": 0.3741,
|
|
"step": 15970
|
|
},
|
|
{
|
|
"epoch": 2.2972389991371873,
|
|
"grad_norm": 0.3093683860504157,
|
|
"learning_rate": 2.5879294430611346e-06,
|
|
"loss": 0.3747,
|
|
"step": 15975
|
|
},
|
|
{
|
|
"epoch": 2.2979580097785446,
|
|
"grad_norm": 0.3244995841406146,
|
|
"learning_rate": 2.582877194028014e-06,
|
|
"loss": 0.3611,
|
|
"step": 15980
|
|
},
|
|
{
|
|
"epoch": 2.2986770204199023,
|
|
"grad_norm": 0.3111852058029451,
|
|
"learning_rate": 2.5778291499689577e-06,
|
|
"loss": 0.3621,
|
|
"step": 15985
|
|
},
|
|
{
|
|
"epoch": 2.2993960310612596,
|
|
"grad_norm": 0.30975552140475193,
|
|
"learning_rate": 2.572785313745837e-06,
|
|
"loss": 0.3682,
|
|
"step": 15990
|
|
},
|
|
{
|
|
"epoch": 2.3001150417026173,
|
|
"grad_norm": 0.3101174849309566,
|
|
"learning_rate": 2.5677456882181463e-06,
|
|
"loss": 0.3623,
|
|
"step": 15995
|
|
},
|
|
{
|
|
"epoch": 2.3008340523439745,
|
|
"grad_norm": 0.31071583961660165,
|
|
"learning_rate": 2.562710276242992e-06,
|
|
"loss": 0.3592,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 2.3015530629853322,
|
|
"grad_norm": 0.31231135268967924,
|
|
"learning_rate": 2.5576790806750882e-06,
|
|
"loss": 0.3549,
|
|
"step": 16005
|
|
},
|
|
{
|
|
"epoch": 2.3022720736266895,
|
|
"grad_norm": 0.33928947652643504,
|
|
"learning_rate": 2.5526521043667564e-06,
|
|
"loss": 0.362,
|
|
"step": 16010
|
|
},
|
|
{
|
|
"epoch": 2.302991084268047,
|
|
"grad_norm": 0.29992825922532507,
|
|
"learning_rate": 2.547629350167936e-06,
|
|
"loss": 0.3647,
|
|
"step": 16015
|
|
},
|
|
{
|
|
"epoch": 2.3037100949094045,
|
|
"grad_norm": 0.3004583761327109,
|
|
"learning_rate": 2.5426108209261614e-06,
|
|
"loss": 0.3557,
|
|
"step": 16020
|
|
},
|
|
{
|
|
"epoch": 2.304429105550762,
|
|
"grad_norm": 0.3069875577536048,
|
|
"learning_rate": 2.5375965194865813e-06,
|
|
"loss": 0.3433,
|
|
"step": 16025
|
|
},
|
|
{
|
|
"epoch": 2.3051481161921195,
|
|
"grad_norm": 0.3277259137871057,
|
|
"learning_rate": 2.5325864486919417e-06,
|
|
"loss": 0.3633,
|
|
"step": 16030
|
|
},
|
|
{
|
|
"epoch": 2.305867126833477,
|
|
"grad_norm": 0.3038629438042739,
|
|
"learning_rate": 2.5275806113825885e-06,
|
|
"loss": 0.3562,
|
|
"step": 16035
|
|
},
|
|
{
|
|
"epoch": 2.3065861374748344,
|
|
"grad_norm": 0.31461803132898014,
|
|
"learning_rate": 2.522579010396472e-06,
|
|
"loss": 0.3619,
|
|
"step": 16040
|
|
},
|
|
{
|
|
"epoch": 2.307305148116192,
|
|
"grad_norm": 0.2959354817846541,
|
|
"learning_rate": 2.517581648569145e-06,
|
|
"loss": 0.3647,
|
|
"step": 16045
|
|
},
|
|
{
|
|
"epoch": 2.30802415875755,
|
|
"grad_norm": 0.30566584330916874,
|
|
"learning_rate": 2.5125885287337438e-06,
|
|
"loss": 0.3586,
|
|
"step": 16050
|
|
},
|
|
{
|
|
"epoch": 2.308743169398907,
|
|
"grad_norm": 0.3646508015892226,
|
|
"learning_rate": 2.5075996537210133e-06,
|
|
"loss": 0.3488,
|
|
"step": 16055
|
|
},
|
|
{
|
|
"epoch": 2.3094621800402644,
|
|
"grad_norm": 0.3153456121902386,
|
|
"learning_rate": 2.502615026359285e-06,
|
|
"loss": 0.3688,
|
|
"step": 16060
|
|
},
|
|
{
|
|
"epoch": 2.310181190681622,
|
|
"grad_norm": 0.29404551026941367,
|
|
"learning_rate": 2.4976346494744785e-06,
|
|
"loss": 0.3581,
|
|
"step": 16065
|
|
},
|
|
{
|
|
"epoch": 2.31090020132298,
|
|
"grad_norm": 0.3105472552136778,
|
|
"learning_rate": 2.492658525890115e-06,
|
|
"loss": 0.3496,
|
|
"step": 16070
|
|
},
|
|
{
|
|
"epoch": 2.311619211964337,
|
|
"grad_norm": 0.319832608016326,
|
|
"learning_rate": 2.487686658427295e-06,
|
|
"loss": 0.3508,
|
|
"step": 16075
|
|
},
|
|
{
|
|
"epoch": 2.3123382226056943,
|
|
"grad_norm": 0.3167547707643775,
|
|
"learning_rate": 2.482719049904706e-06,
|
|
"loss": 0.3531,
|
|
"step": 16080
|
|
},
|
|
{
|
|
"epoch": 2.313057233247052,
|
|
"grad_norm": 0.31759970183497527,
|
|
"learning_rate": 2.4777557031386302e-06,
|
|
"loss": 0.3485,
|
|
"step": 16085
|
|
},
|
|
{
|
|
"epoch": 2.3137762438884097,
|
|
"grad_norm": 0.30660907697399054,
|
|
"learning_rate": 2.472796620942922e-06,
|
|
"loss": 0.3479,
|
|
"step": 16090
|
|
},
|
|
{
|
|
"epoch": 2.314495254529767,
|
|
"grad_norm": 0.3063113107537106,
|
|
"learning_rate": 2.4678418061290253e-06,
|
|
"loss": 0.3559,
|
|
"step": 16095
|
|
},
|
|
{
|
|
"epoch": 2.3152142651711247,
|
|
"grad_norm": 0.2972991853403953,
|
|
"learning_rate": 2.4628912615059664e-06,
|
|
"loss": 0.3744,
|
|
"step": 16100
|
|
},
|
|
{
|
|
"epoch": 2.315933275812482,
|
|
"grad_norm": 0.31162519376561765,
|
|
"learning_rate": 2.4579449898803453e-06,
|
|
"loss": 0.36,
|
|
"step": 16105
|
|
},
|
|
{
|
|
"epoch": 2.3166522864538397,
|
|
"grad_norm": 0.3018722218181238,
|
|
"learning_rate": 2.453002994056337e-06,
|
|
"loss": 0.3538,
|
|
"step": 16110
|
|
},
|
|
{
|
|
"epoch": 2.317371297095197,
|
|
"grad_norm": 0.30595044958719786,
|
|
"learning_rate": 2.448065276835705e-06,
|
|
"loss": 0.3546,
|
|
"step": 16115
|
|
},
|
|
{
|
|
"epoch": 2.3180903077365547,
|
|
"grad_norm": 0.30736329196433115,
|
|
"learning_rate": 2.4431318410177705e-06,
|
|
"loss": 0.3481,
|
|
"step": 16120
|
|
},
|
|
{
|
|
"epoch": 2.318809318377912,
|
|
"grad_norm": 0.32241614377384,
|
|
"learning_rate": 2.4382026893994435e-06,
|
|
"loss": 0.3447,
|
|
"step": 16125
|
|
},
|
|
{
|
|
"epoch": 2.3195283290192696,
|
|
"grad_norm": 0.3419157713808682,
|
|
"learning_rate": 2.4332778247751953e-06,
|
|
"loss": 0.3501,
|
|
"step": 16130
|
|
},
|
|
{
|
|
"epoch": 2.320247339660627,
|
|
"grad_norm": 0.3124712043542506,
|
|
"learning_rate": 2.4283572499370655e-06,
|
|
"loss": 0.354,
|
|
"step": 16135
|
|
},
|
|
{
|
|
"epoch": 2.3209663503019846,
|
|
"grad_norm": 0.31444315193194494,
|
|
"learning_rate": 2.4234409676746673e-06,
|
|
"loss": 0.3709,
|
|
"step": 16140
|
|
},
|
|
{
|
|
"epoch": 2.321685360943342,
|
|
"grad_norm": 0.299491226974594,
|
|
"learning_rate": 2.4185289807751833e-06,
|
|
"loss": 0.3547,
|
|
"step": 16145
|
|
},
|
|
{
|
|
"epoch": 2.3224043715846996,
|
|
"grad_norm": 0.3028786378211861,
|
|
"learning_rate": 2.413621292023349e-06,
|
|
"loss": 0.3631,
|
|
"step": 16150
|
|
},
|
|
{
|
|
"epoch": 2.323123382226057,
|
|
"grad_norm": 0.3038788084800907,
|
|
"learning_rate": 2.4087179042014774e-06,
|
|
"loss": 0.3466,
|
|
"step": 16155
|
|
},
|
|
{
|
|
"epoch": 2.3238423928674146,
|
|
"grad_norm": 0.30843838680085833,
|
|
"learning_rate": 2.403818820089431e-06,
|
|
"loss": 0.3571,
|
|
"step": 16160
|
|
},
|
|
{
|
|
"epoch": 2.324561403508772,
|
|
"grad_norm": 0.2949958202416981,
|
|
"learning_rate": 2.3989240424646355e-06,
|
|
"loss": 0.3363,
|
|
"step": 16165
|
|
},
|
|
{
|
|
"epoch": 2.3252804141501295,
|
|
"grad_norm": 0.32492884881713374,
|
|
"learning_rate": 2.3940335741020826e-06,
|
|
"loss": 0.3531,
|
|
"step": 16170
|
|
},
|
|
{
|
|
"epoch": 2.325999424791487,
|
|
"grad_norm": 0.29904998162825586,
|
|
"learning_rate": 2.3891474177743136e-06,
|
|
"loss": 0.3578,
|
|
"step": 16175
|
|
},
|
|
{
|
|
"epoch": 2.3267184354328445,
|
|
"grad_norm": 0.3014079508481498,
|
|
"learning_rate": 2.3842655762514234e-06,
|
|
"loss": 0.3472,
|
|
"step": 16180
|
|
},
|
|
{
|
|
"epoch": 2.3274374460742018,
|
|
"grad_norm": 0.3003664642803967,
|
|
"learning_rate": 2.379388052301066e-06,
|
|
"loss": 0.3527,
|
|
"step": 16185
|
|
},
|
|
{
|
|
"epoch": 2.3281564567155595,
|
|
"grad_norm": 0.3205562401165908,
|
|
"learning_rate": 2.3745148486884505e-06,
|
|
"loss": 0.34,
|
|
"step": 16190
|
|
},
|
|
{
|
|
"epoch": 2.3288754673569168,
|
|
"grad_norm": 0.3022338320914003,
|
|
"learning_rate": 2.369645968176326e-06,
|
|
"loss": 0.3532,
|
|
"step": 16195
|
|
},
|
|
{
|
|
"epoch": 2.3295944779982745,
|
|
"grad_norm": 0.31456416907548845,
|
|
"learning_rate": 2.3647814135250025e-06,
|
|
"loss": 0.3635,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 2.3303134886396317,
|
|
"grad_norm": 0.31209161437727956,
|
|
"learning_rate": 2.359921187492329e-06,
|
|
"loss": 0.3557,
|
|
"step": 16205
|
|
},
|
|
{
|
|
"epoch": 2.3310324992809894,
|
|
"grad_norm": 0.31100188059013256,
|
|
"learning_rate": 2.3550652928336994e-06,
|
|
"loss": 0.3604,
|
|
"step": 16210
|
|
},
|
|
{
|
|
"epoch": 2.3317515099223467,
|
|
"grad_norm": 0.3047213678132922,
|
|
"learning_rate": 2.3502137323020636e-06,
|
|
"loss": 0.3498,
|
|
"step": 16215
|
|
},
|
|
{
|
|
"epoch": 2.3324705205637044,
|
|
"grad_norm": 0.30394871087647873,
|
|
"learning_rate": 2.3453665086479015e-06,
|
|
"loss": 0.3422,
|
|
"step": 16220
|
|
},
|
|
{
|
|
"epoch": 2.3331895312050617,
|
|
"grad_norm": 0.3002461274106404,
|
|
"learning_rate": 2.34052362461924e-06,
|
|
"loss": 0.3511,
|
|
"step": 16225
|
|
},
|
|
{
|
|
"epoch": 2.3339085418464194,
|
|
"grad_norm": 0.3019570152023722,
|
|
"learning_rate": 2.3356850829616486e-06,
|
|
"loss": 0.3543,
|
|
"step": 16230
|
|
},
|
|
{
|
|
"epoch": 2.3346275524877766,
|
|
"grad_norm": 0.3028763815910138,
|
|
"learning_rate": 2.3308508864182254e-06,
|
|
"loss": 0.3646,
|
|
"step": 16235
|
|
},
|
|
{
|
|
"epoch": 2.3353465631291344,
|
|
"grad_norm": 0.2939782533371511,
|
|
"learning_rate": 2.3260210377296166e-06,
|
|
"loss": 0.3445,
|
|
"step": 16240
|
|
},
|
|
{
|
|
"epoch": 2.3360655737704916,
|
|
"grad_norm": 0.306733886861416,
|
|
"learning_rate": 2.3211955396340003e-06,
|
|
"loss": 0.358,
|
|
"step": 16245
|
|
},
|
|
{
|
|
"epoch": 2.3367845844118493,
|
|
"grad_norm": 0.3187293409049959,
|
|
"learning_rate": 2.3163743948670793e-06,
|
|
"loss": 0.347,
|
|
"step": 16250
|
|
},
|
|
{
|
|
"epoch": 2.3375035950532066,
|
|
"grad_norm": 0.3123739995817767,
|
|
"learning_rate": 2.3115576061621024e-06,
|
|
"loss": 0.359,
|
|
"step": 16255
|
|
},
|
|
{
|
|
"epoch": 2.3382226056945643,
|
|
"grad_norm": 0.30354960150343874,
|
|
"learning_rate": 2.306745176249838e-06,
|
|
"loss": 0.3488,
|
|
"step": 16260
|
|
},
|
|
{
|
|
"epoch": 2.338941616335922,
|
|
"grad_norm": 0.4608452621624845,
|
|
"learning_rate": 2.301937107858584e-06,
|
|
"loss": 0.3377,
|
|
"step": 16265
|
|
},
|
|
{
|
|
"epoch": 2.3396606269772793,
|
|
"grad_norm": 0.3134200470172266,
|
|
"learning_rate": 2.2971334037141756e-06,
|
|
"loss": 0.3479,
|
|
"step": 16270
|
|
},
|
|
{
|
|
"epoch": 2.3403796376186365,
|
|
"grad_norm": 0.30648596554580404,
|
|
"learning_rate": 2.2923340665399617e-06,
|
|
"loss": 0.3548,
|
|
"step": 16275
|
|
},
|
|
{
|
|
"epoch": 2.3410986482599943,
|
|
"grad_norm": 0.3122376400261476,
|
|
"learning_rate": 2.2875390990568204e-06,
|
|
"loss": 0.3551,
|
|
"step": 16280
|
|
},
|
|
{
|
|
"epoch": 2.341817658901352,
|
|
"grad_norm": 0.33082925511950845,
|
|
"learning_rate": 2.2827485039831533e-06,
|
|
"loss": 0.3526,
|
|
"step": 16285
|
|
},
|
|
{
|
|
"epoch": 2.3425366695427092,
|
|
"grad_norm": 0.31757353800080146,
|
|
"learning_rate": 2.2779622840348868e-06,
|
|
"loss": 0.3624,
|
|
"step": 16290
|
|
},
|
|
{
|
|
"epoch": 2.3432556801840665,
|
|
"grad_norm": 0.3104326101785247,
|
|
"learning_rate": 2.2731804419254565e-06,
|
|
"loss": 0.3622,
|
|
"step": 16295
|
|
},
|
|
{
|
|
"epoch": 2.343974690825424,
|
|
"grad_norm": 0.3050757513897761,
|
|
"learning_rate": 2.268402980365828e-06,
|
|
"loss": 0.3603,
|
|
"step": 16300
|
|
},
|
|
{
|
|
"epoch": 2.344693701466782,
|
|
"grad_norm": 0.31463816006910816,
|
|
"learning_rate": 2.263629902064475e-06,
|
|
"loss": 0.3569,
|
|
"step": 16305
|
|
},
|
|
{
|
|
"epoch": 2.345412712108139,
|
|
"grad_norm": 0.33432411275730034,
|
|
"learning_rate": 2.2588612097273843e-06,
|
|
"loss": 0.3636,
|
|
"step": 16310
|
|
},
|
|
{
|
|
"epoch": 2.346131722749497,
|
|
"grad_norm": 0.3045011398905948,
|
|
"learning_rate": 2.2540969060580685e-06,
|
|
"loss": 0.3513,
|
|
"step": 16315
|
|
},
|
|
{
|
|
"epoch": 2.346850733390854,
|
|
"grad_norm": 0.3113277493082616,
|
|
"learning_rate": 2.2493369937575414e-06,
|
|
"loss": 0.3503,
|
|
"step": 16320
|
|
},
|
|
{
|
|
"epoch": 2.347569744032212,
|
|
"grad_norm": 0.3017165149030103,
|
|
"learning_rate": 2.2445814755243277e-06,
|
|
"loss": 0.3563,
|
|
"step": 16325
|
|
},
|
|
{
|
|
"epoch": 2.348288754673569,
|
|
"grad_norm": 0.32665381028134893,
|
|
"learning_rate": 2.2398303540544675e-06,
|
|
"loss": 0.3641,
|
|
"step": 16330
|
|
},
|
|
{
|
|
"epoch": 2.349007765314927,
|
|
"grad_norm": 0.311750150664556,
|
|
"learning_rate": 2.2350836320414994e-06,
|
|
"loss": 0.35,
|
|
"step": 16335
|
|
},
|
|
{
|
|
"epoch": 2.349726775956284,
|
|
"grad_norm": 0.31191935995809517,
|
|
"learning_rate": 2.230341312176476e-06,
|
|
"loss": 0.3586,
|
|
"step": 16340
|
|
},
|
|
{
|
|
"epoch": 2.350445786597642,
|
|
"grad_norm": 0.3106659240495657,
|
|
"learning_rate": 2.225603397147953e-06,
|
|
"loss": 0.3624,
|
|
"step": 16345
|
|
},
|
|
{
|
|
"epoch": 2.351164797238999,
|
|
"grad_norm": 0.32111354852882784,
|
|
"learning_rate": 2.220869889641982e-06,
|
|
"loss": 0.3581,
|
|
"step": 16350
|
|
},
|
|
{
|
|
"epoch": 2.351883807880357,
|
|
"grad_norm": 0.31067249529602536,
|
|
"learning_rate": 2.216140792342125e-06,
|
|
"loss": 0.345,
|
|
"step": 16355
|
|
},
|
|
{
|
|
"epoch": 2.352602818521714,
|
|
"grad_norm": 0.3007292058247453,
|
|
"learning_rate": 2.211416107929437e-06,
|
|
"loss": 0.3507,
|
|
"step": 16360
|
|
},
|
|
{
|
|
"epoch": 2.3533218291630718,
|
|
"grad_norm": 0.32091144533272364,
|
|
"learning_rate": 2.206695839082472e-06,
|
|
"loss": 0.3424,
|
|
"step": 16365
|
|
},
|
|
{
|
|
"epoch": 2.354040839804429,
|
|
"grad_norm": 0.31334951666591937,
|
|
"learning_rate": 2.2019799884772862e-06,
|
|
"loss": 0.3395,
|
|
"step": 16370
|
|
},
|
|
{
|
|
"epoch": 2.3547598504457867,
|
|
"grad_norm": 0.3219576297127824,
|
|
"learning_rate": 2.1972685587874245e-06,
|
|
"loss": 0.3537,
|
|
"step": 16375
|
|
},
|
|
{
|
|
"epoch": 2.355478861087144,
|
|
"grad_norm": 0.2947101729287441,
|
|
"learning_rate": 2.192561552683926e-06,
|
|
"loss": 0.3604,
|
|
"step": 16380
|
|
},
|
|
{
|
|
"epoch": 2.3561978717285017,
|
|
"grad_norm": 0.30076076072427377,
|
|
"learning_rate": 2.187858972835326e-06,
|
|
"loss": 0.362,
|
|
"step": 16385
|
|
},
|
|
{
|
|
"epoch": 2.356916882369859,
|
|
"grad_norm": 0.32859975659304574,
|
|
"learning_rate": 2.1831608219076506e-06,
|
|
"loss": 0.3661,
|
|
"step": 16390
|
|
},
|
|
{
|
|
"epoch": 2.3576358930112167,
|
|
"grad_norm": 0.31032566988966753,
|
|
"learning_rate": 2.178467102564409e-06,
|
|
"loss": 0.3596,
|
|
"step": 16395
|
|
},
|
|
{
|
|
"epoch": 2.358354903652574,
|
|
"grad_norm": 0.31282176337577733,
|
|
"learning_rate": 2.1737778174666048e-06,
|
|
"loss": 0.3517,
|
|
"step": 16400
|
|
},
|
|
{
|
|
"epoch": 2.3590739142939317,
|
|
"grad_norm": 0.3148146729786297,
|
|
"learning_rate": 2.1690929692727246e-06,
|
|
"loss": 0.3663,
|
|
"step": 16405
|
|
},
|
|
{
|
|
"epoch": 2.359792924935289,
|
|
"grad_norm": 0.3117374664985557,
|
|
"learning_rate": 2.1644125606387346e-06,
|
|
"loss": 0.3609,
|
|
"step": 16410
|
|
},
|
|
{
|
|
"epoch": 2.3605119355766466,
|
|
"grad_norm": 0.3064740677110053,
|
|
"learning_rate": 2.159736594218097e-06,
|
|
"loss": 0.3647,
|
|
"step": 16415
|
|
},
|
|
{
|
|
"epoch": 2.361230946218004,
|
|
"grad_norm": 0.304115220511543,
|
|
"learning_rate": 2.1550650726617426e-06,
|
|
"loss": 0.3542,
|
|
"step": 16420
|
|
},
|
|
{
|
|
"epoch": 2.3619499568593616,
|
|
"grad_norm": 0.31155459585360695,
|
|
"learning_rate": 2.1503979986180866e-06,
|
|
"loss": 0.3412,
|
|
"step": 16425
|
|
},
|
|
{
|
|
"epoch": 2.362668967500719,
|
|
"grad_norm": 0.3104529776654304,
|
|
"learning_rate": 2.1457353747330247e-06,
|
|
"loss": 0.3663,
|
|
"step": 16430
|
|
},
|
|
{
|
|
"epoch": 2.3633879781420766,
|
|
"grad_norm": 0.3121733802967262,
|
|
"learning_rate": 2.1410772036499327e-06,
|
|
"loss": 0.3418,
|
|
"step": 16435
|
|
},
|
|
{
|
|
"epoch": 2.364106988783434,
|
|
"grad_norm": 0.30938847939299197,
|
|
"learning_rate": 2.1364234880096524e-06,
|
|
"loss": 0.3532,
|
|
"step": 16440
|
|
},
|
|
{
|
|
"epoch": 2.3648259994247915,
|
|
"grad_norm": 0.3135484840880156,
|
|
"learning_rate": 2.1317742304505097e-06,
|
|
"loss": 0.3591,
|
|
"step": 16445
|
|
},
|
|
{
|
|
"epoch": 2.365545010066149,
|
|
"grad_norm": 0.3175485579628117,
|
|
"learning_rate": 2.1271294336082936e-06,
|
|
"loss": 0.3465,
|
|
"step": 16450
|
|
},
|
|
{
|
|
"epoch": 2.3662640207075065,
|
|
"grad_norm": 0.30714339546031744,
|
|
"learning_rate": 2.1224891001162738e-06,
|
|
"loss": 0.3543,
|
|
"step": 16455
|
|
},
|
|
{
|
|
"epoch": 2.366983031348864,
|
|
"grad_norm": 0.30491007660977404,
|
|
"learning_rate": 2.1178532326051837e-06,
|
|
"loss": 0.3444,
|
|
"step": 16460
|
|
},
|
|
{
|
|
"epoch": 2.3677020419902215,
|
|
"grad_norm": 0.299116979713143,
|
|
"learning_rate": 2.1132218337032227e-06,
|
|
"loss": 0.3687,
|
|
"step": 16465
|
|
},
|
|
{
|
|
"epoch": 2.3684210526315788,
|
|
"grad_norm": 0.30244982913768803,
|
|
"learning_rate": 2.1085949060360654e-06,
|
|
"loss": 0.3456,
|
|
"step": 16470
|
|
},
|
|
{
|
|
"epoch": 2.3691400632729365,
|
|
"grad_norm": 0.3017966260338068,
|
|
"learning_rate": 2.1039724522268436e-06,
|
|
"loss": 0.3701,
|
|
"step": 16475
|
|
},
|
|
{
|
|
"epoch": 2.369859073914294,
|
|
"grad_norm": 0.2990197771685328,
|
|
"learning_rate": 2.0993544748961524e-06,
|
|
"loss": 0.3559,
|
|
"step": 16480
|
|
},
|
|
{
|
|
"epoch": 2.3705780845556514,
|
|
"grad_norm": 0.31138638604362306,
|
|
"learning_rate": 2.0947409766620562e-06,
|
|
"loss": 0.3469,
|
|
"step": 16485
|
|
},
|
|
{
|
|
"epoch": 2.3712970951970087,
|
|
"grad_norm": 0.31228122688384496,
|
|
"learning_rate": 2.0901319601400772e-06,
|
|
"loss": 0.3624,
|
|
"step": 16490
|
|
},
|
|
{
|
|
"epoch": 2.3720161058383664,
|
|
"grad_norm": 0.313733282980535,
|
|
"learning_rate": 2.0855274279431914e-06,
|
|
"loss": 0.3574,
|
|
"step": 16495
|
|
},
|
|
{
|
|
"epoch": 2.372735116479724,
|
|
"grad_norm": 0.28752112482067166,
|
|
"learning_rate": 2.080927382681841e-06,
|
|
"loss": 0.3386,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 2.3734541271210814,
|
|
"grad_norm": 0.3122161854370265,
|
|
"learning_rate": 2.0763318269639175e-06,
|
|
"loss": 0.3562,
|
|
"step": 16505
|
|
},
|
|
{
|
|
"epoch": 2.3741731377624387,
|
|
"grad_norm": 0.30338082102032904,
|
|
"learning_rate": 2.0717407633947683e-06,
|
|
"loss": 0.3626,
|
|
"step": 16510
|
|
},
|
|
{
|
|
"epoch": 2.3748921484037964,
|
|
"grad_norm": 0.31256480793426256,
|
|
"learning_rate": 2.0671541945772e-06,
|
|
"loss": 0.3673,
|
|
"step": 16515
|
|
},
|
|
{
|
|
"epoch": 2.375611159045154,
|
|
"grad_norm": 0.3022378542581336,
|
|
"learning_rate": 2.0625721231114638e-06,
|
|
"loss": 0.3554,
|
|
"step": 16520
|
|
},
|
|
{
|
|
"epoch": 2.3763301696865113,
|
|
"grad_norm": 0.3091646292671584,
|
|
"learning_rate": 2.0579945515952616e-06,
|
|
"loss": 0.3495,
|
|
"step": 16525
|
|
},
|
|
{
|
|
"epoch": 2.3770491803278686,
|
|
"grad_norm": 0.3052049807170149,
|
|
"learning_rate": 2.0534214826237486e-06,
|
|
"loss": 0.3541,
|
|
"step": 16530
|
|
},
|
|
{
|
|
"epoch": 2.3777681909692263,
|
|
"grad_norm": 0.29774851165940125,
|
|
"learning_rate": 2.048852918789529e-06,
|
|
"loss": 0.375,
|
|
"step": 16535
|
|
},
|
|
{
|
|
"epoch": 2.378487201610584,
|
|
"grad_norm": 0.31071838766471116,
|
|
"learning_rate": 2.044288862682643e-06,
|
|
"loss": 0.3557,
|
|
"step": 16540
|
|
},
|
|
{
|
|
"epoch": 2.3792062122519413,
|
|
"grad_norm": 0.3015799692278713,
|
|
"learning_rate": 2.0397293168905876e-06,
|
|
"loss": 0.3457,
|
|
"step": 16545
|
|
},
|
|
{
|
|
"epoch": 2.379925222893299,
|
|
"grad_norm": 0.3171182651740354,
|
|
"learning_rate": 2.0351742839982936e-06,
|
|
"loss": 0.3715,
|
|
"step": 16550
|
|
},
|
|
{
|
|
"epoch": 2.3806442335346563,
|
|
"grad_norm": 0.30145775610388537,
|
|
"learning_rate": 2.0306237665881336e-06,
|
|
"loss": 0.3438,
|
|
"step": 16555
|
|
},
|
|
{
|
|
"epoch": 2.381363244176014,
|
|
"grad_norm": 0.29783895885463646,
|
|
"learning_rate": 2.026077767239928e-06,
|
|
"loss": 0.3513,
|
|
"step": 16560
|
|
},
|
|
{
|
|
"epoch": 2.3820822548173712,
|
|
"grad_norm": 0.3083372562288726,
|
|
"learning_rate": 2.0215362885309253e-06,
|
|
"loss": 0.3653,
|
|
"step": 16565
|
|
},
|
|
{
|
|
"epoch": 2.382801265458729,
|
|
"grad_norm": 0.30894133256713824,
|
|
"learning_rate": 2.016999333035824e-06,
|
|
"loss": 0.365,
|
|
"step": 16570
|
|
},
|
|
{
|
|
"epoch": 2.383520276100086,
|
|
"grad_norm": 0.3100165022014743,
|
|
"learning_rate": 2.012466903326743e-06,
|
|
"loss": 0.3624,
|
|
"step": 16575
|
|
},
|
|
{
|
|
"epoch": 2.384239286741444,
|
|
"grad_norm": 0.3172582338793338,
|
|
"learning_rate": 2.007939001973249e-06,
|
|
"loss": 0.3632,
|
|
"step": 16580
|
|
},
|
|
{
|
|
"epoch": 2.384958297382801,
|
|
"grad_norm": 0.2993700156185241,
|
|
"learning_rate": 2.0034156315423325e-06,
|
|
"loss": 0.3411,
|
|
"step": 16585
|
|
},
|
|
{
|
|
"epoch": 2.385677308024159,
|
|
"grad_norm": 0.3067991007904251,
|
|
"learning_rate": 1.9988967945984216e-06,
|
|
"loss": 0.3765,
|
|
"step": 16590
|
|
},
|
|
{
|
|
"epoch": 2.386396318665516,
|
|
"grad_norm": 0.29813149304561987,
|
|
"learning_rate": 1.9943824937033675e-06,
|
|
"loss": 0.3673,
|
|
"step": 16595
|
|
},
|
|
{
|
|
"epoch": 2.387115329306874,
|
|
"grad_norm": 0.31150650600211865,
|
|
"learning_rate": 1.989872731416457e-06,
|
|
"loss": 0.3475,
|
|
"step": 16600
|
|
},
|
|
{
|
|
"epoch": 2.387834339948231,
|
|
"grad_norm": 0.2987440339189312,
|
|
"learning_rate": 1.985367510294398e-06,
|
|
"loss": 0.3473,
|
|
"step": 16605
|
|
},
|
|
{
|
|
"epoch": 2.388553350589589,
|
|
"grad_norm": 0.31135653279374875,
|
|
"learning_rate": 1.980866832891325e-06,
|
|
"loss": 0.3593,
|
|
"step": 16610
|
|
},
|
|
{
|
|
"epoch": 2.389272361230946,
|
|
"grad_norm": 0.345410883194383,
|
|
"learning_rate": 1.976370701758802e-06,
|
|
"loss": 0.3643,
|
|
"step": 16615
|
|
},
|
|
{
|
|
"epoch": 2.389991371872304,
|
|
"grad_norm": 0.29833245223834287,
|
|
"learning_rate": 1.9718791194458086e-06,
|
|
"loss": 0.3525,
|
|
"step": 16620
|
|
},
|
|
{
|
|
"epoch": 2.390710382513661,
|
|
"grad_norm": 0.3070446717095762,
|
|
"learning_rate": 1.9673920884987462e-06,
|
|
"loss": 0.3574,
|
|
"step": 16625
|
|
},
|
|
{
|
|
"epoch": 2.391429393155019,
|
|
"grad_norm": 0.3165435717253997,
|
|
"learning_rate": 1.96290961146144e-06,
|
|
"loss": 0.3602,
|
|
"step": 16630
|
|
},
|
|
{
|
|
"epoch": 2.392148403796376,
|
|
"grad_norm": 0.3061032201985541,
|
|
"learning_rate": 1.9584316908751334e-06,
|
|
"loss": 0.3575,
|
|
"step": 16635
|
|
},
|
|
{
|
|
"epoch": 2.3928674144377338,
|
|
"grad_norm": 0.33595527793917046,
|
|
"learning_rate": 1.9539583292784805e-06,
|
|
"loss": 0.3451,
|
|
"step": 16640
|
|
},
|
|
{
|
|
"epoch": 2.393586425079091,
|
|
"grad_norm": 0.31692637409251406,
|
|
"learning_rate": 1.94948952920756e-06,
|
|
"loss": 0.366,
|
|
"step": 16645
|
|
},
|
|
{
|
|
"epoch": 2.3943054357204487,
|
|
"grad_norm": 0.32106051722998524,
|
|
"learning_rate": 1.945025293195857e-06,
|
|
"loss": 0.3629,
|
|
"step": 16650
|
|
},
|
|
{
|
|
"epoch": 2.395024446361806,
|
|
"grad_norm": 0.31875715775304597,
|
|
"learning_rate": 1.9405656237742678e-06,
|
|
"loss": 0.3562,
|
|
"step": 16655
|
|
},
|
|
{
|
|
"epoch": 2.3957434570031637,
|
|
"grad_norm": 0.31159749223707356,
|
|
"learning_rate": 1.936110523471111e-06,
|
|
"loss": 0.3505,
|
|
"step": 16660
|
|
},
|
|
{
|
|
"epoch": 2.396462467644521,
|
|
"grad_norm": 0.31894280138883213,
|
|
"learning_rate": 1.9316599948121017e-06,
|
|
"loss": 0.3565,
|
|
"step": 16665
|
|
},
|
|
{
|
|
"epoch": 2.3971814782858787,
|
|
"grad_norm": 0.30113119455442694,
|
|
"learning_rate": 1.9272140403203687e-06,
|
|
"loss": 0.3394,
|
|
"step": 16670
|
|
},
|
|
{
|
|
"epoch": 2.397900488927236,
|
|
"grad_norm": 0.30222770577244146,
|
|
"learning_rate": 1.92277266251645e-06,
|
|
"loss": 0.3696,
|
|
"step": 16675
|
|
},
|
|
{
|
|
"epoch": 2.3986194995685937,
|
|
"grad_norm": 0.3197379106148971,
|
|
"learning_rate": 1.918335863918286e-06,
|
|
"loss": 0.3582,
|
|
"step": 16680
|
|
},
|
|
{
|
|
"epoch": 2.399338510209951,
|
|
"grad_norm": 0.30244520929727703,
|
|
"learning_rate": 1.913903647041224e-06,
|
|
"loss": 0.3435,
|
|
"step": 16685
|
|
},
|
|
{
|
|
"epoch": 2.4000575208513086,
|
|
"grad_norm": 0.30923941953194345,
|
|
"learning_rate": 1.9094760143980107e-06,
|
|
"loss": 0.3457,
|
|
"step": 16690
|
|
},
|
|
{
|
|
"epoch": 2.400776531492666,
|
|
"grad_norm": 0.3076681220598972,
|
|
"learning_rate": 1.9050529684987906e-06,
|
|
"loss": 0.3657,
|
|
"step": 16695
|
|
},
|
|
{
|
|
"epoch": 2.4014955421340236,
|
|
"grad_norm": 0.3063633406576403,
|
|
"learning_rate": 1.9006345118511171e-06,
|
|
"loss": 0.344,
|
|
"step": 16700
|
|
},
|
|
{
|
|
"epoch": 2.402214552775381,
|
|
"grad_norm": 0.31846373280083884,
|
|
"learning_rate": 1.8962206469599353e-06,
|
|
"loss": 0.3464,
|
|
"step": 16705
|
|
},
|
|
{
|
|
"epoch": 2.4029335634167386,
|
|
"grad_norm": 0.3067389528265799,
|
|
"learning_rate": 1.8918113763275847e-06,
|
|
"loss": 0.3622,
|
|
"step": 16710
|
|
},
|
|
{
|
|
"epoch": 2.4036525740580963,
|
|
"grad_norm": 0.3012500601430393,
|
|
"learning_rate": 1.887406702453809e-06,
|
|
"loss": 0.3537,
|
|
"step": 16715
|
|
},
|
|
{
|
|
"epoch": 2.4043715846994536,
|
|
"grad_norm": 0.3123994980644394,
|
|
"learning_rate": 1.8830066278357395e-06,
|
|
"loss": 0.3667,
|
|
"step": 16720
|
|
},
|
|
{
|
|
"epoch": 2.405090595340811,
|
|
"grad_norm": 0.3070931996515153,
|
|
"learning_rate": 1.8786111549678977e-06,
|
|
"loss": 0.3576,
|
|
"step": 16725
|
|
},
|
|
{
|
|
"epoch": 2.4058096059821685,
|
|
"grad_norm": 0.30152513549698934,
|
|
"learning_rate": 1.8742202863422033e-06,
|
|
"loss": 0.3582,
|
|
"step": 16730
|
|
},
|
|
{
|
|
"epoch": 2.4065286166235262,
|
|
"grad_norm": 0.31045639787630824,
|
|
"learning_rate": 1.869834024447964e-06,
|
|
"loss": 0.3627,
|
|
"step": 16735
|
|
},
|
|
{
|
|
"epoch": 2.4072476272648835,
|
|
"grad_norm": 0.313458442773504,
|
|
"learning_rate": 1.8654523717718697e-06,
|
|
"loss": 0.358,
|
|
"step": 16740
|
|
},
|
|
{
|
|
"epoch": 2.4079666379062408,
|
|
"grad_norm": 0.3121673881775161,
|
|
"learning_rate": 1.8610753307980068e-06,
|
|
"loss": 0.3422,
|
|
"step": 16745
|
|
},
|
|
{
|
|
"epoch": 2.4086856485475985,
|
|
"grad_norm": 0.3138157455498551,
|
|
"learning_rate": 1.85670290400784e-06,
|
|
"loss": 0.3514,
|
|
"step": 16750
|
|
},
|
|
{
|
|
"epoch": 2.409404659188956,
|
|
"grad_norm": 0.3062831057689979,
|
|
"learning_rate": 1.8523350938802165e-06,
|
|
"loss": 0.345,
|
|
"step": 16755
|
|
},
|
|
{
|
|
"epoch": 2.4101236698303135,
|
|
"grad_norm": 0.3088378047998563,
|
|
"learning_rate": 1.8479719028913746e-06,
|
|
"loss": 0.3428,
|
|
"step": 16760
|
|
},
|
|
{
|
|
"epoch": 2.410842680471671,
|
|
"grad_norm": 0.31155147696743823,
|
|
"learning_rate": 1.8436133335149276e-06,
|
|
"loss": 0.3702,
|
|
"step": 16765
|
|
},
|
|
{
|
|
"epoch": 2.4115616911130284,
|
|
"grad_norm": 0.31777544208044184,
|
|
"learning_rate": 1.839259388221868e-06,
|
|
"loss": 0.3589,
|
|
"step": 16770
|
|
},
|
|
{
|
|
"epoch": 2.412280701754386,
|
|
"grad_norm": 0.31445743776164564,
|
|
"learning_rate": 1.8349100694805711e-06,
|
|
"loss": 0.3543,
|
|
"step": 16775
|
|
},
|
|
{
|
|
"epoch": 2.4129997123957434,
|
|
"grad_norm": 0.33402574635843285,
|
|
"learning_rate": 1.8305653797567869e-06,
|
|
"loss": 0.3626,
|
|
"step": 16780
|
|
},
|
|
{
|
|
"epoch": 2.413718723037101,
|
|
"grad_norm": 0.30402025220962886,
|
|
"learning_rate": 1.8262253215136438e-06,
|
|
"loss": 0.3563,
|
|
"step": 16785
|
|
},
|
|
{
|
|
"epoch": 2.4144377336784584,
|
|
"grad_norm": 0.30518960338687,
|
|
"learning_rate": 1.8218898972116394e-06,
|
|
"loss": 0.3543,
|
|
"step": 16790
|
|
},
|
|
{
|
|
"epoch": 2.415156744319816,
|
|
"grad_norm": 0.3061444672734299,
|
|
"learning_rate": 1.8175591093086442e-06,
|
|
"loss": 0.3516,
|
|
"step": 16795
|
|
},
|
|
{
|
|
"epoch": 2.4158757549611733,
|
|
"grad_norm": 0.30806427596885644,
|
|
"learning_rate": 1.8132329602599097e-06,
|
|
"loss": 0.3648,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 2.416594765602531,
|
|
"grad_norm": 0.38849773803962784,
|
|
"learning_rate": 1.8089114525180451e-06,
|
|
"loss": 0.349,
|
|
"step": 16805
|
|
},
|
|
{
|
|
"epoch": 2.4173137762438883,
|
|
"grad_norm": 0.30563185571890733,
|
|
"learning_rate": 1.8045945885330341e-06,
|
|
"loss": 0.3537,
|
|
"step": 16810
|
|
},
|
|
{
|
|
"epoch": 2.418032786885246,
|
|
"grad_norm": 0.29673282061356554,
|
|
"learning_rate": 1.80028237075223e-06,
|
|
"loss": 0.3704,
|
|
"step": 16815
|
|
},
|
|
{
|
|
"epoch": 2.4187517975266033,
|
|
"grad_norm": 0.3070195687967652,
|
|
"learning_rate": 1.795974801620346e-06,
|
|
"loss": 0.3675,
|
|
"step": 16820
|
|
},
|
|
{
|
|
"epoch": 2.419470808167961,
|
|
"grad_norm": 0.30232957242307895,
|
|
"learning_rate": 1.791671883579469e-06,
|
|
"loss": 0.3489,
|
|
"step": 16825
|
|
},
|
|
{
|
|
"epoch": 2.4201898188093183,
|
|
"grad_norm": 0.31102097099611603,
|
|
"learning_rate": 1.787373619069036e-06,
|
|
"loss": 0.3619,
|
|
"step": 16830
|
|
},
|
|
{
|
|
"epoch": 2.420908829450676,
|
|
"grad_norm": 0.3613315161174587,
|
|
"learning_rate": 1.7830800105258605e-06,
|
|
"loss": 0.3602,
|
|
"step": 16835
|
|
},
|
|
{
|
|
"epoch": 2.4216278400920332,
|
|
"grad_norm": 0.3015445183271017,
|
|
"learning_rate": 1.778791060384104e-06,
|
|
"loss": 0.3492,
|
|
"step": 16840
|
|
},
|
|
{
|
|
"epoch": 2.422346850733391,
|
|
"grad_norm": 0.30762161087025014,
|
|
"learning_rate": 1.774506771075295e-06,
|
|
"loss": 0.3575,
|
|
"step": 16845
|
|
},
|
|
{
|
|
"epoch": 2.423065861374748,
|
|
"grad_norm": 0.3105962165052004,
|
|
"learning_rate": 1.770227145028316e-06,
|
|
"loss": 0.3519,
|
|
"step": 16850
|
|
},
|
|
{
|
|
"epoch": 2.423784872016106,
|
|
"grad_norm": 0.2996130875400581,
|
|
"learning_rate": 1.7659521846694039e-06,
|
|
"loss": 0.3611,
|
|
"step": 16855
|
|
},
|
|
{
|
|
"epoch": 2.424503882657463,
|
|
"grad_norm": 0.3030618387750977,
|
|
"learning_rate": 1.761681892422158e-06,
|
|
"loss": 0.3567,
|
|
"step": 16860
|
|
},
|
|
{
|
|
"epoch": 2.425222893298821,
|
|
"grad_norm": 0.3090647114349956,
|
|
"learning_rate": 1.7574162707075226e-06,
|
|
"loss": 0.3615,
|
|
"step": 16865
|
|
},
|
|
{
|
|
"epoch": 2.425941903940178,
|
|
"grad_norm": 0.3086330386760215,
|
|
"learning_rate": 1.753155321943797e-06,
|
|
"loss": 0.3697,
|
|
"step": 16870
|
|
},
|
|
{
|
|
"epoch": 2.426660914581536,
|
|
"grad_norm": 0.3076850664842922,
|
|
"learning_rate": 1.748899048546634e-06,
|
|
"loss": 0.3615,
|
|
"step": 16875
|
|
},
|
|
{
|
|
"epoch": 2.427379925222893,
|
|
"grad_norm": 0.2957884747974804,
|
|
"learning_rate": 1.7446474529290359e-06,
|
|
"loss": 0.3431,
|
|
"step": 16880
|
|
},
|
|
{
|
|
"epoch": 2.428098935864251,
|
|
"grad_norm": 0.3107362937467926,
|
|
"learning_rate": 1.7404005375013466e-06,
|
|
"loss": 0.3597,
|
|
"step": 16885
|
|
},
|
|
{
|
|
"epoch": 2.428817946505608,
|
|
"grad_norm": 0.312188974556344,
|
|
"learning_rate": 1.7361583046712649e-06,
|
|
"loss": 0.3715,
|
|
"step": 16890
|
|
},
|
|
{
|
|
"epoch": 2.429536957146966,
|
|
"grad_norm": 0.2967382756754711,
|
|
"learning_rate": 1.7319207568438278e-06,
|
|
"loss": 0.3599,
|
|
"step": 16895
|
|
},
|
|
{
|
|
"epoch": 2.430255967788323,
|
|
"grad_norm": 0.31529094661142887,
|
|
"learning_rate": 1.7276878964214227e-06,
|
|
"loss": 0.3403,
|
|
"step": 16900
|
|
},
|
|
{
|
|
"epoch": 2.430974978429681,
|
|
"grad_norm": 0.2986818445027334,
|
|
"learning_rate": 1.7234597258037756e-06,
|
|
"loss": 0.3519,
|
|
"step": 16905
|
|
},
|
|
{
|
|
"epoch": 2.431693989071038,
|
|
"grad_norm": 0.3125459971936233,
|
|
"learning_rate": 1.719236247387951e-06,
|
|
"loss": 0.3656,
|
|
"step": 16910
|
|
},
|
|
{
|
|
"epoch": 2.4324129997123958,
|
|
"grad_norm": 0.3717572805196429,
|
|
"learning_rate": 1.7150174635683615e-06,
|
|
"loss": 0.3642,
|
|
"step": 16915
|
|
},
|
|
{
|
|
"epoch": 2.433132010353753,
|
|
"grad_norm": 0.3059666316215835,
|
|
"learning_rate": 1.7108033767367494e-06,
|
|
"loss": 0.3725,
|
|
"step": 16920
|
|
},
|
|
{
|
|
"epoch": 2.4338510209951107,
|
|
"grad_norm": 0.3196174908517126,
|
|
"learning_rate": 1.7065939892821992e-06,
|
|
"loss": 0.3495,
|
|
"step": 16925
|
|
},
|
|
{
|
|
"epoch": 2.4345700316364685,
|
|
"grad_norm": 0.30571009156213,
|
|
"learning_rate": 1.7023893035911355e-06,
|
|
"loss": 0.3706,
|
|
"step": 16930
|
|
},
|
|
{
|
|
"epoch": 2.4352890422778257,
|
|
"grad_norm": 0.30831064132873515,
|
|
"learning_rate": 1.6981893220473067e-06,
|
|
"loss": 0.3394,
|
|
"step": 16935
|
|
},
|
|
{
|
|
"epoch": 2.436008052919183,
|
|
"grad_norm": 0.33104396855694757,
|
|
"learning_rate": 1.6939940470317984e-06,
|
|
"loss": 0.3537,
|
|
"step": 16940
|
|
},
|
|
{
|
|
"epoch": 2.4367270635605407,
|
|
"grad_norm": 0.31128059060130037,
|
|
"learning_rate": 1.6898034809230334e-06,
|
|
"loss": 0.3753,
|
|
"step": 16945
|
|
},
|
|
{
|
|
"epoch": 2.4374460742018984,
|
|
"grad_norm": 0.3148376623822208,
|
|
"learning_rate": 1.6856176260967593e-06,
|
|
"loss": 0.3574,
|
|
"step": 16950
|
|
},
|
|
{
|
|
"epoch": 2.4381650848432557,
|
|
"grad_norm": 0.31121691153326964,
|
|
"learning_rate": 1.681436484926051e-06,
|
|
"loss": 0.349,
|
|
"step": 16955
|
|
},
|
|
{
|
|
"epoch": 2.438884095484613,
|
|
"grad_norm": 0.3097706154767799,
|
|
"learning_rate": 1.6772600597813194e-06,
|
|
"loss": 0.3545,
|
|
"step": 16960
|
|
},
|
|
{
|
|
"epoch": 2.4396031061259706,
|
|
"grad_norm": 0.3138394742795372,
|
|
"learning_rate": 1.673088353030291e-06,
|
|
"loss": 0.3583,
|
|
"step": 16965
|
|
},
|
|
{
|
|
"epoch": 2.4403221167673284,
|
|
"grad_norm": 0.3009795543198072,
|
|
"learning_rate": 1.668921367038029e-06,
|
|
"loss": 0.3557,
|
|
"step": 16970
|
|
},
|
|
{
|
|
"epoch": 2.4410411274086856,
|
|
"grad_norm": 0.3058900019055182,
|
|
"learning_rate": 1.6647591041669076e-06,
|
|
"loss": 0.3662,
|
|
"step": 16975
|
|
},
|
|
{
|
|
"epoch": 2.4417601380500433,
|
|
"grad_norm": 0.30977411122929144,
|
|
"learning_rate": 1.6606015667766362e-06,
|
|
"loss": 0.3404,
|
|
"step": 16980
|
|
},
|
|
{
|
|
"epoch": 2.4424791486914006,
|
|
"grad_norm": 0.3086532344833748,
|
|
"learning_rate": 1.6564487572242338e-06,
|
|
"loss": 0.3634,
|
|
"step": 16985
|
|
},
|
|
{
|
|
"epoch": 2.4431981593327583,
|
|
"grad_norm": 0.30919037973353314,
|
|
"learning_rate": 1.6523006778640472e-06,
|
|
"loss": 0.345,
|
|
"step": 16990
|
|
},
|
|
{
|
|
"epoch": 2.4439171699741156,
|
|
"grad_norm": 0.3350256855598322,
|
|
"learning_rate": 1.6481573310477384e-06,
|
|
"loss": 0.3553,
|
|
"step": 16995
|
|
},
|
|
{
|
|
"epoch": 2.4446361806154733,
|
|
"grad_norm": 0.3399771416992865,
|
|
"learning_rate": 1.644018719124283e-06,
|
|
"loss": 0.3373,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 2.4453551912568305,
|
|
"grad_norm": 0.31607703923232544,
|
|
"learning_rate": 1.6398848444399794e-06,
|
|
"loss": 0.3586,
|
|
"step": 17005
|
|
},
|
|
{
|
|
"epoch": 2.4460742018981882,
|
|
"grad_norm": 0.3201203019090241,
|
|
"learning_rate": 1.6357557093384335e-06,
|
|
"loss": 0.3595,
|
|
"step": 17010
|
|
},
|
|
{
|
|
"epoch": 2.4467932125395455,
|
|
"grad_norm": 0.3002187152760896,
|
|
"learning_rate": 1.6316313161605723e-06,
|
|
"loss": 0.3457,
|
|
"step": 17015
|
|
},
|
|
{
|
|
"epoch": 2.4475122231809032,
|
|
"grad_norm": 0.3243764298092394,
|
|
"learning_rate": 1.6275116672446235e-06,
|
|
"loss": 0.3576,
|
|
"step": 17020
|
|
},
|
|
{
|
|
"epoch": 2.4482312338222605,
|
|
"grad_norm": 0.3277410455544984,
|
|
"learning_rate": 1.6233967649261328e-06,
|
|
"loss": 0.362,
|
|
"step": 17025
|
|
},
|
|
{
|
|
"epoch": 2.448950244463618,
|
|
"grad_norm": 0.31461864856915484,
|
|
"learning_rate": 1.619286611537958e-06,
|
|
"loss": 0.3579,
|
|
"step": 17030
|
|
},
|
|
{
|
|
"epoch": 2.4496692551049755,
|
|
"grad_norm": 0.3160979685375077,
|
|
"learning_rate": 1.6151812094102548e-06,
|
|
"loss": 0.3611,
|
|
"step": 17035
|
|
},
|
|
{
|
|
"epoch": 2.450388265746333,
|
|
"grad_norm": 0.3040492263650492,
|
|
"learning_rate": 1.6110805608704904e-06,
|
|
"loss": 0.3596,
|
|
"step": 17040
|
|
},
|
|
{
|
|
"epoch": 2.4511072763876904,
|
|
"grad_norm": 0.31725131938443385,
|
|
"learning_rate": 1.606984668243441e-06,
|
|
"loss": 0.3631,
|
|
"step": 17045
|
|
},
|
|
{
|
|
"epoch": 2.451826287029048,
|
|
"grad_norm": 0.30864474580795526,
|
|
"learning_rate": 1.6028935338511786e-06,
|
|
"loss": 0.3338,
|
|
"step": 17050
|
|
},
|
|
{
|
|
"epoch": 2.4525452976704054,
|
|
"grad_norm": 0.3001447069907468,
|
|
"learning_rate": 1.5988071600130805e-06,
|
|
"loss": 0.3397,
|
|
"step": 17055
|
|
},
|
|
{
|
|
"epoch": 2.453264308311763,
|
|
"grad_norm": 0.3165638007878571,
|
|
"learning_rate": 1.5947255490458312e-06,
|
|
"loss": 0.3606,
|
|
"step": 17060
|
|
},
|
|
{
|
|
"epoch": 2.4539833189531204,
|
|
"grad_norm": 0.32135725369278234,
|
|
"learning_rate": 1.5906487032634055e-06,
|
|
"loss": 0.359,
|
|
"step": 17065
|
|
},
|
|
{
|
|
"epoch": 2.454702329594478,
|
|
"grad_norm": 0.3266234948475013,
|
|
"learning_rate": 1.586576624977082e-06,
|
|
"loss": 0.3553,
|
|
"step": 17070
|
|
},
|
|
{
|
|
"epoch": 2.4554213402358354,
|
|
"grad_norm": 0.31119789748060417,
|
|
"learning_rate": 1.5825093164954387e-06,
|
|
"loss": 0.3501,
|
|
"step": 17075
|
|
},
|
|
{
|
|
"epoch": 2.456140350877193,
|
|
"grad_norm": 0.29703370983787136,
|
|
"learning_rate": 1.578446780124344e-06,
|
|
"loss": 0.3546,
|
|
"step": 17080
|
|
},
|
|
{
|
|
"epoch": 2.4568593615185503,
|
|
"grad_norm": 0.31650867374625985,
|
|
"learning_rate": 1.5743890181669607e-06,
|
|
"loss": 0.342,
|
|
"step": 17085
|
|
},
|
|
{
|
|
"epoch": 2.457578372159908,
|
|
"grad_norm": 0.3250380853925492,
|
|
"learning_rate": 1.5703360329237526e-06,
|
|
"loss": 0.3555,
|
|
"step": 17090
|
|
},
|
|
{
|
|
"epoch": 2.4582973828012653,
|
|
"grad_norm": 0.31577974895217814,
|
|
"learning_rate": 1.5662878266924675e-06,
|
|
"loss": 0.362,
|
|
"step": 17095
|
|
},
|
|
{
|
|
"epoch": 2.459016393442623,
|
|
"grad_norm": 0.29951875548924073,
|
|
"learning_rate": 1.5622444017681438e-06,
|
|
"loss": 0.3471,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 2.4597354040839803,
|
|
"grad_norm": 0.308005757258318,
|
|
"learning_rate": 1.5582057604431178e-06,
|
|
"loss": 0.3643,
|
|
"step": 17105
|
|
},
|
|
{
|
|
"epoch": 2.460454414725338,
|
|
"grad_norm": 0.30455057643136085,
|
|
"learning_rate": 1.5541719050070026e-06,
|
|
"loss": 0.352,
|
|
"step": 17110
|
|
},
|
|
{
|
|
"epoch": 2.4611734253666953,
|
|
"grad_norm": 0.3057923328411924,
|
|
"learning_rate": 1.5501428377467087e-06,
|
|
"loss": 0.3462,
|
|
"step": 17115
|
|
},
|
|
{
|
|
"epoch": 2.461892436008053,
|
|
"grad_norm": 0.30086318438027765,
|
|
"learning_rate": 1.5461185609464214e-06,
|
|
"loss": 0.3556,
|
|
"step": 17120
|
|
},
|
|
{
|
|
"epoch": 2.4626114466494102,
|
|
"grad_norm": 0.3099221276873916,
|
|
"learning_rate": 1.5420990768876175e-06,
|
|
"loss": 0.3562,
|
|
"step": 17125
|
|
},
|
|
{
|
|
"epoch": 2.463330457290768,
|
|
"grad_norm": 0.32262263906273925,
|
|
"learning_rate": 1.5380843878490592e-06,
|
|
"loss": 0.3659,
|
|
"step": 17130
|
|
},
|
|
{
|
|
"epoch": 2.464049467932125,
|
|
"grad_norm": 0.33418684176700825,
|
|
"learning_rate": 1.5340744961067821e-06,
|
|
"loss": 0.3462,
|
|
"step": 17135
|
|
},
|
|
{
|
|
"epoch": 2.464768478573483,
|
|
"grad_norm": 0.3097293293337475,
|
|
"learning_rate": 1.5300694039341035e-06,
|
|
"loss": 0.353,
|
|
"step": 17140
|
|
},
|
|
{
|
|
"epoch": 2.4654874892148406,
|
|
"grad_norm": 0.298674900463793,
|
|
"learning_rate": 1.526069113601627e-06,
|
|
"loss": 0.3484,
|
|
"step": 17145
|
|
},
|
|
{
|
|
"epoch": 2.466206499856198,
|
|
"grad_norm": 0.3158087180370093,
|
|
"learning_rate": 1.5220736273772263e-06,
|
|
"loss": 0.3517,
|
|
"step": 17150
|
|
},
|
|
{
|
|
"epoch": 2.466925510497555,
|
|
"grad_norm": 0.31834404918792936,
|
|
"learning_rate": 1.5180829475260517e-06,
|
|
"loss": 0.3744,
|
|
"step": 17155
|
|
},
|
|
{
|
|
"epoch": 2.467644521138913,
|
|
"grad_norm": 0.31709258011300673,
|
|
"learning_rate": 1.5140970763105356e-06,
|
|
"loss": 0.3544,
|
|
"step": 17160
|
|
},
|
|
{
|
|
"epoch": 2.4683635317802706,
|
|
"grad_norm": 0.3120763334555011,
|
|
"learning_rate": 1.510116015990376e-06,
|
|
"loss": 0.3513,
|
|
"step": 17165
|
|
},
|
|
{
|
|
"epoch": 2.469082542421628,
|
|
"grad_norm": 0.3162692296179125,
|
|
"learning_rate": 1.5061397688225477e-06,
|
|
"loss": 0.3557,
|
|
"step": 17170
|
|
},
|
|
{
|
|
"epoch": 2.469801553062985,
|
|
"grad_norm": 0.31157358107536154,
|
|
"learning_rate": 1.5021683370613017e-06,
|
|
"loss": 0.3685,
|
|
"step": 17175
|
|
},
|
|
{
|
|
"epoch": 2.470520563704343,
|
|
"grad_norm": 0.2954469245495106,
|
|
"learning_rate": 1.498201722958148e-06,
|
|
"loss": 0.3482,
|
|
"step": 17180
|
|
},
|
|
{
|
|
"epoch": 2.4712395743457005,
|
|
"grad_norm": 0.3118611389765953,
|
|
"learning_rate": 1.494239928761869e-06,
|
|
"loss": 0.3651,
|
|
"step": 17185
|
|
},
|
|
{
|
|
"epoch": 2.471958584987058,
|
|
"grad_norm": 0.3153770084694792,
|
|
"learning_rate": 1.490282956718524e-06,
|
|
"loss": 0.3539,
|
|
"step": 17190
|
|
},
|
|
{
|
|
"epoch": 2.4726775956284155,
|
|
"grad_norm": 0.30868214955442846,
|
|
"learning_rate": 1.4863308090714258e-06,
|
|
"loss": 0.3473,
|
|
"step": 17195
|
|
},
|
|
{
|
|
"epoch": 2.4733966062697728,
|
|
"grad_norm": 0.30759194940633955,
|
|
"learning_rate": 1.4823834880611554e-06,
|
|
"loss": 0.3507,
|
|
"step": 17200
|
|
},
|
|
{
|
|
"epoch": 2.4741156169111305,
|
|
"grad_norm": 0.3402795028334639,
|
|
"learning_rate": 1.4784409959255642e-06,
|
|
"loss": 0.3567,
|
|
"step": 17205
|
|
},
|
|
{
|
|
"epoch": 2.4748346275524877,
|
|
"grad_norm": 0.3175717638334829,
|
|
"learning_rate": 1.4745033348997572e-06,
|
|
"loss": 0.3613,
|
|
"step": 17210
|
|
},
|
|
{
|
|
"epoch": 2.4755536381938454,
|
|
"grad_norm": 0.3074937262153812,
|
|
"learning_rate": 1.470570507216108e-06,
|
|
"loss": 0.3522,
|
|
"step": 17215
|
|
},
|
|
{
|
|
"epoch": 2.4762726488352027,
|
|
"grad_norm": 0.3125326237358324,
|
|
"learning_rate": 1.4666425151042429e-06,
|
|
"loss": 0.3458,
|
|
"step": 17220
|
|
},
|
|
{
|
|
"epoch": 2.4769916594765604,
|
|
"grad_norm": 0.30608668052185395,
|
|
"learning_rate": 1.4627193607910516e-06,
|
|
"loss": 0.353,
|
|
"step": 17225
|
|
},
|
|
{
|
|
"epoch": 2.4777106701179177,
|
|
"grad_norm": 0.318666532375494,
|
|
"learning_rate": 1.458801046500683e-06,
|
|
"loss": 0.3549,
|
|
"step": 17230
|
|
},
|
|
{
|
|
"epoch": 2.4784296807592754,
|
|
"grad_norm": 0.3142834316070068,
|
|
"learning_rate": 1.4548875744545366e-06,
|
|
"loss": 0.367,
|
|
"step": 17235
|
|
},
|
|
{
|
|
"epoch": 2.4791486914006327,
|
|
"grad_norm": 0.30397581194824813,
|
|
"learning_rate": 1.4509789468712653e-06,
|
|
"loss": 0.3575,
|
|
"step": 17240
|
|
},
|
|
{
|
|
"epoch": 2.4798677020419904,
|
|
"grad_norm": 0.3057273682661973,
|
|
"learning_rate": 1.4470751659667849e-06,
|
|
"loss": 0.3443,
|
|
"step": 17245
|
|
},
|
|
{
|
|
"epoch": 2.4805867126833476,
|
|
"grad_norm": 0.32534194389958865,
|
|
"learning_rate": 1.4431762339542553e-06,
|
|
"loss": 0.3561,
|
|
"step": 17250
|
|
},
|
|
{
|
|
"epoch": 2.4813057233247053,
|
|
"grad_norm": 0.3160287954987181,
|
|
"learning_rate": 1.4392821530440882e-06,
|
|
"loss": 0.3516,
|
|
"step": 17255
|
|
},
|
|
{
|
|
"epoch": 2.4820247339660626,
|
|
"grad_norm": 0.3136330939409183,
|
|
"learning_rate": 1.4353929254439502e-06,
|
|
"loss": 0.3556,
|
|
"step": 17260
|
|
},
|
|
{
|
|
"epoch": 2.4827437446074203,
|
|
"grad_norm": 0.3215116924328709,
|
|
"learning_rate": 1.4315085533587502e-06,
|
|
"loss": 0.3562,
|
|
"step": 17265
|
|
},
|
|
{
|
|
"epoch": 2.4834627552487776,
|
|
"grad_norm": 0.31491915843149676,
|
|
"learning_rate": 1.4276290389906478e-06,
|
|
"loss": 0.342,
|
|
"step": 17270
|
|
},
|
|
{
|
|
"epoch": 2.4841817658901353,
|
|
"grad_norm": 0.3090368767273289,
|
|
"learning_rate": 1.423754384539051e-06,
|
|
"loss": 0.3492,
|
|
"step": 17275
|
|
},
|
|
{
|
|
"epoch": 2.4849007765314925,
|
|
"grad_norm": 0.3009441303694336,
|
|
"learning_rate": 1.419884592200609e-06,
|
|
"loss": 0.3484,
|
|
"step": 17280
|
|
},
|
|
{
|
|
"epoch": 2.4856197871728503,
|
|
"grad_norm": 0.32423942758517144,
|
|
"learning_rate": 1.4160196641692093e-06,
|
|
"loss": 0.3685,
|
|
"step": 17285
|
|
},
|
|
{
|
|
"epoch": 2.4863387978142075,
|
|
"grad_norm": 0.3032972586724873,
|
|
"learning_rate": 1.4121596026359951e-06,
|
|
"loss": 0.3579,
|
|
"step": 17290
|
|
},
|
|
{
|
|
"epoch": 2.4870578084555652,
|
|
"grad_norm": 0.3027485270411955,
|
|
"learning_rate": 1.4083044097893396e-06,
|
|
"loss": 0.3451,
|
|
"step": 17295
|
|
},
|
|
{
|
|
"epoch": 2.4877768190969225,
|
|
"grad_norm": 0.31783825112011593,
|
|
"learning_rate": 1.4044540878148572e-06,
|
|
"loss": 0.3567,
|
|
"step": 17300
|
|
},
|
|
{
|
|
"epoch": 2.48849582973828,
|
|
"grad_norm": 0.3157336204160479,
|
|
"learning_rate": 1.4006086388954066e-06,
|
|
"loss": 0.3693,
|
|
"step": 17305
|
|
},
|
|
{
|
|
"epoch": 2.4892148403796375,
|
|
"grad_norm": 0.30291168506729627,
|
|
"learning_rate": 1.3967680652110783e-06,
|
|
"loss": 0.3733,
|
|
"step": 17310
|
|
},
|
|
{
|
|
"epoch": 2.489933851020995,
|
|
"grad_norm": 0.32324881197648486,
|
|
"learning_rate": 1.3929323689391994e-06,
|
|
"loss": 0.3605,
|
|
"step": 17315
|
|
},
|
|
{
|
|
"epoch": 2.4906528616623524,
|
|
"grad_norm": 0.29364732713138836,
|
|
"learning_rate": 1.3891015522543382e-06,
|
|
"loss": 0.3464,
|
|
"step": 17320
|
|
},
|
|
{
|
|
"epoch": 2.49137187230371,
|
|
"grad_norm": 0.3261749345456971,
|
|
"learning_rate": 1.3852756173282889e-06,
|
|
"loss": 0.365,
|
|
"step": 17325
|
|
},
|
|
{
|
|
"epoch": 2.4920908829450674,
|
|
"grad_norm": 0.2997107742358322,
|
|
"learning_rate": 1.3814545663300783e-06,
|
|
"loss": 0.3591,
|
|
"step": 17330
|
|
},
|
|
{
|
|
"epoch": 2.492809893586425,
|
|
"grad_norm": 0.307601657265674,
|
|
"learning_rate": 1.3776384014259714e-06,
|
|
"loss": 0.3512,
|
|
"step": 17335
|
|
},
|
|
{
|
|
"epoch": 2.4935289042277824,
|
|
"grad_norm": 0.3163799337854286,
|
|
"learning_rate": 1.3738271247794533e-06,
|
|
"loss": 0.3467,
|
|
"step": 17340
|
|
},
|
|
{
|
|
"epoch": 2.49424791486914,
|
|
"grad_norm": 0.3190152944508011,
|
|
"learning_rate": 1.3700207385512497e-06,
|
|
"loss": 0.3561,
|
|
"step": 17345
|
|
},
|
|
{
|
|
"epoch": 2.4949669255104974,
|
|
"grad_norm": 0.3130223226974614,
|
|
"learning_rate": 1.3662192448993028e-06,
|
|
"loss": 0.3467,
|
|
"step": 17350
|
|
},
|
|
{
|
|
"epoch": 2.495685936151855,
|
|
"grad_norm": 0.306490021257793,
|
|
"learning_rate": 1.3624226459787849e-06,
|
|
"loss": 0.3517,
|
|
"step": 17355
|
|
},
|
|
{
|
|
"epoch": 2.496404946793213,
|
|
"grad_norm": 0.3229106798668172,
|
|
"learning_rate": 1.3586309439420985e-06,
|
|
"loss": 0.3484,
|
|
"step": 17360
|
|
},
|
|
{
|
|
"epoch": 2.49712395743457,
|
|
"grad_norm": 0.3088760623768454,
|
|
"learning_rate": 1.3548441409388591e-06,
|
|
"loss": 0.3536,
|
|
"step": 17365
|
|
},
|
|
{
|
|
"epoch": 2.4978429680759273,
|
|
"grad_norm": 0.3157699187140034,
|
|
"learning_rate": 1.3510622391159156e-06,
|
|
"loss": 0.3631,
|
|
"step": 17370
|
|
},
|
|
{
|
|
"epoch": 2.498561978717285,
|
|
"grad_norm": 0.3111580111211083,
|
|
"learning_rate": 1.3472852406173342e-06,
|
|
"loss": 0.3382,
|
|
"step": 17375
|
|
},
|
|
{
|
|
"epoch": 2.4992809893586427,
|
|
"grad_norm": 0.30354069525747657,
|
|
"learning_rate": 1.3435131475843988e-06,
|
|
"loss": 0.3717,
|
|
"step": 17380
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.3196523648789475,
|
|
"learning_rate": 1.339745962155613e-06,
|
|
"loss": 0.3676,
|
|
"step": 17385
|
|
},
|
|
{
|
|
"epoch": 2.5007190106413573,
|
|
"grad_norm": 0.29607395991693086,
|
|
"learning_rate": 1.3359836864667043e-06,
|
|
"loss": 0.3413,
|
|
"step": 17390
|
|
},
|
|
{
|
|
"epoch": 2.501438021282715,
|
|
"grad_norm": 0.30646191943253703,
|
|
"learning_rate": 1.3322263226506072e-06,
|
|
"loss": 0.3753,
|
|
"step": 17395
|
|
},
|
|
{
|
|
"epoch": 2.5021570319240727,
|
|
"grad_norm": 0.29913069531853337,
|
|
"learning_rate": 1.3284738728374769e-06,
|
|
"loss": 0.3618,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 2.50287604256543,
|
|
"grad_norm": 0.30682008904914354,
|
|
"learning_rate": 1.3247263391546838e-06,
|
|
"loss": 0.3453,
|
|
"step": 17405
|
|
},
|
|
{
|
|
"epoch": 2.503595053206787,
|
|
"grad_norm": 0.3068404197607742,
|
|
"learning_rate": 1.3209837237268075e-06,
|
|
"loss": 0.3588,
|
|
"step": 17410
|
|
},
|
|
{
|
|
"epoch": 2.504314063848145,
|
|
"grad_norm": 0.30055187674162964,
|
|
"learning_rate": 1.3172460286756417e-06,
|
|
"loss": 0.3468,
|
|
"step": 17415
|
|
},
|
|
{
|
|
"epoch": 2.5050330744895026,
|
|
"grad_norm": 0.3043986575852982,
|
|
"learning_rate": 1.3135132561201925e-06,
|
|
"loss": 0.3541,
|
|
"step": 17420
|
|
},
|
|
{
|
|
"epoch": 2.50575208513086,
|
|
"grad_norm": 0.3029808494772463,
|
|
"learning_rate": 1.3097854081766715e-06,
|
|
"loss": 0.3579,
|
|
"step": 17425
|
|
},
|
|
{
|
|
"epoch": 2.506471095772217,
|
|
"grad_norm": 0.3063283709079658,
|
|
"learning_rate": 1.3060624869584959e-06,
|
|
"loss": 0.35,
|
|
"step": 17430
|
|
},
|
|
{
|
|
"epoch": 2.507190106413575,
|
|
"grad_norm": 0.3156085285867541,
|
|
"learning_rate": 1.3023444945762997e-06,
|
|
"loss": 0.3539,
|
|
"step": 17435
|
|
},
|
|
{
|
|
"epoch": 2.5079091170549326,
|
|
"grad_norm": 0.30120145554191713,
|
|
"learning_rate": 1.2986314331379147e-06,
|
|
"loss": 0.3527,
|
|
"step": 17440
|
|
},
|
|
{
|
|
"epoch": 2.50862812769629,
|
|
"grad_norm": 0.32084302624344235,
|
|
"learning_rate": 1.2949233047483756e-06,
|
|
"loss": 0.3541,
|
|
"step": 17445
|
|
},
|
|
{
|
|
"epoch": 2.5093471383376476,
|
|
"grad_norm": 0.31549607511388816,
|
|
"learning_rate": 1.29122011150993e-06,
|
|
"loss": 0.3478,
|
|
"step": 17450
|
|
},
|
|
{
|
|
"epoch": 2.510066148979005,
|
|
"grad_norm": 0.3061931307570909,
|
|
"learning_rate": 1.287521855522015e-06,
|
|
"loss": 0.3386,
|
|
"step": 17455
|
|
},
|
|
{
|
|
"epoch": 2.5107851596203625,
|
|
"grad_norm": 0.30550206028882093,
|
|
"learning_rate": 1.2838285388812788e-06,
|
|
"loss": 0.3597,
|
|
"step": 17460
|
|
},
|
|
{
|
|
"epoch": 2.51150417026172,
|
|
"grad_norm": 0.3115625018502871,
|
|
"learning_rate": 1.280140163681568e-06,
|
|
"loss": 0.3615,
|
|
"step": 17465
|
|
},
|
|
{
|
|
"epoch": 2.5122231809030775,
|
|
"grad_norm": 0.31812462882374754,
|
|
"learning_rate": 1.276456732013921e-06,
|
|
"loss": 0.3598,
|
|
"step": 17470
|
|
},
|
|
{
|
|
"epoch": 2.5129421915444348,
|
|
"grad_norm": 0.3179610690815218,
|
|
"learning_rate": 1.2727782459665816e-06,
|
|
"loss": 0.352,
|
|
"step": 17475
|
|
},
|
|
{
|
|
"epoch": 2.5136612021857925,
|
|
"grad_norm": 0.3089280360690141,
|
|
"learning_rate": 1.2691047076249852e-06,
|
|
"loss": 0.3478,
|
|
"step": 17480
|
|
},
|
|
{
|
|
"epoch": 2.5143802128271497,
|
|
"grad_norm": 0.3064307988896098,
|
|
"learning_rate": 1.26543611907176e-06,
|
|
"loss": 0.3581,
|
|
"step": 17485
|
|
},
|
|
{
|
|
"epoch": 2.5150992234685075,
|
|
"grad_norm": 0.3217412579727273,
|
|
"learning_rate": 1.2617724823867373e-06,
|
|
"loss": 0.3721,
|
|
"step": 17490
|
|
},
|
|
{
|
|
"epoch": 2.5158182341098647,
|
|
"grad_norm": 0.3215853577403647,
|
|
"learning_rate": 1.2581137996469306e-06,
|
|
"loss": 0.3672,
|
|
"step": 17495
|
|
},
|
|
{
|
|
"epoch": 2.5165372447512224,
|
|
"grad_norm": 0.3098704726577232,
|
|
"learning_rate": 1.2544600729265499e-06,
|
|
"loss": 0.3458,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"epoch": 2.5172562553925797,
|
|
"grad_norm": 0.30909301288355273,
|
|
"learning_rate": 1.2508113042969972e-06,
|
|
"loss": 0.3637,
|
|
"step": 17505
|
|
},
|
|
{
|
|
"epoch": 2.5179752660339374,
|
|
"grad_norm": 0.3082915578651469,
|
|
"learning_rate": 1.2471674958268564e-06,
|
|
"loss": 0.3459,
|
|
"step": 17510
|
|
},
|
|
{
|
|
"epoch": 2.5186942766752947,
|
|
"grad_norm": 0.3054486820572545,
|
|
"learning_rate": 1.2435286495819088e-06,
|
|
"loss": 0.3626,
|
|
"step": 17515
|
|
},
|
|
{
|
|
"epoch": 2.5194132873166524,
|
|
"grad_norm": 0.32736410711487907,
|
|
"learning_rate": 1.2398947676251194e-06,
|
|
"loss": 0.3559,
|
|
"step": 17520
|
|
},
|
|
{
|
|
"epoch": 2.5201322979580096,
|
|
"grad_norm": 0.30674435608235473,
|
|
"learning_rate": 1.2362658520166348e-06,
|
|
"loss": 0.3599,
|
|
"step": 17525
|
|
},
|
|
{
|
|
"epoch": 2.5208513085993673,
|
|
"grad_norm": 0.30842284080151283,
|
|
"learning_rate": 1.232641904813785e-06,
|
|
"loss": 0.3498,
|
|
"step": 17530
|
|
},
|
|
{
|
|
"epoch": 2.5215703192407246,
|
|
"grad_norm": 0.3096068528572147,
|
|
"learning_rate": 1.2290229280710942e-06,
|
|
"loss": 0.3452,
|
|
"step": 17535
|
|
},
|
|
{
|
|
"epoch": 2.5222893298820823,
|
|
"grad_norm": 0.35087830788333857,
|
|
"learning_rate": 1.2254089238402567e-06,
|
|
"loss": 0.3536,
|
|
"step": 17540
|
|
},
|
|
{
|
|
"epoch": 2.5230083405234396,
|
|
"grad_norm": 0.3047042168981482,
|
|
"learning_rate": 1.2217998941701515e-06,
|
|
"loss": 0.3575,
|
|
"step": 17545
|
|
},
|
|
{
|
|
"epoch": 2.5237273511647973,
|
|
"grad_norm": 0.3151491714584883,
|
|
"learning_rate": 1.218195841106843e-06,
|
|
"loss": 0.3601,
|
|
"step": 17550
|
|
},
|
|
{
|
|
"epoch": 2.524446361806155,
|
|
"grad_norm": 0.31762041317369133,
|
|
"learning_rate": 1.2145967666935632e-06,
|
|
"loss": 0.3471,
|
|
"step": 17555
|
|
},
|
|
{
|
|
"epoch": 2.5251653724475123,
|
|
"grad_norm": 0.30093015661914774,
|
|
"learning_rate": 1.2110026729707325e-06,
|
|
"loss": 0.3583,
|
|
"step": 17560
|
|
},
|
|
{
|
|
"epoch": 2.5258843830888695,
|
|
"grad_norm": 0.32399559788590615,
|
|
"learning_rate": 1.2074135619759431e-06,
|
|
"loss": 0.356,
|
|
"step": 17565
|
|
},
|
|
{
|
|
"epoch": 2.5266033937302272,
|
|
"grad_norm": 0.30586256467197237,
|
|
"learning_rate": 1.2038294357439596e-06,
|
|
"loss": 0.3464,
|
|
"step": 17570
|
|
},
|
|
{
|
|
"epoch": 2.527322404371585,
|
|
"grad_norm": 0.30796096457299954,
|
|
"learning_rate": 1.2002502963067274e-06,
|
|
"loss": 0.3658,
|
|
"step": 17575
|
|
},
|
|
{
|
|
"epoch": 2.528041415012942,
|
|
"grad_norm": 0.30947016052734266,
|
|
"learning_rate": 1.1966761456933573e-06,
|
|
"loss": 0.3598,
|
|
"step": 17580
|
|
},
|
|
{
|
|
"epoch": 2.5287604256542995,
|
|
"grad_norm": 0.29855342196192786,
|
|
"learning_rate": 1.1931069859301335e-06,
|
|
"loss": 0.3493,
|
|
"step": 17585
|
|
},
|
|
{
|
|
"epoch": 2.529479436295657,
|
|
"grad_norm": 0.30982323067125345,
|
|
"learning_rate": 1.1895428190405168e-06,
|
|
"loss": 0.3545,
|
|
"step": 17590
|
|
},
|
|
{
|
|
"epoch": 2.530198446937015,
|
|
"grad_norm": 0.30672308153682704,
|
|
"learning_rate": 1.1859836470451314e-06,
|
|
"loss": 0.3546,
|
|
"step": 17595
|
|
},
|
|
{
|
|
"epoch": 2.530917457578372,
|
|
"grad_norm": 0.31941929039113764,
|
|
"learning_rate": 1.182429471961768e-06,
|
|
"loss": 0.3557,
|
|
"step": 17600
|
|
},
|
|
{
|
|
"epoch": 2.5316364682197294,
|
|
"grad_norm": 0.3083870835576022,
|
|
"learning_rate": 1.1788802958053924e-06,
|
|
"loss": 0.3569,
|
|
"step": 17605
|
|
},
|
|
{
|
|
"epoch": 2.532355478861087,
|
|
"grad_norm": 0.3163874409785299,
|
|
"learning_rate": 1.1753361205881275e-06,
|
|
"loss": 0.3535,
|
|
"step": 17610
|
|
},
|
|
{
|
|
"epoch": 2.533074489502445,
|
|
"grad_norm": 0.3133733691437593,
|
|
"learning_rate": 1.1717969483192671e-06,
|
|
"loss": 0.3573,
|
|
"step": 17615
|
|
},
|
|
{
|
|
"epoch": 2.533793500143802,
|
|
"grad_norm": 0.308815961752189,
|
|
"learning_rate": 1.1682627810052693e-06,
|
|
"loss": 0.3459,
|
|
"step": 17620
|
|
},
|
|
{
|
|
"epoch": 2.5345125107851594,
|
|
"grad_norm": 0.30696375516601637,
|
|
"learning_rate": 1.1647336206497505e-06,
|
|
"loss": 0.3695,
|
|
"step": 17625
|
|
},
|
|
{
|
|
"epoch": 2.535231521426517,
|
|
"grad_norm": 0.3374303060855002,
|
|
"learning_rate": 1.161209469253487e-06,
|
|
"loss": 0.3521,
|
|
"step": 17630
|
|
},
|
|
{
|
|
"epoch": 2.535950532067875,
|
|
"grad_norm": 0.3135294382429081,
|
|
"learning_rate": 1.1576903288144237e-06,
|
|
"loss": 0.3688,
|
|
"step": 17635
|
|
},
|
|
{
|
|
"epoch": 2.536669542709232,
|
|
"grad_norm": 0.31574909250794675,
|
|
"learning_rate": 1.154176201327658e-06,
|
|
"loss": 0.3549,
|
|
"step": 17640
|
|
},
|
|
{
|
|
"epoch": 2.5373885533505893,
|
|
"grad_norm": 0.3171119631374329,
|
|
"learning_rate": 1.1506670887854432e-06,
|
|
"loss": 0.3611,
|
|
"step": 17645
|
|
},
|
|
{
|
|
"epoch": 2.538107563991947,
|
|
"grad_norm": 0.3381488113116444,
|
|
"learning_rate": 1.1471629931771988e-06,
|
|
"loss": 0.3626,
|
|
"step": 17650
|
|
},
|
|
{
|
|
"epoch": 2.5388265746333047,
|
|
"grad_norm": 0.3127780772949529,
|
|
"learning_rate": 1.1436639164894893e-06,
|
|
"loss": 0.3521,
|
|
"step": 17655
|
|
},
|
|
{
|
|
"epoch": 2.539545585274662,
|
|
"grad_norm": 0.3041307380485427,
|
|
"learning_rate": 1.1401698607060418e-06,
|
|
"loss": 0.3536,
|
|
"step": 17660
|
|
},
|
|
{
|
|
"epoch": 2.5402645959160197,
|
|
"grad_norm": 0.31128470056928004,
|
|
"learning_rate": 1.1366808278077368e-06,
|
|
"loss": 0.3631,
|
|
"step": 17665
|
|
},
|
|
{
|
|
"epoch": 2.540983606557377,
|
|
"grad_norm": 0.311601683935439,
|
|
"learning_rate": 1.1331968197725985e-06,
|
|
"loss": 0.3599,
|
|
"step": 17670
|
|
},
|
|
{
|
|
"epoch": 2.5417026171987347,
|
|
"grad_norm": 0.2928710699884809,
|
|
"learning_rate": 1.1297178385758146e-06,
|
|
"loss": 0.3679,
|
|
"step": 17675
|
|
},
|
|
{
|
|
"epoch": 2.542421627840092,
|
|
"grad_norm": 0.31033503445344474,
|
|
"learning_rate": 1.1262438861897117e-06,
|
|
"loss": 0.3461,
|
|
"step": 17680
|
|
},
|
|
{
|
|
"epoch": 2.5431406384814497,
|
|
"grad_norm": 0.3102263513449576,
|
|
"learning_rate": 1.1227749645837716e-06,
|
|
"loss": 0.3545,
|
|
"step": 17685
|
|
},
|
|
{
|
|
"epoch": 2.543859649122807,
|
|
"grad_norm": 0.3093251646618003,
|
|
"learning_rate": 1.1193110757246251e-06,
|
|
"loss": 0.345,
|
|
"step": 17690
|
|
},
|
|
{
|
|
"epoch": 2.5445786597641646,
|
|
"grad_norm": 0.31033525496857073,
|
|
"learning_rate": 1.115852221576047e-06,
|
|
"loss": 0.3604,
|
|
"step": 17695
|
|
},
|
|
{
|
|
"epoch": 2.545297670405522,
|
|
"grad_norm": 0.32487944012429926,
|
|
"learning_rate": 1.1123984040989532e-06,
|
|
"loss": 0.3446,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 2.5460166810468796,
|
|
"grad_norm": 0.30611626755303806,
|
|
"learning_rate": 1.1089496252514153e-06,
|
|
"loss": 0.3573,
|
|
"step": 17705
|
|
},
|
|
{
|
|
"epoch": 2.546735691688237,
|
|
"grad_norm": 0.31201259122817254,
|
|
"learning_rate": 1.1055058869886414e-06,
|
|
"loss": 0.3578,
|
|
"step": 17710
|
|
},
|
|
{
|
|
"epoch": 2.5474547023295946,
|
|
"grad_norm": 0.36987983935988966,
|
|
"learning_rate": 1.10206719126298e-06,
|
|
"loss": 0.3304,
|
|
"step": 17715
|
|
},
|
|
{
|
|
"epoch": 2.548173712970952,
|
|
"grad_norm": 0.30973641081492104,
|
|
"learning_rate": 1.0986335400239268e-06,
|
|
"loss": 0.3676,
|
|
"step": 17720
|
|
},
|
|
{
|
|
"epoch": 2.5488927236123096,
|
|
"grad_norm": 0.3063446783456226,
|
|
"learning_rate": 1.095204935218115e-06,
|
|
"loss": 0.3595,
|
|
"step": 17725
|
|
},
|
|
{
|
|
"epoch": 2.549611734253667,
|
|
"grad_norm": 0.2982906314716578,
|
|
"learning_rate": 1.0917813787893118e-06,
|
|
"loss": 0.3407,
|
|
"step": 17730
|
|
},
|
|
{
|
|
"epoch": 2.5503307448950245,
|
|
"grad_norm": 0.33960199106630035,
|
|
"learning_rate": 1.0883628726784323e-06,
|
|
"loss": 0.3699,
|
|
"step": 17735
|
|
},
|
|
{
|
|
"epoch": 2.551049755536382,
|
|
"grad_norm": 0.31833517392182115,
|
|
"learning_rate": 1.0849494188235198e-06,
|
|
"loss": 0.3476,
|
|
"step": 17740
|
|
},
|
|
{
|
|
"epoch": 2.5517687661777395,
|
|
"grad_norm": 0.3122112574473752,
|
|
"learning_rate": 1.0815410191597563e-06,
|
|
"loss": 0.3544,
|
|
"step": 17745
|
|
},
|
|
{
|
|
"epoch": 2.5524877768190968,
|
|
"grad_norm": 0.3128498719695118,
|
|
"learning_rate": 1.0781376756194628e-06,
|
|
"loss": 0.3553,
|
|
"step": 17750
|
|
},
|
|
{
|
|
"epoch": 2.5532067874604545,
|
|
"grad_norm": 0.30965344017042834,
|
|
"learning_rate": 1.0747393901320836e-06,
|
|
"loss": 0.3453,
|
|
"step": 17755
|
|
},
|
|
{
|
|
"epoch": 2.5539257981018118,
|
|
"grad_norm": 0.30062785024974664,
|
|
"learning_rate": 1.0713461646242063e-06,
|
|
"loss": 0.3557,
|
|
"step": 17760
|
|
},
|
|
{
|
|
"epoch": 2.5546448087431695,
|
|
"grad_norm": 0.3085812113907845,
|
|
"learning_rate": 1.0679580010195444e-06,
|
|
"loss": 0.3599,
|
|
"step": 17765
|
|
},
|
|
{
|
|
"epoch": 2.5553638193845267,
|
|
"grad_norm": 0.30933192998939213,
|
|
"learning_rate": 1.0645749012389438e-06,
|
|
"loss": 0.3653,
|
|
"step": 17770
|
|
},
|
|
{
|
|
"epoch": 2.5560828300258844,
|
|
"grad_norm": 0.3140084895452016,
|
|
"learning_rate": 1.0611968672003735e-06,
|
|
"loss": 0.3482,
|
|
"step": 17775
|
|
},
|
|
{
|
|
"epoch": 2.5568018406672417,
|
|
"grad_norm": 0.29400669684850556,
|
|
"learning_rate": 1.0578239008189406e-06,
|
|
"loss": 0.3525,
|
|
"step": 17780
|
|
},
|
|
{
|
|
"epoch": 2.5575208513085994,
|
|
"grad_norm": 0.31072248256953744,
|
|
"learning_rate": 1.0544560040068697e-06,
|
|
"loss": 0.3672,
|
|
"step": 17785
|
|
},
|
|
{
|
|
"epoch": 2.558239861949957,
|
|
"grad_norm": 0.30413205651252373,
|
|
"learning_rate": 1.0510931786735191e-06,
|
|
"loss": 0.3541,
|
|
"step": 17790
|
|
},
|
|
{
|
|
"epoch": 2.5589588725913144,
|
|
"grad_norm": 0.3209217544752419,
|
|
"learning_rate": 1.047735426725368e-06,
|
|
"loss": 0.3412,
|
|
"step": 17795
|
|
},
|
|
{
|
|
"epoch": 2.5596778832326716,
|
|
"grad_norm": 0.31029080755246485,
|
|
"learning_rate": 1.0443827500660152e-06,
|
|
"loss": 0.352,
|
|
"step": 17800
|
|
},
|
|
{
|
|
"epoch": 2.5603968938740294,
|
|
"grad_norm": 0.31861603957763635,
|
|
"learning_rate": 1.0410351505961912e-06,
|
|
"loss": 0.3636,
|
|
"step": 17805
|
|
},
|
|
{
|
|
"epoch": 2.561115904515387,
|
|
"grad_norm": 0.3134869954025055,
|
|
"learning_rate": 1.0376926302137435e-06,
|
|
"loss": 0.3471,
|
|
"step": 17810
|
|
},
|
|
{
|
|
"epoch": 2.5618349151567443,
|
|
"grad_norm": 0.31756572789531323,
|
|
"learning_rate": 1.0343551908136385e-06,
|
|
"loss": 0.3498,
|
|
"step": 17815
|
|
},
|
|
{
|
|
"epoch": 2.5625539257981016,
|
|
"grad_norm": 0.3161529464224881,
|
|
"learning_rate": 1.0310228342879658e-06,
|
|
"loss": 0.3523,
|
|
"step": 17820
|
|
},
|
|
{
|
|
"epoch": 2.5632729364394593,
|
|
"grad_norm": 0.30553748286795546,
|
|
"learning_rate": 1.0276955625259299e-06,
|
|
"loss": 0.3565,
|
|
"step": 17825
|
|
},
|
|
{
|
|
"epoch": 2.563991947080817,
|
|
"grad_norm": 0.3232474732656798,
|
|
"learning_rate": 1.024373377413853e-06,
|
|
"loss": 0.3724,
|
|
"step": 17830
|
|
},
|
|
{
|
|
"epoch": 2.5647109577221743,
|
|
"grad_norm": 0.2966854239027476,
|
|
"learning_rate": 1.0210562808351775e-06,
|
|
"loss": 0.369,
|
|
"step": 17835
|
|
},
|
|
{
|
|
"epoch": 2.5654299683635315,
|
|
"grad_norm": 0.31398220428883766,
|
|
"learning_rate": 1.017744274670457e-06,
|
|
"loss": 0.3637,
|
|
"step": 17840
|
|
},
|
|
{
|
|
"epoch": 2.5661489790048893,
|
|
"grad_norm": 0.3070092928631495,
|
|
"learning_rate": 1.0144373607973578e-06,
|
|
"loss": 0.3656,
|
|
"step": 17845
|
|
},
|
|
{
|
|
"epoch": 2.566867989646247,
|
|
"grad_norm": 0.30754228214286794,
|
|
"learning_rate": 1.0111355410906632e-06,
|
|
"loss": 0.3617,
|
|
"step": 17850
|
|
},
|
|
{
|
|
"epoch": 2.5675870002876042,
|
|
"grad_norm": 0.3079138574317495,
|
|
"learning_rate": 1.0078388174222696e-06,
|
|
"loss": 0.3558,
|
|
"step": 17855
|
|
},
|
|
{
|
|
"epoch": 2.5683060109289615,
|
|
"grad_norm": 0.2951486391053495,
|
|
"learning_rate": 1.004547191661178e-06,
|
|
"loss": 0.3581,
|
|
"step": 17860
|
|
},
|
|
{
|
|
"epoch": 2.569025021570319,
|
|
"grad_norm": 0.30423514391994716,
|
|
"learning_rate": 1.001260665673508e-06,
|
|
"loss": 0.3716,
|
|
"step": 17865
|
|
},
|
|
{
|
|
"epoch": 2.569744032211677,
|
|
"grad_norm": 0.31846350303726495,
|
|
"learning_rate": 9.979792413224775e-07,
|
|
"loss": 0.3706,
|
|
"step": 17870
|
|
},
|
|
{
|
|
"epoch": 2.570463042853034,
|
|
"grad_norm": 0.31488956701231063,
|
|
"learning_rate": 9.94702920468419e-07,
|
|
"loss": 0.3692,
|
|
"step": 17875
|
|
},
|
|
{
|
|
"epoch": 2.5711820534943914,
|
|
"grad_norm": 0.3092619322032862,
|
|
"learning_rate": 9.914317049687727e-07,
|
|
"loss": 0.3547,
|
|
"step": 17880
|
|
},
|
|
{
|
|
"epoch": 2.571901064135749,
|
|
"grad_norm": 0.3172533533313283,
|
|
"learning_rate": 9.88165596678079e-07,
|
|
"loss": 0.3549,
|
|
"step": 17885
|
|
},
|
|
{
|
|
"epoch": 2.572620074777107,
|
|
"grad_norm": 0.31089167574958426,
|
|
"learning_rate": 9.849045974479887e-07,
|
|
"loss": 0.3579,
|
|
"step": 17890
|
|
},
|
|
{
|
|
"epoch": 2.573339085418464,
|
|
"grad_norm": 0.3086333598820324,
|
|
"learning_rate": 9.81648709127252e-07,
|
|
"loss": 0.3663,
|
|
"step": 17895
|
|
},
|
|
{
|
|
"epoch": 2.574058096059822,
|
|
"grad_norm": 0.4560442882955573,
|
|
"learning_rate": 9.7839793356172e-07,
|
|
"loss": 0.3523,
|
|
"step": 17900
|
|
},
|
|
{
|
|
"epoch": 2.574777106701179,
|
|
"grad_norm": 0.3149997741880437,
|
|
"learning_rate": 9.751522725943519e-07,
|
|
"loss": 0.3577,
|
|
"step": 17905
|
|
},
|
|
{
|
|
"epoch": 2.575496117342537,
|
|
"grad_norm": 0.31545564164529466,
|
|
"learning_rate": 9.719117280652045e-07,
|
|
"loss": 0.3659,
|
|
"step": 17910
|
|
},
|
|
{
|
|
"epoch": 2.576215127983894,
|
|
"grad_norm": 0.29418506531220967,
|
|
"learning_rate": 9.686763018114299e-07,
|
|
"loss": 0.3609,
|
|
"step": 17915
|
|
},
|
|
{
|
|
"epoch": 2.576934138625252,
|
|
"grad_norm": 0.3551091360630454,
|
|
"learning_rate": 9.654459956672834e-07,
|
|
"loss": 0.3506,
|
|
"step": 17920
|
|
},
|
|
{
|
|
"epoch": 2.577653149266609,
|
|
"grad_norm": 0.30902365189976183,
|
|
"learning_rate": 9.622208114641163e-07,
|
|
"loss": 0.3554,
|
|
"step": 17925
|
|
},
|
|
{
|
|
"epoch": 2.5783721599079668,
|
|
"grad_norm": 0.31418474404733315,
|
|
"learning_rate": 9.590007510303711e-07,
|
|
"loss": 0.3663,
|
|
"step": 17930
|
|
},
|
|
{
|
|
"epoch": 2.579091170549324,
|
|
"grad_norm": 0.3012377127454265,
|
|
"learning_rate": 9.557858161915968e-07,
|
|
"loss": 0.3634,
|
|
"step": 17935
|
|
},
|
|
{
|
|
"epoch": 2.5798101811906817,
|
|
"grad_norm": 0.2982718072566914,
|
|
"learning_rate": 9.525760087704261e-07,
|
|
"loss": 0.3449,
|
|
"step": 17940
|
|
},
|
|
{
|
|
"epoch": 2.580529191832039,
|
|
"grad_norm": 0.31614761632902033,
|
|
"learning_rate": 9.493713305865859e-07,
|
|
"loss": 0.3554,
|
|
"step": 17945
|
|
},
|
|
{
|
|
"epoch": 2.5812482024733967,
|
|
"grad_norm": 0.31841954472989525,
|
|
"learning_rate": 9.461717834569007e-07,
|
|
"loss": 0.3593,
|
|
"step": 17950
|
|
},
|
|
{
|
|
"epoch": 2.581967213114754,
|
|
"grad_norm": 0.3125590065874319,
|
|
"learning_rate": 9.42977369195286e-07,
|
|
"loss": 0.3381,
|
|
"step": 17955
|
|
},
|
|
{
|
|
"epoch": 2.5826862237561117,
|
|
"grad_norm": 0.30646306122852585,
|
|
"learning_rate": 9.397880896127387e-07,
|
|
"loss": 0.3668,
|
|
"step": 17960
|
|
},
|
|
{
|
|
"epoch": 2.583405234397469,
|
|
"grad_norm": 0.31857745113245123,
|
|
"learning_rate": 9.366039465173549e-07,
|
|
"loss": 0.3409,
|
|
"step": 17965
|
|
},
|
|
{
|
|
"epoch": 2.5841242450388267,
|
|
"grad_norm": 0.30420519814884317,
|
|
"learning_rate": 9.334249417143126e-07,
|
|
"loss": 0.3542,
|
|
"step": 17970
|
|
},
|
|
{
|
|
"epoch": 2.584843255680184,
|
|
"grad_norm": 0.3189126112868735,
|
|
"learning_rate": 9.30251077005877e-07,
|
|
"loss": 0.3379,
|
|
"step": 17975
|
|
},
|
|
{
|
|
"epoch": 2.5855622663215416,
|
|
"grad_norm": 0.3112465994030675,
|
|
"learning_rate": 9.270823541914031e-07,
|
|
"loss": 0.3548,
|
|
"step": 17980
|
|
},
|
|
{
|
|
"epoch": 2.586281276962899,
|
|
"grad_norm": 0.3135723637127105,
|
|
"learning_rate": 9.239187750673284e-07,
|
|
"loss": 0.3598,
|
|
"step": 17985
|
|
},
|
|
{
|
|
"epoch": 2.5870002876042566,
|
|
"grad_norm": 0.3168485875913864,
|
|
"learning_rate": 9.207603414271704e-07,
|
|
"loss": 0.3442,
|
|
"step": 17990
|
|
},
|
|
{
|
|
"epoch": 2.587719298245614,
|
|
"grad_norm": 0.3130767429698904,
|
|
"learning_rate": 9.176070550615379e-07,
|
|
"loss": 0.3586,
|
|
"step": 17995
|
|
},
|
|
{
|
|
"epoch": 2.5884383088869716,
|
|
"grad_norm": 0.2986598627803862,
|
|
"learning_rate": 9.144589177581132e-07,
|
|
"loss": 0.3504,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 2.5891573195283293,
|
|
"grad_norm": 0.30468076985726344,
|
|
"learning_rate": 9.113159313016662e-07,
|
|
"loss": 0.3553,
|
|
"step": 18005
|
|
},
|
|
{
|
|
"epoch": 2.5898763301696865,
|
|
"grad_norm": 0.29062379652839426,
|
|
"learning_rate": 9.08178097474044e-07,
|
|
"loss": 0.3596,
|
|
"step": 18010
|
|
},
|
|
{
|
|
"epoch": 2.590595340811044,
|
|
"grad_norm": 0.31065467582213546,
|
|
"learning_rate": 9.050454180541679e-07,
|
|
"loss": 0.3576,
|
|
"step": 18015
|
|
},
|
|
{
|
|
"epoch": 2.5913143514524015,
|
|
"grad_norm": 0.31633070892778303,
|
|
"learning_rate": 9.019178948180474e-07,
|
|
"loss": 0.3548,
|
|
"step": 18020
|
|
},
|
|
{
|
|
"epoch": 2.5920333620937592,
|
|
"grad_norm": 0.3173632886828699,
|
|
"learning_rate": 8.987955295387596e-07,
|
|
"loss": 0.3699,
|
|
"step": 18025
|
|
},
|
|
{
|
|
"epoch": 2.5927523727351165,
|
|
"grad_norm": 0.29854965996870914,
|
|
"learning_rate": 8.956783239864586e-07,
|
|
"loss": 0.3514,
|
|
"step": 18030
|
|
},
|
|
{
|
|
"epoch": 2.5934713833764738,
|
|
"grad_norm": 0.31656456590031207,
|
|
"learning_rate": 8.925662799283797e-07,
|
|
"loss": 0.3668,
|
|
"step": 18035
|
|
},
|
|
{
|
|
"epoch": 2.5941903940178315,
|
|
"grad_norm": 0.5433124842010192,
|
|
"learning_rate": 8.894593991288259e-07,
|
|
"loss": 0.3555,
|
|
"step": 18040
|
|
},
|
|
{
|
|
"epoch": 2.594909404659189,
|
|
"grad_norm": 0.3222805926226404,
|
|
"learning_rate": 8.863576833491705e-07,
|
|
"loss": 0.348,
|
|
"step": 18045
|
|
},
|
|
{
|
|
"epoch": 2.5956284153005464,
|
|
"grad_norm": 0.3575555047987363,
|
|
"learning_rate": 8.832611343478681e-07,
|
|
"loss": 0.3617,
|
|
"step": 18050
|
|
},
|
|
{
|
|
"epoch": 2.5963474259419037,
|
|
"grad_norm": 0.3154306626253122,
|
|
"learning_rate": 8.801697538804377e-07,
|
|
"loss": 0.3497,
|
|
"step": 18055
|
|
},
|
|
{
|
|
"epoch": 2.5970664365832614,
|
|
"grad_norm": 0.2985330395295317,
|
|
"learning_rate": 8.770835436994674e-07,
|
|
"loss": 0.3508,
|
|
"step": 18060
|
|
},
|
|
{
|
|
"epoch": 2.597785447224619,
|
|
"grad_norm": 0.3031063965380999,
|
|
"learning_rate": 8.740025055546186e-07,
|
|
"loss": 0.3624,
|
|
"step": 18065
|
|
},
|
|
{
|
|
"epoch": 2.5985044578659764,
|
|
"grad_norm": 0.3214116364593015,
|
|
"learning_rate": 8.709266411926165e-07,
|
|
"loss": 0.3539,
|
|
"step": 18070
|
|
},
|
|
{
|
|
"epoch": 2.5992234685073337,
|
|
"grad_norm": 0.30763234128501515,
|
|
"learning_rate": 8.678559523572527e-07,
|
|
"loss": 0.3553,
|
|
"step": 18075
|
|
},
|
|
{
|
|
"epoch": 2.5999424791486914,
|
|
"grad_norm": 0.3001909852834488,
|
|
"learning_rate": 8.647904407893904e-07,
|
|
"loss": 0.3656,
|
|
"step": 18080
|
|
},
|
|
{
|
|
"epoch": 2.600661489790049,
|
|
"grad_norm": 0.30630237394703397,
|
|
"learning_rate": 8.617301082269514e-07,
|
|
"loss": 0.3554,
|
|
"step": 18085
|
|
},
|
|
{
|
|
"epoch": 2.6013805004314063,
|
|
"grad_norm": 0.31033057872110525,
|
|
"learning_rate": 8.586749564049223e-07,
|
|
"loss": 0.3544,
|
|
"step": 18090
|
|
},
|
|
{
|
|
"epoch": 2.6020995110727636,
|
|
"grad_norm": 0.3247236658783421,
|
|
"learning_rate": 8.556249870553546e-07,
|
|
"loss": 0.3477,
|
|
"step": 18095
|
|
},
|
|
{
|
|
"epoch": 2.6028185217141213,
|
|
"grad_norm": 0.32313911455388106,
|
|
"learning_rate": 8.525802019073647e-07,
|
|
"loss": 0.3568,
|
|
"step": 18100
|
|
},
|
|
{
|
|
"epoch": 2.603537532355479,
|
|
"grad_norm": 0.30326882259229654,
|
|
"learning_rate": 8.495406026871212e-07,
|
|
"loss": 0.3539,
|
|
"step": 18105
|
|
},
|
|
{
|
|
"epoch": 2.6042565429968363,
|
|
"grad_norm": 0.33029483328489695,
|
|
"learning_rate": 8.465061911178619e-07,
|
|
"loss": 0.3507,
|
|
"step": 18110
|
|
},
|
|
{
|
|
"epoch": 2.604975553638194,
|
|
"grad_norm": 0.31693139615258986,
|
|
"learning_rate": 8.434769689198763e-07,
|
|
"loss": 0.3484,
|
|
"step": 18115
|
|
},
|
|
{
|
|
"epoch": 2.6056945642795513,
|
|
"grad_norm": 0.30435444065947753,
|
|
"learning_rate": 8.404529378105186e-07,
|
|
"loss": 0.3524,
|
|
"step": 18120
|
|
},
|
|
{
|
|
"epoch": 2.606413574920909,
|
|
"grad_norm": 0.3145248094633032,
|
|
"learning_rate": 8.374340995041941e-07,
|
|
"loss": 0.3507,
|
|
"step": 18125
|
|
},
|
|
{
|
|
"epoch": 2.6071325855622662,
|
|
"grad_norm": 0.3027205535143405,
|
|
"learning_rate": 8.344204557123648e-07,
|
|
"loss": 0.3517,
|
|
"step": 18130
|
|
},
|
|
{
|
|
"epoch": 2.607851596203624,
|
|
"grad_norm": 0.3031565366950298,
|
|
"learning_rate": 8.314120081435539e-07,
|
|
"loss": 0.3615,
|
|
"step": 18135
|
|
},
|
|
{
|
|
"epoch": 2.608570606844981,
|
|
"grad_norm": 0.3123310425463929,
|
|
"learning_rate": 8.284087585033329e-07,
|
|
"loss": 0.3455,
|
|
"step": 18140
|
|
},
|
|
{
|
|
"epoch": 2.609289617486339,
|
|
"grad_norm": 0.3153991945372381,
|
|
"learning_rate": 8.254107084943241e-07,
|
|
"loss": 0.3657,
|
|
"step": 18145
|
|
},
|
|
{
|
|
"epoch": 2.610008628127696,
|
|
"grad_norm": 0.31486864682643856,
|
|
"learning_rate": 8.224178598162091e-07,
|
|
"loss": 0.3526,
|
|
"step": 18150
|
|
},
|
|
{
|
|
"epoch": 2.610727638769054,
|
|
"grad_norm": 0.30974477297603165,
|
|
"learning_rate": 8.194302141657185e-07,
|
|
"loss": 0.3504,
|
|
"step": 18155
|
|
},
|
|
{
|
|
"epoch": 2.611446649410411,
|
|
"grad_norm": 0.30736985502698316,
|
|
"learning_rate": 8.164477732366294e-07,
|
|
"loss": 0.3559,
|
|
"step": 18160
|
|
},
|
|
{
|
|
"epoch": 2.612165660051769,
|
|
"grad_norm": 0.31725108094571464,
|
|
"learning_rate": 8.134705387197728e-07,
|
|
"loss": 0.3564,
|
|
"step": 18165
|
|
},
|
|
{
|
|
"epoch": 2.612884670693126,
|
|
"grad_norm": 0.3200454927042961,
|
|
"learning_rate": 8.104985123030263e-07,
|
|
"loss": 0.3673,
|
|
"step": 18170
|
|
},
|
|
{
|
|
"epoch": 2.613603681334484,
|
|
"grad_norm": 0.30975804083250985,
|
|
"learning_rate": 8.075316956713119e-07,
|
|
"loss": 0.3436,
|
|
"step": 18175
|
|
},
|
|
{
|
|
"epoch": 2.614322691975841,
|
|
"grad_norm": 0.30489268103297146,
|
|
"learning_rate": 8.045700905066034e-07,
|
|
"loss": 0.3392,
|
|
"step": 18180
|
|
},
|
|
{
|
|
"epoch": 2.615041702617199,
|
|
"grad_norm": 0.3133491432561115,
|
|
"learning_rate": 8.016136984879175e-07,
|
|
"loss": 0.3717,
|
|
"step": 18185
|
|
},
|
|
{
|
|
"epoch": 2.615760713258556,
|
|
"grad_norm": 0.31546749604628177,
|
|
"learning_rate": 7.986625212913124e-07,
|
|
"loss": 0.3575,
|
|
"step": 18190
|
|
},
|
|
{
|
|
"epoch": 2.616479723899914,
|
|
"grad_norm": 0.30925664697615834,
|
|
"learning_rate": 7.957165605898964e-07,
|
|
"loss": 0.3481,
|
|
"step": 18195
|
|
},
|
|
{
|
|
"epoch": 2.617198734541271,
|
|
"grad_norm": 0.297558200940703,
|
|
"learning_rate": 7.927758180538158e-07,
|
|
"loss": 0.3432,
|
|
"step": 18200
|
|
},
|
|
{
|
|
"epoch": 2.6179177451826288,
|
|
"grad_norm": 0.3206407749722453,
|
|
"learning_rate": 7.898402953502582e-07,
|
|
"loss": 0.3409,
|
|
"step": 18205
|
|
},
|
|
{
|
|
"epoch": 2.618636755823986,
|
|
"grad_norm": 0.3243519309879927,
|
|
"learning_rate": 7.869099941434565e-07,
|
|
"loss": 0.3472,
|
|
"step": 18210
|
|
},
|
|
{
|
|
"epoch": 2.6193557664653437,
|
|
"grad_norm": 0.3168554065725209,
|
|
"learning_rate": 7.839849160946766e-07,
|
|
"loss": 0.3479,
|
|
"step": 18215
|
|
},
|
|
{
|
|
"epoch": 2.6200747771067014,
|
|
"grad_norm": 0.32068591660149026,
|
|
"learning_rate": 7.810650628622308e-07,
|
|
"loss": 0.3604,
|
|
"step": 18220
|
|
},
|
|
{
|
|
"epoch": 2.6207937877480587,
|
|
"grad_norm": 0.31186403526473955,
|
|
"learning_rate": 7.781504361014635e-07,
|
|
"loss": 0.3579,
|
|
"step": 18225
|
|
},
|
|
{
|
|
"epoch": 2.621512798389416,
|
|
"grad_norm": 0.3160010911992544,
|
|
"learning_rate": 7.752410374647557e-07,
|
|
"loss": 0.3615,
|
|
"step": 18230
|
|
},
|
|
{
|
|
"epoch": 2.6222318090307737,
|
|
"grad_norm": 0.31075518910777417,
|
|
"learning_rate": 7.723368686015309e-07,
|
|
"loss": 0.354,
|
|
"step": 18235
|
|
},
|
|
{
|
|
"epoch": 2.6229508196721314,
|
|
"grad_norm": 0.30430970186633305,
|
|
"learning_rate": 7.694379311582401e-07,
|
|
"loss": 0.3495,
|
|
"step": 18240
|
|
},
|
|
{
|
|
"epoch": 2.6236698303134887,
|
|
"grad_norm": 0.31048597447448645,
|
|
"learning_rate": 7.665442267783741e-07,
|
|
"loss": 0.3574,
|
|
"step": 18245
|
|
},
|
|
{
|
|
"epoch": 2.624388840954846,
|
|
"grad_norm": 0.309648764495263,
|
|
"learning_rate": 7.636557571024528e-07,
|
|
"loss": 0.3367,
|
|
"step": 18250
|
|
},
|
|
{
|
|
"epoch": 2.6251078515962036,
|
|
"grad_norm": 0.30943657694739996,
|
|
"learning_rate": 7.607725237680342e-07,
|
|
"loss": 0.3622,
|
|
"step": 18255
|
|
},
|
|
{
|
|
"epoch": 2.6258268622375613,
|
|
"grad_norm": 0.32244387001738317,
|
|
"learning_rate": 7.578945284096983e-07,
|
|
"loss": 0.354,
|
|
"step": 18260
|
|
},
|
|
{
|
|
"epoch": 2.6265458728789186,
|
|
"grad_norm": 0.3386284710455727,
|
|
"learning_rate": 7.550217726590658e-07,
|
|
"loss": 0.3562,
|
|
"step": 18265
|
|
},
|
|
{
|
|
"epoch": 2.627264883520276,
|
|
"grad_norm": 0.31781993035567807,
|
|
"learning_rate": 7.521542581447804e-07,
|
|
"loss": 0.3578,
|
|
"step": 18270
|
|
},
|
|
{
|
|
"epoch": 2.6279838941616336,
|
|
"grad_norm": 0.3027853958633486,
|
|
"learning_rate": 7.492919864925153e-07,
|
|
"loss": 0.3533,
|
|
"step": 18275
|
|
},
|
|
{
|
|
"epoch": 2.6287029048029913,
|
|
"grad_norm": 0.3042117835743225,
|
|
"learning_rate": 7.464349593249731e-07,
|
|
"loss": 0.3533,
|
|
"step": 18280
|
|
},
|
|
{
|
|
"epoch": 2.6294219154443486,
|
|
"grad_norm": 0.29235389032971715,
|
|
"learning_rate": 7.435831782618829e-07,
|
|
"loss": 0.3416,
|
|
"step": 18285
|
|
},
|
|
{
|
|
"epoch": 2.630140926085706,
|
|
"grad_norm": 0.30215436536724255,
|
|
"learning_rate": 7.407366449199959e-07,
|
|
"loss": 0.3579,
|
|
"step": 18290
|
|
},
|
|
{
|
|
"epoch": 2.6308599367270635,
|
|
"grad_norm": 0.3168111913387137,
|
|
"learning_rate": 7.378953609130946e-07,
|
|
"loss": 0.3599,
|
|
"step": 18295
|
|
},
|
|
{
|
|
"epoch": 2.6315789473684212,
|
|
"grad_norm": 0.37295092331405927,
|
|
"learning_rate": 7.350593278519824e-07,
|
|
"loss": 0.3532,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 2.6322979580097785,
|
|
"grad_norm": 0.32834135943493065,
|
|
"learning_rate": 7.322285473444835e-07,
|
|
"loss": 0.3683,
|
|
"step": 18305
|
|
},
|
|
{
|
|
"epoch": 2.6330169686511358,
|
|
"grad_norm": 0.3368581794847914,
|
|
"learning_rate": 7.294030209954494e-07,
|
|
"loss": 0.3553,
|
|
"step": 18310
|
|
},
|
|
{
|
|
"epoch": 2.6337359792924935,
|
|
"grad_norm": 0.3038531205271354,
|
|
"learning_rate": 7.265827504067479e-07,
|
|
"loss": 0.3599,
|
|
"step": 18315
|
|
},
|
|
{
|
|
"epoch": 2.634454989933851,
|
|
"grad_norm": 0.3133142852428987,
|
|
"learning_rate": 7.237677371772667e-07,
|
|
"loss": 0.353,
|
|
"step": 18320
|
|
},
|
|
{
|
|
"epoch": 2.6351740005752085,
|
|
"grad_norm": 0.31187021864545034,
|
|
"learning_rate": 7.209579829029211e-07,
|
|
"loss": 0.3354,
|
|
"step": 18325
|
|
},
|
|
{
|
|
"epoch": 2.635893011216566,
|
|
"grad_norm": 0.3087799419620964,
|
|
"learning_rate": 7.181534891766329e-07,
|
|
"loss": 0.3586,
|
|
"step": 18330
|
|
},
|
|
{
|
|
"epoch": 2.6366120218579234,
|
|
"grad_norm": 0.3013821823288862,
|
|
"learning_rate": 7.153542575883543e-07,
|
|
"loss": 0.3437,
|
|
"step": 18335
|
|
},
|
|
{
|
|
"epoch": 2.637331032499281,
|
|
"grad_norm": 0.31452374304676034,
|
|
"learning_rate": 7.125602897250427e-07,
|
|
"loss": 0.3544,
|
|
"step": 18340
|
|
},
|
|
{
|
|
"epoch": 2.6380500431406384,
|
|
"grad_norm": 0.3059157386158428,
|
|
"learning_rate": 7.097715871706778e-07,
|
|
"loss": 0.3714,
|
|
"step": 18345
|
|
},
|
|
{
|
|
"epoch": 2.638769053781996,
|
|
"grad_norm": 0.3128598559179821,
|
|
"learning_rate": 7.06988151506256e-07,
|
|
"loss": 0.3653,
|
|
"step": 18350
|
|
},
|
|
{
|
|
"epoch": 2.6394880644233534,
|
|
"grad_norm": 0.309303362265719,
|
|
"learning_rate": 7.042099843097827e-07,
|
|
"loss": 0.3426,
|
|
"step": 18355
|
|
},
|
|
{
|
|
"epoch": 2.640207075064711,
|
|
"grad_norm": 0.3038414633493157,
|
|
"learning_rate": 7.014370871562759e-07,
|
|
"loss": 0.354,
|
|
"step": 18360
|
|
},
|
|
{
|
|
"epoch": 2.6409260857060683,
|
|
"grad_norm": 0.31176618525619093,
|
|
"learning_rate": 6.986694616177736e-07,
|
|
"loss": 0.3691,
|
|
"step": 18365
|
|
},
|
|
{
|
|
"epoch": 2.641645096347426,
|
|
"grad_norm": 0.30105168726294435,
|
|
"learning_rate": 6.959071092633163e-07,
|
|
"loss": 0.3556,
|
|
"step": 18370
|
|
},
|
|
{
|
|
"epoch": 2.6423641069887833,
|
|
"grad_norm": 0.29985646196713983,
|
|
"learning_rate": 6.931500316589578e-07,
|
|
"loss": 0.351,
|
|
"step": 18375
|
|
},
|
|
{
|
|
"epoch": 2.643083117630141,
|
|
"grad_norm": 0.29909723415454065,
|
|
"learning_rate": 6.903982303677659e-07,
|
|
"loss": 0.348,
|
|
"step": 18380
|
|
},
|
|
{
|
|
"epoch": 2.6438021282714983,
|
|
"grad_norm": 0.2989167855128057,
|
|
"learning_rate": 6.876517069498123e-07,
|
|
"loss": 0.351,
|
|
"step": 18385
|
|
},
|
|
{
|
|
"epoch": 2.644521138912856,
|
|
"grad_norm": 0.30636967525330233,
|
|
"learning_rate": 6.84910462962175e-07,
|
|
"loss": 0.361,
|
|
"step": 18390
|
|
},
|
|
{
|
|
"epoch": 2.6452401495542133,
|
|
"grad_norm": 0.3426303376526915,
|
|
"learning_rate": 6.821744999589452e-07,
|
|
"loss": 0.3575,
|
|
"step": 18395
|
|
},
|
|
{
|
|
"epoch": 2.645959160195571,
|
|
"grad_norm": 0.3084242076335015,
|
|
"learning_rate": 6.794438194912168e-07,
|
|
"loss": 0.3355,
|
|
"step": 18400
|
|
},
|
|
{
|
|
"epoch": 2.6466781708369282,
|
|
"grad_norm": 0.3194661800181507,
|
|
"learning_rate": 6.767184231070855e-07,
|
|
"loss": 0.3658,
|
|
"step": 18405
|
|
},
|
|
{
|
|
"epoch": 2.647397181478286,
|
|
"grad_norm": 0.30745927021889674,
|
|
"learning_rate": 6.739983123516591e-07,
|
|
"loss": 0.3486,
|
|
"step": 18410
|
|
},
|
|
{
|
|
"epoch": 2.648116192119643,
|
|
"grad_norm": 0.3071331898673472,
|
|
"learning_rate": 6.712834887670417e-07,
|
|
"loss": 0.3545,
|
|
"step": 18415
|
|
},
|
|
{
|
|
"epoch": 2.648835202761001,
|
|
"grad_norm": 0.3124056453002358,
|
|
"learning_rate": 6.685739538923419e-07,
|
|
"loss": 0.3738,
|
|
"step": 18420
|
|
},
|
|
{
|
|
"epoch": 2.649554213402358,
|
|
"grad_norm": 0.3055154897012926,
|
|
"learning_rate": 6.658697092636735e-07,
|
|
"loss": 0.3396,
|
|
"step": 18425
|
|
},
|
|
{
|
|
"epoch": 2.650273224043716,
|
|
"grad_norm": 0.3171116831309729,
|
|
"learning_rate": 6.631707564141454e-07,
|
|
"loss": 0.351,
|
|
"step": 18430
|
|
},
|
|
{
|
|
"epoch": 2.6509922346850736,
|
|
"grad_norm": 0.31313833552359555,
|
|
"learning_rate": 6.604770968738705e-07,
|
|
"loss": 0.3673,
|
|
"step": 18435
|
|
},
|
|
{
|
|
"epoch": 2.651711245326431,
|
|
"grad_norm": 0.314753899908688,
|
|
"learning_rate": 6.577887321699583e-07,
|
|
"loss": 0.3766,
|
|
"step": 18440
|
|
},
|
|
{
|
|
"epoch": 2.652430255967788,
|
|
"grad_norm": 0.3044781194749951,
|
|
"learning_rate": 6.551056638265208e-07,
|
|
"loss": 0.3582,
|
|
"step": 18445
|
|
},
|
|
{
|
|
"epoch": 2.653149266609146,
|
|
"grad_norm": 0.30834514659184414,
|
|
"learning_rate": 6.524278933646633e-07,
|
|
"loss": 0.3409,
|
|
"step": 18450
|
|
},
|
|
{
|
|
"epoch": 2.6538682772505036,
|
|
"grad_norm": 0.2953324976439212,
|
|
"learning_rate": 6.497554223024883e-07,
|
|
"loss": 0.3643,
|
|
"step": 18455
|
|
},
|
|
{
|
|
"epoch": 2.654587287891861,
|
|
"grad_norm": 0.31342055968388427,
|
|
"learning_rate": 6.470882521550914e-07,
|
|
"loss": 0.3388,
|
|
"step": 18460
|
|
},
|
|
{
|
|
"epoch": 2.655306298533218,
|
|
"grad_norm": 0.30962123859836094,
|
|
"learning_rate": 6.44426384434571e-07,
|
|
"loss": 0.3533,
|
|
"step": 18465
|
|
},
|
|
{
|
|
"epoch": 2.656025309174576,
|
|
"grad_norm": 0.2992654887729307,
|
|
"learning_rate": 6.417698206500123e-07,
|
|
"loss": 0.375,
|
|
"step": 18470
|
|
},
|
|
{
|
|
"epoch": 2.6567443198159335,
|
|
"grad_norm": 0.30336724108173907,
|
|
"learning_rate": 6.391185623074935e-07,
|
|
"loss": 0.3558,
|
|
"step": 18475
|
|
},
|
|
{
|
|
"epoch": 2.6574633304572908,
|
|
"grad_norm": 0.31580569462875396,
|
|
"learning_rate": 6.364726109100894e-07,
|
|
"loss": 0.3579,
|
|
"step": 18480
|
|
},
|
|
{
|
|
"epoch": 2.658182341098648,
|
|
"grad_norm": 0.30753080592729337,
|
|
"learning_rate": 6.338319679578619e-07,
|
|
"loss": 0.3444,
|
|
"step": 18485
|
|
},
|
|
{
|
|
"epoch": 2.6589013517400057,
|
|
"grad_norm": 0.30236913583372577,
|
|
"learning_rate": 6.311966349478671e-07,
|
|
"loss": 0.3552,
|
|
"step": 18490
|
|
},
|
|
{
|
|
"epoch": 2.6596203623813635,
|
|
"grad_norm": 0.30749247481492153,
|
|
"learning_rate": 6.285666133741463e-07,
|
|
"loss": 0.3707,
|
|
"step": 18495
|
|
},
|
|
{
|
|
"epoch": 2.6603393730227207,
|
|
"grad_norm": 0.32415010648046794,
|
|
"learning_rate": 6.25941904727736e-07,
|
|
"loss": 0.3373,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"epoch": 2.661058383664078,
|
|
"grad_norm": 0.2989538667429938,
|
|
"learning_rate": 6.233225104966534e-07,
|
|
"loss": 0.3389,
|
|
"step": 18505
|
|
},
|
|
{
|
|
"epoch": 2.6617773943054357,
|
|
"grad_norm": 0.31222880480938014,
|
|
"learning_rate": 6.207084321659085e-07,
|
|
"loss": 0.3556,
|
|
"step": 18510
|
|
},
|
|
{
|
|
"epoch": 2.6624964049467934,
|
|
"grad_norm": 0.3107654979460972,
|
|
"learning_rate": 6.180996712174936e-07,
|
|
"loss": 0.3428,
|
|
"step": 18515
|
|
},
|
|
{
|
|
"epoch": 2.6632154155881507,
|
|
"grad_norm": 0.29323243544712446,
|
|
"learning_rate": 6.15496229130389e-07,
|
|
"loss": 0.3582,
|
|
"step": 18520
|
|
},
|
|
{
|
|
"epoch": 2.663934426229508,
|
|
"grad_norm": 0.30046781829391606,
|
|
"learning_rate": 6.128981073805585e-07,
|
|
"loss": 0.3563,
|
|
"step": 18525
|
|
},
|
|
{
|
|
"epoch": 2.6646534368708656,
|
|
"grad_norm": 0.31216589986443405,
|
|
"learning_rate": 6.103053074409515e-07,
|
|
"loss": 0.3473,
|
|
"step": 18530
|
|
},
|
|
{
|
|
"epoch": 2.6653724475122234,
|
|
"grad_norm": 0.30703745582960706,
|
|
"learning_rate": 6.077178307814946e-07,
|
|
"loss": 0.3644,
|
|
"step": 18535
|
|
},
|
|
{
|
|
"epoch": 2.6660914581535806,
|
|
"grad_norm": 0.31184638928636954,
|
|
"learning_rate": 6.051356788691032e-07,
|
|
"loss": 0.3564,
|
|
"step": 18540
|
|
},
|
|
{
|
|
"epoch": 2.6668104687949383,
|
|
"grad_norm": 0.32313849767841774,
|
|
"learning_rate": 6.025588531676719e-07,
|
|
"loss": 0.3751,
|
|
"step": 18545
|
|
},
|
|
{
|
|
"epoch": 2.6675294794362956,
|
|
"grad_norm": 0.3294691163967009,
|
|
"learning_rate": 5.999873551380753e-07,
|
|
"loss": 0.3478,
|
|
"step": 18550
|
|
},
|
|
{
|
|
"epoch": 2.6682484900776533,
|
|
"grad_norm": 0.32674976118151594,
|
|
"learning_rate": 5.974211862381673e-07,
|
|
"loss": 0.3488,
|
|
"step": 18555
|
|
},
|
|
{
|
|
"epoch": 2.6689675007190106,
|
|
"grad_norm": 0.31470234216041487,
|
|
"learning_rate": 5.948603479227777e-07,
|
|
"loss": 0.3561,
|
|
"step": 18560
|
|
},
|
|
{
|
|
"epoch": 2.6696865113603683,
|
|
"grad_norm": 0.3300101863212417,
|
|
"learning_rate": 5.923048416437215e-07,
|
|
"loss": 0.3509,
|
|
"step": 18565
|
|
},
|
|
{
|
|
"epoch": 2.6704055220017255,
|
|
"grad_norm": 0.31705094998007205,
|
|
"learning_rate": 5.897546688497857e-07,
|
|
"loss": 0.3671,
|
|
"step": 18570
|
|
},
|
|
{
|
|
"epoch": 2.6711245326430832,
|
|
"grad_norm": 0.30827868767097877,
|
|
"learning_rate": 5.872098309867314e-07,
|
|
"loss": 0.3593,
|
|
"step": 18575
|
|
},
|
|
{
|
|
"epoch": 2.6718435432844405,
|
|
"grad_norm": 0.31382404359434163,
|
|
"learning_rate": 5.84670329497301e-07,
|
|
"loss": 0.3579,
|
|
"step": 18580
|
|
},
|
|
{
|
|
"epoch": 2.6725625539257982,
|
|
"grad_norm": 0.31692913487461893,
|
|
"learning_rate": 5.821361658212077e-07,
|
|
"loss": 0.3561,
|
|
"step": 18585
|
|
},
|
|
{
|
|
"epoch": 2.6732815645671555,
|
|
"grad_norm": 0.3022309763466634,
|
|
"learning_rate": 5.796073413951398e-07,
|
|
"loss": 0.3601,
|
|
"step": 18590
|
|
},
|
|
{
|
|
"epoch": 2.674000575208513,
|
|
"grad_norm": 0.30453373089008384,
|
|
"learning_rate": 5.770838576527604e-07,
|
|
"loss": 0.3567,
|
|
"step": 18595
|
|
},
|
|
{
|
|
"epoch": 2.6747195858498705,
|
|
"grad_norm": 0.31273564745374643,
|
|
"learning_rate": 5.74565716024702e-07,
|
|
"loss": 0.3525,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 2.675438596491228,
|
|
"grad_norm": 0.3188454358204983,
|
|
"learning_rate": 5.720529179385659e-07,
|
|
"loss": 0.3626,
|
|
"step": 18605
|
|
},
|
|
{
|
|
"epoch": 2.6761576071325854,
|
|
"grad_norm": 0.3144833786152819,
|
|
"learning_rate": 5.695454648189336e-07,
|
|
"loss": 0.3489,
|
|
"step": 18610
|
|
},
|
|
{
|
|
"epoch": 2.676876617773943,
|
|
"grad_norm": 0.30619781719720385,
|
|
"learning_rate": 5.670433580873458e-07,
|
|
"loss": 0.3625,
|
|
"step": 18615
|
|
},
|
|
{
|
|
"epoch": 2.6775956284153004,
|
|
"grad_norm": 0.3035857668552543,
|
|
"learning_rate": 5.645465991623167e-07,
|
|
"loss": 0.3523,
|
|
"step": 18620
|
|
},
|
|
{
|
|
"epoch": 2.678314639056658,
|
|
"grad_norm": 0.31449667720556634,
|
|
"learning_rate": 5.620551894593318e-07,
|
|
"loss": 0.3473,
|
|
"step": 18625
|
|
},
|
|
{
|
|
"epoch": 2.6790336496980154,
|
|
"grad_norm": 0.3142448634332198,
|
|
"learning_rate": 5.595691303908368e-07,
|
|
"loss": 0.3458,
|
|
"step": 18630
|
|
},
|
|
{
|
|
"epoch": 2.679752660339373,
|
|
"grad_norm": 0.30135205870349274,
|
|
"learning_rate": 5.570884233662521e-07,
|
|
"loss": 0.3552,
|
|
"step": 18635
|
|
},
|
|
{
|
|
"epoch": 2.6804716709807304,
|
|
"grad_norm": 0.30971611305814795,
|
|
"learning_rate": 5.54613069791956e-07,
|
|
"loss": 0.3545,
|
|
"step": 18640
|
|
},
|
|
{
|
|
"epoch": 2.681190681622088,
|
|
"grad_norm": 0.3033914862578706,
|
|
"learning_rate": 5.521430710712994e-07,
|
|
"loss": 0.3495,
|
|
"step": 18645
|
|
},
|
|
{
|
|
"epoch": 2.6819096922634453,
|
|
"grad_norm": 0.30910133743457535,
|
|
"learning_rate": 5.496784286045898e-07,
|
|
"loss": 0.3553,
|
|
"step": 18650
|
|
},
|
|
{
|
|
"epoch": 2.682628702904803,
|
|
"grad_norm": 0.3221062115483798,
|
|
"learning_rate": 5.47219143789105e-07,
|
|
"loss": 0.3524,
|
|
"step": 18655
|
|
},
|
|
{
|
|
"epoch": 2.6833477135461603,
|
|
"grad_norm": 0.31435498970811887,
|
|
"learning_rate": 5.447652180190799e-07,
|
|
"loss": 0.3554,
|
|
"step": 18660
|
|
},
|
|
{
|
|
"epoch": 2.684066724187518,
|
|
"grad_norm": 0.3086658790808085,
|
|
"learning_rate": 5.42316652685716e-07,
|
|
"loss": 0.3533,
|
|
"step": 18665
|
|
},
|
|
{
|
|
"epoch": 2.6847857348288757,
|
|
"grad_norm": 0.3130566647412001,
|
|
"learning_rate": 5.398734491771718e-07,
|
|
"loss": 0.3338,
|
|
"step": 18670
|
|
},
|
|
{
|
|
"epoch": 2.685504745470233,
|
|
"grad_norm": 0.3141328453505302,
|
|
"learning_rate": 5.374356088785659e-07,
|
|
"loss": 0.3438,
|
|
"step": 18675
|
|
},
|
|
{
|
|
"epoch": 2.6862237561115903,
|
|
"grad_norm": 0.3196956028730137,
|
|
"learning_rate": 5.350031331719818e-07,
|
|
"loss": 0.36,
|
|
"step": 18680
|
|
},
|
|
{
|
|
"epoch": 2.686942766752948,
|
|
"grad_norm": 0.29995725083229124,
|
|
"learning_rate": 5.325760234364541e-07,
|
|
"loss": 0.3523,
|
|
"step": 18685
|
|
},
|
|
{
|
|
"epoch": 2.6876617773943057,
|
|
"grad_norm": 0.3084889722071449,
|
|
"learning_rate": 5.301542810479809e-07,
|
|
"loss": 0.3379,
|
|
"step": 18690
|
|
},
|
|
{
|
|
"epoch": 2.688380788035663,
|
|
"grad_norm": 0.2988514783129927,
|
|
"learning_rate": 5.277379073795175e-07,
|
|
"loss": 0.3523,
|
|
"step": 18695
|
|
},
|
|
{
|
|
"epoch": 2.68909979867702,
|
|
"grad_norm": 0.3201023826593751,
|
|
"learning_rate": 5.253269038009711e-07,
|
|
"loss": 0.3625,
|
|
"step": 18700
|
|
},
|
|
{
|
|
"epoch": 2.689818809318378,
|
|
"grad_norm": 0.31833673612930263,
|
|
"learning_rate": 5.229212716792065e-07,
|
|
"loss": 0.3449,
|
|
"step": 18705
|
|
},
|
|
{
|
|
"epoch": 2.6905378199597356,
|
|
"grad_norm": 0.3170454396678282,
|
|
"learning_rate": 5.205210123780468e-07,
|
|
"loss": 0.3753,
|
|
"step": 18710
|
|
},
|
|
{
|
|
"epoch": 2.691256830601093,
|
|
"grad_norm": 0.30917530066201315,
|
|
"learning_rate": 5.181261272582638e-07,
|
|
"loss": 0.3579,
|
|
"step": 18715
|
|
},
|
|
{
|
|
"epoch": 2.69197584124245,
|
|
"grad_norm": 0.3104175867900994,
|
|
"learning_rate": 5.157366176775835e-07,
|
|
"loss": 0.3562,
|
|
"step": 18720
|
|
},
|
|
{
|
|
"epoch": 2.692694851883808,
|
|
"grad_norm": 0.31521969091333557,
|
|
"learning_rate": 5.13352484990689e-07,
|
|
"loss": 0.3516,
|
|
"step": 18725
|
|
},
|
|
{
|
|
"epoch": 2.6934138625251656,
|
|
"grad_norm": 0.3154907210066356,
|
|
"learning_rate": 5.10973730549208e-07,
|
|
"loss": 0.353,
|
|
"step": 18730
|
|
},
|
|
{
|
|
"epoch": 2.694132873166523,
|
|
"grad_norm": 0.31154418289279073,
|
|
"learning_rate": 5.08600355701725e-07,
|
|
"loss": 0.352,
|
|
"step": 18735
|
|
},
|
|
{
|
|
"epoch": 2.69485188380788,
|
|
"grad_norm": 0.3218114148291255,
|
|
"learning_rate": 5.062323617937736e-07,
|
|
"loss": 0.3671,
|
|
"step": 18740
|
|
},
|
|
{
|
|
"epoch": 2.695570894449238,
|
|
"grad_norm": 0.30783533205590813,
|
|
"learning_rate": 5.038697501678336e-07,
|
|
"loss": 0.3639,
|
|
"step": 18745
|
|
},
|
|
{
|
|
"epoch": 2.6962899050905955,
|
|
"grad_norm": 0.32083279526223407,
|
|
"learning_rate": 5.015125221633355e-07,
|
|
"loss": 0.3592,
|
|
"step": 18750
|
|
},
|
|
{
|
|
"epoch": 2.697008915731953,
|
|
"grad_norm": 0.31814805964899057,
|
|
"learning_rate": 4.991606791166592e-07,
|
|
"loss": 0.3467,
|
|
"step": 18755
|
|
},
|
|
{
|
|
"epoch": 2.69772792637331,
|
|
"grad_norm": 0.3238850743949208,
|
|
"learning_rate": 4.968142223611306e-07,
|
|
"loss": 0.3682,
|
|
"step": 18760
|
|
},
|
|
{
|
|
"epoch": 2.6984469370146678,
|
|
"grad_norm": 0.29977506526631764,
|
|
"learning_rate": 4.944731532270175e-07,
|
|
"loss": 0.3479,
|
|
"step": 18765
|
|
},
|
|
{
|
|
"epoch": 2.6991659476560255,
|
|
"grad_norm": 0.3094979116924287,
|
|
"learning_rate": 4.921374730415418e-07,
|
|
"loss": 0.3532,
|
|
"step": 18770
|
|
},
|
|
{
|
|
"epoch": 2.6998849582973827,
|
|
"grad_norm": 0.3136896957980207,
|
|
"learning_rate": 4.898071831288631e-07,
|
|
"loss": 0.3531,
|
|
"step": 18775
|
|
},
|
|
{
|
|
"epoch": 2.7006039689387404,
|
|
"grad_norm": 0.300332682624589,
|
|
"learning_rate": 4.874822848100902e-07,
|
|
"loss": 0.3456,
|
|
"step": 18780
|
|
},
|
|
{
|
|
"epoch": 2.7013229795800977,
|
|
"grad_norm": 0.30510090318461636,
|
|
"learning_rate": 4.851627794032709e-07,
|
|
"loss": 0.3552,
|
|
"step": 18785
|
|
},
|
|
{
|
|
"epoch": 2.7020419902214554,
|
|
"grad_norm": 0.3053803908202146,
|
|
"learning_rate": 4.82848668223398e-07,
|
|
"loss": 0.3505,
|
|
"step": 18790
|
|
},
|
|
{
|
|
"epoch": 2.7027610008628127,
|
|
"grad_norm": 0.3145419940435831,
|
|
"learning_rate": 4.805399525824072e-07,
|
|
"loss": 0.3526,
|
|
"step": 18795
|
|
},
|
|
{
|
|
"epoch": 2.7034800115041704,
|
|
"grad_norm": 0.30657817865780707,
|
|
"learning_rate": 4.78236633789173e-07,
|
|
"loss": 0.329,
|
|
"step": 18800
|
|
},
|
|
{
|
|
"epoch": 2.7041990221455277,
|
|
"grad_norm": 0.30153501861176296,
|
|
"learning_rate": 4.759387131495097e-07,
|
|
"loss": 0.3476,
|
|
"step": 18805
|
|
},
|
|
{
|
|
"epoch": 2.7049180327868854,
|
|
"grad_norm": 0.3200553213274552,
|
|
"learning_rate": 4.73646191966175e-07,
|
|
"loss": 0.3509,
|
|
"step": 18810
|
|
},
|
|
{
|
|
"epoch": 2.7056370434282426,
|
|
"grad_norm": 0.32163306304030215,
|
|
"learning_rate": 4.7135907153886163e-07,
|
|
"loss": 0.3553,
|
|
"step": 18815
|
|
},
|
|
{
|
|
"epoch": 2.7063560540696003,
|
|
"grad_norm": 0.3101983446467971,
|
|
"learning_rate": 4.690773531642023e-07,
|
|
"loss": 0.3489,
|
|
"step": 18820
|
|
},
|
|
{
|
|
"epoch": 2.7070750647109576,
|
|
"grad_norm": 0.3045462102486363,
|
|
"learning_rate": 4.668010381357679e-07,
|
|
"loss": 0.3647,
|
|
"step": 18825
|
|
},
|
|
{
|
|
"epoch": 2.7077940753523153,
|
|
"grad_norm": 0.37845373384606695,
|
|
"learning_rate": 4.6453012774406283e-07,
|
|
"loss": 0.351,
|
|
"step": 18830
|
|
},
|
|
{
|
|
"epoch": 2.7085130859936726,
|
|
"grad_norm": 0.3085350917759782,
|
|
"learning_rate": 4.622646232765304e-07,
|
|
"loss": 0.3349,
|
|
"step": 18835
|
|
},
|
|
{
|
|
"epoch": 2.7092320966350303,
|
|
"grad_norm": 0.3136971389236542,
|
|
"learning_rate": 4.600045260175512e-07,
|
|
"loss": 0.3368,
|
|
"step": 18840
|
|
},
|
|
{
|
|
"epoch": 2.7099511072763875,
|
|
"grad_norm": 0.3124886404752603,
|
|
"learning_rate": 4.577498372484346e-07,
|
|
"loss": 0.3704,
|
|
"step": 18845
|
|
},
|
|
{
|
|
"epoch": 2.7106701179177453,
|
|
"grad_norm": 0.3111384819969063,
|
|
"learning_rate": 4.555005582474259e-07,
|
|
"loss": 0.3569,
|
|
"step": 18850
|
|
},
|
|
{
|
|
"epoch": 2.7113891285591025,
|
|
"grad_norm": 0.28801618774079224,
|
|
"learning_rate": 4.532566902897062e-07,
|
|
"loss": 0.3563,
|
|
"step": 18855
|
|
},
|
|
{
|
|
"epoch": 2.7121081392004602,
|
|
"grad_norm": 0.2971301894165523,
|
|
"learning_rate": 4.5101823464738683e-07,
|
|
"loss": 0.3438,
|
|
"step": 18860
|
|
},
|
|
{
|
|
"epoch": 2.7128271498418175,
|
|
"grad_norm": 0.31455805491954564,
|
|
"learning_rate": 4.4878519258950927e-07,
|
|
"loss": 0.3746,
|
|
"step": 18865
|
|
},
|
|
{
|
|
"epoch": 2.713546160483175,
|
|
"grad_norm": 0.3065010227643302,
|
|
"learning_rate": 4.4655756538204977e-07,
|
|
"loss": 0.339,
|
|
"step": 18870
|
|
},
|
|
{
|
|
"epoch": 2.7142651711245325,
|
|
"grad_norm": 0.3015148779282554,
|
|
"learning_rate": 4.443353542879092e-07,
|
|
"loss": 0.3555,
|
|
"step": 18875
|
|
},
|
|
{
|
|
"epoch": 2.71498418176589,
|
|
"grad_norm": 0.3211160666636949,
|
|
"learning_rate": 4.4211856056692424e-07,
|
|
"loss": 0.3699,
|
|
"step": 18880
|
|
},
|
|
{
|
|
"epoch": 2.715703192407248,
|
|
"grad_norm": 0.3102988134950522,
|
|
"learning_rate": 4.399071854758541e-07,
|
|
"loss": 0.3593,
|
|
"step": 18885
|
|
},
|
|
{
|
|
"epoch": 2.716422203048605,
|
|
"grad_norm": 0.3171698575154616,
|
|
"learning_rate": 4.377012302683914e-07,
|
|
"loss": 0.3732,
|
|
"step": 18890
|
|
},
|
|
{
|
|
"epoch": 2.7171412136899624,
|
|
"grad_norm": 0.31623857776697184,
|
|
"learning_rate": 4.3550069619515357e-07,
|
|
"loss": 0.3388,
|
|
"step": 18895
|
|
},
|
|
{
|
|
"epoch": 2.71786022433132,
|
|
"grad_norm": 0.29798574840923625,
|
|
"learning_rate": 4.33305584503686e-07,
|
|
"loss": 0.3484,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 2.718579234972678,
|
|
"grad_norm": 0.30965254627004185,
|
|
"learning_rate": 4.311158964384543e-07,
|
|
"loss": 0.3489,
|
|
"step": 18905
|
|
},
|
|
{
|
|
"epoch": 2.719298245614035,
|
|
"grad_norm": 0.3090538492119933,
|
|
"learning_rate": 4.2893163324085886e-07,
|
|
"loss": 0.3511,
|
|
"step": 18910
|
|
},
|
|
{
|
|
"epoch": 2.7200172562553924,
|
|
"grad_norm": 0.2998415100524849,
|
|
"learning_rate": 4.2675279614921683e-07,
|
|
"loss": 0.3453,
|
|
"step": 18915
|
|
},
|
|
{
|
|
"epoch": 2.72073626689675,
|
|
"grad_norm": 0.32312935380484853,
|
|
"learning_rate": 4.2457938639877126e-07,
|
|
"loss": 0.3573,
|
|
"step": 18920
|
|
},
|
|
{
|
|
"epoch": 2.721455277538108,
|
|
"grad_norm": 0.30687821717128466,
|
|
"learning_rate": 4.22411405221691e-07,
|
|
"loss": 0.3572,
|
|
"step": 18925
|
|
},
|
|
{
|
|
"epoch": 2.722174288179465,
|
|
"grad_norm": 0.3094724714007833,
|
|
"learning_rate": 4.202488538470628e-07,
|
|
"loss": 0.3552,
|
|
"step": 18930
|
|
},
|
|
{
|
|
"epoch": 2.7228932988208223,
|
|
"grad_norm": 0.30483700125558255,
|
|
"learning_rate": 4.180917335008994e-07,
|
|
"loss": 0.3512,
|
|
"step": 18935
|
|
},
|
|
{
|
|
"epoch": 2.72361230946218,
|
|
"grad_norm": 0.317142667617482,
|
|
"learning_rate": 4.159400454061324e-07,
|
|
"loss": 0.3608,
|
|
"step": 18940
|
|
},
|
|
{
|
|
"epoch": 2.7243313201035377,
|
|
"grad_norm": 0.30437333279705286,
|
|
"learning_rate": 4.1379379078261285e-07,
|
|
"loss": 0.3461,
|
|
"step": 18945
|
|
},
|
|
{
|
|
"epoch": 2.725050330744895,
|
|
"grad_norm": 0.315368021904134,
|
|
"learning_rate": 4.1165297084711176e-07,
|
|
"loss": 0.3539,
|
|
"step": 18950
|
|
},
|
|
{
|
|
"epoch": 2.7257693413862523,
|
|
"grad_norm": 0.2951030237927974,
|
|
"learning_rate": 4.095175868133228e-07,
|
|
"loss": 0.3325,
|
|
"step": 18955
|
|
},
|
|
{
|
|
"epoch": 2.72648835202761,
|
|
"grad_norm": 0.32123425416763773,
|
|
"learning_rate": 4.073876398918519e-07,
|
|
"loss": 0.3659,
|
|
"step": 18960
|
|
},
|
|
{
|
|
"epoch": 2.7272073626689677,
|
|
"grad_norm": 0.3138156016388089,
|
|
"learning_rate": 4.0526313129022556e-07,
|
|
"loss": 0.3643,
|
|
"step": 18965
|
|
},
|
|
{
|
|
"epoch": 2.727926373310325,
|
|
"grad_norm": 0.31792892655190597,
|
|
"learning_rate": 4.0314406221288904e-07,
|
|
"loss": 0.3548,
|
|
"step": 18970
|
|
},
|
|
{
|
|
"epoch": 2.728645383951682,
|
|
"grad_norm": 0.31750579980553373,
|
|
"learning_rate": 4.0103043386120034e-07,
|
|
"loss": 0.3534,
|
|
"step": 18975
|
|
},
|
|
{
|
|
"epoch": 2.72936439459304,
|
|
"grad_norm": 0.3127828508352821,
|
|
"learning_rate": 3.989222474334331e-07,
|
|
"loss": 0.3552,
|
|
"step": 18980
|
|
},
|
|
{
|
|
"epoch": 2.7300834052343976,
|
|
"grad_norm": 0.31537497523927804,
|
|
"learning_rate": 3.968195041247813e-07,
|
|
"loss": 0.3583,
|
|
"step": 18985
|
|
},
|
|
{
|
|
"epoch": 2.730802415875755,
|
|
"grad_norm": 0.31938161691099504,
|
|
"learning_rate": 3.947222051273436e-07,
|
|
"loss": 0.3501,
|
|
"step": 18990
|
|
},
|
|
{
|
|
"epoch": 2.7315214265171126,
|
|
"grad_norm": 0.31652148422752785,
|
|
"learning_rate": 3.9263035163014216e-07,
|
|
"loss": 0.3444,
|
|
"step": 18995
|
|
},
|
|
{
|
|
"epoch": 2.73224043715847,
|
|
"grad_norm": 0.30126188868707954,
|
|
"learning_rate": 3.9054394481910507e-07,
|
|
"loss": 0.3586,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 2.7329594477998276,
|
|
"grad_norm": 0.3102429634472407,
|
|
"learning_rate": 3.8846298587707276e-07,
|
|
"loss": 0.3558,
|
|
"step": 19005
|
|
},
|
|
{
|
|
"epoch": 2.733678458441185,
|
|
"grad_norm": 0.32717685832361776,
|
|
"learning_rate": 3.863874759838027e-07,
|
|
"loss": 0.342,
|
|
"step": 19010
|
|
},
|
|
{
|
|
"epoch": 2.7343974690825426,
|
|
"grad_norm": 0.3122104808693987,
|
|
"learning_rate": 3.8431741631595577e-07,
|
|
"loss": 0.351,
|
|
"step": 19015
|
|
},
|
|
{
|
|
"epoch": 2.7351164797239,
|
|
"grad_norm": 0.309312955399503,
|
|
"learning_rate": 3.8225280804710884e-07,
|
|
"loss": 0.3453,
|
|
"step": 19020
|
|
},
|
|
{
|
|
"epoch": 2.7358354903652575,
|
|
"grad_norm": 0.31322827510848456,
|
|
"learning_rate": 3.8019365234774565e-07,
|
|
"loss": 0.351,
|
|
"step": 19025
|
|
},
|
|
{
|
|
"epoch": 2.736554501006615,
|
|
"grad_norm": 0.30609678361472104,
|
|
"learning_rate": 3.7813995038525785e-07,
|
|
"loss": 0.3467,
|
|
"step": 19030
|
|
},
|
|
{
|
|
"epoch": 2.7372735116479725,
|
|
"grad_norm": 0.3036416135499562,
|
|
"learning_rate": 3.760917033239475e-07,
|
|
"loss": 0.3696,
|
|
"step": 19035
|
|
},
|
|
{
|
|
"epoch": 2.7379925222893298,
|
|
"grad_norm": 0.3127183110799516,
|
|
"learning_rate": 3.740489123250246e-07,
|
|
"loss": 0.335,
|
|
"step": 19040
|
|
},
|
|
{
|
|
"epoch": 2.7387115329306875,
|
|
"grad_norm": 0.31472999885863273,
|
|
"learning_rate": 3.7201157854660276e-07,
|
|
"loss": 0.3531,
|
|
"step": 19045
|
|
},
|
|
{
|
|
"epoch": 2.7394305435720447,
|
|
"grad_norm": 0.3060140112019616,
|
|
"learning_rate": 3.6997970314370244e-07,
|
|
"loss": 0.333,
|
|
"step": 19050
|
|
},
|
|
{
|
|
"epoch": 2.7401495542134024,
|
|
"grad_norm": 0.31436707159088967,
|
|
"learning_rate": 3.679532872682523e-07,
|
|
"loss": 0.3564,
|
|
"step": 19055
|
|
},
|
|
{
|
|
"epoch": 2.7408685648547597,
|
|
"grad_norm": 0.2973837776497081,
|
|
"learning_rate": 3.659323320690833e-07,
|
|
"loss": 0.3583,
|
|
"step": 19060
|
|
},
|
|
{
|
|
"epoch": 2.7415875754961174,
|
|
"grad_norm": 0.31471514171052006,
|
|
"learning_rate": 3.6391683869193005e-07,
|
|
"loss": 0.3572,
|
|
"step": 19065
|
|
},
|
|
{
|
|
"epoch": 2.7423065861374747,
|
|
"grad_norm": 0.31183057464625613,
|
|
"learning_rate": 3.619068082794353e-07,
|
|
"loss": 0.3585,
|
|
"step": 19070
|
|
},
|
|
{
|
|
"epoch": 2.7430255967788324,
|
|
"grad_norm": 0.3187039125707166,
|
|
"learning_rate": 3.5990224197113843e-07,
|
|
"loss": 0.3604,
|
|
"step": 19075
|
|
},
|
|
{
|
|
"epoch": 2.7437446074201897,
|
|
"grad_norm": 0.32225379075816213,
|
|
"learning_rate": 3.579031409034839e-07,
|
|
"loss": 0.3545,
|
|
"step": 19080
|
|
},
|
|
{
|
|
"epoch": 2.7444636180615474,
|
|
"grad_norm": 0.31170249003966904,
|
|
"learning_rate": 3.559095062098217e-07,
|
|
"loss": 0.3418,
|
|
"step": 19085
|
|
},
|
|
{
|
|
"epoch": 2.7451826287029046,
|
|
"grad_norm": 0.3076672510727331,
|
|
"learning_rate": 3.5392133902039663e-07,
|
|
"loss": 0.3519,
|
|
"step": 19090
|
|
},
|
|
{
|
|
"epoch": 2.7459016393442623,
|
|
"grad_norm": 0.32914504991219135,
|
|
"learning_rate": 3.5193864046235373e-07,
|
|
"loss": 0.3479,
|
|
"step": 19095
|
|
},
|
|
{
|
|
"epoch": 2.74662064998562,
|
|
"grad_norm": 0.2894981089596311,
|
|
"learning_rate": 3.4996141165974494e-07,
|
|
"loss": 0.3551,
|
|
"step": 19100
|
|
},
|
|
{
|
|
"epoch": 2.7473396606269773,
|
|
"grad_norm": 0.3291202963850717,
|
|
"learning_rate": 3.479896537335126e-07,
|
|
"loss": 0.345,
|
|
"step": 19105
|
|
},
|
|
{
|
|
"epoch": 2.7480586712683346,
|
|
"grad_norm": 0.3156365442267992,
|
|
"learning_rate": 3.4602336780150345e-07,
|
|
"loss": 0.3368,
|
|
"step": 19110
|
|
},
|
|
{
|
|
"epoch": 2.7487776819096923,
|
|
"grad_norm": 0.3131468049975896,
|
|
"learning_rate": 3.440625549784604e-07,
|
|
"loss": 0.3651,
|
|
"step": 19115
|
|
},
|
|
{
|
|
"epoch": 2.74949669255105,
|
|
"grad_norm": 0.32355695103390114,
|
|
"learning_rate": 3.4210721637601973e-07,
|
|
"loss": 0.3485,
|
|
"step": 19120
|
|
},
|
|
{
|
|
"epoch": 2.7502157031924073,
|
|
"grad_norm": 0.3265074768625927,
|
|
"learning_rate": 3.4015735310272024e-07,
|
|
"loss": 0.3545,
|
|
"step": 19125
|
|
},
|
|
{
|
|
"epoch": 2.7509347138337645,
|
|
"grad_norm": 0.31465707754685873,
|
|
"learning_rate": 3.3821296626399436e-07,
|
|
"loss": 0.336,
|
|
"step": 19130
|
|
},
|
|
{
|
|
"epoch": 2.7516537244751222,
|
|
"grad_norm": 0.3164483446457853,
|
|
"learning_rate": 3.36274056962167e-07,
|
|
"loss": 0.3563,
|
|
"step": 19135
|
|
},
|
|
{
|
|
"epoch": 2.75237273511648,
|
|
"grad_norm": 0.2971233101419614,
|
|
"learning_rate": 3.343406262964621e-07,
|
|
"loss": 0.3439,
|
|
"step": 19140
|
|
},
|
|
{
|
|
"epoch": 2.753091745757837,
|
|
"grad_norm": 0.3035407471454841,
|
|
"learning_rate": 3.3241267536299524e-07,
|
|
"loss": 0.3623,
|
|
"step": 19145
|
|
},
|
|
{
|
|
"epoch": 2.7538107563991945,
|
|
"grad_norm": 0.30526484121954384,
|
|
"learning_rate": 3.3049020525477316e-07,
|
|
"loss": 0.3393,
|
|
"step": 19150
|
|
},
|
|
{
|
|
"epoch": 2.754529767040552,
|
|
"grad_norm": 0.3024804150027958,
|
|
"learning_rate": 3.2857321706170175e-07,
|
|
"loss": 0.3508,
|
|
"step": 19155
|
|
},
|
|
{
|
|
"epoch": 2.75524877768191,
|
|
"grad_norm": 0.3124146280953073,
|
|
"learning_rate": 3.2666171187057284e-07,
|
|
"loss": 0.3588,
|
|
"step": 19160
|
|
},
|
|
{
|
|
"epoch": 2.755967788323267,
|
|
"grad_norm": 0.35628555495546016,
|
|
"learning_rate": 3.2475569076507064e-07,
|
|
"loss": 0.3479,
|
|
"step": 19165
|
|
},
|
|
{
|
|
"epoch": 2.7566867989646244,
|
|
"grad_norm": 0.31606148166597947,
|
|
"learning_rate": 3.2285515482577524e-07,
|
|
"loss": 0.3464,
|
|
"step": 19170
|
|
},
|
|
{
|
|
"epoch": 2.757405809605982,
|
|
"grad_norm": 0.3141344280963558,
|
|
"learning_rate": 3.209601051301503e-07,
|
|
"loss": 0.342,
|
|
"step": 19175
|
|
},
|
|
{
|
|
"epoch": 2.75812482024734,
|
|
"grad_norm": 0.3032858758815662,
|
|
"learning_rate": 3.190705427525542e-07,
|
|
"loss": 0.357,
|
|
"step": 19180
|
|
},
|
|
{
|
|
"epoch": 2.758843830888697,
|
|
"grad_norm": 0.316002321061407,
|
|
"learning_rate": 3.171864687642334e-07,
|
|
"loss": 0.3501,
|
|
"step": 19185
|
|
},
|
|
{
|
|
"epoch": 2.7595628415300544,
|
|
"grad_norm": 0.3096518923870791,
|
|
"learning_rate": 3.1530788423332124e-07,
|
|
"loss": 0.3508,
|
|
"step": 19190
|
|
},
|
|
{
|
|
"epoch": 2.760281852171412,
|
|
"grad_norm": 0.3083885146318327,
|
|
"learning_rate": 3.1343479022483805e-07,
|
|
"loss": 0.3627,
|
|
"step": 19195
|
|
},
|
|
{
|
|
"epoch": 2.76100086281277,
|
|
"grad_norm": 0.3401606194708313,
|
|
"learning_rate": 3.115671878006965e-07,
|
|
"loss": 0.3619,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 2.761719873454127,
|
|
"grad_norm": 0.31816989117475597,
|
|
"learning_rate": 3.097050780196886e-07,
|
|
"loss": 0.3552,
|
|
"step": 19205
|
|
},
|
|
{
|
|
"epoch": 2.7624388840954848,
|
|
"grad_norm": 0.3215018520643212,
|
|
"learning_rate": 3.0784846193749995e-07,
|
|
"loss": 0.3632,
|
|
"step": 19210
|
|
},
|
|
{
|
|
"epoch": 2.763157894736842,
|
|
"grad_norm": 0.306117019547072,
|
|
"learning_rate": 3.059973406066963e-07,
|
|
"loss": 0.369,
|
|
"step": 19215
|
|
},
|
|
{
|
|
"epoch": 2.7638769053781997,
|
|
"grad_norm": 0.3075540707527376,
|
|
"learning_rate": 3.0415171507673034e-07,
|
|
"loss": 0.3615,
|
|
"step": 19220
|
|
},
|
|
{
|
|
"epoch": 2.764595916019557,
|
|
"grad_norm": 0.3167288278275876,
|
|
"learning_rate": 3.0231158639393744e-07,
|
|
"loss": 0.359,
|
|
"step": 19225
|
|
},
|
|
{
|
|
"epoch": 2.7653149266609147,
|
|
"grad_norm": 0.31235025822274415,
|
|
"learning_rate": 3.004769556015408e-07,
|
|
"loss": 0.3621,
|
|
"step": 19230
|
|
},
|
|
{
|
|
"epoch": 2.766033937302272,
|
|
"grad_norm": 0.31043606133104146,
|
|
"learning_rate": 2.9864782373964064e-07,
|
|
"loss": 0.3627,
|
|
"step": 19235
|
|
},
|
|
{
|
|
"epoch": 2.7667529479436297,
|
|
"grad_norm": 0.3061465253691321,
|
|
"learning_rate": 2.968241918452264e-07,
|
|
"loss": 0.3508,
|
|
"step": 19240
|
|
},
|
|
{
|
|
"epoch": 2.767471958584987,
|
|
"grad_norm": 0.31869793718769496,
|
|
"learning_rate": 2.9500606095216323e-07,
|
|
"loss": 0.3503,
|
|
"step": 19245
|
|
},
|
|
{
|
|
"epoch": 2.7681909692263447,
|
|
"grad_norm": 0.2998803905548194,
|
|
"learning_rate": 2.931934320912011e-07,
|
|
"loss": 0.3595,
|
|
"step": 19250
|
|
},
|
|
{
|
|
"epoch": 2.768909979867702,
|
|
"grad_norm": 0.29028144343980955,
|
|
"learning_rate": 2.913863062899702e-07,
|
|
"loss": 0.3542,
|
|
"step": 19255
|
|
},
|
|
{
|
|
"epoch": 2.7696289905090596,
|
|
"grad_norm": 0.32494600515578226,
|
|
"learning_rate": 2.8958468457297996e-07,
|
|
"loss": 0.3553,
|
|
"step": 19260
|
|
},
|
|
{
|
|
"epoch": 2.770348001150417,
|
|
"grad_norm": 0.3034610623098884,
|
|
"learning_rate": 2.8778856796161994e-07,
|
|
"loss": 0.3567,
|
|
"step": 19265
|
|
},
|
|
{
|
|
"epoch": 2.7710670117917746,
|
|
"grad_norm": 0.3069232780605492,
|
|
"learning_rate": 2.859979574741589e-07,
|
|
"loss": 0.3572,
|
|
"step": 19270
|
|
},
|
|
{
|
|
"epoch": 2.771786022433132,
|
|
"grad_norm": 0.31245632549477803,
|
|
"learning_rate": 2.8421285412574607e-07,
|
|
"loss": 0.3592,
|
|
"step": 19275
|
|
},
|
|
{
|
|
"epoch": 2.7725050330744896,
|
|
"grad_norm": 0.3033053941899692,
|
|
"learning_rate": 2.824332589284029e-07,
|
|
"loss": 0.3417,
|
|
"step": 19280
|
|
},
|
|
{
|
|
"epoch": 2.773224043715847,
|
|
"grad_norm": 0.32735633280602244,
|
|
"learning_rate": 2.806591728910357e-07,
|
|
"loss": 0.3577,
|
|
"step": 19285
|
|
},
|
|
{
|
|
"epoch": 2.7739430543572046,
|
|
"grad_norm": 0.31162274414451313,
|
|
"learning_rate": 2.7889059701942e-07,
|
|
"loss": 0.3488,
|
|
"step": 19290
|
|
},
|
|
{
|
|
"epoch": 2.774662064998562,
|
|
"grad_norm": 0.307206554261849,
|
|
"learning_rate": 2.7712753231621036e-07,
|
|
"loss": 0.3561,
|
|
"step": 19295
|
|
},
|
|
{
|
|
"epoch": 2.7753810756399195,
|
|
"grad_norm": 0.31895368120415385,
|
|
"learning_rate": 2.753699797809406e-07,
|
|
"loss": 0.3605,
|
|
"step": 19300
|
|
},
|
|
{
|
|
"epoch": 2.776100086281277,
|
|
"grad_norm": 0.31873626300607877,
|
|
"learning_rate": 2.7361794041001474e-07,
|
|
"loss": 0.3524,
|
|
"step": 19305
|
|
},
|
|
{
|
|
"epoch": 2.7768190969226345,
|
|
"grad_norm": 0.31776333008390883,
|
|
"learning_rate": 2.7187141519671277e-07,
|
|
"loss": 0.3635,
|
|
"step": 19310
|
|
},
|
|
{
|
|
"epoch": 2.777538107563992,
|
|
"grad_norm": 0.30582751106832323,
|
|
"learning_rate": 2.7013040513118813e-07,
|
|
"loss": 0.3406,
|
|
"step": 19315
|
|
},
|
|
{
|
|
"epoch": 2.7782571182053495,
|
|
"grad_norm": 0.3108606741265266,
|
|
"learning_rate": 2.68394911200468e-07,
|
|
"loss": 0.3653,
|
|
"step": 19320
|
|
},
|
|
{
|
|
"epoch": 2.7789761288467068,
|
|
"grad_norm": 0.30765930009578635,
|
|
"learning_rate": 2.666649343884531e-07,
|
|
"loss": 0.3576,
|
|
"step": 19325
|
|
},
|
|
{
|
|
"epoch": 2.7796951394880645,
|
|
"grad_norm": 0.3095905947908685,
|
|
"learning_rate": 2.6494047567591664e-07,
|
|
"loss": 0.3711,
|
|
"step": 19330
|
|
},
|
|
{
|
|
"epoch": 2.780414150129422,
|
|
"grad_norm": 0.3026101419425275,
|
|
"learning_rate": 2.6322153604049994e-07,
|
|
"loss": 0.3516,
|
|
"step": 19335
|
|
},
|
|
{
|
|
"epoch": 2.7811331607707794,
|
|
"grad_norm": 0.3023031462941279,
|
|
"learning_rate": 2.61508116456719e-07,
|
|
"loss": 0.351,
|
|
"step": 19340
|
|
},
|
|
{
|
|
"epoch": 2.7818521714121367,
|
|
"grad_norm": 0.3175845507467177,
|
|
"learning_rate": 2.598002178959602e-07,
|
|
"loss": 0.3635,
|
|
"step": 19345
|
|
},
|
|
{
|
|
"epoch": 2.7825711820534944,
|
|
"grad_norm": 0.3023168961882304,
|
|
"learning_rate": 2.5809784132647786e-07,
|
|
"loss": 0.3511,
|
|
"step": 19350
|
|
},
|
|
{
|
|
"epoch": 2.783290192694852,
|
|
"grad_norm": 0.31426012236313855,
|
|
"learning_rate": 2.564009877133977e-07,
|
|
"loss": 0.3897,
|
|
"step": 19355
|
|
},
|
|
{
|
|
"epoch": 2.7840092033362094,
|
|
"grad_norm": 0.3233617148924042,
|
|
"learning_rate": 2.547096580187125e-07,
|
|
"loss": 0.3666,
|
|
"step": 19360
|
|
},
|
|
{
|
|
"epoch": 2.7847282139775666,
|
|
"grad_norm": 0.3085777263843596,
|
|
"learning_rate": 2.5302385320128295e-07,
|
|
"loss": 0.3374,
|
|
"step": 19365
|
|
},
|
|
{
|
|
"epoch": 2.7854472246189244,
|
|
"grad_norm": 0.315282681974723,
|
|
"learning_rate": 2.513435742168413e-07,
|
|
"loss": 0.3511,
|
|
"step": 19370
|
|
},
|
|
{
|
|
"epoch": 2.786166235260282,
|
|
"grad_norm": 0.3141889988453881,
|
|
"learning_rate": 2.4966882201798436e-07,
|
|
"loss": 0.3571,
|
|
"step": 19375
|
|
},
|
|
{
|
|
"epoch": 2.7868852459016393,
|
|
"grad_norm": 0.3239914217961741,
|
|
"learning_rate": 2.479995975541749e-07,
|
|
"loss": 0.3549,
|
|
"step": 19380
|
|
},
|
|
{
|
|
"epoch": 2.7876042565429966,
|
|
"grad_norm": 0.3011667215231966,
|
|
"learning_rate": 2.463359017717437e-07,
|
|
"loss": 0.3602,
|
|
"step": 19385
|
|
},
|
|
{
|
|
"epoch": 2.7883232671843543,
|
|
"grad_norm": 0.29901731608420934,
|
|
"learning_rate": 2.446777356138863e-07,
|
|
"loss": 0.3419,
|
|
"step": 19390
|
|
},
|
|
{
|
|
"epoch": 2.789042277825712,
|
|
"grad_norm": 0.38783833461175193,
|
|
"learning_rate": 2.430251000206618e-07,
|
|
"loss": 0.349,
|
|
"step": 19395
|
|
},
|
|
{
|
|
"epoch": 2.7897612884670693,
|
|
"grad_norm": 0.31196293453991447,
|
|
"learning_rate": 2.4137799592899857e-07,
|
|
"loss": 0.3711,
|
|
"step": 19400
|
|
},
|
|
{
|
|
"epoch": 2.7904802991084265,
|
|
"grad_norm": 0.31413563856842847,
|
|
"learning_rate": 2.3973642427268405e-07,
|
|
"loss": 0.3551,
|
|
"step": 19405
|
|
},
|
|
{
|
|
"epoch": 2.7911993097497843,
|
|
"grad_norm": 0.30669848987056847,
|
|
"learning_rate": 2.381003859823694e-07,
|
|
"loss": 0.3645,
|
|
"step": 19410
|
|
},
|
|
{
|
|
"epoch": 2.791918320391142,
|
|
"grad_norm": 0.3129950796498745,
|
|
"learning_rate": 2.3646988198557375e-07,
|
|
"loss": 0.3436,
|
|
"step": 19415
|
|
},
|
|
{
|
|
"epoch": 2.7926373310324992,
|
|
"grad_norm": 0.3020904742624162,
|
|
"learning_rate": 2.3484491320667324e-07,
|
|
"loss": 0.3515,
|
|
"step": 19420
|
|
},
|
|
{
|
|
"epoch": 2.793356341673857,
|
|
"grad_norm": 0.31348790424609296,
|
|
"learning_rate": 2.3322548056690763e-07,
|
|
"loss": 0.3411,
|
|
"step": 19425
|
|
},
|
|
{
|
|
"epoch": 2.794075352315214,
|
|
"grad_norm": 0.30315554360892016,
|
|
"learning_rate": 2.316115849843803e-07,
|
|
"loss": 0.3369,
|
|
"step": 19430
|
|
},
|
|
{
|
|
"epoch": 2.794794362956572,
|
|
"grad_norm": 0.3029707042387598,
|
|
"learning_rate": 2.3000322737405266e-07,
|
|
"loss": 0.345,
|
|
"step": 19435
|
|
},
|
|
{
|
|
"epoch": 2.795513373597929,
|
|
"grad_norm": 0.30827057562429894,
|
|
"learning_rate": 2.284004086477487e-07,
|
|
"loss": 0.3551,
|
|
"step": 19440
|
|
},
|
|
{
|
|
"epoch": 2.796232384239287,
|
|
"grad_norm": 0.31638290497312094,
|
|
"learning_rate": 2.268031297141504e-07,
|
|
"loss": 0.3552,
|
|
"step": 19445
|
|
},
|
|
{
|
|
"epoch": 2.796951394880644,
|
|
"grad_norm": 0.298083641375201,
|
|
"learning_rate": 2.252113914787979e-07,
|
|
"loss": 0.3601,
|
|
"step": 19450
|
|
},
|
|
{
|
|
"epoch": 2.797670405522002,
|
|
"grad_norm": 0.29388492637341807,
|
|
"learning_rate": 2.2362519484409484e-07,
|
|
"loss": 0.3582,
|
|
"step": 19455
|
|
},
|
|
{
|
|
"epoch": 2.798389416163359,
|
|
"grad_norm": 0.3074794572376518,
|
|
"learning_rate": 2.220445407092997e-07,
|
|
"loss": 0.3545,
|
|
"step": 19460
|
|
},
|
|
{
|
|
"epoch": 2.799108426804717,
|
|
"grad_norm": 0.33058963087328974,
|
|
"learning_rate": 2.20469429970529e-07,
|
|
"loss": 0.3657,
|
|
"step": 19465
|
|
},
|
|
{
|
|
"epoch": 2.799827437446074,
|
|
"grad_norm": 0.32059830509972015,
|
|
"learning_rate": 2.1889986352075621e-07,
|
|
"loss": 0.3598,
|
|
"step": 19470
|
|
},
|
|
{
|
|
"epoch": 2.800546448087432,
|
|
"grad_norm": 0.31344674596084215,
|
|
"learning_rate": 2.1733584224981396e-07,
|
|
"loss": 0.3576,
|
|
"step": 19475
|
|
},
|
|
{
|
|
"epoch": 2.801265458728789,
|
|
"grad_norm": 0.3138969602559058,
|
|
"learning_rate": 2.1577736704438746e-07,
|
|
"loss": 0.3523,
|
|
"step": 19480
|
|
},
|
|
{
|
|
"epoch": 2.801984469370147,
|
|
"grad_norm": 0.31218452641803646,
|
|
"learning_rate": 2.1422443878802323e-07,
|
|
"loss": 0.3504,
|
|
"step": 19485
|
|
},
|
|
{
|
|
"epoch": 2.802703480011504,
|
|
"grad_norm": 0.3103625493885958,
|
|
"learning_rate": 2.1267705836111708e-07,
|
|
"loss": 0.3481,
|
|
"step": 19490
|
|
},
|
|
{
|
|
"epoch": 2.8034224906528618,
|
|
"grad_norm": 0.3394743217767908,
|
|
"learning_rate": 2.1113522664092168e-07,
|
|
"loss": 0.3614,
|
|
"step": 19495
|
|
},
|
|
{
|
|
"epoch": 2.804141501294219,
|
|
"grad_norm": 0.3194563285631549,
|
|
"learning_rate": 2.0959894450154783e-07,
|
|
"loss": 0.3573,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 2.8048605119355767,
|
|
"grad_norm": 0.31943217420345055,
|
|
"learning_rate": 2.0806821281395328e-07,
|
|
"loss": 0.3645,
|
|
"step": 19505
|
|
},
|
|
{
|
|
"epoch": 2.805579522576934,
|
|
"grad_norm": 0.3215617418555956,
|
|
"learning_rate": 2.0654303244595274e-07,
|
|
"loss": 0.3506,
|
|
"step": 19510
|
|
},
|
|
{
|
|
"epoch": 2.8062985332182917,
|
|
"grad_norm": 0.3186694011640869,
|
|
"learning_rate": 2.0502340426221568e-07,
|
|
"loss": 0.3764,
|
|
"step": 19515
|
|
},
|
|
{
|
|
"epoch": 2.807017543859649,
|
|
"grad_norm": 0.30126128632485133,
|
|
"learning_rate": 2.035093291242607e-07,
|
|
"loss": 0.348,
|
|
"step": 19520
|
|
},
|
|
{
|
|
"epoch": 2.8077365545010067,
|
|
"grad_norm": 0.31837897587547037,
|
|
"learning_rate": 2.0200080789045895e-07,
|
|
"loss": 0.3509,
|
|
"step": 19525
|
|
},
|
|
{
|
|
"epoch": 2.808455565142364,
|
|
"grad_norm": 0.3049340670206854,
|
|
"learning_rate": 2.0049784141603525e-07,
|
|
"loss": 0.3732,
|
|
"step": 19530
|
|
},
|
|
{
|
|
"epoch": 2.8091745757837217,
|
|
"grad_norm": 0.3200739069071617,
|
|
"learning_rate": 1.9900043055306018e-07,
|
|
"loss": 0.3623,
|
|
"step": 19535
|
|
},
|
|
{
|
|
"epoch": 2.809893586425079,
|
|
"grad_norm": 0.30563109975452546,
|
|
"learning_rate": 1.9750857615045915e-07,
|
|
"loss": 0.3446,
|
|
"step": 19540
|
|
},
|
|
{
|
|
"epoch": 2.8106125970664366,
|
|
"grad_norm": 0.3222392767558234,
|
|
"learning_rate": 1.9602227905400673e-07,
|
|
"loss": 0.3498,
|
|
"step": 19545
|
|
},
|
|
{
|
|
"epoch": 2.8113316077077943,
|
|
"grad_norm": 0.30758975157613416,
|
|
"learning_rate": 1.9454154010632553e-07,
|
|
"loss": 0.3657,
|
|
"step": 19550
|
|
},
|
|
{
|
|
"epoch": 2.8120506183491516,
|
|
"grad_norm": 0.30649135149595613,
|
|
"learning_rate": 1.930663601468885e-07,
|
|
"loss": 0.3555,
|
|
"step": 19555
|
|
},
|
|
{
|
|
"epoch": 2.812769628990509,
|
|
"grad_norm": 0.3151315801542019,
|
|
"learning_rate": 1.9159674001201556e-07,
|
|
"loss": 0.3472,
|
|
"step": 19560
|
|
},
|
|
{
|
|
"epoch": 2.8134886396318666,
|
|
"grad_norm": 0.3016552975710565,
|
|
"learning_rate": 1.9013268053487465e-07,
|
|
"loss": 0.3536,
|
|
"step": 19565
|
|
},
|
|
{
|
|
"epoch": 2.8142076502732243,
|
|
"grad_norm": 0.31533068041057283,
|
|
"learning_rate": 1.8867418254548298e-07,
|
|
"loss": 0.3506,
|
|
"step": 19570
|
|
},
|
|
{
|
|
"epoch": 2.8149266609145815,
|
|
"grad_norm": 0.29714007702411543,
|
|
"learning_rate": 1.8722124687070574e-07,
|
|
"loss": 0.3403,
|
|
"step": 19575
|
|
},
|
|
{
|
|
"epoch": 2.815645671555939,
|
|
"grad_norm": 0.31169946439304297,
|
|
"learning_rate": 1.8577387433424854e-07,
|
|
"loss": 0.3583,
|
|
"step": 19580
|
|
},
|
|
{
|
|
"epoch": 2.8163646821972965,
|
|
"grad_norm": 0.30565219090359186,
|
|
"learning_rate": 1.8433206575667161e-07,
|
|
"loss": 0.348,
|
|
"step": 19585
|
|
},
|
|
{
|
|
"epoch": 2.8170836928386542,
|
|
"grad_norm": 0.309771796015684,
|
|
"learning_rate": 1.8289582195537337e-07,
|
|
"loss": 0.353,
|
|
"step": 19590
|
|
},
|
|
{
|
|
"epoch": 2.8178027034800115,
|
|
"grad_norm": 0.30738045526837676,
|
|
"learning_rate": 1.8146514374460134e-07,
|
|
"loss": 0.3427,
|
|
"step": 19595
|
|
},
|
|
{
|
|
"epoch": 2.8185217141213688,
|
|
"grad_norm": 0.32212616978895764,
|
|
"learning_rate": 1.8004003193544894e-07,
|
|
"loss": 0.3345,
|
|
"step": 19600
|
|
},
|
|
{
|
|
"epoch": 2.8192407247627265,
|
|
"grad_norm": 0.31935665499208105,
|
|
"learning_rate": 1.7862048733584882e-07,
|
|
"loss": 0.3436,
|
|
"step": 19605
|
|
},
|
|
{
|
|
"epoch": 2.819959735404084,
|
|
"grad_norm": 0.31011607261544066,
|
|
"learning_rate": 1.772065107505816e-07,
|
|
"loss": 0.3549,
|
|
"step": 19610
|
|
},
|
|
{
|
|
"epoch": 2.8206787460454414,
|
|
"grad_norm": 0.2963752944269382,
|
|
"learning_rate": 1.7579810298127054e-07,
|
|
"loss": 0.3637,
|
|
"step": 19615
|
|
},
|
|
{
|
|
"epoch": 2.8213977566867987,
|
|
"grad_norm": 0.303716726779769,
|
|
"learning_rate": 1.7439526482638136e-07,
|
|
"loss": 0.3603,
|
|
"step": 19620
|
|
},
|
|
{
|
|
"epoch": 2.8221167673281564,
|
|
"grad_norm": 0.3194904910954896,
|
|
"learning_rate": 1.7299799708122124e-07,
|
|
"loss": 0.3648,
|
|
"step": 19625
|
|
},
|
|
{
|
|
"epoch": 2.822835777969514,
|
|
"grad_norm": 0.32609672240121534,
|
|
"learning_rate": 1.7160630053794203e-07,
|
|
"loss": 0.3431,
|
|
"step": 19630
|
|
},
|
|
{
|
|
"epoch": 2.8235547886108714,
|
|
"grad_norm": 0.30902138416485625,
|
|
"learning_rate": 1.7022017598553376e-07,
|
|
"loss": 0.3488,
|
|
"step": 19635
|
|
},
|
|
{
|
|
"epoch": 2.8242737992522287,
|
|
"grad_norm": 0.32988805197025134,
|
|
"learning_rate": 1.6883962420982892e-07,
|
|
"loss": 0.3591,
|
|
"step": 19640
|
|
},
|
|
{
|
|
"epoch": 2.8249928098935864,
|
|
"grad_norm": 0.31791131140624473,
|
|
"learning_rate": 1.6746464599350253e-07,
|
|
"loss": 0.3705,
|
|
"step": 19645
|
|
},
|
|
{
|
|
"epoch": 2.825711820534944,
|
|
"grad_norm": 0.36161136472309374,
|
|
"learning_rate": 1.6609524211606666e-07,
|
|
"loss": 0.3498,
|
|
"step": 19650
|
|
},
|
|
{
|
|
"epoch": 2.8264308311763013,
|
|
"grad_norm": 0.3069902726420009,
|
|
"learning_rate": 1.6473141335387688e-07,
|
|
"loss": 0.3606,
|
|
"step": 19655
|
|
},
|
|
{
|
|
"epoch": 2.827149841817659,
|
|
"grad_norm": 0.30450847009491083,
|
|
"learning_rate": 1.6337316048012142e-07,
|
|
"loss": 0.3513,
|
|
"step": 19660
|
|
},
|
|
{
|
|
"epoch": 2.8278688524590163,
|
|
"grad_norm": 0.3090335390208858,
|
|
"learning_rate": 1.6202048426483652e-07,
|
|
"loss": 0.3593,
|
|
"step": 19665
|
|
},
|
|
{
|
|
"epoch": 2.828587863100374,
|
|
"grad_norm": 0.29473249703071286,
|
|
"learning_rate": 1.6067338547488875e-07,
|
|
"loss": 0.3409,
|
|
"step": 19670
|
|
},
|
|
{
|
|
"epoch": 2.8293068737417313,
|
|
"grad_norm": 0.30502673474783937,
|
|
"learning_rate": 1.5933186487398945e-07,
|
|
"loss": 0.3539,
|
|
"step": 19675
|
|
},
|
|
{
|
|
"epoch": 2.830025884383089,
|
|
"grad_norm": 0.3237522346303837,
|
|
"learning_rate": 1.579959232226802e-07,
|
|
"loss": 0.3565,
|
|
"step": 19680
|
|
},
|
|
{
|
|
"epoch": 2.8307448950244463,
|
|
"grad_norm": 0.29410601198753455,
|
|
"learning_rate": 1.566655612783452e-07,
|
|
"loss": 0.3494,
|
|
"step": 19685
|
|
},
|
|
{
|
|
"epoch": 2.831463905665804,
|
|
"grad_norm": 0.3201855612913448,
|
|
"learning_rate": 1.5534077979520558e-07,
|
|
"loss": 0.36,
|
|
"step": 19690
|
|
},
|
|
{
|
|
"epoch": 2.8321829163071612,
|
|
"grad_norm": 0.3176807634275686,
|
|
"learning_rate": 1.5402157952431385e-07,
|
|
"loss": 0.357,
|
|
"step": 19695
|
|
},
|
|
{
|
|
"epoch": 2.832901926948519,
|
|
"grad_norm": 0.32624540853994793,
|
|
"learning_rate": 1.5270796121356402e-07,
|
|
"loss": 0.3555,
|
|
"step": 19700
|
|
},
|
|
{
|
|
"epoch": 2.833620937589876,
|
|
"grad_norm": 0.30558565906106155,
|
|
"learning_rate": 1.5139992560768257e-07,
|
|
"loss": 0.3638,
|
|
"step": 19705
|
|
},
|
|
{
|
|
"epoch": 2.834339948231234,
|
|
"grad_norm": 0.3148425885106696,
|
|
"learning_rate": 1.5009747344822966e-07,
|
|
"loss": 0.3485,
|
|
"step": 19710
|
|
},
|
|
{
|
|
"epoch": 2.835058958872591,
|
|
"grad_norm": 0.3151890767325172,
|
|
"learning_rate": 1.488006054736024e-07,
|
|
"loss": 0.3486,
|
|
"step": 19715
|
|
},
|
|
{
|
|
"epoch": 2.835777969513949,
|
|
"grad_norm": 0.2990771173282484,
|
|
"learning_rate": 1.4750932241903382e-07,
|
|
"loss": 0.3583,
|
|
"step": 19720
|
|
},
|
|
{
|
|
"epoch": 2.836496980155306,
|
|
"grad_norm": 0.3092892688996735,
|
|
"learning_rate": 1.4622362501658495e-07,
|
|
"loss": 0.3478,
|
|
"step": 19725
|
|
},
|
|
{
|
|
"epoch": 2.837215990796664,
|
|
"grad_norm": 0.31105647203704995,
|
|
"learning_rate": 1.4494351399515604e-07,
|
|
"loss": 0.3742,
|
|
"step": 19730
|
|
},
|
|
{
|
|
"epoch": 2.837935001438021,
|
|
"grad_norm": 0.31333961454391107,
|
|
"learning_rate": 1.4366899008047774e-07,
|
|
"loss": 0.3457,
|
|
"step": 19735
|
|
},
|
|
{
|
|
"epoch": 2.838654012079379,
|
|
"grad_norm": 0.3098972660151477,
|
|
"learning_rate": 1.4240005399511091e-07,
|
|
"loss": 0.3445,
|
|
"step": 19740
|
|
},
|
|
{
|
|
"epoch": 2.839373022720736,
|
|
"grad_norm": 0.29904952311822824,
|
|
"learning_rate": 1.4113670645845345e-07,
|
|
"loss": 0.3674,
|
|
"step": 19745
|
|
},
|
|
{
|
|
"epoch": 2.840092033362094,
|
|
"grad_norm": 0.30724864773465854,
|
|
"learning_rate": 1.398789481867313e-07,
|
|
"loss": 0.3683,
|
|
"step": 19750
|
|
},
|
|
{
|
|
"epoch": 2.840811044003451,
|
|
"grad_norm": 0.3098518253670769,
|
|
"learning_rate": 1.3862677989300188e-07,
|
|
"loss": 0.3427,
|
|
"step": 19755
|
|
},
|
|
{
|
|
"epoch": 2.841530054644809,
|
|
"grad_norm": 0.3007055183846121,
|
|
"learning_rate": 1.373802022871551e-07,
|
|
"loss": 0.3337,
|
|
"step": 19760
|
|
},
|
|
{
|
|
"epoch": 2.8422490652861665,
|
|
"grad_norm": 0.31362905222662657,
|
|
"learning_rate": 1.361392160759112e-07,
|
|
"loss": 0.3541,
|
|
"step": 19765
|
|
},
|
|
{
|
|
"epoch": 2.8429680759275238,
|
|
"grad_norm": 0.3086581602027274,
|
|
"learning_rate": 1.3490382196281959e-07,
|
|
"loss": 0.3366,
|
|
"step": 19770
|
|
},
|
|
{
|
|
"epoch": 2.843687086568881,
|
|
"grad_norm": 0.32186996399691165,
|
|
"learning_rate": 1.3367402064826007e-07,
|
|
"loss": 0.3569,
|
|
"step": 19775
|
|
},
|
|
{
|
|
"epoch": 2.8444060972102387,
|
|
"grad_norm": 0.30885699347696555,
|
|
"learning_rate": 1.3244981282944047e-07,
|
|
"loss": 0.3534,
|
|
"step": 19780
|
|
},
|
|
{
|
|
"epoch": 2.8451251078515964,
|
|
"grad_norm": 0.31284364176405044,
|
|
"learning_rate": 1.3123119920039894e-07,
|
|
"loss": 0.3544,
|
|
"step": 19785
|
|
},
|
|
{
|
|
"epoch": 2.8458441184929537,
|
|
"grad_norm": 0.3160906628771324,
|
|
"learning_rate": 1.3001818045200175e-07,
|
|
"loss": 0.3401,
|
|
"step": 19790
|
|
},
|
|
{
|
|
"epoch": 2.846563129134311,
|
|
"grad_norm": 0.32702047456560096,
|
|
"learning_rate": 1.2881075727194214e-07,
|
|
"loss": 0.3769,
|
|
"step": 19795
|
|
},
|
|
{
|
|
"epoch": 2.8472821397756687,
|
|
"grad_norm": 0.3090608205253169,
|
|
"learning_rate": 1.2760893034474254e-07,
|
|
"loss": 0.3499,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 2.8480011504170264,
|
|
"grad_norm": 0.3138204067749859,
|
|
"learning_rate": 1.2641270035175347e-07,
|
|
"loss": 0.3728,
|
|
"step": 19805
|
|
},
|
|
{
|
|
"epoch": 2.8487201610583837,
|
|
"grad_norm": 0.30587465368286,
|
|
"learning_rate": 1.25222067971148e-07,
|
|
"loss": 0.3591,
|
|
"step": 19810
|
|
},
|
|
{
|
|
"epoch": 2.849439171699741,
|
|
"grad_norm": 0.3151828226894727,
|
|
"learning_rate": 1.2403703387793176e-07,
|
|
"loss": 0.3524,
|
|
"step": 19815
|
|
},
|
|
{
|
|
"epoch": 2.8501581823410986,
|
|
"grad_norm": 0.31006606220294936,
|
|
"learning_rate": 1.228575987439329e-07,
|
|
"loss": 0.3546,
|
|
"step": 19820
|
|
},
|
|
{
|
|
"epoch": 2.8508771929824563,
|
|
"grad_norm": 0.29605394252109407,
|
|
"learning_rate": 1.2168376323780652e-07,
|
|
"loss": 0.356,
|
|
"step": 19825
|
|
},
|
|
{
|
|
"epoch": 2.8515962036238136,
|
|
"grad_norm": 0.31417656865686544,
|
|
"learning_rate": 1.205155280250314e-07,
|
|
"loss": 0.351,
|
|
"step": 19830
|
|
},
|
|
{
|
|
"epoch": 2.852315214265171,
|
|
"grad_norm": 0.30377191215236227,
|
|
"learning_rate": 1.193528937679145e-07,
|
|
"loss": 0.3546,
|
|
"step": 19835
|
|
},
|
|
{
|
|
"epoch": 2.8530342249065286,
|
|
"grad_norm": 0.3140116671650671,
|
|
"learning_rate": 1.1819586112558401e-07,
|
|
"loss": 0.3697,
|
|
"step": 19840
|
|
},
|
|
{
|
|
"epoch": 2.8537532355478863,
|
|
"grad_norm": 0.31720157777001484,
|
|
"learning_rate": 1.1704443075399418e-07,
|
|
"loss": 0.3534,
|
|
"step": 19845
|
|
},
|
|
{
|
|
"epoch": 2.8544722461892436,
|
|
"grad_norm": 0.2927661022495849,
|
|
"learning_rate": 1.1589860330592506e-07,
|
|
"loss": 0.3356,
|
|
"step": 19850
|
|
},
|
|
{
|
|
"epoch": 2.855191256830601,
|
|
"grad_norm": 0.3074670230872882,
|
|
"learning_rate": 1.147583794309759e-07,
|
|
"loss": 0.3433,
|
|
"step": 19855
|
|
},
|
|
{
|
|
"epoch": 2.8559102674719585,
|
|
"grad_norm": 0.31164255393992035,
|
|
"learning_rate": 1.1362375977557183e-07,
|
|
"loss": 0.3407,
|
|
"step": 19860
|
|
},
|
|
{
|
|
"epoch": 2.8566292781133162,
|
|
"grad_norm": 0.3144825590840295,
|
|
"learning_rate": 1.1249474498296053e-07,
|
|
"loss": 0.3461,
|
|
"step": 19865
|
|
},
|
|
{
|
|
"epoch": 2.8573482887546735,
|
|
"grad_norm": 0.315202884878571,
|
|
"learning_rate": 1.1137133569321335e-07,
|
|
"loss": 0.3491,
|
|
"step": 19870
|
|
},
|
|
{
|
|
"epoch": 2.858067299396031,
|
|
"grad_norm": 0.45033323533661335,
|
|
"learning_rate": 1.1025353254322191e-07,
|
|
"loss": 0.3529,
|
|
"step": 19875
|
|
},
|
|
{
|
|
"epoch": 2.8587863100373885,
|
|
"grad_norm": 0.3159910074980656,
|
|
"learning_rate": 1.0914133616669931e-07,
|
|
"loss": 0.3548,
|
|
"step": 19880
|
|
},
|
|
{
|
|
"epoch": 2.859505320678746,
|
|
"grad_norm": 0.31832852961200697,
|
|
"learning_rate": 1.0803474719418006e-07,
|
|
"loss": 0.3601,
|
|
"step": 19885
|
|
},
|
|
{
|
|
"epoch": 2.8602243313201035,
|
|
"grad_norm": 0.3227663752680385,
|
|
"learning_rate": 1.0693376625302232e-07,
|
|
"loss": 0.3533,
|
|
"step": 19890
|
|
},
|
|
{
|
|
"epoch": 2.860943341961461,
|
|
"grad_norm": 0.3201045100876346,
|
|
"learning_rate": 1.0583839396740126e-07,
|
|
"loss": 0.3418,
|
|
"step": 19895
|
|
},
|
|
{
|
|
"epoch": 2.8616623526028184,
|
|
"grad_norm": 0.3065616056971823,
|
|
"learning_rate": 1.0474863095831566e-07,
|
|
"loss": 0.3651,
|
|
"step": 19900
|
|
},
|
|
{
|
|
"epoch": 2.862381363244176,
|
|
"grad_norm": 0.3093595730538003,
|
|
"learning_rate": 1.0366447784358025e-07,
|
|
"loss": 0.3558,
|
|
"step": 19905
|
|
},
|
|
{
|
|
"epoch": 2.8631003738855334,
|
|
"grad_norm": 0.3127569775360688,
|
|
"learning_rate": 1.0258593523783444e-07,
|
|
"loss": 0.3487,
|
|
"step": 19910
|
|
},
|
|
{
|
|
"epoch": 2.863819384526891,
|
|
"grad_norm": 0.3127546755793686,
|
|
"learning_rate": 1.0151300375253138e-07,
|
|
"loss": 0.3515,
|
|
"step": 19915
|
|
},
|
|
{
|
|
"epoch": 2.8645383951682484,
|
|
"grad_norm": 0.3127556843332249,
|
|
"learning_rate": 1.0044568399594778e-07,
|
|
"loss": 0.3667,
|
|
"step": 19920
|
|
},
|
|
{
|
|
"epoch": 2.865257405809606,
|
|
"grad_norm": 0.3019032436054994,
|
|
"learning_rate": 9.938397657317633e-08,
|
|
"loss": 0.3417,
|
|
"step": 19925
|
|
},
|
|
{
|
|
"epoch": 2.8659764164509633,
|
|
"grad_norm": 0.32179642385988344,
|
|
"learning_rate": 9.832788208612998e-08,
|
|
"loss": 0.3506,
|
|
"step": 19930
|
|
},
|
|
{
|
|
"epoch": 2.866695427092321,
|
|
"grad_norm": 0.31825252312903735,
|
|
"learning_rate": 9.727740113353645e-08,
|
|
"loss": 0.3552,
|
|
"step": 19935
|
|
},
|
|
{
|
|
"epoch": 2.8674144377336783,
|
|
"grad_norm": 0.3059276048014507,
|
|
"learning_rate": 9.62325343109427e-08,
|
|
"loss": 0.3606,
|
|
"step": 19940
|
|
},
|
|
{
|
|
"epoch": 2.868133448375036,
|
|
"grad_norm": 0.31518111667148346,
|
|
"learning_rate": 9.519328221071378e-08,
|
|
"loss": 0.3538,
|
|
"step": 19945
|
|
},
|
|
{
|
|
"epoch": 2.8688524590163933,
|
|
"grad_norm": 0.33909437397438713,
|
|
"learning_rate": 9.415964542203059e-08,
|
|
"loss": 0.36,
|
|
"step": 19950
|
|
},
|
|
{
|
|
"epoch": 2.869571469657751,
|
|
"grad_norm": 0.3271387134786379,
|
|
"learning_rate": 9.313162453088997e-08,
|
|
"loss": 0.3628,
|
|
"step": 19955
|
|
},
|
|
{
|
|
"epoch": 2.8702904802991083,
|
|
"grad_norm": 0.3022971868672396,
|
|
"learning_rate": 9.210922012010681e-08,
|
|
"loss": 0.3524,
|
|
"step": 19960
|
|
},
|
|
{
|
|
"epoch": 2.871009490940466,
|
|
"grad_norm": 0.3032486265662045,
|
|
"learning_rate": 9.109243276930968e-08,
|
|
"loss": 0.3519,
|
|
"step": 19965
|
|
},
|
|
{
|
|
"epoch": 2.8717285015818232,
|
|
"grad_norm": 0.3166470766101174,
|
|
"learning_rate": 9.008126305494524e-08,
|
|
"loss": 0.3506,
|
|
"step": 19970
|
|
},
|
|
{
|
|
"epoch": 2.872447512223181,
|
|
"grad_norm": 0.30724298045971954,
|
|
"learning_rate": 8.907571155027272e-08,
|
|
"loss": 0.344,
|
|
"step": 19975
|
|
},
|
|
{
|
|
"epoch": 2.8731665228645387,
|
|
"grad_norm": 0.2998477886331141,
|
|
"learning_rate": 8.807577882536611e-08,
|
|
"loss": 0.3327,
|
|
"step": 19980
|
|
},
|
|
{
|
|
"epoch": 2.873885533505896,
|
|
"grad_norm": 0.31285567261052705,
|
|
"learning_rate": 8.708146544711749e-08,
|
|
"loss": 0.3558,
|
|
"step": 19985
|
|
},
|
|
{
|
|
"epoch": 2.874604544147253,
|
|
"grad_norm": 0.336148110439647,
|
|
"learning_rate": 8.609277197923038e-08,
|
|
"loss": 0.3574,
|
|
"step": 19990
|
|
},
|
|
{
|
|
"epoch": 2.875323554788611,
|
|
"grad_norm": 0.3142366128590145,
|
|
"learning_rate": 8.510969898222199e-08,
|
|
"loss": 0.3657,
|
|
"step": 19995
|
|
},
|
|
{
|
|
"epoch": 2.8760425654299686,
|
|
"grad_norm": 0.35590677060330256,
|
|
"learning_rate": 8.413224701342427e-08,
|
|
"loss": 0.336,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 2.876761576071326,
|
|
"grad_norm": 0.3155349391121181,
|
|
"learning_rate": 8.31604166269806e-08,
|
|
"loss": 0.3615,
|
|
"step": 20005
|
|
},
|
|
{
|
|
"epoch": 2.877480586712683,
|
|
"grad_norm": 0.3105909821116713,
|
|
"learning_rate": 8.219420837385139e-08,
|
|
"loss": 0.3558,
|
|
"step": 20010
|
|
},
|
|
{
|
|
"epoch": 2.878199597354041,
|
|
"grad_norm": 0.31943968035518244,
|
|
"learning_rate": 8.123362280180514e-08,
|
|
"loss": 0.3392,
|
|
"step": 20015
|
|
},
|
|
{
|
|
"epoch": 2.8789186079953986,
|
|
"grad_norm": 0.3188491758331916,
|
|
"learning_rate": 8.02786604554262e-08,
|
|
"loss": 0.3378,
|
|
"step": 20020
|
|
},
|
|
{
|
|
"epoch": 2.879637618636756,
|
|
"grad_norm": 0.31796106698426874,
|
|
"learning_rate": 7.93293218761071e-08,
|
|
"loss": 0.3755,
|
|
"step": 20025
|
|
},
|
|
{
|
|
"epoch": 2.880356629278113,
|
|
"grad_norm": 0.30798491903781205,
|
|
"learning_rate": 7.838560760205727e-08,
|
|
"loss": 0.3452,
|
|
"step": 20030
|
|
},
|
|
{
|
|
"epoch": 2.881075639919471,
|
|
"grad_norm": 0.3147825658639276,
|
|
"learning_rate": 7.74475181682921e-08,
|
|
"loss": 0.3589,
|
|
"step": 20035
|
|
},
|
|
{
|
|
"epoch": 2.8817946505608285,
|
|
"grad_norm": 0.30130707474401197,
|
|
"learning_rate": 7.651505410664284e-08,
|
|
"loss": 0.3433,
|
|
"step": 20040
|
|
},
|
|
{
|
|
"epoch": 2.8825136612021858,
|
|
"grad_norm": 0.3001199192213933,
|
|
"learning_rate": 7.558821594574773e-08,
|
|
"loss": 0.3523,
|
|
"step": 20045
|
|
},
|
|
{
|
|
"epoch": 2.883232671843543,
|
|
"grad_norm": 0.30609348693305766,
|
|
"learning_rate": 7.466700421105643e-08,
|
|
"loss": 0.3539,
|
|
"step": 20050
|
|
},
|
|
{
|
|
"epoch": 2.8839516824849007,
|
|
"grad_norm": 0.30529711807306814,
|
|
"learning_rate": 7.375141942483343e-08,
|
|
"loss": 0.3535,
|
|
"step": 20055
|
|
},
|
|
{
|
|
"epoch": 2.8846706931262585,
|
|
"grad_norm": 0.3085725150077207,
|
|
"learning_rate": 7.284146210614463e-08,
|
|
"loss": 0.3596,
|
|
"step": 20060
|
|
},
|
|
{
|
|
"epoch": 2.8853897037676157,
|
|
"grad_norm": 0.32379734232553514,
|
|
"learning_rate": 7.1937132770874e-08,
|
|
"loss": 0.3502,
|
|
"step": 20065
|
|
},
|
|
{
|
|
"epoch": 2.886108714408973,
|
|
"grad_norm": 0.30441899310242754,
|
|
"learning_rate": 7.103843193170924e-08,
|
|
"loss": 0.3643,
|
|
"step": 20070
|
|
},
|
|
{
|
|
"epoch": 2.8868277250503307,
|
|
"grad_norm": 0.30441945106316937,
|
|
"learning_rate": 7.014536009814943e-08,
|
|
"loss": 0.3623,
|
|
"step": 20075
|
|
},
|
|
{
|
|
"epoch": 2.8875467356916884,
|
|
"grad_norm": 0.307420928138311,
|
|
"learning_rate": 6.925791777650181e-08,
|
|
"loss": 0.3422,
|
|
"step": 20080
|
|
},
|
|
{
|
|
"epoch": 2.8882657463330457,
|
|
"grad_norm": 0.3133443991613013,
|
|
"learning_rate": 6.837610546988061e-08,
|
|
"loss": 0.3449,
|
|
"step": 20085
|
|
},
|
|
{
|
|
"epoch": 2.8889847569744034,
|
|
"grad_norm": 0.31544187213757585,
|
|
"learning_rate": 6.749992367821367e-08,
|
|
"loss": 0.362,
|
|
"step": 20090
|
|
},
|
|
{
|
|
"epoch": 2.8897037676157606,
|
|
"grad_norm": 0.3181257327525517,
|
|
"learning_rate": 6.662937289822924e-08,
|
|
"loss": 0.3524,
|
|
"step": 20095
|
|
},
|
|
{
|
|
"epoch": 2.8904227782571184,
|
|
"grad_norm": 0.31568453456892315,
|
|
"learning_rate": 6.576445362346917e-08,
|
|
"loss": 0.35,
|
|
"step": 20100
|
|
},
|
|
{
|
|
"epoch": 2.8911417888984756,
|
|
"grad_norm": 0.31390550340544804,
|
|
"learning_rate": 6.490516634427901e-08,
|
|
"loss": 0.3374,
|
|
"step": 20105
|
|
},
|
|
{
|
|
"epoch": 2.8918607995398333,
|
|
"grad_norm": 0.31081366225391455,
|
|
"learning_rate": 6.405151154781241e-08,
|
|
"loss": 0.3625,
|
|
"step": 20110
|
|
},
|
|
{
|
|
"epoch": 2.8925798101811906,
|
|
"grad_norm": 0.3177500145092479,
|
|
"learning_rate": 6.320348971803225e-08,
|
|
"loss": 0.3564,
|
|
"step": 20115
|
|
},
|
|
{
|
|
"epoch": 2.8932988208225483,
|
|
"grad_norm": 0.29677972139612535,
|
|
"learning_rate": 6.236110133570505e-08,
|
|
"loss": 0.3517,
|
|
"step": 20120
|
|
},
|
|
{
|
|
"epoch": 2.8940178314639056,
|
|
"grad_norm": 0.3206822403012554,
|
|
"learning_rate": 6.152434687840214e-08,
|
|
"loss": 0.3471,
|
|
"step": 20125
|
|
},
|
|
{
|
|
"epoch": 2.8947368421052633,
|
|
"grad_norm": 0.30565224332619295,
|
|
"learning_rate": 6.069322682050516e-08,
|
|
"loss": 0.359,
|
|
"step": 20130
|
|
},
|
|
{
|
|
"epoch": 2.8954558527466205,
|
|
"grad_norm": 0.2990491966371965,
|
|
"learning_rate": 5.986774163319942e-08,
|
|
"loss": 0.3442,
|
|
"step": 20135
|
|
},
|
|
{
|
|
"epoch": 2.8961748633879782,
|
|
"grad_norm": 0.328629335753113,
|
|
"learning_rate": 5.90478917844739e-08,
|
|
"loss": 0.3781,
|
|
"step": 20140
|
|
},
|
|
{
|
|
"epoch": 2.8968938740293355,
|
|
"grad_norm": 0.3011937892687381,
|
|
"learning_rate": 5.823367773912569e-08,
|
|
"loss": 0.3389,
|
|
"step": 20145
|
|
},
|
|
{
|
|
"epoch": 2.8976128846706932,
|
|
"grad_norm": 0.3068318241936056,
|
|
"learning_rate": 5.742509995875445e-08,
|
|
"loss": 0.3702,
|
|
"step": 20150
|
|
},
|
|
{
|
|
"epoch": 2.8983318953120505,
|
|
"grad_norm": 0.2999250357976834,
|
|
"learning_rate": 5.66221589017657e-08,
|
|
"loss": 0.3576,
|
|
"step": 20155
|
|
},
|
|
{
|
|
"epoch": 2.899050905953408,
|
|
"grad_norm": 0.30259364045053866,
|
|
"learning_rate": 5.582485502337087e-08,
|
|
"loss": 0.3578,
|
|
"step": 20160
|
|
},
|
|
{
|
|
"epoch": 2.8997699165947655,
|
|
"grad_norm": 0.31667873659095214,
|
|
"learning_rate": 5.503318877558172e-08,
|
|
"loss": 0.3567,
|
|
"step": 20165
|
|
},
|
|
{
|
|
"epoch": 2.900488927236123,
|
|
"grad_norm": 0.3115584463254306,
|
|
"learning_rate": 5.424716060721702e-08,
|
|
"loss": 0.3552,
|
|
"step": 20170
|
|
},
|
|
{
|
|
"epoch": 2.9012079378774804,
|
|
"grad_norm": 0.3079569355921638,
|
|
"learning_rate": 5.3466770963898074e-08,
|
|
"loss": 0.3515,
|
|
"step": 20175
|
|
},
|
|
{
|
|
"epoch": 2.901926948518838,
|
|
"grad_norm": 0.3130676722117026,
|
|
"learning_rate": 5.269202028804876e-08,
|
|
"loss": 0.3421,
|
|
"step": 20180
|
|
},
|
|
{
|
|
"epoch": 2.9026459591601954,
|
|
"grad_norm": 0.31657513998731357,
|
|
"learning_rate": 5.192290901889774e-08,
|
|
"loss": 0.3552,
|
|
"step": 20185
|
|
},
|
|
{
|
|
"epoch": 2.903364969801553,
|
|
"grad_norm": 0.3033146127249199,
|
|
"learning_rate": 5.11594375924751e-08,
|
|
"loss": 0.3312,
|
|
"step": 20190
|
|
},
|
|
{
|
|
"epoch": 2.904083980442911,
|
|
"grad_norm": 0.3090288154840797,
|
|
"learning_rate": 5.0401606441613515e-08,
|
|
"loss": 0.3829,
|
|
"step": 20195
|
|
},
|
|
{
|
|
"epoch": 2.904802991084268,
|
|
"grad_norm": 0.6597731135932114,
|
|
"learning_rate": 4.964941599595041e-08,
|
|
"loss": 0.3414,
|
|
"step": 20200
|
|
},
|
|
{
|
|
"epoch": 2.9055220017256254,
|
|
"grad_norm": 0.3039729546228988,
|
|
"learning_rate": 4.890286668192246e-08,
|
|
"loss": 0.3509,
|
|
"step": 20205
|
|
},
|
|
{
|
|
"epoch": 2.906241012366983,
|
|
"grad_norm": 0.31292629673882,
|
|
"learning_rate": 4.816195892276887e-08,
|
|
"loss": 0.3675,
|
|
"step": 20210
|
|
},
|
|
{
|
|
"epoch": 2.9069600230083408,
|
|
"grad_norm": 0.3091273065471905,
|
|
"learning_rate": 4.742669313853254e-08,
|
|
"loss": 0.3425,
|
|
"step": 20215
|
|
},
|
|
{
|
|
"epoch": 2.907679033649698,
|
|
"grad_norm": 0.31573160565098474,
|
|
"learning_rate": 4.669706974605559e-08,
|
|
"loss": 0.3676,
|
|
"step": 20220
|
|
},
|
|
{
|
|
"epoch": 2.9083980442910553,
|
|
"grad_norm": 0.32873206136837513,
|
|
"learning_rate": 4.5973089158980464e-08,
|
|
"loss": 0.3499,
|
|
"step": 20225
|
|
},
|
|
{
|
|
"epoch": 2.909117054932413,
|
|
"grad_norm": 0.3004924279055244,
|
|
"learning_rate": 4.5254751787753294e-08,
|
|
"loss": 0.3555,
|
|
"step": 20230
|
|
},
|
|
{
|
|
"epoch": 2.9098360655737707,
|
|
"grad_norm": 0.3169102105053304,
|
|
"learning_rate": 4.454205803961942e-08,
|
|
"loss": 0.3607,
|
|
"step": 20235
|
|
},
|
|
{
|
|
"epoch": 2.910555076215128,
|
|
"grad_norm": 0.3021816853831664,
|
|
"learning_rate": 4.383500831862342e-08,
|
|
"loss": 0.3476,
|
|
"step": 20240
|
|
},
|
|
{
|
|
"epoch": 2.9112740868564853,
|
|
"grad_norm": 0.3060949401927673,
|
|
"learning_rate": 4.3133603025614644e-08,
|
|
"loss": 0.3745,
|
|
"step": 20245
|
|
},
|
|
{
|
|
"epoch": 2.911993097497843,
|
|
"grad_norm": 0.3109649592297195,
|
|
"learning_rate": 4.243784255823613e-08,
|
|
"loss": 0.3462,
|
|
"step": 20250
|
|
},
|
|
{
|
|
"epoch": 2.9127121081392007,
|
|
"grad_norm": 0.32036028908374864,
|
|
"learning_rate": 4.1747727310935683e-08,
|
|
"loss": 0.3631,
|
|
"step": 20255
|
|
},
|
|
{
|
|
"epoch": 2.913431118780558,
|
|
"grad_norm": 0.31581149777093853,
|
|
"learning_rate": 4.106325767495811e-08,
|
|
"loss": 0.3466,
|
|
"step": 20260
|
|
},
|
|
{
|
|
"epoch": 2.914150129421915,
|
|
"grad_norm": 0.31533401382203285,
|
|
"learning_rate": 4.038443403834969e-08,
|
|
"loss": 0.3474,
|
|
"step": 20265
|
|
},
|
|
{
|
|
"epoch": 2.914869140063273,
|
|
"grad_norm": 0.30029568636648557,
|
|
"learning_rate": 3.9711256785953666e-08,
|
|
"loss": 0.335,
|
|
"step": 20270
|
|
},
|
|
{
|
|
"epoch": 2.9155881507046306,
|
|
"grad_norm": 0.30555415771095046,
|
|
"learning_rate": 3.9043726299412555e-08,
|
|
"loss": 0.3643,
|
|
"step": 20275
|
|
},
|
|
{
|
|
"epoch": 2.916307161345988,
|
|
"grad_norm": 0.31782358760179896,
|
|
"learning_rate": 3.838184295716807e-08,
|
|
"loss": 0.3509,
|
|
"step": 20280
|
|
},
|
|
{
|
|
"epoch": 2.917026171987345,
|
|
"grad_norm": 0.3076610569146311,
|
|
"learning_rate": 3.772560713446116e-08,
|
|
"loss": 0.3564,
|
|
"step": 20285
|
|
},
|
|
{
|
|
"epoch": 2.917745182628703,
|
|
"grad_norm": 0.29596165046549433,
|
|
"learning_rate": 3.7075019203329785e-08,
|
|
"loss": 0.3553,
|
|
"step": 20290
|
|
},
|
|
{
|
|
"epoch": 2.9184641932700606,
|
|
"grad_norm": 0.28946501766402427,
|
|
"learning_rate": 3.643007953261002e-08,
|
|
"loss": 0.3449,
|
|
"step": 20295
|
|
},
|
|
{
|
|
"epoch": 2.919183203911418,
|
|
"grad_norm": 0.3033439392793503,
|
|
"learning_rate": 3.579078848793605e-08,
|
|
"loss": 0.3634,
|
|
"step": 20300
|
|
},
|
|
{
|
|
"epoch": 2.9199022145527755,
|
|
"grad_norm": 0.32510429371217153,
|
|
"learning_rate": 3.5157146431741285e-08,
|
|
"loss": 0.3495,
|
|
"step": 20305
|
|
},
|
|
{
|
|
"epoch": 2.920621225194133,
|
|
"grad_norm": 0.31844070605679214,
|
|
"learning_rate": 3.452915372325394e-08,
|
|
"loss": 0.3543,
|
|
"step": 20310
|
|
},
|
|
{
|
|
"epoch": 2.9213402358354905,
|
|
"grad_norm": 0.31533466726486986,
|
|
"learning_rate": 3.390681071850033e-08,
|
|
"loss": 0.3672,
|
|
"step": 20315
|
|
},
|
|
{
|
|
"epoch": 2.922059246476848,
|
|
"grad_norm": 0.30434420248270794,
|
|
"learning_rate": 3.3290117770306e-08,
|
|
"loss": 0.3597,
|
|
"step": 20320
|
|
},
|
|
{
|
|
"epoch": 2.9227782571182055,
|
|
"grad_norm": 0.3566339511084951,
|
|
"learning_rate": 3.2679075228289056e-08,
|
|
"loss": 0.3476,
|
|
"step": 20325
|
|
},
|
|
{
|
|
"epoch": 2.9234972677595628,
|
|
"grad_norm": 0.3161302342664479,
|
|
"learning_rate": 3.2073683438866856e-08,
|
|
"loss": 0.3311,
|
|
"step": 20330
|
|
},
|
|
{
|
|
"epoch": 2.9242162784009205,
|
|
"grad_norm": 0.33681221067176464,
|
|
"learning_rate": 3.147394274525484e-08,
|
|
"loss": 0.3534,
|
|
"step": 20335
|
|
},
|
|
{
|
|
"epoch": 2.9249352890422777,
|
|
"grad_norm": 0.3183033251815415,
|
|
"learning_rate": 3.0879853487461034e-08,
|
|
"loss": 0.3531,
|
|
"step": 20340
|
|
},
|
|
{
|
|
"epoch": 2.9256542996836354,
|
|
"grad_norm": 0.31419493538006127,
|
|
"learning_rate": 3.029141600229157e-08,
|
|
"loss": 0.3563,
|
|
"step": 20345
|
|
},
|
|
{
|
|
"epoch": 2.9263733103249927,
|
|
"grad_norm": 0.32750253413973196,
|
|
"learning_rate": 2.97086306233485e-08,
|
|
"loss": 0.3617,
|
|
"step": 20350
|
|
},
|
|
{
|
|
"epoch": 2.9270923209663504,
|
|
"grad_norm": 0.3194802734755754,
|
|
"learning_rate": 2.913149768102752e-08,
|
|
"loss": 0.3698,
|
|
"step": 20355
|
|
},
|
|
{
|
|
"epoch": 2.9278113316077077,
|
|
"grad_norm": 0.3065275294013698,
|
|
"learning_rate": 2.8560017502524684e-08,
|
|
"loss": 0.3525,
|
|
"step": 20360
|
|
},
|
|
{
|
|
"epoch": 2.9285303422490654,
|
|
"grad_norm": 0.31590621149316106,
|
|
"learning_rate": 2.7994190411825272e-08,
|
|
"loss": 0.3543,
|
|
"step": 20365
|
|
},
|
|
{
|
|
"epoch": 2.9292493528904227,
|
|
"grad_norm": 0.2988018337298097,
|
|
"learning_rate": 2.7434016729712688e-08,
|
|
"loss": 0.3454,
|
|
"step": 20370
|
|
},
|
|
{
|
|
"epoch": 2.9299683635317804,
|
|
"grad_norm": 0.3111543446081254,
|
|
"learning_rate": 2.6879496773766223e-08,
|
|
"loss": 0.3492,
|
|
"step": 20375
|
|
},
|
|
{
|
|
"epoch": 2.9306873741731376,
|
|
"grad_norm": 0.3135170301967151,
|
|
"learning_rate": 2.6330630858358854e-08,
|
|
"loss": 0.3639,
|
|
"step": 20380
|
|
},
|
|
{
|
|
"epoch": 2.9314063848144953,
|
|
"grad_norm": 0.30293972283016546,
|
|
"learning_rate": 2.5787419294656113e-08,
|
|
"loss": 0.3563,
|
|
"step": 20385
|
|
},
|
|
{
|
|
"epoch": 2.9321253954558526,
|
|
"grad_norm": 0.3088210817503903,
|
|
"learning_rate": 2.524986239062166e-08,
|
|
"loss": 0.3533,
|
|
"step": 20390
|
|
},
|
|
{
|
|
"epoch": 2.9328444060972103,
|
|
"grad_norm": 0.3057889408584442,
|
|
"learning_rate": 2.4717960451010604e-08,
|
|
"loss": 0.3562,
|
|
"step": 20395
|
|
},
|
|
{
|
|
"epoch": 2.9335634167385676,
|
|
"grad_norm": 0.3140805414092627,
|
|
"learning_rate": 2.4191713777373947e-08,
|
|
"loss": 0.3419,
|
|
"step": 20400
|
|
},
|
|
{
|
|
"epoch": 2.9342824273799253,
|
|
"grad_norm": 0.3171695758890566,
|
|
"learning_rate": 2.3671122668054157e-08,
|
|
"loss": 0.3641,
|
|
"step": 20405
|
|
},
|
|
{
|
|
"epoch": 2.9350014380212825,
|
|
"grad_norm": 0.30623977691725907,
|
|
"learning_rate": 2.3156187418189592e-08,
|
|
"loss": 0.3439,
|
|
"step": 20410
|
|
},
|
|
{
|
|
"epoch": 2.9357204486626403,
|
|
"grad_norm": 0.3119455073162684,
|
|
"learning_rate": 2.264690831971228e-08,
|
|
"loss": 0.3542,
|
|
"step": 20415
|
|
},
|
|
{
|
|
"epoch": 2.9364394593039975,
|
|
"grad_norm": 0.31863085872319036,
|
|
"learning_rate": 2.2143285661345716e-08,
|
|
"loss": 0.3616,
|
|
"step": 20420
|
|
},
|
|
{
|
|
"epoch": 2.9371584699453552,
|
|
"grad_norm": 0.32105718207060935,
|
|
"learning_rate": 2.1645319728607063e-08,
|
|
"loss": 0.3602,
|
|
"step": 20425
|
|
},
|
|
{
|
|
"epoch": 2.937877480586713,
|
|
"grad_norm": 0.31514042136363335,
|
|
"learning_rate": 2.115301080380827e-08,
|
|
"loss": 0.3522,
|
|
"step": 20430
|
|
},
|
|
{
|
|
"epoch": 2.93859649122807,
|
|
"grad_norm": 0.3137089621665793,
|
|
"learning_rate": 2.066635916605386e-08,
|
|
"loss": 0.3518,
|
|
"step": 20435
|
|
},
|
|
{
|
|
"epoch": 2.9393155018694275,
|
|
"grad_norm": 0.31346497350839764,
|
|
"learning_rate": 2.0185365091237584e-08,
|
|
"loss": 0.3629,
|
|
"step": 20440
|
|
},
|
|
{
|
|
"epoch": 2.940034512510785,
|
|
"grad_norm": 0.31697462485362327,
|
|
"learning_rate": 1.971002885205131e-08,
|
|
"loss": 0.343,
|
|
"step": 20445
|
|
},
|
|
{
|
|
"epoch": 2.940753523152143,
|
|
"grad_norm": 0.30895496705501163,
|
|
"learning_rate": 1.924035071797392e-08,
|
|
"loss": 0.3546,
|
|
"step": 20450
|
|
},
|
|
{
|
|
"epoch": 2.9414725337935,
|
|
"grad_norm": 0.324785557475365,
|
|
"learning_rate": 1.87763309552802e-08,
|
|
"loss": 0.3589,
|
|
"step": 20455
|
|
},
|
|
{
|
|
"epoch": 2.9421915444348574,
|
|
"grad_norm": 0.3178029299759339,
|
|
"learning_rate": 1.8317969827036374e-08,
|
|
"loss": 0.3598,
|
|
"step": 20460
|
|
},
|
|
{
|
|
"epoch": 2.942910555076215,
|
|
"grad_norm": 0.31453703135147415,
|
|
"learning_rate": 1.7865267593099035e-08,
|
|
"loss": 0.3573,
|
|
"step": 20465
|
|
},
|
|
{
|
|
"epoch": 2.943629565717573,
|
|
"grad_norm": 0.31021776925122346,
|
|
"learning_rate": 1.741822451011954e-08,
|
|
"loss": 0.3607,
|
|
"step": 20470
|
|
},
|
|
{
|
|
"epoch": 2.94434857635893,
|
|
"grad_norm": 0.31773060517094964,
|
|
"learning_rate": 1.697684083153739e-08,
|
|
"loss": 0.3598,
|
|
"step": 20475
|
|
},
|
|
{
|
|
"epoch": 2.9450675870002874,
|
|
"grad_norm": 0.3242150798460791,
|
|
"learning_rate": 1.6541116807585746e-08,
|
|
"loss": 0.3556,
|
|
"step": 20480
|
|
},
|
|
{
|
|
"epoch": 2.945786597641645,
|
|
"grad_norm": 0.31054625988596773,
|
|
"learning_rate": 1.611105268528812e-08,
|
|
"loss": 0.3583,
|
|
"step": 20485
|
|
},
|
|
{
|
|
"epoch": 2.946505608283003,
|
|
"grad_norm": 0.3221031333992454,
|
|
"learning_rate": 1.5686648708461706e-08,
|
|
"loss": 0.358,
|
|
"step": 20490
|
|
},
|
|
{
|
|
"epoch": 2.94722461892436,
|
|
"grad_norm": 0.30824838943691657,
|
|
"learning_rate": 1.52679051177107e-08,
|
|
"loss": 0.3649,
|
|
"step": 20495
|
|
},
|
|
{
|
|
"epoch": 2.9479436295657173,
|
|
"grad_norm": 0.3032873167790253,
|
|
"learning_rate": 1.4854822150435211e-08,
|
|
"loss": 0.3365,
|
|
"step": 20500
|
|
},
|
|
{
|
|
"epoch": 2.948662640207075,
|
|
"grad_norm": 0.3096758621157358,
|
|
"learning_rate": 1.4447400040821236e-08,
|
|
"loss": 0.3488,
|
|
"step": 20505
|
|
},
|
|
{
|
|
"epoch": 2.9493816508484327,
|
|
"grad_norm": 0.3128423976533039,
|
|
"learning_rate": 1.4045639019848456e-08,
|
|
"loss": 0.3601,
|
|
"step": 20510
|
|
},
|
|
{
|
|
"epoch": 2.95010066148979,
|
|
"grad_norm": 0.31522955258712954,
|
|
"learning_rate": 1.3649539315285787e-08,
|
|
"loss": 0.3521,
|
|
"step": 20515
|
|
},
|
|
{
|
|
"epoch": 2.9508196721311473,
|
|
"grad_norm": 0.3011454778763948,
|
|
"learning_rate": 1.325910115169471e-08,
|
|
"loss": 0.3576,
|
|
"step": 20520
|
|
},
|
|
{
|
|
"epoch": 2.951538682772505,
|
|
"grad_norm": 0.3137639258571949,
|
|
"learning_rate": 1.2874324750424827e-08,
|
|
"loss": 0.3481,
|
|
"step": 20525
|
|
},
|
|
{
|
|
"epoch": 2.9522576934138627,
|
|
"grad_norm": 0.30701991466704626,
|
|
"learning_rate": 1.2495210329616091e-08,
|
|
"loss": 0.3481,
|
|
"step": 20530
|
|
},
|
|
{
|
|
"epoch": 2.95297670405522,
|
|
"grad_norm": 0.32093792697243734,
|
|
"learning_rate": 1.212175810419991e-08,
|
|
"loss": 0.3516,
|
|
"step": 20535
|
|
},
|
|
{
|
|
"epoch": 2.9536957146965777,
|
|
"grad_norm": 0.3314417785675213,
|
|
"learning_rate": 1.1753968285895812e-08,
|
|
"loss": 0.3637,
|
|
"step": 20540
|
|
},
|
|
{
|
|
"epoch": 2.954414725337935,
|
|
"grad_norm": 0.31720763388416395,
|
|
"learning_rate": 1.1391841083214783e-08,
|
|
"loss": 0.3375,
|
|
"step": 20545
|
|
},
|
|
{
|
|
"epoch": 2.9551337359792926,
|
|
"grad_norm": 0.3287030502099959,
|
|
"learning_rate": 1.1035376701457046e-08,
|
|
"loss": 0.3566,
|
|
"step": 20550
|
|
},
|
|
{
|
|
"epoch": 2.95585274662065,
|
|
"grad_norm": 0.3316071546997749,
|
|
"learning_rate": 1.0684575342710946e-08,
|
|
"loss": 0.3494,
|
|
"step": 20555
|
|
},
|
|
{
|
|
"epoch": 2.9565717572620076,
|
|
"grad_norm": 0.29597642878693026,
|
|
"learning_rate": 1.0339437205857395e-08,
|
|
"loss": 0.3406,
|
|
"step": 20560
|
|
},
|
|
{
|
|
"epoch": 2.957290767903365,
|
|
"grad_norm": 0.30733500521657,
|
|
"learning_rate": 9.999962486564319e-09,
|
|
"loss": 0.3464,
|
|
"step": 20565
|
|
},
|
|
{
|
|
"epoch": 2.9580097785447226,
|
|
"grad_norm": 0.31820148103238727,
|
|
"learning_rate": 9.666151377287768e-09,
|
|
"loss": 0.3521,
|
|
"step": 20570
|
|
},
|
|
{
|
|
"epoch": 2.95872878918608,
|
|
"grad_norm": 0.3099185849479619,
|
|
"learning_rate": 9.338004067277473e-09,
|
|
"loss": 0.3527,
|
|
"step": 20575
|
|
},
|
|
{
|
|
"epoch": 2.9594477998274376,
|
|
"grad_norm": 0.3173987262020995,
|
|
"learning_rate": 9.01552074256684e-09,
|
|
"loss": 0.3618,
|
|
"step": 20580
|
|
},
|
|
{
|
|
"epoch": 2.960166810468795,
|
|
"grad_norm": 0.32166638003526127,
|
|
"learning_rate": 8.69870158598074e-09,
|
|
"loss": 0.3605,
|
|
"step": 20585
|
|
},
|
|
{
|
|
"epoch": 2.9608858211101525,
|
|
"grad_norm": 0.3210814857400657,
|
|
"learning_rate": 8.387546777134382e-09,
|
|
"loss": 0.3503,
|
|
"step": 20590
|
|
},
|
|
{
|
|
"epoch": 2.96160483175151,
|
|
"grad_norm": 0.2969681480427528,
|
|
"learning_rate": 8.082056492428881e-09,
|
|
"loss": 0.3553,
|
|
"step": 20595
|
|
},
|
|
{
|
|
"epoch": 2.9623238423928675,
|
|
"grad_norm": 0.3288645255292419,
|
|
"learning_rate": 7.782230905055699e-09,
|
|
"loss": 0.355,
|
|
"step": 20600
|
|
},
|
|
{
|
|
"epoch": 2.9630428530342248,
|
|
"grad_norm": 0.3027674170191426,
|
|
"learning_rate": 7.488070184995532e-09,
|
|
"loss": 0.3662,
|
|
"step": 20605
|
|
},
|
|
{
|
|
"epoch": 2.9637618636755825,
|
|
"grad_norm": 0.3186717157693368,
|
|
"learning_rate": 7.1995744990138725e-09,
|
|
"loss": 0.3618,
|
|
"step": 20610
|
|
},
|
|
{
|
|
"epoch": 2.9644808743169397,
|
|
"grad_norm": 0.3236458510160991,
|
|
"learning_rate": 6.916744010667664e-09,
|
|
"loss": 0.3532,
|
|
"step": 20615
|
|
},
|
|
{
|
|
"epoch": 2.9651998849582974,
|
|
"grad_norm": 0.308800845677921,
|
|
"learning_rate": 6.639578880303088e-09,
|
|
"loss": 0.3481,
|
|
"step": 20620
|
|
},
|
|
{
|
|
"epoch": 2.9659188955996547,
|
|
"grad_norm": 0.31311455469072286,
|
|
"learning_rate": 6.3680792650511195e-09,
|
|
"loss": 0.3381,
|
|
"step": 20625
|
|
},
|
|
{
|
|
"epoch": 2.9666379062410124,
|
|
"grad_norm": 0.327509452324389,
|
|
"learning_rate": 6.102245318833078e-09,
|
|
"loss": 0.3658,
|
|
"step": 20630
|
|
},
|
|
{
|
|
"epoch": 2.9673569168823697,
|
|
"grad_norm": 0.3161914892201409,
|
|
"learning_rate": 5.842077192357298e-09,
|
|
"loss": 0.3611,
|
|
"step": 20635
|
|
},
|
|
{
|
|
"epoch": 2.9680759275237274,
|
|
"grad_norm": 0.30985081917277596,
|
|
"learning_rate": 5.587575033121351e-09,
|
|
"loss": 0.3539,
|
|
"step": 20640
|
|
},
|
|
{
|
|
"epoch": 2.968794938165085,
|
|
"grad_norm": 0.2981910052886259,
|
|
"learning_rate": 5.338738985407599e-09,
|
|
"loss": 0.3529,
|
|
"step": 20645
|
|
},
|
|
{
|
|
"epoch": 2.9695139488064424,
|
|
"grad_norm": 0.2944166779019275,
|
|
"learning_rate": 5.095569190290972e-09,
|
|
"loss": 0.3594,
|
|
"step": 20650
|
|
},
|
|
{
|
|
"epoch": 2.9702329594477996,
|
|
"grad_norm": 0.3085063397705311,
|
|
"learning_rate": 4.858065785627863e-09,
|
|
"loss": 0.3629,
|
|
"step": 20655
|
|
},
|
|
{
|
|
"epoch": 2.9709519700891573,
|
|
"grad_norm": 0.30295517426485213,
|
|
"learning_rate": 4.6262289060683414e-09,
|
|
"loss": 0.3572,
|
|
"step": 20660
|
|
},
|
|
{
|
|
"epoch": 2.971670980730515,
|
|
"grad_norm": 0.35923076109615476,
|
|
"learning_rate": 4.40005868304727e-09,
|
|
"loss": 0.3597,
|
|
"step": 20665
|
|
},
|
|
{
|
|
"epoch": 2.9723899913718723,
|
|
"grad_norm": 0.34893271399115416,
|
|
"learning_rate": 4.179555244784306e-09,
|
|
"loss": 0.3509,
|
|
"step": 20670
|
|
},
|
|
{
|
|
"epoch": 2.9731090020132296,
|
|
"grad_norm": 0.3143883926628604,
|
|
"learning_rate": 3.964718716291671e-09,
|
|
"loss": 0.3652,
|
|
"step": 20675
|
|
},
|
|
{
|
|
"epoch": 2.9738280126545873,
|
|
"grad_norm": 0.29830006241925805,
|
|
"learning_rate": 3.7555492193641626e-09,
|
|
"loss": 0.3589,
|
|
"step": 20680
|
|
},
|
|
{
|
|
"epoch": 2.974547023295945,
|
|
"grad_norm": 0.32414274647085906,
|
|
"learning_rate": 3.552046872586923e-09,
|
|
"loss": 0.3513,
|
|
"step": 20685
|
|
},
|
|
{
|
|
"epoch": 2.9752660339373023,
|
|
"grad_norm": 0.32909445069510174,
|
|
"learning_rate": 3.354211791330997e-09,
|
|
"loss": 0.3556,
|
|
"step": 20690
|
|
},
|
|
{
|
|
"epoch": 2.9759850445786595,
|
|
"grad_norm": 0.31026678511950995,
|
|
"learning_rate": 3.1620440877544455e-09,
|
|
"loss": 0.3606,
|
|
"step": 20695
|
|
},
|
|
{
|
|
"epoch": 2.9767040552200172,
|
|
"grad_norm": 0.305190412044465,
|
|
"learning_rate": 2.9755438708034545e-09,
|
|
"loss": 0.3598,
|
|
"step": 20700
|
|
},
|
|
{
|
|
"epoch": 2.977423065861375,
|
|
"grad_norm": 0.3194916805504214,
|
|
"learning_rate": 2.7947112462078928e-09,
|
|
"loss": 0.3553,
|
|
"step": 20705
|
|
},
|
|
{
|
|
"epoch": 2.978142076502732,
|
|
"grad_norm": 0.3057262686960243,
|
|
"learning_rate": 2.6195463164901956e-09,
|
|
"loss": 0.3426,
|
|
"step": 20710
|
|
},
|
|
{
|
|
"epoch": 2.9788610871440895,
|
|
"grad_norm": 0.30864903152168977,
|
|
"learning_rate": 2.4500491809531514e-09,
|
|
"loss": 0.3618,
|
|
"step": 20715
|
|
},
|
|
{
|
|
"epoch": 2.979580097785447,
|
|
"grad_norm": 0.3097525173144828,
|
|
"learning_rate": 2.286219935689893e-09,
|
|
"loss": 0.3465,
|
|
"step": 20720
|
|
},
|
|
{
|
|
"epoch": 2.980299108426805,
|
|
"grad_norm": 0.3069420211958998,
|
|
"learning_rate": 2.1280586735816787e-09,
|
|
"loss": 0.3487,
|
|
"step": 20725
|
|
},
|
|
{
|
|
"epoch": 2.981018119068162,
|
|
"grad_norm": 0.3177879063527259,
|
|
"learning_rate": 1.9755654842923413e-09,
|
|
"loss": 0.3618,
|
|
"step": 20730
|
|
},
|
|
{
|
|
"epoch": 2.9817371297095194,
|
|
"grad_norm": 0.3056246865363535,
|
|
"learning_rate": 1.8287404542771669e-09,
|
|
"loss": 0.3551,
|
|
"step": 20735
|
|
},
|
|
{
|
|
"epoch": 2.982456140350877,
|
|
"grad_norm": 0.3173333176806556,
|
|
"learning_rate": 1.6875836667729073e-09,
|
|
"loss": 0.3561,
|
|
"step": 20740
|
|
},
|
|
{
|
|
"epoch": 2.983175150992235,
|
|
"grad_norm": 0.30558313989198144,
|
|
"learning_rate": 1.5520952018055479e-09,
|
|
"loss": 0.3471,
|
|
"step": 20745
|
|
},
|
|
{
|
|
"epoch": 2.983894161633592,
|
|
"grad_norm": 0.3014106514790185,
|
|
"learning_rate": 1.4222751361880894e-09,
|
|
"loss": 0.345,
|
|
"step": 20750
|
|
},
|
|
{
|
|
"epoch": 2.98461317227495,
|
|
"grad_norm": 0.3088583558020864,
|
|
"learning_rate": 1.298123543519436e-09,
|
|
"loss": 0.365,
|
|
"step": 20755
|
|
},
|
|
{
|
|
"epoch": 2.985332182916307,
|
|
"grad_norm": 0.30252174119943026,
|
|
"learning_rate": 1.1796404941843975e-09,
|
|
"loss": 0.3653,
|
|
"step": 20760
|
|
},
|
|
{
|
|
"epoch": 2.986051193557665,
|
|
"grad_norm": 0.31549823464697346,
|
|
"learning_rate": 1.0668260553525767e-09,
|
|
"loss": 0.3644,
|
|
"step": 20765
|
|
},
|
|
{
|
|
"epoch": 2.986770204199022,
|
|
"grad_norm": 0.3013501884323165,
|
|
"learning_rate": 9.59680290983922e-10,
|
|
"loss": 0.3503,
|
|
"step": 20770
|
|
},
|
|
{
|
|
"epoch": 2.9874892148403798,
|
|
"grad_norm": 0.3121051698193485,
|
|
"learning_rate": 8.582032618220659e-10,
|
|
"loss": 0.3694,
|
|
"step": 20775
|
|
},
|
|
{
|
|
"epoch": 2.988208225481737,
|
|
"grad_norm": 0.31331924741224554,
|
|
"learning_rate": 7.62395025396545e-10,
|
|
"loss": 0.3695,
|
|
"step": 20780
|
|
},
|
|
{
|
|
"epoch": 2.9889272361230947,
|
|
"grad_norm": 0.30108692196709286,
|
|
"learning_rate": 6.722556360228006e-10,
|
|
"loss": 0.3546,
|
|
"step": 20785
|
|
},
|
|
{
|
|
"epoch": 2.989646246764452,
|
|
"grad_norm": 0.31417699206808497,
|
|
"learning_rate": 5.877851448055083e-10,
|
|
"loss": 0.3517,
|
|
"step": 20790
|
|
},
|
|
{
|
|
"epoch": 2.9903652574058097,
|
|
"grad_norm": 0.3198470224153279,
|
|
"learning_rate": 5.089835996319181e-10,
|
|
"loss": 0.3592,
|
|
"step": 20795
|
|
},
|
|
{
|
|
"epoch": 2.991084268047167,
|
|
"grad_norm": 0.3095277946847495,
|
|
"learning_rate": 4.3585104517629427e-10,
|
|
"loss": 0.3549,
|
|
"step": 20800
|
|
},
|
|
{
|
|
"epoch": 2.9918032786885247,
|
|
"grad_norm": 0.3064379816557907,
|
|
"learning_rate": 3.683875229010259e-10,
|
|
"loss": 0.3572,
|
|
"step": 20805
|
|
},
|
|
{
|
|
"epoch": 2.992522289329882,
|
|
"grad_norm": 0.31077957491930513,
|
|
"learning_rate": 3.0659307105218584e-10,
|
|
"loss": 0.3561,
|
|
"step": 20810
|
|
},
|
|
{
|
|
"epoch": 2.9932412999712397,
|
|
"grad_norm": 0.3094845779687916,
|
|
"learning_rate": 2.504677246628617e-10,
|
|
"loss": 0.3618,
|
|
"step": 20815
|
|
},
|
|
{
|
|
"epoch": 2.993960310612597,
|
|
"grad_norm": 0.30761750000478694,
|
|
"learning_rate": 2.0001151555315567e-10,
|
|
"loss": 0.3668,
|
|
"step": 20820
|
|
},
|
|
{
|
|
"epoch": 2.9946793212539546,
|
|
"grad_norm": 0.40069067232077094,
|
|
"learning_rate": 1.5522447232574345e-10,
|
|
"loss": 0.3679,
|
|
"step": 20825
|
|
},
|
|
{
|
|
"epoch": 2.995398331895312,
|
|
"grad_norm": 0.30539391121856113,
|
|
"learning_rate": 1.1610662037364607e-10,
|
|
"loss": 0.3552,
|
|
"step": 20830
|
|
},
|
|
{
|
|
"epoch": 2.9961173425366696,
|
|
"grad_norm": 0.30632811646513874,
|
|
"learning_rate": 8.265798187356844e-11,
|
|
"loss": 0.3503,
|
|
"step": 20835
|
|
},
|
|
{
|
|
"epoch": 2.996836353178027,
|
|
"grad_norm": 0.31324750295382386,
|
|
"learning_rate": 5.487857578811984e-11,
|
|
"loss": 0.3634,
|
|
"step": 20840
|
|
},
|
|
{
|
|
"epoch": 2.9975553638193846,
|
|
"grad_norm": 0.2974223358331177,
|
|
"learning_rate": 3.276841786581386e-11,
|
|
"loss": 0.3547,
|
|
"step": 20845
|
|
},
|
|
{
|
|
"epoch": 2.998274374460742,
|
|
"grad_norm": 0.3129318240396816,
|
|
"learning_rate": 1.6327520642178686e-11,
|
|
"loss": 0.3584,
|
|
"step": 20850
|
|
},
|
|
{
|
|
"epoch": 2.9989933851020996,
|
|
"grad_norm": 0.30680068198124816,
|
|
"learning_rate": 5.55589343864682e-12,
|
|
"loss": 0.3546,
|
|
"step": 20855
|
|
},
|
|
{
|
|
"epoch": 2.9997123957434573,
|
|
"grad_norm": 0.32602309806564345,
|
|
"learning_rate": 4.5354236033468e-13,
|
|
"loss": 0.3461,
|
|
"step": 20860
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"eval_loss": 0.45375168323516846,
|
|
"eval_runtime": 0.5768,
|
|
"eval_samples_per_second": 43.343,
|
|
"eval_steps_per_second": 1.734,
|
|
"step": 20862
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 20862,
|
|
"total_flos": 2049636776804352.0,
|
|
"train_loss": 0.42822988295175696,
|
|
"train_runtime": 30718.8165,
|
|
"train_samples_per_second": 21.729,
|
|
"train_steps_per_second": 0.679
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 20862,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 2087,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2049636776804352.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|