3523 lines
85 KiB
JSON
3523 lines
85 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4971,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.006035913686434284,
|
|
"grad_norm": 13.465597639885102,
|
|
"learning_rate": 1.8072289156626505e-07,
|
|
"loss": 0.8887,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.012071827372868568,
|
|
"grad_norm": 12.414937705631793,
|
|
"learning_rate": 3.8152610441767073e-07,
|
|
"loss": 0.8871,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.01810774105930285,
|
|
"grad_norm": 5.604563580675332,
|
|
"learning_rate": 5.823293172690764e-07,
|
|
"loss": 0.7943,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.024143654745737136,
|
|
"grad_norm": 3.082127771205323,
|
|
"learning_rate": 7.83132530120482e-07,
|
|
"loss": 0.6989,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.03017956843217142,
|
|
"grad_norm": 1.9253363533227204,
|
|
"learning_rate": 9.839357429718876e-07,
|
|
"loss": 0.6283,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.0362154821186057,
|
|
"grad_norm": 1.2352792533570607,
|
|
"learning_rate": 1.1847389558232934e-06,
|
|
"loss": 0.5916,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.04225139580503999,
|
|
"grad_norm": 0.8094703225757798,
|
|
"learning_rate": 1.385542168674699e-06,
|
|
"loss": 0.5623,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.04828730949147427,
|
|
"grad_norm": 0.7924082712954621,
|
|
"learning_rate": 1.5863453815261046e-06,
|
|
"loss": 0.536,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.05432322317790855,
|
|
"grad_norm": 0.7765422489934142,
|
|
"learning_rate": 1.7871485943775102e-06,
|
|
"loss": 0.5246,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.06035913686434284,
|
|
"grad_norm": 1.2024335532490196,
|
|
"learning_rate": 1.987951807228916e-06,
|
|
"loss": 0.5112,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.06639505055077713,
|
|
"grad_norm": 0.7361271470838762,
|
|
"learning_rate": 2.1887550200803216e-06,
|
|
"loss": 0.4973,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.0724309642372114,
|
|
"grad_norm": 0.9636947285799289,
|
|
"learning_rate": 2.389558232931727e-06,
|
|
"loss": 0.4926,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.07846687792364569,
|
|
"grad_norm": 0.834221678860187,
|
|
"learning_rate": 2.590361445783133e-06,
|
|
"loss": 0.4868,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.08450279161007998,
|
|
"grad_norm": 0.721459089158014,
|
|
"learning_rate": 2.791164658634538e-06,
|
|
"loss": 0.4836,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.09053870529651425,
|
|
"grad_norm": 0.7680662169711512,
|
|
"learning_rate": 2.991967871485944e-06,
|
|
"loss": 0.4759,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.09657461898294854,
|
|
"grad_norm": 0.8074470103289187,
|
|
"learning_rate": 3.1927710843373494e-06,
|
|
"loss": 0.4753,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.10261053266938283,
|
|
"grad_norm": 0.7821569953929599,
|
|
"learning_rate": 3.393574297188755e-06,
|
|
"loss": 0.4689,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.1086464463558171,
|
|
"grad_norm": 0.8046561770221946,
|
|
"learning_rate": 3.5943775100401606e-06,
|
|
"loss": 0.4678,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.1146823600422514,
|
|
"grad_norm": 0.8787311236716008,
|
|
"learning_rate": 3.7951807228915664e-06,
|
|
"loss": 0.463,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.12071827372868568,
|
|
"grad_norm": 0.8810490177348705,
|
|
"learning_rate": 3.995983935742972e-06,
|
|
"loss": 0.4601,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.12675418741511996,
|
|
"grad_norm": 0.8889957032229883,
|
|
"learning_rate": 4.196787148594378e-06,
|
|
"loss": 0.4589,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.13279010110155426,
|
|
"grad_norm": 1.0047774511651133,
|
|
"learning_rate": 4.397590361445783e-06,
|
|
"loss": 0.4533,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.13882601478798853,
|
|
"grad_norm": 0.8382898002966074,
|
|
"learning_rate": 4.598393574297189e-06,
|
|
"loss": 0.4541,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.1448619284744228,
|
|
"grad_norm": 0.8867952107395614,
|
|
"learning_rate": 4.799196787148594e-06,
|
|
"loss": 0.4488,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.1508978421608571,
|
|
"grad_norm": 0.8306941422038123,
|
|
"learning_rate": 5e-06,
|
|
"loss": 0.4521,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.15693375584729138,
|
|
"grad_norm": 0.8874942497893604,
|
|
"learning_rate": 5.200803212851407e-06,
|
|
"loss": 0.4505,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.16296966953372566,
|
|
"grad_norm": 0.8911658824764517,
|
|
"learning_rate": 5.401606425702812e-06,
|
|
"loss": 0.446,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.16900558322015996,
|
|
"grad_norm": 0.8852855388263275,
|
|
"learning_rate": 5.602409638554217e-06,
|
|
"loss": 0.4452,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.17504149690659423,
|
|
"grad_norm": 0.9910318655855725,
|
|
"learning_rate": 5.803212851405623e-06,
|
|
"loss": 0.4413,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.1810774105930285,
|
|
"grad_norm": 1.0110708733608424,
|
|
"learning_rate": 6.004016064257029e-06,
|
|
"loss": 0.4397,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.1871133242794628,
|
|
"grad_norm": 0.9003864963841174,
|
|
"learning_rate": 6.2048192771084344e-06,
|
|
"loss": 0.4414,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.19314923796589709,
|
|
"grad_norm": 0.906399226331659,
|
|
"learning_rate": 6.40562248995984e-06,
|
|
"loss": 0.4372,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.19918515165233136,
|
|
"grad_norm": 1.071695971731785,
|
|
"learning_rate": 6.606425702811245e-06,
|
|
"loss": 0.4381,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.20522106533876566,
|
|
"grad_norm": 0.9761062070856111,
|
|
"learning_rate": 6.8072289156626514e-06,
|
|
"loss": 0.4343,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.21125697902519994,
|
|
"grad_norm": 0.9721838815300707,
|
|
"learning_rate": 7.008032128514058e-06,
|
|
"loss": 0.4373,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.2172928927116342,
|
|
"grad_norm": 0.8909414468062403,
|
|
"learning_rate": 7.208835341365462e-06,
|
|
"loss": 0.4336,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.2233288063980685,
|
|
"grad_norm": 0.964718436271309,
|
|
"learning_rate": 7.4096385542168684e-06,
|
|
"loss": 0.4348,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.2293647200845028,
|
|
"grad_norm": 1.025409695885071,
|
|
"learning_rate": 7.610441767068274e-06,
|
|
"loss": 0.4326,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.23540063377093706,
|
|
"grad_norm": 0.9270275542948012,
|
|
"learning_rate": 7.81124497991968e-06,
|
|
"loss": 0.4324,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.24143654745737136,
|
|
"grad_norm": 0.9390965093376761,
|
|
"learning_rate": 8.012048192771085e-06,
|
|
"loss": 0.4302,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.24747246114380564,
|
|
"grad_norm": 0.7707812298350031,
|
|
"learning_rate": 8.21285140562249e-06,
|
|
"loss": 0.4276,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.2535083748302399,
|
|
"grad_norm": 0.8215921682895242,
|
|
"learning_rate": 8.413654618473896e-06,
|
|
"loss": 0.4274,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.2595442885166742,
|
|
"grad_norm": 1.0290878620245738,
|
|
"learning_rate": 8.614457831325302e-06,
|
|
"loss": 0.427,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.2655802022031085,
|
|
"grad_norm": 0.9009095092288704,
|
|
"learning_rate": 8.815261044176707e-06,
|
|
"loss": 0.4232,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.27161611588954276,
|
|
"grad_norm": 0.9646916353387767,
|
|
"learning_rate": 9.016064257028112e-06,
|
|
"loss": 0.4235,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.27765202957597707,
|
|
"grad_norm": 0.8009669905789347,
|
|
"learning_rate": 9.21686746987952e-06,
|
|
"loss": 0.4248,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.28368794326241137,
|
|
"grad_norm": 2.57971922495045,
|
|
"learning_rate": 9.417670682730925e-06,
|
|
"loss": 0.4246,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.2897238569488456,
|
|
"grad_norm": 0.9225235875464007,
|
|
"learning_rate": 9.61847389558233e-06,
|
|
"loss": 0.4256,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.2957597706352799,
|
|
"grad_norm": 0.8937790567235143,
|
|
"learning_rate": 9.819277108433736e-06,
|
|
"loss": 0.4232,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.3017956843217142,
|
|
"grad_norm": 0.992661961364272,
|
|
"learning_rate": 9.99999876677608e-06,
|
|
"loss": 0.4236,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.30783159800814847,
|
|
"grad_norm": 1.1203639087859305,
|
|
"learning_rate": 9.999850780641762e-06,
|
|
"loss": 0.423,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.31386751169458277,
|
|
"grad_norm": 0.9504675074156581,
|
|
"learning_rate": 9.999456158087994e-06,
|
|
"loss": 0.4255,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.31990342538101707,
|
|
"grad_norm": 1.1526705071263037,
|
|
"learning_rate": 9.998814918581017e-06,
|
|
"loss": 0.4236,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.3259393390674513,
|
|
"grad_norm": 0.9400926329756719,
|
|
"learning_rate": 9.99792709375238e-06,
|
|
"loss": 0.4193,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.3319752527538856,
|
|
"grad_norm": 0.761979605644821,
|
|
"learning_rate": 9.996792727397374e-06,
|
|
"loss": 0.4178,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.3380111664403199,
|
|
"grad_norm": 0.7761858463434534,
|
|
"learning_rate": 9.995411875472882e-06,
|
|
"loss": 0.4172,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.34404708012675417,
|
|
"grad_norm": 0.8353265789234773,
|
|
"learning_rate": 9.993784606094612e-06,
|
|
"loss": 0.417,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.35008299381318847,
|
|
"grad_norm": 0.7921534241896437,
|
|
"learning_rate": 9.991910999533739e-06,
|
|
"loss": 0.4164,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.35611890749962277,
|
|
"grad_norm": 0.8368518529458858,
|
|
"learning_rate": 9.98979114821294e-06,
|
|
"loss": 0.4212,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.362154821186057,
|
|
"grad_norm": 0.8526689259731893,
|
|
"learning_rate": 9.98742515670185e-06,
|
|
"loss": 0.413,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.3681907348724913,
|
|
"grad_norm": 0.8691355689423315,
|
|
"learning_rate": 9.98481314171188e-06,
|
|
"loss": 0.4147,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.3742266485589256,
|
|
"grad_norm": 0.7413766525933784,
|
|
"learning_rate": 9.981955232090484e-06,
|
|
"loss": 0.4202,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.38026256224535987,
|
|
"grad_norm": 0.862826800304683,
|
|
"learning_rate": 9.978851568814789e-06,
|
|
"loss": 0.4144,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.38629847593179417,
|
|
"grad_norm": 0.852995884285724,
|
|
"learning_rate": 9.975502304984643e-06,
|
|
"loss": 0.4159,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.3923343896182285,
|
|
"grad_norm": 0.8190268708459463,
|
|
"learning_rate": 9.971907605815065e-06,
|
|
"loss": 0.4133,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.3983703033046627,
|
|
"grad_norm": 0.7826738241592833,
|
|
"learning_rate": 9.968067648628092e-06,
|
|
"loss": 0.417,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.404406216991097,
|
|
"grad_norm": 0.8234056482304477,
|
|
"learning_rate": 9.963982622844037e-06,
|
|
"loss": 0.4151,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.4104421306775313,
|
|
"grad_norm": 0.8389822495874198,
|
|
"learning_rate": 9.959652729972138e-06,
|
|
"loss": 0.4142,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.41647804436396557,
|
|
"grad_norm": 0.7530220222404655,
|
|
"learning_rate": 9.955078183600626e-06,
|
|
"loss": 0.4135,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.4225139580503999,
|
|
"grad_norm": 0.8094044727188283,
|
|
"learning_rate": 9.950259209386182e-06,
|
|
"loss": 0.4076,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.4285498717368342,
|
|
"grad_norm": 0.7704390882655109,
|
|
"learning_rate": 9.945196045042812e-06,
|
|
"loss": 0.41,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.4345857854232684,
|
|
"grad_norm": 0.9003987196323937,
|
|
"learning_rate": 9.93988894033011e-06,
|
|
"loss": 0.4114,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.4406216991097027,
|
|
"grad_norm": 0.8729571471009108,
|
|
"learning_rate": 9.934338157040953e-06,
|
|
"loss": 0.4128,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.446657612796137,
|
|
"grad_norm": 0.7801434856688376,
|
|
"learning_rate": 9.928543968988576e-06,
|
|
"loss": 0.4103,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.4526935264825713,
|
|
"grad_norm": 0.9417689284475159,
|
|
"learning_rate": 9.922506661993067e-06,
|
|
"loss": 0.4086,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.4587294401690056,
|
|
"grad_norm": 0.8877368018323296,
|
|
"learning_rate": 9.91622653386727e-06,
|
|
"loss": 0.4139,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.4647653538554399,
|
|
"grad_norm": 0.7960343939884429,
|
|
"learning_rate": 9.909703894402093e-06,
|
|
"loss": 0.4072,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.4708012675418741,
|
|
"grad_norm": 0.7142525800658928,
|
|
"learning_rate": 9.90293906535123e-06,
|
|
"loss": 0.4069,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.4768371812283084,
|
|
"grad_norm": 0.8168998091378754,
|
|
"learning_rate": 9.895932380415277e-06,
|
|
"loss": 0.4053,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.48287309491474273,
|
|
"grad_norm": 0.7851582099155968,
|
|
"learning_rate": 9.888684185225291e-06,
|
|
"loss": 0.4096,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.48890900860117703,
|
|
"grad_norm": 0.7313895363802666,
|
|
"learning_rate": 9.881194837325722e-06,
|
|
"loss": 0.4035,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.4949449222876113,
|
|
"grad_norm": 0.801599057157289,
|
|
"learning_rate": 9.873464706156785e-06,
|
|
"loss": 0.4082,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.5009808359740455,
|
|
"grad_norm": 0.7959824627607599,
|
|
"learning_rate": 9.865494173036238e-06,
|
|
"loss": 0.4086,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.5070167496604798,
|
|
"grad_norm": 0.7643194639900054,
|
|
"learning_rate": 9.857283631140563e-06,
|
|
"loss": 0.4097,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.5130526633469141,
|
|
"grad_norm": 0.8141162481887632,
|
|
"learning_rate": 9.848833485485577e-06,
|
|
"loss": 0.4068,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.5190885770333484,
|
|
"grad_norm": 0.7263606575446551,
|
|
"learning_rate": 9.840144152906455e-06,
|
|
"loss": 0.4052,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.5251244907197827,
|
|
"grad_norm": 0.7326820835121685,
|
|
"learning_rate": 9.831216062037163e-06,
|
|
"loss": 0.403,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.531160404406217,
|
|
"grad_norm": 0.7722145618849807,
|
|
"learning_rate": 9.822049653289318e-06,
|
|
"loss": 0.4041,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.5371963180926512,
|
|
"grad_norm": 0.7035970302521439,
|
|
"learning_rate": 9.81264537883046e-06,
|
|
"loss": 0.401,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.5432322317790855,
|
|
"grad_norm": 0.6580207236042055,
|
|
"learning_rate": 9.803003702561753e-06,
|
|
"loss": 0.4057,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.5492681454655198,
|
|
"grad_norm": 0.6960070468306416,
|
|
"learning_rate": 9.79312510009509e-06,
|
|
"loss": 0.4103,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.5553040591519541,
|
|
"grad_norm": 0.7088936549744779,
|
|
"learning_rate": 9.783010058729644e-06,
|
|
"loss": 0.4024,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.5613399728383884,
|
|
"grad_norm": 0.8173990374915286,
|
|
"learning_rate": 9.772659077427824e-06,
|
|
"loss": 0.3983,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.5673758865248227,
|
|
"grad_norm": 0.7248588219467303,
|
|
"learning_rate": 9.762072666790658e-06,
|
|
"loss": 0.4042,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.5734118002112569,
|
|
"grad_norm": 0.6953286894486166,
|
|
"learning_rate": 9.751251349032615e-06,
|
|
"loss": 0.4052,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.5794477138976912,
|
|
"grad_norm": 0.6805775618542874,
|
|
"learning_rate": 9.74019565795584e-06,
|
|
"loss": 0.4028,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.5854836275841255,
|
|
"grad_norm": 0.7073250522342893,
|
|
"learning_rate": 9.728906138923823e-06,
|
|
"loss": 0.4031,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.5915195412705598,
|
|
"grad_norm": 0.8161486510568995,
|
|
"learning_rate": 9.71738334883449e-06,
|
|
"loss": 0.4012,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.5975554549569941,
|
|
"grad_norm": 0.7478470587664012,
|
|
"learning_rate": 9.705627856092743e-06,
|
|
"loss": 0.4035,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.6035913686434284,
|
|
"grad_norm": 1.2181648223419725,
|
|
"learning_rate": 9.69364024058242e-06,
|
|
"loss": 0.3994,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.6096272823298626,
|
|
"grad_norm": 0.724496170506016,
|
|
"learning_rate": 9.681421093637677e-06,
|
|
"loss": 0.4003,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.6156631960162969,
|
|
"grad_norm": 0.7245373569956688,
|
|
"learning_rate": 9.668971018013835e-06,
|
|
"loss": 0.3993,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.6216991097027312,
|
|
"grad_norm": 1.3707555561464966,
|
|
"learning_rate": 9.656290627857638e-06,
|
|
"loss": 0.4031,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.6277350233891655,
|
|
"grad_norm": 0.8617205371794142,
|
|
"learning_rate": 9.643380548676957e-06,
|
|
"loss": 0.3989,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.6337709370755998,
|
|
"grad_norm": 0.7218421707442351,
|
|
"learning_rate": 9.63024141730994e-06,
|
|
"loss": 0.4009,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.6398068507620341,
|
|
"grad_norm": 0.7919863849580143,
|
|
"learning_rate": 9.616873881893593e-06,
|
|
"loss": 0.402,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.6458427644484683,
|
|
"grad_norm": 0.7643496416415103,
|
|
"learning_rate": 9.603278601831806e-06,
|
|
"loss": 0.3966,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.6518786781349026,
|
|
"grad_norm": 0.8387350986976135,
|
|
"learning_rate": 9.58945624776284e-06,
|
|
"loss": 0.3974,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.6579145918213369,
|
|
"grad_norm": 0.7195707742464319,
|
|
"learning_rate": 9.575407501526218e-06,
|
|
"loss": 0.4033,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.6639505055077712,
|
|
"grad_norm": 0.8948583587192116,
|
|
"learning_rate": 9.561133056129122e-06,
|
|
"loss": 0.4005,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.6699864191942055,
|
|
"grad_norm": 0.7784558611785358,
|
|
"learning_rate": 9.546633615712184e-06,
|
|
"loss": 0.3969,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.6760223328806398,
|
|
"grad_norm": 0.7279188084081983,
|
|
"learning_rate": 9.531909895514766e-06,
|
|
"loss": 0.3968,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.6820582465670741,
|
|
"grad_norm": 0.7707824454002812,
|
|
"learning_rate": 9.516962621839667e-06,
|
|
"loss": 0.3941,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.6880941602535083,
|
|
"grad_norm": 0.7559246242676043,
|
|
"learning_rate": 9.501792532017304e-06,
|
|
"loss": 0.3935,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.6941300739399426,
|
|
"grad_norm": 0.7670492895949397,
|
|
"learning_rate": 9.48640037436934e-06,
|
|
"loss": 0.3962,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.7001659876263769,
|
|
"grad_norm": 0.7574175499302432,
|
|
"learning_rate": 9.470786908171761e-06,
|
|
"loss": 0.396,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.7062019013128112,
|
|
"grad_norm": 1.1364368407573255,
|
|
"learning_rate": 9.454952903617434e-06,
|
|
"loss": 0.3987,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.7122378149992455,
|
|
"grad_norm": 0.6929517509246322,
|
|
"learning_rate": 9.438899141778105e-06,
|
|
"loss": 0.3959,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.7182737286856798,
|
|
"grad_norm": 0.7239918001848392,
|
|
"learning_rate": 9.42262641456588e-06,
|
|
"loss": 0.3961,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.724309642372114,
|
|
"grad_norm": 0.7351627240649914,
|
|
"learning_rate": 9.406135524694146e-06,
|
|
"loss": 0.3946,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.7303455560585483,
|
|
"grad_norm": 0.7178193311197739,
|
|
"learning_rate": 9.389427285637986e-06,
|
|
"loss": 0.3934,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.7363814697449826,
|
|
"grad_norm": 0.7197436378060236,
|
|
"learning_rate": 9.372502521594052e-06,
|
|
"loss": 0.3951,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.7424173834314169,
|
|
"grad_norm": 0.7020942866993558,
|
|
"learning_rate": 9.355362067439899e-06,
|
|
"loss": 0.3953,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.7484532971178512,
|
|
"grad_norm": 0.6493652144119091,
|
|
"learning_rate": 9.338006768692807e-06,
|
|
"loss": 0.3976,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.7544892108042855,
|
|
"grad_norm": 0.7452091082245685,
|
|
"learning_rate": 9.320437481468077e-06,
|
|
"loss": 0.3947,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.7605251244907197,
|
|
"grad_norm": 0.7211982596336295,
|
|
"learning_rate": 9.302655072436789e-06,
|
|
"loss": 0.3978,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.766561038177154,
|
|
"grad_norm": 0.8069527677411222,
|
|
"learning_rate": 9.284660418783064e-06,
|
|
"loss": 0.3961,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.7725969518635883,
|
|
"grad_norm": 0.6964974366663241,
|
|
"learning_rate": 9.266454408160779e-06,
|
|
"loss": 0.395,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.7786328655500226,
|
|
"grad_norm": 0.6951835215600591,
|
|
"learning_rate": 9.248037938649792e-06,
|
|
"loss": 0.3918,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.784668779236457,
|
|
"grad_norm": 0.7011033108204148,
|
|
"learning_rate": 9.229411918711637e-06,
|
|
"loss": 0.3911,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.7907046929228913,
|
|
"grad_norm": 0.6699999752789259,
|
|
"learning_rate": 9.210577267144703e-06,
|
|
"loss": 0.3917,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.7967406066093254,
|
|
"grad_norm": 0.7952469588442095,
|
|
"learning_rate": 9.191534913038926e-06,
|
|
"loss": 0.393,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.8027765202957597,
|
|
"grad_norm": 0.7362949625214187,
|
|
"learning_rate": 9.172285795729945e-06,
|
|
"loss": 0.3916,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.808812433982194,
|
|
"grad_norm": 0.777349182077021,
|
|
"learning_rate": 9.152830864752773e-06,
|
|
"loss": 0.396,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.8148483476686283,
|
|
"grad_norm": 0.6858011231159463,
|
|
"learning_rate": 9.133171079794952e-06,
|
|
"loss": 0.3949,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.8208842613550626,
|
|
"grad_norm": 0.8252893789848457,
|
|
"learning_rate": 9.113307410649222e-06,
|
|
"loss": 0.3951,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.826920175041497,
|
|
"grad_norm": 0.742614174317752,
|
|
"learning_rate": 9.093240837165668e-06,
|
|
"loss": 0.3912,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.8329560887279311,
|
|
"grad_norm": 0.6712408370389595,
|
|
"learning_rate": 9.072972349203401e-06,
|
|
"loss": 0.3938,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.8389920024143654,
|
|
"grad_norm": 0.7390425813359819,
|
|
"learning_rate": 9.052502946581718e-06,
|
|
"loss": 0.3902,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.8450279161007997,
|
|
"grad_norm": 0.9031901060003036,
|
|
"learning_rate": 9.031833639030789e-06,
|
|
"loss": 0.39,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.851063829787234,
|
|
"grad_norm": 0.8073830235615219,
|
|
"learning_rate": 9.010965446141842e-06,
|
|
"loss": 0.3907,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.8570997434736684,
|
|
"grad_norm": 0.7197468777451328,
|
|
"learning_rate": 8.989899397316875e-06,
|
|
"loss": 0.3933,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.8631356571601027,
|
|
"grad_norm": 0.7874409375571629,
|
|
"learning_rate": 8.96863653171787e-06,
|
|
"loss": 0.3941,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.8691715708465368,
|
|
"grad_norm": 0.7047790860975574,
|
|
"learning_rate": 8.947177898215538e-06,
|
|
"loss": 0.3918,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.8752074845329711,
|
|
"grad_norm": 0.6732410856766448,
|
|
"learning_rate": 8.925524555337575e-06,
|
|
"loss": 0.3948,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.8812433982194054,
|
|
"grad_norm": 0.6379130166882847,
|
|
"learning_rate": 8.90367757121645e-06,
|
|
"loss": 0.392,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.8872793119058398,
|
|
"grad_norm": 0.6453169279070088,
|
|
"learning_rate": 8.881638023536715e-06,
|
|
"loss": 0.3902,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.893315225592274,
|
|
"grad_norm": 0.8925532684482897,
|
|
"learning_rate": 8.859406999481839e-06,
|
|
"loss": 0.3897,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.8993511392787084,
|
|
"grad_norm": 0.7321151042406583,
|
|
"learning_rate": 8.836985595680585e-06,
|
|
"loss": 0.3903,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.9053870529651425,
|
|
"grad_norm": 0.717542202485072,
|
|
"learning_rate": 8.81437491815291e-06,
|
|
"loss": 0.3907,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.9114229666515768,
|
|
"grad_norm": 0.6899069830042462,
|
|
"learning_rate": 8.791576082255414e-06,
|
|
"loss": 0.3914,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.9174588803380112,
|
|
"grad_norm": 0.7416902913208727,
|
|
"learning_rate": 8.768590212626305e-06,
|
|
"loss": 0.3914,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.9234947940244455,
|
|
"grad_norm": 0.648187852127454,
|
|
"learning_rate": 8.745418443129944e-06,
|
|
"loss": 0.3878,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.9295307077108798,
|
|
"grad_norm": 0.6971446829374528,
|
|
"learning_rate": 8.722061916800892e-06,
|
|
"loss": 0.3889,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.9355666213973141,
|
|
"grad_norm": 0.6897656341763103,
|
|
"learning_rate": 8.698521785787543e-06,
|
|
"loss": 0.3916,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.9416025350837482,
|
|
"grad_norm": 0.6707821534631215,
|
|
"learning_rate": 8.674799211295272e-06,
|
|
"loss": 0.3872,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.9476384487701826,
|
|
"grad_norm": 0.7047440310341709,
|
|
"learning_rate": 8.650895363529172e-06,
|
|
"loss": 0.3893,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.9536743624566169,
|
|
"grad_norm": 0.7111300925227007,
|
|
"learning_rate": 8.626811421636318e-06,
|
|
"loss": 0.3899,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.9597102761430512,
|
|
"grad_norm": 0.742242466940292,
|
|
"learning_rate": 8.602548573647603e-06,
|
|
"loss": 0.3933,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.9657461898294855,
|
|
"grad_norm": 0.6405514647772552,
|
|
"learning_rate": 8.578108016419138e-06,
|
|
"loss": 0.3886,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.9717821035159198,
|
|
"grad_norm": 0.6969067995610034,
|
|
"learning_rate": 8.553490955573207e-06,
|
|
"loss": 0.3875,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.9778180172023541,
|
|
"grad_norm": 0.6404080311189763,
|
|
"learning_rate": 8.528698605438801e-06,
|
|
"loss": 0.3915,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.9838539308887883,
|
|
"grad_norm": 0.689314089106684,
|
|
"learning_rate": 8.50373218899171e-06,
|
|
"loss": 0.3897,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.9898898445752226,
|
|
"grad_norm": 0.6238451440610306,
|
|
"learning_rate": 8.478592937794202e-06,
|
|
"loss": 0.3865,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.9959257582616569,
|
|
"grad_norm": 0.6246538104726604,
|
|
"learning_rate": 8.453282091934262e-06,
|
|
"loss": 0.3891,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 1.0018107741059303,
|
|
"grad_norm": 0.6650133535244673,
|
|
"learning_rate": 8.427800899964438e-06,
|
|
"loss": 0.3775,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 1.0078466877923646,
|
|
"grad_norm": 0.7340465665361768,
|
|
"learning_rate": 8.402150618840229e-06,
|
|
"loss": 0.3658,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 1.013882601478799,
|
|
"grad_norm": 0.8803678131362109,
|
|
"learning_rate": 8.376332513858091e-06,
|
|
"loss": 0.3643,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 1.0199185151652332,
|
|
"grad_norm": 0.6784266807756097,
|
|
"learning_rate": 8.350347858593035e-06,
|
|
"loss": 0.3632,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 1.0259544288516673,
|
|
"grad_norm": 0.6757297253946429,
|
|
"learning_rate": 8.324197934835775e-06,
|
|
"loss": 0.3611,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 1.0319903425381016,
|
|
"grad_norm": 0.6937615226816463,
|
|
"learning_rate": 8.297884032529525e-06,
|
|
"loss": 0.3641,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 1.038026256224536,
|
|
"grad_norm": 0.6656265896882699,
|
|
"learning_rate": 8.271407449706347e-06,
|
|
"loss": 0.3634,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 1.0440621699109702,
|
|
"grad_norm": 0.6758693000716391,
|
|
"learning_rate": 8.244769492423144e-06,
|
|
"loss": 0.3651,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 1.0500980835974045,
|
|
"grad_norm": 0.7271602756269683,
|
|
"learning_rate": 8.217971474697205e-06,
|
|
"loss": 0.3655,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 1.0561339972838388,
|
|
"grad_norm": 0.7262048623607191,
|
|
"learning_rate": 8.191014718441413e-06,
|
|
"loss": 0.3646,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 1.0621699109702731,
|
|
"grad_norm": 0.7594858496478063,
|
|
"learning_rate": 8.163900553399022e-06,
|
|
"loss": 0.3683,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 1.0682058246567074,
|
|
"grad_norm": 0.6834326812737692,
|
|
"learning_rate": 8.13663031707806e-06,
|
|
"loss": 0.3657,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 1.0742417383431417,
|
|
"grad_norm": 0.829231127715137,
|
|
"learning_rate": 8.109205354685367e-06,
|
|
"loss": 0.3657,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 1.080277652029576,
|
|
"grad_norm": 0.7172584884654448,
|
|
"learning_rate": 8.081627019060223e-06,
|
|
"loss": 0.3612,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 1.0863135657160103,
|
|
"grad_norm": 0.700123283944604,
|
|
"learning_rate": 8.053896670607616e-06,
|
|
"loss": 0.3669,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 1.0923494794024446,
|
|
"grad_norm": 0.6802763184360072,
|
|
"learning_rate": 8.026015677231137e-06,
|
|
"loss": 0.36,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 1.0983853930888787,
|
|
"grad_norm": 0.6976972839342949,
|
|
"learning_rate": 7.997985414265513e-06,
|
|
"loss": 0.3645,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 1.104421306775313,
|
|
"grad_norm": 0.6892045690564895,
|
|
"learning_rate": 7.969807264408745e-06,
|
|
"loss": 0.3664,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 1.1104572204617473,
|
|
"grad_norm": 0.6606374628961976,
|
|
"learning_rate": 7.94148261765391e-06,
|
|
"loss": 0.3611,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 1.1164931341481816,
|
|
"grad_norm": 0.7063672325182395,
|
|
"learning_rate": 7.913012871220605e-06,
|
|
"loss": 0.3652,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 1.122529047834616,
|
|
"grad_norm": 0.6353061774622171,
|
|
"learning_rate": 7.884399429486e-06,
|
|
"loss": 0.3619,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 1.1285649615210502,
|
|
"grad_norm": 0.6646621743965846,
|
|
"learning_rate": 7.855643703915585e-06,
|
|
"loss": 0.3638,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 1.1346008752074845,
|
|
"grad_norm": 0.6379034557335701,
|
|
"learning_rate": 7.826747112993532e-06,
|
|
"loss": 0.3595,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 1.1406367888939188,
|
|
"grad_norm": 0.6995974469144366,
|
|
"learning_rate": 7.797711082152726e-06,
|
|
"loss": 0.3628,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 1.1466727025803531,
|
|
"grad_norm": 0.6564170955860726,
|
|
"learning_rate": 7.768537043704447e-06,
|
|
"loss": 0.3637,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 1.1527086162667874,
|
|
"grad_norm": 0.7572552114374352,
|
|
"learning_rate": 7.739226436767721e-06,
|
|
"loss": 0.362,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 1.1587445299532217,
|
|
"grad_norm": 0.7571612085211564,
|
|
"learning_rate": 7.709780707198328e-06,
|
|
"loss": 0.3638,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 1.164780443639656,
|
|
"grad_norm": 0.6792493024466744,
|
|
"learning_rate": 7.680201307517479e-06,
|
|
"loss": 0.3625,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 1.1708163573260904,
|
|
"grad_norm": 0.664259682779261,
|
|
"learning_rate": 7.650489696840164e-06,
|
|
"loss": 0.3646,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 1.1768522710125244,
|
|
"grad_norm": 0.6270149603322056,
|
|
"learning_rate": 7.6206473408031775e-06,
|
|
"loss": 0.3624,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 1.1828881846989587,
|
|
"grad_norm": 0.6383894085325998,
|
|
"learning_rate": 7.590675711492823e-06,
|
|
"loss": 0.3643,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 1.188924098385393,
|
|
"grad_norm": 0.6816453891866903,
|
|
"learning_rate": 7.56057628737229e-06,
|
|
"loss": 0.3637,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 1.1949600120718273,
|
|
"grad_norm": 0.7133078108250313,
|
|
"learning_rate": 7.530350553208726e-06,
|
|
"loss": 0.3585,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 1.2009959257582616,
|
|
"grad_norm": 0.6322767475179056,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 0.361,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 1.207031839444696,
|
|
"grad_norm": 0.6987380190815154,
|
|
"learning_rate": 7.469526124901149e-06,
|
|
"loss": 0.3623,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 1.2130677531311302,
|
|
"grad_norm": 0.6219916214226197,
|
|
"learning_rate": 7.4389304311505195e-06,
|
|
"loss": 0.3637,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 1.2191036668175645,
|
|
"grad_norm": 0.6591583924033313,
|
|
"learning_rate": 7.408214427995628e-06,
|
|
"loss": 0.3644,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 1.2251395805039988,
|
|
"grad_norm": 0.7005471225701302,
|
|
"learning_rate": 7.3773796306187e-06,
|
|
"loss": 0.3595,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 1.2311754941904332,
|
|
"grad_norm": 0.6332845796820719,
|
|
"learning_rate": 7.346427560061931e-06,
|
|
"loss": 0.3652,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 1.2372114078768675,
|
|
"grad_norm": 0.6778224076333697,
|
|
"learning_rate": 7.315359743152464e-06,
|
|
"loss": 0.3606,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 1.2432473215633015,
|
|
"grad_norm": 0.6582665893949518,
|
|
"learning_rate": 7.284177712427056e-06,
|
|
"loss": 0.3599,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 1.2492832352497358,
|
|
"grad_norm": 0.6584059931101761,
|
|
"learning_rate": 7.252883006056495e-06,
|
|
"loss": 0.3622,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 1.2553191489361701,
|
|
"grad_norm": 0.6857700496450303,
|
|
"learning_rate": 7.221477167769716e-06,
|
|
"loss": 0.3633,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 1.2613550626226044,
|
|
"grad_norm": 0.6856644672766703,
|
|
"learning_rate": 7.189961746777657e-06,
|
|
"loss": 0.363,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 1.2673909763090387,
|
|
"grad_norm": 0.6857005736783666,
|
|
"learning_rate": 7.1583382976968295e-06,
|
|
"loss": 0.3618,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 1.273426889995473,
|
|
"grad_norm": 0.6166440607694041,
|
|
"learning_rate": 7.126608380472642e-06,
|
|
"loss": 0.3593,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 1.2794628036819073,
|
|
"grad_norm": 0.6673854300030073,
|
|
"learning_rate": 7.094773560302438e-06,
|
|
"loss": 0.3616,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 1.2854987173683416,
|
|
"grad_norm": 0.6261609808400934,
|
|
"learning_rate": 7.062835407558295e-06,
|
|
"loss": 0.3623,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 1.291534631054776,
|
|
"grad_norm": 0.6573770008704372,
|
|
"learning_rate": 7.030795497709559e-06,
|
|
"loss": 0.3616,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 1.2975705447412103,
|
|
"grad_norm": 0.63175357402283,
|
|
"learning_rate": 6.99865541124513e-06,
|
|
"loss": 0.363,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 1.3036064584276446,
|
|
"grad_norm": 0.7095581591416922,
|
|
"learning_rate": 6.9664167335954866e-06,
|
|
"loss": 0.3604,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 1.3096423721140789,
|
|
"grad_norm": 0.6211244267814455,
|
|
"learning_rate": 6.9340810550545004e-06,
|
|
"loss": 0.3584,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 1.3156782858005132,
|
|
"grad_norm": 0.6411383893721285,
|
|
"learning_rate": 6.901649970700966e-06,
|
|
"loss": 0.3616,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 1.3217141994869475,
|
|
"grad_norm": 0.6508872294411808,
|
|
"learning_rate": 6.869125080319934e-06,
|
|
"loss": 0.3626,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 1.3277501131733815,
|
|
"grad_norm": 0.6456129899609592,
|
|
"learning_rate": 6.836507988323785e-06,
|
|
"loss": 0.3612,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 1.3337860268598158,
|
|
"grad_norm": 0.6885055595324049,
|
|
"learning_rate": 6.803800303673096e-06,
|
|
"loss": 0.3588,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 1.3398219405462501,
|
|
"grad_norm": 0.6841559054058574,
|
|
"learning_rate": 6.77100363979726e-06,
|
|
"loss": 0.3608,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 1.3458578542326844,
|
|
"grad_norm": 0.7229876827512576,
|
|
"learning_rate": 6.738119614514913e-06,
|
|
"loss": 0.3655,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 1.3518937679191187,
|
|
"grad_norm": 0.6235312062043321,
|
|
"learning_rate": 6.705149849954116e-06,
|
|
"loss": 0.3607,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 1.357929681605553,
|
|
"grad_norm": 0.6372979896414575,
|
|
"learning_rate": 6.672095972472339e-06,
|
|
"loss": 0.3613,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 1.3639655952919874,
|
|
"grad_norm": 0.5943237749223176,
|
|
"learning_rate": 6.638959612576243e-06,
|
|
"loss": 0.3578,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 1.3700015089784217,
|
|
"grad_norm": 0.6331473442190148,
|
|
"learning_rate": 6.605742404841241e-06,
|
|
"loss": 0.3606,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 1.376037422664856,
|
|
"grad_norm": 0.6352200712052698,
|
|
"learning_rate": 6.572445987830869e-06,
|
|
"loss": 0.3602,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 1.38207333635129,
|
|
"grad_norm": 0.6315011206585134,
|
|
"learning_rate": 6.539072004015962e-06,
|
|
"loss": 0.3585,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 1.3881092500377243,
|
|
"grad_norm": 0.672467399271792,
|
|
"learning_rate": 6.505622099693624e-06,
|
|
"loss": 0.359,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 1.3941451637241586,
|
|
"grad_norm": 0.6540330679200106,
|
|
"learning_rate": 6.4720979249060245e-06,
|
|
"loss": 0.357,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 1.400181077410593,
|
|
"grad_norm": 0.6296334356002367,
|
|
"learning_rate": 6.438501133359006e-06,
|
|
"loss": 0.363,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 1.4062169910970272,
|
|
"grad_norm": 0.5755292937597596,
|
|
"learning_rate": 6.404833382340498e-06,
|
|
"loss": 0.3579,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 1.4122529047834615,
|
|
"grad_norm": 0.6273216809842853,
|
|
"learning_rate": 6.3710963326387845e-06,
|
|
"loss": 0.361,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 1.4182888184698959,
|
|
"grad_norm": 0.659504858020357,
|
|
"learning_rate": 6.337291648460554e-06,
|
|
"loss": 0.3648,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 1.4243247321563302,
|
|
"grad_norm": 0.646430703430766,
|
|
"learning_rate": 6.303420997348828e-06,
|
|
"loss": 0.3609,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 1.4303606458427645,
|
|
"grad_norm": 0.70677217944382,
|
|
"learning_rate": 6.269486050100692e-06,
|
|
"loss": 0.3583,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 1.4363965595291988,
|
|
"grad_norm": 0.6982928562021034,
|
|
"learning_rate": 6.2354884806848825e-06,
|
|
"loss": 0.3587,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 1.442432473215633,
|
|
"grad_norm": 0.635748838083391,
|
|
"learning_rate": 6.201429966159203e-06,
|
|
"loss": 0.3603,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 1.4484683869020674,
|
|
"grad_norm": 0.6591941857655591,
|
|
"learning_rate": 6.167312186587813e-06,
|
|
"loss": 0.3587,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 1.4545043005885017,
|
|
"grad_norm": 0.6513018732706167,
|
|
"learning_rate": 6.133136824958334e-06,
|
|
"loss": 0.3583,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 1.460540214274936,
|
|
"grad_norm": 0.6895727383237782,
|
|
"learning_rate": 6.098905567098846e-06,
|
|
"loss": 0.3638,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 1.4665761279613703,
|
|
"grad_norm": 0.6281650394691185,
|
|
"learning_rate": 6.064620101594715e-06,
|
|
"loss": 0.3629,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 1.4726120416478046,
|
|
"grad_norm": 0.7324490252015554,
|
|
"learning_rate": 6.030282119705306e-06,
|
|
"loss": 0.3621,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 1.4786479553342387,
|
|
"grad_norm": 0.6803933740478001,
|
|
"learning_rate": 5.99589331528055e-06,
|
|
"loss": 0.3613,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 1.484683869020673,
|
|
"grad_norm": 0.6535344969186776,
|
|
"learning_rate": 5.961455384677393e-06,
|
|
"loss": 0.3588,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 1.4907197827071073,
|
|
"grad_norm": 0.6220530519094237,
|
|
"learning_rate": 5.92697002667611e-06,
|
|
"loss": 0.3614,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 1.4967556963935416,
|
|
"grad_norm": 0.5997735782443615,
|
|
"learning_rate": 5.892438942396515e-06,
|
|
"loss": 0.3562,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 1.5027916100799759,
|
|
"grad_norm": 0.5881600037112182,
|
|
"learning_rate": 5.857863835214041e-06,
|
|
"loss": 0.36,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 1.5088275237664102,
|
|
"grad_norm": 0.6301732957095514,
|
|
"learning_rate": 5.823246410675714e-06,
|
|
"loss": 0.3602,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 1.5148634374528445,
|
|
"grad_norm": 0.6369138058336548,
|
|
"learning_rate": 5.788588376416026e-06,
|
|
"loss": 0.3575,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 1.5208993511392785,
|
|
"grad_norm": 1.8916358390305654,
|
|
"learning_rate": 5.753891442072693e-06,
|
|
"loss": 0.3584,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 1.5269352648257128,
|
|
"grad_norm": 0.6400402583906231,
|
|
"learning_rate": 5.719157319202325e-06,
|
|
"loss": 0.3539,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 1.5329711785121471,
|
|
"grad_norm": 0.6223661041265537,
|
|
"learning_rate": 5.684387721195997e-06,
|
|
"loss": 0.3595,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 1.5390070921985815,
|
|
"grad_norm": 0.6649761362975228,
|
|
"learning_rate": 5.649584363194725e-06,
|
|
"loss": 0.36,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 1.5450430058850158,
|
|
"grad_norm": 0.5989851062495032,
|
|
"learning_rate": 5.6147489620048655e-06,
|
|
"loss": 0.3582,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 1.55107891957145,
|
|
"grad_norm": 0.6435791376898407,
|
|
"learning_rate": 5.579883236013429e-06,
|
|
"loss": 0.3559,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 1.5571148332578844,
|
|
"grad_norm": 0.5973586913854247,
|
|
"learning_rate": 5.544988905103304e-06,
|
|
"loss": 0.3581,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 1.5631507469443187,
|
|
"grad_norm": 0.6331916860819433,
|
|
"learning_rate": 5.510067690568429e-06,
|
|
"loss": 0.3573,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 1.569186660630753,
|
|
"grad_norm": 0.6000249694556851,
|
|
"learning_rate": 5.475121315028876e-06,
|
|
"loss": 0.3574,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 1.5752225743171873,
|
|
"grad_norm": 0.5919987411148389,
|
|
"learning_rate": 5.4401515023458805e-06,
|
|
"loss": 0.3622,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 1.5812584880036216,
|
|
"grad_norm": 0.6130160505042299,
|
|
"learning_rate": 5.4051599775368e-06,
|
|
"loss": 0.3585,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 1.5872944016900559,
|
|
"grad_norm": 0.6196465067482942,
|
|
"learning_rate": 5.370148466690026e-06,
|
|
"loss": 0.3524,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 1.5933303153764902,
|
|
"grad_norm": 0.6396523422153624,
|
|
"learning_rate": 5.335118696879836e-06,
|
|
"loss": 0.3584,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 1.5993662290629245,
|
|
"grad_norm": 0.6247037129381725,
|
|
"learning_rate": 5.3000723960812e-06,
|
|
"loss": 0.358,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 1.6054021427493588,
|
|
"grad_norm": 0.6296280096461855,
|
|
"learning_rate": 5.265011293084539e-06,
|
|
"loss": 0.3557,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 1.611438056435793,
|
|
"grad_norm": 0.6270649643037325,
|
|
"learning_rate": 5.2299371174104505e-06,
|
|
"loss": 0.3586,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 1.6174739701222274,
|
|
"grad_norm": 0.6724245016825049,
|
|
"learning_rate": 5.194851599224392e-06,
|
|
"loss": 0.3563,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 1.6235098838086617,
|
|
"grad_norm": 0.6246722692854128,
|
|
"learning_rate": 5.159756469251327e-06,
|
|
"loss": 0.3587,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 1.629545797495096,
|
|
"grad_norm": 0.5856892553580461,
|
|
"learning_rate": 5.1246534586903655e-06,
|
|
"loss": 0.3538,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 1.63558171118153,
|
|
"grad_norm": 0.6199649535926036,
|
|
"learning_rate": 5.089544299129349e-06,
|
|
"loss": 0.3552,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 1.6416176248679644,
|
|
"grad_norm": 0.6395106688159933,
|
|
"learning_rate": 5.054430722459442e-06,
|
|
"loss": 0.3575,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 1.6476535385543987,
|
|
"grad_norm": 0.6217763272730691,
|
|
"learning_rate": 5.019314460789708e-06,
|
|
"loss": 0.3568,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 1.653689452240833,
|
|
"grad_norm": 0.6159996290026578,
|
|
"learning_rate": 4.984197246361649e-06,
|
|
"loss": 0.3565,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 1.6597253659272673,
|
|
"grad_norm": 0.6021051813495957,
|
|
"learning_rate": 4.949080811463767e-06,
|
|
"loss": 0.3577,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 1.6657612796137016,
|
|
"grad_norm": 0.6102206368388114,
|
|
"learning_rate": 4.913966888346118e-06,
|
|
"loss": 0.3556,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 1.6717971933001357,
|
|
"grad_norm": 0.5968837038838994,
|
|
"learning_rate": 4.8788572091348435e-06,
|
|
"loss": 0.3581,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 1.67783310698657,
|
|
"grad_norm": 0.5981355700097328,
|
|
"learning_rate": 4.843753505746748e-06,
|
|
"loss": 0.358,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 1.6838690206730043,
|
|
"grad_norm": 0.6567740858768865,
|
|
"learning_rate": 4.8086575098038505e-06,
|
|
"loss": 0.3573,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 1.6899049343594386,
|
|
"grad_norm": 0.6773288375423023,
|
|
"learning_rate": 4.773570952547975e-06,
|
|
"loss": 0.3552,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 1.6959408480458729,
|
|
"grad_norm": 0.6202686068367487,
|
|
"learning_rate": 4.738495564755345e-06,
|
|
"loss": 0.3547,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 1.7019767617323072,
|
|
"grad_norm": 0.5595337919079114,
|
|
"learning_rate": 4.703433076651205e-06,
|
|
"loss": 0.353,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 1.7080126754187415,
|
|
"grad_norm": 0.6583890978208258,
|
|
"learning_rate": 4.668385217824482e-06,
|
|
"loss": 0.3583,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 1.7140485891051758,
|
|
"grad_norm": 0.5898922057879373,
|
|
"learning_rate": 4.633353717142448e-06,
|
|
"loss": 0.3524,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 1.72008450279161,
|
|
"grad_norm": 0.5938698503556435,
|
|
"learning_rate": 4.5983403026654625e-06,
|
|
"loss": 0.3554,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 1.7261204164780444,
|
|
"grad_norm": 0.632653867195755,
|
|
"learning_rate": 4.563346701561699e-06,
|
|
"loss": 0.3535,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 1.7321563301644787,
|
|
"grad_norm": 0.634481958151908,
|
|
"learning_rate": 4.528374640021975e-06,
|
|
"loss": 0.3548,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 1.738192243850913,
|
|
"grad_norm": 0.6554591212571549,
|
|
"learning_rate": 4.493425843174581e-06,
|
|
"loss": 0.3523,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 1.7442281575373473,
|
|
"grad_norm": 0.639030241328894,
|
|
"learning_rate": 4.4585020350001885e-06,
|
|
"loss": 0.3571,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 1.7502640712237816,
|
|
"grad_norm": 0.579081823243162,
|
|
"learning_rate": 4.423604938246815e-06,
|
|
"loss": 0.358,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 1.7562999849102159,
|
|
"grad_norm": 0.5786332593667859,
|
|
"learning_rate": 4.38873627434483e-06,
|
|
"loss": 0.3546,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 1.7623358985966502,
|
|
"grad_norm": 0.5844630643462843,
|
|
"learning_rate": 4.353897763322053e-06,
|
|
"loss": 0.3557,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 1.7683718122830845,
|
|
"grad_norm": 0.6362540824300466,
|
|
"learning_rate": 4.319091123718891e-06,
|
|
"loss": 0.3577,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 1.7744077259695188,
|
|
"grad_norm": 0.6152238906869951,
|
|
"learning_rate": 4.284318072503581e-06,
|
|
"loss": 0.3558,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 1.7804436396559529,
|
|
"grad_norm": 0.5871415463947245,
|
|
"learning_rate": 4.249580324987482e-06,
|
|
"loss": 0.3565,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 1.7864795533423872,
|
|
"grad_norm": 0.5894304003956816,
|
|
"learning_rate": 4.2148795947404664e-06,
|
|
"loss": 0.3548,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 1.7925154670288215,
|
|
"grad_norm": 0.5546376741165042,
|
|
"learning_rate": 4.180217593506394e-06,
|
|
"loss": 0.3545,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 1.7985513807152558,
|
|
"grad_norm": 0.5882950021870835,
|
|
"learning_rate": 4.1455960311186645e-06,
|
|
"loss": 0.3578,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 1.80458729440169,
|
|
"grad_norm": 0.6581353476419389,
|
|
"learning_rate": 4.111016615415887e-06,
|
|
"loss": 0.3545,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 1.8106232080881244,
|
|
"grad_norm": 0.728199708802779,
|
|
"learning_rate": 4.076481052157621e-06,
|
|
"loss": 0.3567,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 1.8166591217745585,
|
|
"grad_norm": 0.5836951966903218,
|
|
"learning_rate": 4.0419910449402385e-06,
|
|
"loss": 0.3541,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 1.8226950354609928,
|
|
"grad_norm": 0.583825208842142,
|
|
"learning_rate": 4.0075482951128965e-06,
|
|
"loss": 0.3557,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 1.828730949147427,
|
|
"grad_norm": 0.627394077298899,
|
|
"learning_rate": 3.973154501693597e-06,
|
|
"loss": 0.352,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 1.8347668628338614,
|
|
"grad_norm": 0.6500394437203815,
|
|
"learning_rate": 3.938811361285386e-06,
|
|
"loss": 0.3543,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 1.8408027765202957,
|
|
"grad_norm": 0.5787408936785984,
|
|
"learning_rate": 3.904520567992655e-06,
|
|
"loss": 0.3539,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 1.84683869020673,
|
|
"grad_norm": 0.6006488260082842,
|
|
"learning_rate": 3.870283813337587e-06,
|
|
"loss": 0.3534,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 1.8528746038931643,
|
|
"grad_norm": 0.6017706438925717,
|
|
"learning_rate": 3.836102786176697e-06,
|
|
"loss": 0.3533,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 1.8589105175795986,
|
|
"grad_norm": 0.6160731963284618,
|
|
"learning_rate": 3.8019791726175353e-06,
|
|
"loss": 0.3537,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 1.8649464312660329,
|
|
"grad_norm": 0.7394723530516694,
|
|
"learning_rate": 3.767914655935513e-06,
|
|
"loss": 0.3512,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 1.8709823449524672,
|
|
"grad_norm": 0.5969802619046902,
|
|
"learning_rate": 3.73391091649086e-06,
|
|
"loss": 0.3514,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 1.8770182586389015,
|
|
"grad_norm": 0.6434909203687009,
|
|
"learning_rate": 3.6999696316457468e-06,
|
|
"loss": 0.3525,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 1.8830541723253358,
|
|
"grad_norm": 0.6185839002292769,
|
|
"learning_rate": 3.6660924756815314e-06,
|
|
"loss": 0.3516,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 1.88909008601177,
|
|
"grad_norm": 0.5764246370880874,
|
|
"learning_rate": 3.63228111971618e-06,
|
|
"loss": 0.3543,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 1.8951259996982044,
|
|
"grad_norm": 0.5724269342695871,
|
|
"learning_rate": 3.5985372316218187e-06,
|
|
"loss": 0.3524,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 1.9011619133846387,
|
|
"grad_norm": 0.5893980753783277,
|
|
"learning_rate": 3.5648624759424723e-06,
|
|
"loss": 0.3487,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 1.907197827071073,
|
|
"grad_norm": 0.6385286384600478,
|
|
"learning_rate": 3.5312585138119503e-06,
|
|
"loss": 0.353,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 1.9132337407575073,
|
|
"grad_norm": 0.643587632906283,
|
|
"learning_rate": 3.4977270028719013e-06,
|
|
"loss": 0.3498,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 1.9192696544439416,
|
|
"grad_norm": 0.6189874783125575,
|
|
"learning_rate": 3.4642695971900506e-06,
|
|
"loss": 0.3542,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 1.925305568130376,
|
|
"grad_norm": 0.6320316722606764,
|
|
"learning_rate": 3.4308879471785986e-06,
|
|
"loss": 0.3523,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 1.93134148181681,
|
|
"grad_norm": 0.6715762862677156,
|
|
"learning_rate": 3.3975836995128176e-06,
|
|
"loss": 0.3505,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 1.9373773955032443,
|
|
"grad_norm": 0.5947951437286136,
|
|
"learning_rate": 3.3643584970498166e-06,
|
|
"loss": 0.356,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 1.9434133091896786,
|
|
"grad_norm": 0.5953138896683005,
|
|
"learning_rate": 3.3312139787474986e-06,
|
|
"loss": 0.3552,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 1.9494492228761129,
|
|
"grad_norm": 0.5696476474991146,
|
|
"learning_rate": 3.298151779583725e-06,
|
|
"loss": 0.3496,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 1.9554851365625472,
|
|
"grad_norm": 0.6131972032987533,
|
|
"learning_rate": 3.2651735304756505e-06,
|
|
"loss": 0.3536,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 1.9615210502489815,
|
|
"grad_norm": 0.6336317988993604,
|
|
"learning_rate": 3.2322808581992825e-06,
|
|
"loss": 0.3563,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 1.9675569639354156,
|
|
"grad_norm": 0.6341579320490388,
|
|
"learning_rate": 3.1994753853092284e-06,
|
|
"loss": 0.3482,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 1.9735928776218499,
|
|
"grad_norm": 0.5954681993221721,
|
|
"learning_rate": 3.166758730058653e-06,
|
|
"loss": 0.3518,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 1.9796287913082842,
|
|
"grad_norm": 0.5893599270303087,
|
|
"learning_rate": 3.134132506319467e-06,
|
|
"loss": 0.3536,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 1.9856647049947185,
|
|
"grad_norm": 0.5689301232875419,
|
|
"learning_rate": 3.101598323502698e-06,
|
|
"loss": 0.3537,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 1.9917006186811528,
|
|
"grad_norm": 0.6116819898452338,
|
|
"learning_rate": 3.0691577864791176e-06,
|
|
"loss": 0.3515,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 1.997736532367587,
|
|
"grad_norm": 0.5926997741101551,
|
|
"learning_rate": 3.036812495500058e-06,
|
|
"loss": 0.3504,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 2.0036215482118607,
|
|
"grad_norm": 0.5928785278377309,
|
|
"learning_rate": 3.0045640461184917e-06,
|
|
"loss": 0.339,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 2.009657461898295,
|
|
"grad_norm": 0.6039984062866832,
|
|
"learning_rate": 2.97241402911031e-06,
|
|
"loss": 0.3325,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 2.0156933755847293,
|
|
"grad_norm": 0.6671960610879556,
|
|
"learning_rate": 2.940364030395856e-06,
|
|
"loss": 0.3284,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 2.0217292892711636,
|
|
"grad_norm": 0.5808483500966948,
|
|
"learning_rate": 2.908415630961702e-06,
|
|
"loss": 0.3265,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 2.027765202957598,
|
|
"grad_norm": 0.6017580883286716,
|
|
"learning_rate": 2.876570406782645e-06,
|
|
"loss": 0.3296,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 2.033801116644032,
|
|
"grad_norm": 0.6067555273933171,
|
|
"learning_rate": 2.844829928743987e-06,
|
|
"loss": 0.3315,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 2.0398370303304665,
|
|
"grad_norm": 0.5774545226545359,
|
|
"learning_rate": 2.813195762564018e-06,
|
|
"loss": 0.3268,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 2.0458729440169003,
|
|
"grad_norm": 0.5888748284507602,
|
|
"learning_rate": 2.781669468716811e-06,
|
|
"loss": 0.3292,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 2.0519088577033346,
|
|
"grad_norm": 0.6137376399757654,
|
|
"learning_rate": 2.7502526023552227e-06,
|
|
"loss": 0.3258,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 2.057944771389769,
|
|
"grad_norm": 0.59390579398881,
|
|
"learning_rate": 2.718946713234185e-06,
|
|
"loss": 0.3295,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 2.0639806850762032,
|
|
"grad_norm": 0.6555105104152712,
|
|
"learning_rate": 2.6877533456342714e-06,
|
|
"loss": 0.3301,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 2.0700165987626375,
|
|
"grad_norm": 0.6048063575727766,
|
|
"learning_rate": 2.6566740382855005e-06,
|
|
"loss": 0.3289,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 2.076052512449072,
|
|
"grad_norm": 0.6014841818951663,
|
|
"learning_rate": 2.625710324291442e-06,
|
|
"loss": 0.3325,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 2.082088426135506,
|
|
"grad_norm": 0.6035697169885135,
|
|
"learning_rate": 2.5948637310535886e-06,
|
|
"loss": 0.3296,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 2.0881243398219405,
|
|
"grad_norm": 0.6112233467387164,
|
|
"learning_rate": 2.5641357801960186e-06,
|
|
"loss": 0.3278,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 2.0941602535083748,
|
|
"grad_norm": 0.5870217829586826,
|
|
"learning_rate": 2.5335279874903185e-06,
|
|
"loss": 0.3313,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 2.100196167194809,
|
|
"grad_norm": 0.5897131296840935,
|
|
"learning_rate": 2.503041862780827e-06,
|
|
"loss": 0.3296,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 2.1062320808812434,
|
|
"grad_norm": 0.5718259687035243,
|
|
"learning_rate": 2.47267890991016e-06,
|
|
"loss": 0.3281,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 2.1122679945676777,
|
|
"grad_norm": 0.5777856500315681,
|
|
"learning_rate": 2.4424406266450045e-06,
|
|
"loss": 0.3296,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 2.118303908254112,
|
|
"grad_norm": 0.6262457739159312,
|
|
"learning_rate": 2.412328504602264e-06,
|
|
"loss": 0.3336,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 2.1243398219405463,
|
|
"grad_norm": 0.589194023665236,
|
|
"learning_rate": 2.382344029175462e-06,
|
|
"loss": 0.3349,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 2.1303757356269806,
|
|
"grad_norm": 0.6140628916832596,
|
|
"learning_rate": 2.3524886794614653e-06,
|
|
"loss": 0.331,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 2.136411649313415,
|
|
"grad_norm": 0.6028871935735021,
|
|
"learning_rate": 2.322763928187543e-06,
|
|
"loss": 0.3307,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 2.142447562999849,
|
|
"grad_norm": 0.5798390235554982,
|
|
"learning_rate": 2.293171241638698e-06,
|
|
"loss": 0.3298,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 2.1484834766862835,
|
|
"grad_norm": 0.5950496656474389,
|
|
"learning_rate": 2.263712079585345e-06,
|
|
"loss": 0.3305,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 2.154519390372718,
|
|
"grad_norm": 0.5926734664470145,
|
|
"learning_rate": 2.2343878952113012e-06,
|
|
"loss": 0.3276,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 2.160555304059152,
|
|
"grad_norm": 0.5877698580097848,
|
|
"learning_rate": 2.2052001350421096e-06,
|
|
"loss": 0.3268,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 2.1665912177455864,
|
|
"grad_norm": 0.5888247000199527,
|
|
"learning_rate": 2.1761502388736655e-06,
|
|
"loss": 0.3327,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 2.1726271314320207,
|
|
"grad_norm": 0.5807991121980183,
|
|
"learning_rate": 2.14723963970121e-06,
|
|
"loss": 0.3315,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 2.178663045118455,
|
|
"grad_norm": 0.5763459777490838,
|
|
"learning_rate": 2.118469763648643e-06,
|
|
"loss": 0.3278,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 2.1846989588048893,
|
|
"grad_norm": 0.5588744726618396,
|
|
"learning_rate": 2.0898420298981537e-06,
|
|
"loss": 0.3296,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 2.1907348724913236,
|
|
"grad_norm": 0.6040859182215225,
|
|
"learning_rate": 2.061357850620243e-06,
|
|
"loss": 0.3279,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 2.1967707861777575,
|
|
"grad_norm": 0.6083091005217864,
|
|
"learning_rate": 2.0330186309040394e-06,
|
|
"loss": 0.3298,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 2.2028066998641918,
|
|
"grad_norm": 0.568667447432841,
|
|
"learning_rate": 2.0048257686879997e-06,
|
|
"loss": 0.3286,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 2.208842613550626,
|
|
"grad_norm": 0.586169393672314,
|
|
"learning_rate": 1.9767806546909457e-06,
|
|
"loss": 0.3316,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 2.2148785272370604,
|
|
"grad_norm": 0.5855668928973393,
|
|
"learning_rate": 1.9488846723434646e-06,
|
|
"loss": 0.3262,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 2.2209144409234947,
|
|
"grad_norm": 0.5920501956876788,
|
|
"learning_rate": 1.921139197719664e-06,
|
|
"loss": 0.3298,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 2.226950354609929,
|
|
"grad_norm": 0.6343784219115092,
|
|
"learning_rate": 1.893545599469292e-06,
|
|
"loss": 0.3316,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 2.2329862682963633,
|
|
"grad_norm": 0.56167618088226,
|
|
"learning_rate": 1.86610523875023e-06,
|
|
"loss": 0.3288,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 2.2390221819827976,
|
|
"grad_norm": 0.5937195941687996,
|
|
"learning_rate": 1.8388194691613308e-06,
|
|
"loss": 0.3285,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 2.245058095669232,
|
|
"grad_norm": 0.6068056100462802,
|
|
"learning_rate": 1.811689636675672e-06,
|
|
"loss": 0.3295,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 2.251094009355666,
|
|
"grad_norm": 0.5998463693882512,
|
|
"learning_rate": 1.7847170795741414e-06,
|
|
"loss": 0.33,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 2.2571299230421005,
|
|
"grad_norm": 0.5707846476820784,
|
|
"learning_rate": 1.7579031283794234e-06,
|
|
"loss": 0.3324,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 2.2631658367285348,
|
|
"grad_norm": 0.6070101386107148,
|
|
"learning_rate": 1.7312491057903808e-06,
|
|
"loss": 0.3288,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 2.269201750414969,
|
|
"grad_norm": 0.5684370763425239,
|
|
"learning_rate": 1.7047563266167888e-06,
|
|
"loss": 0.3291,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 2.2752376641014034,
|
|
"grad_norm": 0.5367883177519198,
|
|
"learning_rate": 1.678426097714489e-06,
|
|
"loss": 0.3265,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 2.2812735777878377,
|
|
"grad_norm": 0.5853244396608877,
|
|
"learning_rate": 1.6522597179209187e-06,
|
|
"loss": 0.3259,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 2.287309491474272,
|
|
"grad_norm": 0.5641343283784108,
|
|
"learning_rate": 1.6262584779910472e-06,
|
|
"loss": 0.3286,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 2.2933454051607063,
|
|
"grad_norm": 0.5563209895809159,
|
|
"learning_rate": 1.600423660533692e-06,
|
|
"loss": 0.3281,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 2.2993813188471406,
|
|
"grad_norm": 0.5805361043294971,
|
|
"learning_rate": 1.5747565399482605e-06,
|
|
"loss": 0.3299,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 2.305417232533575,
|
|
"grad_norm": 0.5811803574606669,
|
|
"learning_rate": 1.5492583823618878e-06,
|
|
"loss": 0.3289,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 2.311453146220009,
|
|
"grad_norm": 0.6040233888147246,
|
|
"learning_rate": 1.523930445566963e-06,
|
|
"loss": 0.3308,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 2.3174890599064435,
|
|
"grad_norm": 0.6059976475921155,
|
|
"learning_rate": 1.4987739789591056e-06,
|
|
"loss": 0.3294,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 2.323524973592878,
|
|
"grad_norm": 0.5905386095910952,
|
|
"learning_rate": 1.4737902234755203e-06,
|
|
"loss": 0.3301,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 2.329560887279312,
|
|
"grad_norm": 0.5747067002149818,
|
|
"learning_rate": 1.448980411533782e-06,
|
|
"loss": 0.3278,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 2.335596800965746,
|
|
"grad_norm": 0.5732211405787891,
|
|
"learning_rate": 1.4243457669710564e-06,
|
|
"loss": 0.3245,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 2.3416327146521807,
|
|
"grad_norm": 0.6079651710560006,
|
|
"learning_rate": 1.3998875049837141e-06,
|
|
"loss": 0.3268,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 2.3476686283386146,
|
|
"grad_norm": 0.5783578941572416,
|
|
"learning_rate": 1.3756068320673938e-06,
|
|
"loss": 0.3283,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 2.353704542025049,
|
|
"grad_norm": 0.5532376575030373,
|
|
"learning_rate": 1.3515049459574847e-06,
|
|
"loss": 0.3254,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 2.359740455711483,
|
|
"grad_norm": 0.5467274114487632,
|
|
"learning_rate": 1.3275830355700519e-06,
|
|
"loss": 0.3257,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 2.3657763693979175,
|
|
"grad_norm": 0.5922264462167515,
|
|
"learning_rate": 1.3038422809431733e-06,
|
|
"loss": 0.3291,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 2.3718122830843518,
|
|
"grad_norm": 0.5807751637804499,
|
|
"learning_rate": 1.280283853178742e-06,
|
|
"loss": 0.3281,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 2.377848196770786,
|
|
"grad_norm": 0.5751202261036737,
|
|
"learning_rate": 1.256908914384698e-06,
|
|
"loss": 0.3321,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 2.3838841104572204,
|
|
"grad_norm": 0.5829573981972134,
|
|
"learning_rate": 1.233718617617689e-06,
|
|
"loss": 0.3303,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 2.3899200241436547,
|
|
"grad_norm": 0.5614143554083199,
|
|
"learning_rate": 1.2107141068262119e-06,
|
|
"loss": 0.3276,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 2.395955937830089,
|
|
"grad_norm": 0.5657826082869326,
|
|
"learning_rate": 1.1878965167941658e-06,
|
|
"loss": 0.3279,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 2.4019918515165233,
|
|
"grad_norm": 0.5583977788315128,
|
|
"learning_rate": 1.1652669730848837e-06,
|
|
"loss": 0.3259,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 2.4080277652029576,
|
|
"grad_norm": 0.5670227130617606,
|
|
"learning_rate": 1.1428265919856057e-06,
|
|
"loss": 0.3319,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 2.414063678889392,
|
|
"grad_norm": 0.5345020446470288,
|
|
"learning_rate": 1.1205764804524172e-06,
|
|
"loss": 0.3258,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 2.420099592575826,
|
|
"grad_norm": 0.5742530447532448,
|
|
"learning_rate": 1.0985177360556421e-06,
|
|
"loss": 0.3281,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 2.4261355062622605,
|
|
"grad_norm": 0.5681633515485598,
|
|
"learning_rate": 1.0766514469257006e-06,
|
|
"loss": 0.33,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 2.432171419948695,
|
|
"grad_norm": 0.5469547021834809,
|
|
"learning_rate": 1.0549786916994387e-06,
|
|
"loss": 0.3271,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 2.438207333635129,
|
|
"grad_norm": 0.5467836338693935,
|
|
"learning_rate": 1.0335005394669062e-06,
|
|
"loss": 0.3282,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 2.4442432473215634,
|
|
"grad_norm": 0.5496370736783344,
|
|
"learning_rate": 1.012218049718639e-06,
|
|
"loss": 0.3267,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 2.4502791610079977,
|
|
"grad_norm": 0.5532695447765059,
|
|
"learning_rate": 9.911322722933825e-07,
|
|
"loss": 0.3267,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 2.456315074694432,
|
|
"grad_norm": 0.5593061519759683,
|
|
"learning_rate": 9.702442473263035e-07,
|
|
"loss": 0.3261,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 2.4623509883808663,
|
|
"grad_norm": 0.5675718980431652,
|
|
"learning_rate": 9.495550051976937e-07,
|
|
"loss": 0.33,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 2.4683869020673006,
|
|
"grad_norm": 0.55036807877547,
|
|
"learning_rate": 9.290655664821296e-07,
|
|
"loss": 0.326,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 2.474422815753735,
|
|
"grad_norm": 0.6047393707132771,
|
|
"learning_rate": 9.087769418981352e-07,
|
|
"loss": 0.3294,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 2.480458729440169,
|
|
"grad_norm": 0.5377856224781872,
|
|
"learning_rate": 8.88690132258323e-07,
|
|
"loss": 0.3301,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 2.486494643126603,
|
|
"grad_norm": 0.5404023215833121,
|
|
"learning_rate": 8.688061284200266e-07,
|
|
"loss": 0.3308,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 2.492530556813038,
|
|
"grad_norm": 0.5429038964087051,
|
|
"learning_rate": 8.491259112364192e-07,
|
|
"loss": 0.3277,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 2.4985664704994717,
|
|
"grad_norm": 0.5556392061166345,
|
|
"learning_rate": 8.296504515081333e-07,
|
|
"loss": 0.328,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 2.5046023841859064,
|
|
"grad_norm": 0.5550852188468128,
|
|
"learning_rate": 8.103807099353733e-07,
|
|
"loss": 0.3303,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 2.5106382978723403,
|
|
"grad_norm": 0.5683960534884703,
|
|
"learning_rate": 7.913176370705166e-07,
|
|
"loss": 0.3303,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 2.5166742115587746,
|
|
"grad_norm": 0.5647058376594801,
|
|
"learning_rate": 7.724621732712373e-07,
|
|
"loss": 0.3281,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 2.522710125245209,
|
|
"grad_norm": 0.5396463872633352,
|
|
"learning_rate": 7.538152486541078e-07,
|
|
"loss": 0.3224,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 2.528746038931643,
|
|
"grad_norm": 0.5769965957501234,
|
|
"learning_rate": 7.353777830487247e-07,
|
|
"loss": 0.3298,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 2.5347819526180775,
|
|
"grad_norm": 0.5617546845646423,
|
|
"learning_rate": 7.171506859523298e-07,
|
|
"loss": 0.3284,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 2.540817866304512,
|
|
"grad_norm": 0.5370456459767287,
|
|
"learning_rate": 6.991348564849504e-07,
|
|
"loss": 0.3272,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 2.546853779990946,
|
|
"grad_norm": 0.5449920129863155,
|
|
"learning_rate": 6.813311833450426e-07,
|
|
"loss": 0.3244,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 2.5528896936773804,
|
|
"grad_norm": 0.5814796250543772,
|
|
"learning_rate": 6.637405447656542e-07,
|
|
"loss": 0.3286,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 2.5589256073638147,
|
|
"grad_norm": 0.5802300234417045,
|
|
"learning_rate": 6.463638084711088e-07,
|
|
"loss": 0.3303,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 2.564961521050249,
|
|
"grad_norm": 0.5682016106324166,
|
|
"learning_rate": 6.29201831634188e-07,
|
|
"loss": 0.3275,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 2.5709974347366833,
|
|
"grad_norm": 0.628799960343276,
|
|
"learning_rate": 6.122554608338605e-07,
|
|
"loss": 0.3278,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 2.5770333484231176,
|
|
"grad_norm": 0.5261749879449605,
|
|
"learning_rate": 5.955255320135195e-07,
|
|
"loss": 0.3287,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 2.583069262109552,
|
|
"grad_norm": 0.5365103226953842,
|
|
"learning_rate": 5.790128704397424e-07,
|
|
"loss": 0.3242,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 2.589105175795986,
|
|
"grad_norm": 0.5482210552849281,
|
|
"learning_rate": 5.627182906615825e-07,
|
|
"loss": 0.3254,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 2.5951410894824205,
|
|
"grad_norm": 0.5270093070193902,
|
|
"learning_rate": 5.466425964703914e-07,
|
|
"loss": 0.3268,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 2.601177003168855,
|
|
"grad_norm": 0.5351843851712077,
|
|
"learning_rate": 5.307865808601664e-07,
|
|
"loss": 0.3267,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 2.607212916855289,
|
|
"grad_norm": 0.5551045883829538,
|
|
"learning_rate": 5.151510259884329e-07,
|
|
"loss": 0.3261,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 2.6132488305417234,
|
|
"grad_norm": 0.5716515174477422,
|
|
"learning_rate": 4.997367031376627e-07,
|
|
"loss": 0.3283,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 2.6192847442281577,
|
|
"grad_norm": 0.5484469831279773,
|
|
"learning_rate": 4.84544372677228e-07,
|
|
"loss": 0.3279,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 2.6253206579145916,
|
|
"grad_norm": 0.5627722024643765,
|
|
"learning_rate": 4.6957478402589076e-07,
|
|
"loss": 0.3285,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 2.6313565716010263,
|
|
"grad_norm": 0.54030007506572,
|
|
"learning_rate": 4.548286756148401e-07,
|
|
"loss": 0.328,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 2.63739248528746,
|
|
"grad_norm": 0.5688872966757411,
|
|
"learning_rate": 4.4030677485125906e-07,
|
|
"loss": 0.3291,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 2.643428398973895,
|
|
"grad_norm": 0.5611453338620043,
|
|
"learning_rate": 4.2600979808244627e-07,
|
|
"loss": 0.3267,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 2.649464312660329,
|
|
"grad_norm": 0.5591585705521456,
|
|
"learning_rate": 4.119384505604834e-07,
|
|
"loss": 0.3285,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 2.655500226346763,
|
|
"grad_norm": 0.5403567309346599,
|
|
"learning_rate": 3.980934264074393e-07,
|
|
"loss": 0.3234,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 2.6615361400331974,
|
|
"grad_norm": 0.5366841662024877,
|
|
"learning_rate": 3.8447540858113197e-07,
|
|
"loss": 0.3289,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 2.6675720537196317,
|
|
"grad_norm": 0.5505493242335168,
|
|
"learning_rate": 3.710850688414419e-07,
|
|
"loss": 0.329,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 2.673607967406066,
|
|
"grad_norm": 0.5572305600353893,
|
|
"learning_rate": 3.579230677171702e-07,
|
|
"loss": 0.326,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 2.6796438810925003,
|
|
"grad_norm": 0.5320801899819191,
|
|
"learning_rate": 3.4499005447346024e-07,
|
|
"loss": 0.3272,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 2.6856797947789346,
|
|
"grad_norm": 0.5621605698475473,
|
|
"learning_rate": 3.32286667079767e-07,
|
|
"loss": 0.3232,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 2.691715708465369,
|
|
"grad_norm": 0.5535800034831663,
|
|
"learning_rate": 3.1981353217838853e-07,
|
|
"loss": 0.3267,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 2.697751622151803,
|
|
"grad_norm": 0.5541989505631728,
|
|
"learning_rate": 3.0757126505355284e-07,
|
|
"loss": 0.3271,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 2.7037875358382375,
|
|
"grad_norm": 0.554309743511386,
|
|
"learning_rate": 2.9556046960106997e-07,
|
|
"loss": 0.3275,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 2.709823449524672,
|
|
"grad_norm": 0.5441084268121339,
|
|
"learning_rate": 2.837817382985375e-07,
|
|
"loss": 0.3265,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 2.715859363211106,
|
|
"grad_norm": 0.5661752729331364,
|
|
"learning_rate": 2.722356521761188e-07,
|
|
"loss": 0.3251,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 2.7218952768975404,
|
|
"grad_norm": 0.5284497315283775,
|
|
"learning_rate": 2.6092278078788004e-07,
|
|
"loss": 0.3249,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 2.7279311905839747,
|
|
"grad_norm": 0.52978683625873,
|
|
"learning_rate": 2.4984368218369305e-07,
|
|
"loss": 0.3282,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 2.733967104270409,
|
|
"grad_norm": 0.5435219044017648,
|
|
"learning_rate": 2.389989028817108e-07,
|
|
"loss": 0.3283,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 2.7400030179568433,
|
|
"grad_norm": 0.5516780362582209,
|
|
"learning_rate": 2.2838897784140612e-07,
|
|
"loss": 0.3274,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 2.7460389316432776,
|
|
"grad_norm": 0.546719555306795,
|
|
"learning_rate": 2.1801443043718285e-07,
|
|
"loss": 0.3298,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 2.752074845329712,
|
|
"grad_norm": 0.5563980632574993,
|
|
"learning_rate": 2.0787577243255807e-07,
|
|
"loss": 0.3267,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 2.758110759016146,
|
|
"grad_norm": 0.535638021015215,
|
|
"learning_rate": 1.9797350395492077e-07,
|
|
"loss": 0.3253,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 2.76414667270258,
|
|
"grad_norm": 0.5347609121819951,
|
|
"learning_rate": 1.8830811347085697e-07,
|
|
"loss": 0.3252,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 2.770182586389015,
|
|
"grad_norm": 0.5415863482391344,
|
|
"learning_rate": 1.788800777620542e-07,
|
|
"loss": 0.3276,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 2.7762185000754487,
|
|
"grad_norm": 0.5466212280219622,
|
|
"learning_rate": 1.6968986190178728e-07,
|
|
"loss": 0.326,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 2.7822544137618834,
|
|
"grad_norm": 0.5391843573715891,
|
|
"learning_rate": 1.60737919231973e-07,
|
|
"loss": 0.3265,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 2.7882903274483173,
|
|
"grad_norm": 0.5465887305789703,
|
|
"learning_rate": 1.5202469134080633e-07,
|
|
"loss": 0.3291,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 2.794326241134752,
|
|
"grad_norm": 0.5447449635613493,
|
|
"learning_rate": 1.4355060804098043e-07,
|
|
"loss": 0.3254,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 2.800362154821186,
|
|
"grad_norm": 0.5376834372862567,
|
|
"learning_rate": 1.3531608734848433e-07,
|
|
"loss": 0.3252,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 2.80639806850762,
|
|
"grad_norm": 0.5419447242645747,
|
|
"learning_rate": 1.273215354619789e-07,
|
|
"loss": 0.3277,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 2.8124339821940545,
|
|
"grad_norm": 0.521436211709283,
|
|
"learning_rate": 1.1956734674276492e-07,
|
|
"loss": 0.3267,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 2.818469895880489,
|
|
"grad_norm": 0.5443036316275357,
|
|
"learning_rate": 1.1205390369532553e-07,
|
|
"loss": 0.328,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 2.824505809566923,
|
|
"grad_norm": 0.5736771187575125,
|
|
"learning_rate": 1.0478157694846002e-07,
|
|
"loss": 0.3269,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 2.8305417232533574,
|
|
"grad_norm": 0.5533030963421177,
|
|
"learning_rate": 9.775072523700135e-08,
|
|
"loss": 0.3274,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 2.8365776369397917,
|
|
"grad_norm": 0.5297867847542854,
|
|
"learning_rate": 9.096169538411747e-08,
|
|
"loss": 0.3251,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 2.842613550626226,
|
|
"grad_norm": 0.5603590658940372,
|
|
"learning_rate": 8.441482228420505e-08,
|
|
"loss": 0.3261,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 2.8486494643126603,
|
|
"grad_norm": 0.5648155137748375,
|
|
"learning_rate": 7.81104288863721e-08,
|
|
"loss": 0.3238,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 2.8546853779990946,
|
|
"grad_norm": 0.5155233113764542,
|
|
"learning_rate": 7.204882617850129e-08,
|
|
"loss": 0.3284,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 2.860721291685529,
|
|
"grad_norm": 0.5283055469638852,
|
|
"learning_rate": 6.623031317191386e-08,
|
|
"loss": 0.3243,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 2.866757205371963,
|
|
"grad_norm": 0.5324757215458941,
|
|
"learning_rate": 6.065517688661926e-08,
|
|
"loss": 0.3266,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 2.8727931190583975,
|
|
"grad_norm": 0.5270694862009192,
|
|
"learning_rate": 5.532369233715418e-08,
|
|
"loss": 0.3263,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 2.878829032744832,
|
|
"grad_norm": 0.5184850936640313,
|
|
"learning_rate": 5.02361225190201e-08,
|
|
"loss": 0.325,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 2.884864946431266,
|
|
"grad_norm": 0.5522807685327075,
|
|
"learning_rate": 4.539271839570702e-08,
|
|
"loss": 0.3303,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 2.8909008601177004,
|
|
"grad_norm": 0.5833975533295399,
|
|
"learning_rate": 4.079371888631667e-08,
|
|
"loss": 0.3287,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 2.8969367738041347,
|
|
"grad_norm": 0.5435014494666157,
|
|
"learning_rate": 3.643935085377193e-08,
|
|
"loss": 0.3291,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 2.902972687490569,
|
|
"grad_norm": 0.5317951774661862,
|
|
"learning_rate": 3.232982909363247e-08,
|
|
"loss": 0.3302,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 2.9090086011770033,
|
|
"grad_norm": 0.5470417295465569,
|
|
"learning_rate": 2.8465356323494897e-08,
|
|
"loss": 0.3293,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 2.915044514863437,
|
|
"grad_norm": 0.5361189628769133,
|
|
"learning_rate": 2.4846123172992953e-08,
|
|
"loss": 0.3281,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 2.921080428549872,
|
|
"grad_norm": 0.54177713240335,
|
|
"learning_rate": 2.147230817439616e-08,
|
|
"loss": 0.326,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 2.927116342236306,
|
|
"grad_norm": 0.5323052126594137,
|
|
"learning_rate": 1.834407775380187e-08,
|
|
"loss": 0.3281,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 2.9331522559227405,
|
|
"grad_norm": 0.5359903004283559,
|
|
"learning_rate": 1.5461586222924596e-08,
|
|
"loss": 0.3261,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 2.9391881696091744,
|
|
"grad_norm": 0.5543427271655068,
|
|
"learning_rate": 1.2824975771486558e-08,
|
|
"loss": 0.3264,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 2.945224083295609,
|
|
"grad_norm": 0.5384618149718552,
|
|
"learning_rate": 1.0434376460201067e-08,
|
|
"loss": 0.3271,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 2.951259996982043,
|
|
"grad_norm": 0.5121547980752482,
|
|
"learning_rate": 8.289906214358767e-09,
|
|
"loss": 0.3252,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 2.9572959106684773,
|
|
"grad_norm": 0.5126843579972032,
|
|
"learning_rate": 6.391670818008955e-09,
|
|
"loss": 0.3255,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 2.9633318243549116,
|
|
"grad_norm": 0.5323524113852374,
|
|
"learning_rate": 4.7397639087432e-09,
|
|
"loss": 0.3267,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 2.969367738041346,
|
|
"grad_norm": 0.5324354968490875,
|
|
"learning_rate": 3.3342669730729303e-09,
|
|
"loss": 0.3255,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 2.97540365172778,
|
|
"grad_norm": 0.550825930999869,
|
|
"learning_rate": 2.1752493424148647e-09,
|
|
"loss": 0.328,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 2.9814395654142145,
|
|
"grad_norm": 0.5289245768111625,
|
|
"learning_rate": 1.2627681896670852e-09,
|
|
"loss": 0.3265,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 2.987475479100649,
|
|
"grad_norm": 0.5548931035545003,
|
|
"learning_rate": 5.968685263885165e-10,
|
|
"loss": 0.329,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 2.993511392787083,
|
|
"grad_norm": 0.5268141741402684,
|
|
"learning_rate": 1.7758320058236522e-10,
|
|
"loss": 0.3264,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 2.9995473064735174,
|
|
"grad_norm": 0.5394909098085136,
|
|
"learning_rate": 4.932895071863009e-12,
|
|
"loss": 0.3267,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 4971,
|
|
"total_flos": 3906508525600768.0,
|
|
"train_loss": 0.3714317911300974,
|
|
"train_runtime": 271631.3463,
|
|
"train_samples_per_second": 4.684,
|
|
"train_steps_per_second": 0.018
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 4971,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 3906508525600768.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|