4585 lines
115 KiB
JSON
4585 lines
115 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9745218752049776,
|
|
"eval_steps": 500,
|
|
"global_step": 6500,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0014992644233922732,
|
|
"grad_norm": 1.3515625,
|
|
"learning_rate": 3.6e-08,
|
|
"loss": 1.6646936416625977,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.0029985288467845464,
|
|
"grad_norm": 1.0859375,
|
|
"learning_rate": 7.599999999999999e-08,
|
|
"loss": 1.642629623413086,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.004497793270176819,
|
|
"grad_norm": 0.953125,
|
|
"learning_rate": 1.16e-07,
|
|
"loss": 1.6638397216796874,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.005997057693569093,
|
|
"grad_norm": 0.99609375,
|
|
"learning_rate": 1.56e-07,
|
|
"loss": 1.648602294921875,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.007496322116961366,
|
|
"grad_norm": 1.1796875,
|
|
"learning_rate": 1.96e-07,
|
|
"loss": 1.6447210311889648,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.008995586540353638,
|
|
"grad_norm": 1.125,
|
|
"learning_rate": 2.3599999999999997e-07,
|
|
"loss": 1.6022998809814453,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.010494850963745913,
|
|
"grad_norm": 1.1171875,
|
|
"learning_rate": 2.7600000000000004e-07,
|
|
"loss": 1.5853511810302734,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.011994115387138186,
|
|
"grad_norm": 1.078125,
|
|
"learning_rate": 3.1599999999999997e-07,
|
|
"loss": 1.512193775177002,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.013493379810530459,
|
|
"grad_norm": 1.0703125,
|
|
"learning_rate": 3.5599999999999996e-07,
|
|
"loss": 1.4463014602661133,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.014992644233922731,
|
|
"grad_norm": 1.21875,
|
|
"learning_rate": 3.96e-07,
|
|
"loss": 1.3526597023010254,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.016491908657315004,
|
|
"grad_norm": 1.71875,
|
|
"learning_rate": 4.36e-07,
|
|
"loss": 1.1787323951721191,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.017991173080707277,
|
|
"grad_norm": 0.9765625,
|
|
"learning_rate": 4.76e-07,
|
|
"loss": 0.9612628936767578,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.01949043750409955,
|
|
"grad_norm": 1.2578125,
|
|
"learning_rate": 5.16e-07,
|
|
"loss": 0.6817054748535156,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.020989701927491826,
|
|
"grad_norm": 0.703125,
|
|
"learning_rate": 5.560000000000001e-07,
|
|
"loss": 0.4010798454284668,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.0224889663508841,
|
|
"grad_norm": 0.44921875,
|
|
"learning_rate": 5.96e-07,
|
|
"loss": 0.33456034660339357,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.02398823077427637,
|
|
"grad_norm": 0.447265625,
|
|
"learning_rate": 6.36e-07,
|
|
"loss": 0.351082444190979,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.025487495197668644,
|
|
"grad_norm": 0.41015625,
|
|
"learning_rate": 6.76e-07,
|
|
"loss": 0.3510997772216797,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.026986759621060917,
|
|
"grad_norm": 0.2734375,
|
|
"learning_rate": 7.159999999999999e-07,
|
|
"loss": 0.27989749908447265,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.02848602404445319,
|
|
"grad_norm": 0.484375,
|
|
"learning_rate": 7.559999999999999e-07,
|
|
"loss": 0.25770542621612547,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.029985288467845463,
|
|
"grad_norm": 0.361328125,
|
|
"learning_rate": 7.96e-07,
|
|
"loss": 0.24579532146453859,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.031484552891237735,
|
|
"grad_norm": 0.357421875,
|
|
"learning_rate": 8.359999999999999e-07,
|
|
"loss": 0.19708189964294434,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.03298381731463001,
|
|
"grad_norm": 0.337890625,
|
|
"learning_rate": 8.76e-07,
|
|
"loss": 0.19723033905029297,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.03448308173802228,
|
|
"grad_norm": 0.447265625,
|
|
"learning_rate": 9.16e-07,
|
|
"loss": 0.2601869821548462,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.035982346161414554,
|
|
"grad_norm": 0.2431640625,
|
|
"learning_rate": 9.559999999999998e-07,
|
|
"loss": 0.22059969902038573,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.037481610584806827,
|
|
"grad_norm": 0.291015625,
|
|
"learning_rate": 9.959999999999999e-07,
|
|
"loss": 0.23547093868255614,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.0389808750081991,
|
|
"grad_norm": 0.396484375,
|
|
"learning_rate": 1.036e-06,
|
|
"loss": 0.23184664249420167,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.04048013943159137,
|
|
"grad_norm": 0.2373046875,
|
|
"learning_rate": 1.076e-06,
|
|
"loss": 0.1775584936141968,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.04197940385498365,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 1.116e-06,
|
|
"loss": 0.1842654228210449,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.043478668278375925,
|
|
"grad_norm": 0.29296875,
|
|
"learning_rate": 1.1559999999999998e-06,
|
|
"loss": 0.1996519684791565,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.0449779327017682,
|
|
"grad_norm": 0.26171875,
|
|
"learning_rate": 1.1959999999999999e-06,
|
|
"loss": 0.2009890556335449,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.04647719712516047,
|
|
"grad_norm": 0.359375,
|
|
"learning_rate": 1.236e-06,
|
|
"loss": 0.20845539569854737,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.04797646154855274,
|
|
"grad_norm": 0.474609375,
|
|
"learning_rate": 1.276e-06,
|
|
"loss": 0.18711086511611938,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.049475725971945016,
|
|
"grad_norm": 0.287109375,
|
|
"learning_rate": 1.316e-06,
|
|
"loss": 0.17694923877716065,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.05097499039533729,
|
|
"grad_norm": 0.4921875,
|
|
"learning_rate": 1.356e-06,
|
|
"loss": 0.2112422227859497,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.05247425481872956,
|
|
"grad_norm": 0.49609375,
|
|
"learning_rate": 1.3959999999999998e-06,
|
|
"loss": 0.16929364204406738,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.053973519242121834,
|
|
"grad_norm": 0.216796875,
|
|
"learning_rate": 1.4359999999999999e-06,
|
|
"loss": 0.14373246431350709,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.05547278366551411,
|
|
"grad_norm": 0.251953125,
|
|
"learning_rate": 1.476e-06,
|
|
"loss": 0.19208487272262573,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.05697204808890638,
|
|
"grad_norm": 0.490234375,
|
|
"learning_rate": 1.516e-06,
|
|
"loss": 0.21074600219726564,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.05847131251229865,
|
|
"grad_norm": 0.404296875,
|
|
"learning_rate": 1.556e-06,
|
|
"loss": 0.23820483684539795,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.059970576935690925,
|
|
"grad_norm": 0.3828125,
|
|
"learning_rate": 1.596e-06,
|
|
"loss": 0.1559612274169922,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.0614698413590832,
|
|
"grad_norm": 0.263671875,
|
|
"learning_rate": 1.6359999999999999e-06,
|
|
"loss": 0.17463357448577882,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.06296910578247547,
|
|
"grad_norm": 0.2294921875,
|
|
"learning_rate": 1.676e-06,
|
|
"loss": 0.17741453647613525,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.06446837020586775,
|
|
"grad_norm": 0.37890625,
|
|
"learning_rate": 1.716e-06,
|
|
"loss": 0.16474447250366211,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.06596763462926002,
|
|
"grad_norm": 0.1826171875,
|
|
"learning_rate": 1.756e-06,
|
|
"loss": 0.15805156230926515,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.0674668990526523,
|
|
"grad_norm": 0.357421875,
|
|
"learning_rate": 1.796e-06,
|
|
"loss": 0.1870645046234131,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.06896616347604456,
|
|
"grad_norm": 0.3203125,
|
|
"learning_rate": 1.836e-06,
|
|
"loss": 0.17467626333236694,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.07046542789943684,
|
|
"grad_norm": 0.296875,
|
|
"learning_rate": 1.8759999999999997e-06,
|
|
"loss": 0.1839710831642151,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.07196469232282911,
|
|
"grad_norm": 0.271484375,
|
|
"learning_rate": 1.916e-06,
|
|
"loss": 0.15121291875839232,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.07346395674622139,
|
|
"grad_norm": 0.1962890625,
|
|
"learning_rate": 1.956e-06,
|
|
"loss": 0.16356420516967773,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.07496322116961365,
|
|
"grad_norm": 0.2294921875,
|
|
"learning_rate": 1.996e-06,
|
|
"loss": 0.18490909337997435,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.07646248559300593,
|
|
"grad_norm": 0.306640625,
|
|
"learning_rate": 1.9999895001358395e-06,
|
|
"loss": 0.17003339529037476,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.0779617500163982,
|
|
"grad_norm": 0.314453125,
|
|
"learning_rate": 1.9999532045921925e-06,
|
|
"loss": 0.14626400470733641,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.07946101443979048,
|
|
"grad_norm": 0.1728515625,
|
|
"learning_rate": 1.9998909846818658e-06,
|
|
"loss": 0.1461304545402527,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.08096027886318274,
|
|
"grad_norm": 0.1572265625,
|
|
"learning_rate": 1.9998028420179468e-06,
|
|
"loss": 0.1631840229034424,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.08245954328657502,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 1.9996887788855846e-06,
|
|
"loss": 0.14891616106033326,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.0839588077099673,
|
|
"grad_norm": 0.341796875,
|
|
"learning_rate": 1.999548798241933e-06,
|
|
"loss": 0.1451740264892578,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.08545807213335957,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 1.9993829037160704e-06,
|
|
"loss": 0.13687235116958618,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.08695733655675185,
|
|
"grad_norm": 0.19921875,
|
|
"learning_rate": 1.9991910996089085e-06,
|
|
"loss": 0.15143134593963622,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.08845660098014411,
|
|
"grad_norm": 0.322265625,
|
|
"learning_rate": 1.998973390893081e-06,
|
|
"loss": 0.15538901090621948,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.0899558654035364,
|
|
"grad_norm": 0.267578125,
|
|
"learning_rate": 1.998729783212812e-06,
|
|
"loss": 0.17548735141754152,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.09145512982692866,
|
|
"grad_norm": 0.234375,
|
|
"learning_rate": 1.998460282883772e-06,
|
|
"loss": 0.1454736351966858,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.09295439425032094,
|
|
"grad_norm": 0.392578125,
|
|
"learning_rate": 1.998164896892913e-06,
|
|
"loss": 0.13865782022476197,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.0944536586737132,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 1.9978436328982882e-06,
|
|
"loss": 0.16720572710037232,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.09595292309710549,
|
|
"grad_norm": 0.2197265625,
|
|
"learning_rate": 1.997496499228853e-06,
|
|
"loss": 0.14800021648406983,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.09745218752049775,
|
|
"grad_norm": 0.1708984375,
|
|
"learning_rate": 1.9971235048842495e-06,
|
|
"loss": 0.14826395511627197,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.09895145194389003,
|
|
"grad_norm": 0.1396484375,
|
|
"learning_rate": 1.996724659534572e-06,
|
|
"loss": 0.12433024644851684,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.1004507163672823,
|
|
"grad_norm": 0.28125,
|
|
"learning_rate": 1.9962999735201173e-06,
|
|
"loss": 0.1702478051185608,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.10194998079067458,
|
|
"grad_norm": 0.251953125,
|
|
"learning_rate": 1.9958494578511167e-06,
|
|
"loss": 0.1259335994720459,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.10344924521406684,
|
|
"grad_norm": 0.2353515625,
|
|
"learning_rate": 1.99537312420745e-06,
|
|
"loss": 0.20034666061401368,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.10494850963745912,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 1.994870984938344e-06,
|
|
"loss": 0.12428268194198608,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.10644777406085139,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 1.9943430530620497e-06,
|
|
"loss": 0.11142982244491577,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.10794703848424367,
|
|
"grad_norm": 0.361328125,
|
|
"learning_rate": 1.993789342265507e-06,
|
|
"loss": 0.1445391893386841,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.10944630290763595,
|
|
"grad_norm": 0.353515625,
|
|
"learning_rate": 1.99320986690399e-06,
|
|
"loss": 0.1293397307395935,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.11094556733102821,
|
|
"grad_norm": 0.26171875,
|
|
"learning_rate": 1.9926046420007326e-06,
|
|
"loss": 0.11696268320083618,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.1124448317544205,
|
|
"grad_norm": 0.2490234375,
|
|
"learning_rate": 1.9919736832465417e-06,
|
|
"loss": 0.12922875881195067,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.11394409617781276,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 1.9913170069993896e-06,
|
|
"loss": 0.13306174278259278,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.11544336060120504,
|
|
"grad_norm": 0.1982421875,
|
|
"learning_rate": 1.9906346302839882e-06,
|
|
"loss": 0.13486032485961913,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.1169426250245973,
|
|
"grad_norm": 0.1982421875,
|
|
"learning_rate": 1.9899265707913492e-06,
|
|
"loss": 0.13135333061218263,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.11844188944798958,
|
|
"grad_norm": 0.294921875,
|
|
"learning_rate": 1.989192846878326e-06,
|
|
"loss": 0.12307331562042237,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.11994115387138185,
|
|
"grad_norm": 0.29296875,
|
|
"learning_rate": 1.988433477567137e-06,
|
|
"loss": 0.11497733592987061,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.12144041829477413,
|
|
"grad_norm": 0.1865234375,
|
|
"learning_rate": 1.9876484825448706e-06,
|
|
"loss": 0.13883528709411622,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.1229396827181664,
|
|
"grad_norm": 0.37109375,
|
|
"learning_rate": 1.9868378821629795e-06,
|
|
"loss": 0.13286290168762208,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.12443894714155868,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 1.9860016974367474e-06,
|
|
"loss": 0.1608394503593445,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.12593821156495094,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 1.985139950044749e-06,
|
|
"loss": 0.1350063681602478,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.1274374759883432,
|
|
"grad_norm": 0.2236328125,
|
|
"learning_rate": 1.9842526623282844e-06,
|
|
"loss": 0.14678356647491456,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.1289367404117355,
|
|
"grad_norm": 0.1455078125,
|
|
"learning_rate": 1.9833398572908027e-06,
|
|
"loss": 0.13124724626541137,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.13043600483512777,
|
|
"grad_norm": 0.22265625,
|
|
"learning_rate": 1.9824015585973037e-06,
|
|
"loss": 0.1295769214630127,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.13193526925852003,
|
|
"grad_norm": 0.1455078125,
|
|
"learning_rate": 1.9814377905737253e-06,
|
|
"loss": 0.14678038358688356,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.1334345336819123,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 1.980448578206312e-06,
|
|
"loss": 0.12379497289657593,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.1349337981053046,
|
|
"grad_norm": 0.322265625,
|
|
"learning_rate": 1.9794339471409684e-06,
|
|
"loss": 0.1308390736579895,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.13643306252869686,
|
|
"grad_norm": 0.322265625,
|
|
"learning_rate": 1.978393923682593e-06,
|
|
"loss": 0.1078214168548584,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.13793232695208912,
|
|
"grad_norm": 0.203125,
|
|
"learning_rate": 1.9773285347943975e-06,
|
|
"loss": 0.12421451807022095,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.13943159137548142,
|
|
"grad_norm": 0.3125,
|
|
"learning_rate": 1.976237808097206e-06,
|
|
"loss": 0.11592028141021729,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.14093085579887368,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 1.975121771868741e-06,
|
|
"loss": 0.11567631959915162,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.14243012022226595,
|
|
"grad_norm": 0.1904296875,
|
|
"learning_rate": 1.9739804550428887e-06,
|
|
"loss": 0.13639799356460572,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.14392938464565821,
|
|
"grad_norm": 0.349609375,
|
|
"learning_rate": 1.9728138872089495e-06,
|
|
"loss": 0.12592445611953734,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.1454286490690505,
|
|
"grad_norm": 0.1826171875,
|
|
"learning_rate": 1.9716220986108715e-06,
|
|
"loss": 0.12377442121505737,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.14692791349244277,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 1.9704051201464644e-06,
|
|
"loss": 0.14418370723724366,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.14842717791583504,
|
|
"grad_norm": 0.2109375,
|
|
"learning_rate": 1.9691629833666016e-06,
|
|
"loss": 0.1573760986328125,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.1499264423392273,
|
|
"grad_norm": 0.275390625,
|
|
"learning_rate": 1.9678957204743986e-06,
|
|
"loss": 0.1386464238166809,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.1514257067626196,
|
|
"grad_norm": 0.287109375,
|
|
"learning_rate": 1.966603364324381e-06,
|
|
"loss": 0.13971794843673707,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.15292497118601187,
|
|
"grad_norm": 0.1669921875,
|
|
"learning_rate": 1.965285948421631e-06,
|
|
"loss": 0.13169209957122802,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.15442423560940413,
|
|
"grad_norm": 0.31640625,
|
|
"learning_rate": 1.963943506920921e-06,
|
|
"loss": 0.1507979989051819,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.1559235000327964,
|
|
"grad_norm": 0.1748046875,
|
|
"learning_rate": 1.962576074625824e-06,
|
|
"loss": 0.11561447381973267,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.1574227644561887,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 1.961183686987816e-06,
|
|
"loss": 0.14605475664138795,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.15892202887958096,
|
|
"grad_norm": 0.421875,
|
|
"learning_rate": 1.9597663801053534e-06,
|
|
"loss": 0.13819440603256225,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.16042129330297322,
|
|
"grad_norm": 0.16015625,
|
|
"learning_rate": 1.9583241907229395e-06,
|
|
"loss": 0.14112586975097657,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.1619205577263655,
|
|
"grad_norm": 0.158203125,
|
|
"learning_rate": 1.95685715623017e-06,
|
|
"loss": 0.1168364405632019,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.16341982214975778,
|
|
"grad_norm": 0.390625,
|
|
"learning_rate": 1.955365314660765e-06,
|
|
"loss": 0.11267675161361694,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.16491908657315005,
|
|
"grad_norm": 0.30078125,
|
|
"learning_rate": 1.9538487046915824e-06,
|
|
"loss": 0.12178796529769897,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.1664183509965423,
|
|
"grad_norm": 0.2353515625,
|
|
"learning_rate": 1.952307365641615e-06,
|
|
"loss": 0.10850706100463867,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.1679176154199346,
|
|
"grad_norm": 0.248046875,
|
|
"learning_rate": 1.950741337470971e-06,
|
|
"loss": 0.12071930170059204,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.16941687984332687,
|
|
"grad_norm": 0.291015625,
|
|
"learning_rate": 1.949150660779839e-06,
|
|
"loss": 0.12768586874008178,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.17091614426671914,
|
|
"grad_norm": 0.2041015625,
|
|
"learning_rate": 1.9475353768074354e-06,
|
|
"loss": 0.12366677522659301,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.1724154086901114,
|
|
"grad_norm": 0.216796875,
|
|
"learning_rate": 1.9458955274309334e-06,
|
|
"loss": 0.12472466230392457,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.1739146731135037,
|
|
"grad_norm": 0.61328125,
|
|
"learning_rate": 1.944231155164378e-06,
|
|
"loss": 0.10178214311599731,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.17541393753689596,
|
|
"grad_norm": 0.1884765625,
|
|
"learning_rate": 1.942542303157587e-06,
|
|
"loss": 0.10434643030166627,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.17691320196028823,
|
|
"grad_norm": 0.1728515625,
|
|
"learning_rate": 1.940829015195027e-06,
|
|
"loss": 0.12654454708099366,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.1784124663836805,
|
|
"grad_norm": 0.2099609375,
|
|
"learning_rate": 1.939091335694682e-06,
|
|
"loss": 0.14714936017990113,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.1799117308070728,
|
|
"grad_norm": 0.220703125,
|
|
"learning_rate": 1.9373293097069006e-06,
|
|
"loss": 0.12481101751327514,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.18141099523046506,
|
|
"grad_norm": 0.177734375,
|
|
"learning_rate": 1.935542982913229e-06,
|
|
"loss": 0.126925528049469,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.18291025965385732,
|
|
"grad_norm": 0.216796875,
|
|
"learning_rate": 1.9337324016252246e-06,
|
|
"loss": 0.12335828542709351,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.1844095240772496,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 1.931897612783257e-06,
|
|
"loss": 0.1198701024055481,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.18590878850064188,
|
|
"grad_norm": 0.1884765625,
|
|
"learning_rate": 1.9300386639552917e-06,
|
|
"loss": 0.10855865478515625,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.18740805292403415,
|
|
"grad_norm": 0.169921875,
|
|
"learning_rate": 1.928155603335654e-06,
|
|
"loss": 0.11242524385452271,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.1889073173474264,
|
|
"grad_norm": 0.2021484375,
|
|
"learning_rate": 1.9262484797437835e-06,
|
|
"loss": 0.10338661670684815,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.1904065817708187,
|
|
"grad_norm": 0.275390625,
|
|
"learning_rate": 1.924317342622964e-06,
|
|
"loss": 0.13085209131240844,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.19190584619421097,
|
|
"grad_norm": 0.228515625,
|
|
"learning_rate": 1.922362242039046e-06,
|
|
"loss": 0.13100965023040773,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.19340511061760324,
|
|
"grad_norm": 0.318359375,
|
|
"learning_rate": 1.920383228679146e-06,
|
|
"loss": 0.11286605596542358,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.1949043750409955,
|
|
"grad_norm": 0.177734375,
|
|
"learning_rate": 1.9183803538503325e-06,
|
|
"loss": 0.10787241458892823,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.1964036394643878,
|
|
"grad_norm": 0.2041015625,
|
|
"learning_rate": 1.916353669478297e-06,
|
|
"loss": 0.12694379091262817,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.19790290388778006,
|
|
"grad_norm": 0.275390625,
|
|
"learning_rate": 1.914303228106007e-06,
|
|
"loss": 0.12459377050399781,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.19940216831117233,
|
|
"grad_norm": 0.1953125,
|
|
"learning_rate": 1.912229082892344e-06,
|
|
"loss": 0.11015371084213257,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.2009014327345646,
|
|
"grad_norm": 0.166015625,
|
|
"learning_rate": 1.910131287610726e-06,
|
|
"loss": 0.10224473476409912,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.2024006971579569,
|
|
"grad_norm": 0.453125,
|
|
"learning_rate": 1.9080098966477114e-06,
|
|
"loss": 0.1472551107406616,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.20389996158134915,
|
|
"grad_norm": 0.28515625,
|
|
"learning_rate": 1.9058649650015913e-06,
|
|
"loss": 0.12049105167388915,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.20539922600474142,
|
|
"grad_norm": 0.2236328125,
|
|
"learning_rate": 1.9036965482809624e-06,
|
|
"loss": 0.10829113721847534,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.20689849042813369,
|
|
"grad_norm": 0.291015625,
|
|
"learning_rate": 1.9015047027032858e-06,
|
|
"loss": 0.09630746841430664,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.20839775485152598,
|
|
"grad_norm": 0.25,
|
|
"learning_rate": 1.8992894850934288e-06,
|
|
"loss": 0.10639712810516358,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.20989701927491825,
|
|
"grad_norm": 0.251953125,
|
|
"learning_rate": 1.8970509528821933e-06,
|
|
"loss": 0.1108583927154541,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.2113962836983105,
|
|
"grad_norm": 0.267578125,
|
|
"learning_rate": 1.8947891641048236e-06,
|
|
"loss": 0.1440010905265808,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.21289554812170278,
|
|
"grad_norm": 0.2734375,
|
|
"learning_rate": 1.8925041773995066e-06,
|
|
"loss": 0.11479418277740479,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.21439481254509507,
|
|
"grad_norm": 0.30078125,
|
|
"learning_rate": 1.8901960520058466e-06,
|
|
"loss": 0.1372006893157959,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.21589407696848734,
|
|
"grad_norm": 0.130859375,
|
|
"learning_rate": 1.8878648477633338e-06,
|
|
"loss": 0.1048818826675415,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.2173933413918796,
|
|
"grad_norm": 0.2353515625,
|
|
"learning_rate": 1.8855106251097893e-06,
|
|
"loss": 0.11379430294036866,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.2188926058152719,
|
|
"grad_norm": 0.158203125,
|
|
"learning_rate": 1.8831334450798008e-06,
|
|
"loss": 0.11848256587982178,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.22039187023866416,
|
|
"grad_norm": 0.2080078125,
|
|
"learning_rate": 1.8807333693031394e-06,
|
|
"loss": 0.11757129430770874,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.22189113466205643,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 1.8783104600031608e-06,
|
|
"loss": 0.1077274203300476,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.2233903990854487,
|
|
"grad_norm": 0.1875,
|
|
"learning_rate": 1.8758647799951936e-06,
|
|
"loss": 0.13631620407104492,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.224889663508841,
|
|
"grad_norm": 0.1787109375,
|
|
"learning_rate": 1.8733963926849108e-06,
|
|
"loss": 0.11129487752914428,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.22638892793223325,
|
|
"grad_norm": 0.234375,
|
|
"learning_rate": 1.870905362066684e-06,
|
|
"loss": 0.10358604192733764,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.22788819235562552,
|
|
"grad_norm": 0.2275390625,
|
|
"learning_rate": 1.8683917527219274e-06,
|
|
"loss": 0.10696442127227783,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.22938745677901778,
|
|
"grad_norm": 0.244140625,
|
|
"learning_rate": 1.86585562981742e-06,
|
|
"loss": 0.1079567551612854,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.23088672120241008,
|
|
"grad_norm": 0.19140625,
|
|
"learning_rate": 1.863297059103619e-06,
|
|
"loss": 0.08297246098518371,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.23238598562580234,
|
|
"grad_norm": 0.2314453125,
|
|
"learning_rate": 1.860716106912954e-06,
|
|
"loss": 0.11826142072677612,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.2338852500491946,
|
|
"grad_norm": 0.1689453125,
|
|
"learning_rate": 1.858112840158107e-06,
|
|
"loss": 0.11677643060684204,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.23538451447258688,
|
|
"grad_norm": 0.220703125,
|
|
"learning_rate": 1.8554873263302783e-06,
|
|
"loss": 0.10421488285064698,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.23688377889597917,
|
|
"grad_norm": 0.33203125,
|
|
"learning_rate": 1.8528396334974364e-06,
|
|
"loss": 0.10596433877944947,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.23838304331937143,
|
|
"grad_norm": 0.150390625,
|
|
"learning_rate": 1.850169830302553e-06,
|
|
"loss": 0.09852623343467712,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.2398823077427637,
|
|
"grad_norm": 0.2392578125,
|
|
"learning_rate": 1.8474779859618245e-06,
|
|
"loss": 0.13672434091567992,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.24138157216615597,
|
|
"grad_norm": 0.177734375,
|
|
"learning_rate": 1.8447641702628762e-06,
|
|
"loss": 0.11511225700378418,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.24288083658954826,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 1.8420284535629539e-06,
|
|
"loss": 0.11240946054458618,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.24438010101294053,
|
|
"grad_norm": 0.1484375,
|
|
"learning_rate": 1.839270906787099e-06,
|
|
"loss": 0.07973622083663941,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.2458793654363328,
|
|
"grad_norm": 0.23046875,
|
|
"learning_rate": 1.8364916014263115e-06,
|
|
"loss": 0.10506463050842285,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.24737862985972509,
|
|
"grad_norm": 0.287109375,
|
|
"learning_rate": 1.8336906095356937e-06,
|
|
"loss": 0.1416532278060913,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.24887789428311735,
|
|
"grad_norm": 0.294921875,
|
|
"learning_rate": 1.830868003732585e-06,
|
|
"loss": 0.10021046400070191,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.2503771587065096,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 1.8280238571946773e-06,
|
|
"loss": 0.09624313712120056,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.2518764231299019,
|
|
"grad_norm": 0.19921875,
|
|
"learning_rate": 1.8251582436581193e-06,
|
|
"loss": 0.09360762238502503,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.25337568755329415,
|
|
"grad_norm": 0.248046875,
|
|
"learning_rate": 1.8222712374156038e-06,
|
|
"loss": 0.10825358629226685,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.2548749519766864,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 1.8193629133144412e-06,
|
|
"loss": 0.09739000201225281,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.25637421640007874,
|
|
"grad_norm": 0.2734375,
|
|
"learning_rate": 1.8164333467546205e-06,
|
|
"loss": 0.13052973747253419,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.257873480823471,
|
|
"grad_norm": 0.314453125,
|
|
"learning_rate": 1.8134826136868533e-06,
|
|
"loss": 0.1281905174255371,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.25937274524686327,
|
|
"grad_norm": 0.1611328125,
|
|
"learning_rate": 1.810510790610606e-06,
|
|
"loss": 0.1224624514579773,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.26087200967025553,
|
|
"grad_norm": 0.2236328125,
|
|
"learning_rate": 1.8075179545721148e-06,
|
|
"loss": 0.11144398450851441,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.2623712740936478,
|
|
"grad_norm": 0.3125,
|
|
"learning_rate": 1.8045041831623892e-06,
|
|
"loss": 0.07502882480621338,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.26387053851704007,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 1.8014695545152014e-06,
|
|
"loss": 0.11576559543609619,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.26536980294043233,
|
|
"grad_norm": 0.16015625,
|
|
"learning_rate": 1.7984141473050583e-06,
|
|
"loss": 0.10232355594635009,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.2668690673638246,
|
|
"grad_norm": 0.267578125,
|
|
"learning_rate": 1.7953380407451632e-06,
|
|
"loss": 0.10430169105529785,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.2683683317872169,
|
|
"grad_norm": 0.279296875,
|
|
"learning_rate": 1.7922413145853632e-06,
|
|
"loss": 0.10129927396774292,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.2698675962106092,
|
|
"grad_norm": 0.1787109375,
|
|
"learning_rate": 1.7891240491100794e-06,
|
|
"loss": 0.1479990601539612,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.27136686063400145,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 1.7859863251362268e-06,
|
|
"loss": 0.09153670072555542,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.2728661250573937,
|
|
"grad_norm": 0.185546875,
|
|
"learning_rate": 1.7828282240111188e-06,
|
|
"loss": 0.10302189588546753,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.274365389480786,
|
|
"grad_norm": 0.279296875,
|
|
"learning_rate": 1.779649827610359e-06,
|
|
"loss": 0.10783896446228028,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.27586465390417825,
|
|
"grad_norm": 0.2099609375,
|
|
"learning_rate": 1.7764512183357161e-06,
|
|
"loss": 0.10202981233596801,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.2773639183275705,
|
|
"grad_norm": 0.279296875,
|
|
"learning_rate": 1.7732324791129914e-06,
|
|
"loss": 0.09905132055282592,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.27886318275096283,
|
|
"grad_norm": 0.1611328125,
|
|
"learning_rate": 1.769993693389865e-06,
|
|
"loss": 0.10531445741653442,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.2803624471743551,
|
|
"grad_norm": 0.2138671875,
|
|
"learning_rate": 1.7667349451337353e-06,
|
|
"loss": 0.08846319317817689,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.28186171159774737,
|
|
"grad_norm": 0.2392578125,
|
|
"learning_rate": 1.7634563188295403e-06,
|
|
"loss": 0.0975230872631073,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.28336097602113963,
|
|
"grad_norm": 0.31640625,
|
|
"learning_rate": 1.7601578994775684e-06,
|
|
"loss": 0.09964791536331177,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.2848602404445319,
|
|
"grad_norm": 0.2373046875,
|
|
"learning_rate": 1.756839772591254e-06,
|
|
"loss": 0.1272280693054199,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.28635950486792416,
|
|
"grad_norm": 0.353515625,
|
|
"learning_rate": 1.7535020241949598e-06,
|
|
"loss": 0.11281530857086182,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.28785876929131643,
|
|
"grad_norm": 0.19921875,
|
|
"learning_rate": 1.7501447408217497e-06,
|
|
"loss": 0.12100661993026733,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.2893580337147087,
|
|
"grad_norm": 0.16015625,
|
|
"learning_rate": 1.7467680095111414e-06,
|
|
"loss": 0.10090996026992798,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.290857298138101,
|
|
"grad_norm": 0.28125,
|
|
"learning_rate": 1.7433719178068524e-06,
|
|
"loss": 0.13152073621749877,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.2923565625614933,
|
|
"grad_norm": 0.193359375,
|
|
"learning_rate": 1.739956553754529e-06,
|
|
"loss": 0.09162830114364624,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.29385582698488555,
|
|
"grad_norm": 0.396484375,
|
|
"learning_rate": 1.7365220058994655e-06,
|
|
"loss": 0.1236315131187439,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.2953550914082778,
|
|
"grad_norm": 0.2119140625,
|
|
"learning_rate": 1.7330683632843059e-06,
|
|
"loss": 0.09788467288017273,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.2968543558316701,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 1.7295957154467382e-06,
|
|
"loss": 0.09465370178222657,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.29835362025506235,
|
|
"grad_norm": 0.1279296875,
|
|
"learning_rate": 1.726104152417171e-06,
|
|
"loss": 0.1005245327949524,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.2998528846784546,
|
|
"grad_norm": 0.28515625,
|
|
"learning_rate": 1.722593764716401e-06,
|
|
"loss": 0.11565471887588501,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.30135214910184693,
|
|
"grad_norm": 0.1865234375,
|
|
"learning_rate": 1.7190646433532644e-06,
|
|
"loss": 0.10114152431488037,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.3028514135252392,
|
|
"grad_norm": 0.349609375,
|
|
"learning_rate": 1.7155168798222789e-06,
|
|
"loss": 0.11758486032485962,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.30435067794863147,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 1.7119505661012718e-06,
|
|
"loss": 0.12670440673828126,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.30584994237202373,
|
|
"grad_norm": 0.2138671875,
|
|
"learning_rate": 1.7083657946489941e-06,
|
|
"loss": 0.09111065268516541,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.307349206795416,
|
|
"grad_norm": 0.244140625,
|
|
"learning_rate": 1.7047626584027248e-06,
|
|
"loss": 0.10659761428833008,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.30884847121880826,
|
|
"grad_norm": 0.203125,
|
|
"learning_rate": 1.7011412507758598e-06,
|
|
"loss": 0.09141663908958435,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.31034773564220053,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 1.6975016656554924e-06,
|
|
"loss": 0.1156761646270752,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.3118470000655928,
|
|
"grad_norm": 0.1796875,
|
|
"learning_rate": 1.693843997399977e-06,
|
|
"loss": 0.1171414852142334,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.3133462644889851,
|
|
"grad_norm": 0.158203125,
|
|
"learning_rate": 1.690168340836484e-06,
|
|
"loss": 0.10372446775436402,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.3148455289123774,
|
|
"grad_norm": 0.3515625,
|
|
"learning_rate": 1.6864747912585416e-06,
|
|
"loss": 0.11128904819488525,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.31634479333576965,
|
|
"grad_norm": 0.2138671875,
|
|
"learning_rate": 1.6827634444235643e-06,
|
|
"loss": 0.11956160068511963,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.3178440577591619,
|
|
"grad_norm": 0.1640625,
|
|
"learning_rate": 1.6790343965503709e-06,
|
|
"loss": 0.08641130924224853,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.3193433221825542,
|
|
"grad_norm": 0.318359375,
|
|
"learning_rate": 1.67528774431669e-06,
|
|
"loss": 0.11216531991958618,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.32084258660594644,
|
|
"grad_norm": 0.2890625,
|
|
"learning_rate": 1.6715235848566533e-06,
|
|
"loss": 0.09440256357192993,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.3223418510293387,
|
|
"grad_norm": 0.2373046875,
|
|
"learning_rate": 1.6677420157582774e-06,
|
|
"loss": 0.08534490466117858,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.323841115452731,
|
|
"grad_norm": 0.30859375,
|
|
"learning_rate": 1.663943135060934e-06,
|
|
"loss": 0.0956838846206665,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.3253403798761233,
|
|
"grad_norm": 0.1767578125,
|
|
"learning_rate": 1.6601270412528084e-06,
|
|
"loss": 0.1049761414527893,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.32683964429951556,
|
|
"grad_norm": 0.189453125,
|
|
"learning_rate": 1.6562938332683454e-06,
|
|
"loss": 0.10431164503097534,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.32833890872290783,
|
|
"grad_norm": 0.169921875,
|
|
"learning_rate": 1.6524436104856845e-06,
|
|
"loss": 0.09506284594535827,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.3298381731463001,
|
|
"grad_norm": 0.23828125,
|
|
"learning_rate": 1.648576472724084e-06,
|
|
"loss": 0.1192029595375061,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.33133743756969236,
|
|
"grad_norm": 0.201171875,
|
|
"learning_rate": 1.6446925202413331e-06,
|
|
"loss": 0.09638182520866394,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.3328367019930846,
|
|
"grad_norm": 0.19921875,
|
|
"learning_rate": 1.640791853731152e-06,
|
|
"loss": 0.090701824426651,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.3343359664164769,
|
|
"grad_norm": 0.220703125,
|
|
"learning_rate": 1.6368745743205821e-06,
|
|
"loss": 0.09149349331855774,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.3358352308398692,
|
|
"grad_norm": 0.310546875,
|
|
"learning_rate": 1.6329407835673635e-06,
|
|
"loss": 0.13018569946289063,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.3373344952632615,
|
|
"grad_norm": 0.296875,
|
|
"learning_rate": 1.628990583457302e-06,
|
|
"loss": 0.1057326078414917,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.33883375968665375,
|
|
"grad_norm": 0.212890625,
|
|
"learning_rate": 1.6250240764016272e-06,
|
|
"loss": 0.1026038646697998,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.340333024110046,
|
|
"grad_norm": 0.32421875,
|
|
"learning_rate": 1.6210413652343338e-06,
|
|
"loss": 0.08930633664131164,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.3418322885334383,
|
|
"grad_norm": 0.38671875,
|
|
"learning_rate": 1.6170425532095187e-06,
|
|
"loss": 0.10358338356018067,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.34333155295683054,
|
|
"grad_norm": 0.271484375,
|
|
"learning_rate": 1.6130277439987022e-06,
|
|
"loss": 0.09695777893066407,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.3448308173802228,
|
|
"grad_norm": 0.296875,
|
|
"learning_rate": 1.6089970416881414e-06,
|
|
"loss": 0.10922973155975342,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.3463300818036151,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 1.6049505507761309e-06,
|
|
"loss": 0.10175033807754516,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.3478293462270074,
|
|
"grad_norm": 0.2890625,
|
|
"learning_rate": 1.600888376170294e-06,
|
|
"loss": 0.10103652477264405,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.34932861065039966,
|
|
"grad_norm": 0.1904296875,
|
|
"learning_rate": 1.5968106231848632e-06,
|
|
"loss": 0.07333493828773499,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.35082787507379193,
|
|
"grad_norm": 0.1875,
|
|
"learning_rate": 1.5927173975379488e-06,
|
|
"loss": 0.08524224758148194,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.3523271394971842,
|
|
"grad_norm": 0.220703125,
|
|
"learning_rate": 1.5886088053488e-06,
|
|
"loss": 0.09646062850952149,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.35382640392057646,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 1.584484953135051e-06,
|
|
"loss": 0.0860047996044159,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.3553256683439687,
|
|
"grad_norm": 0.177734375,
|
|
"learning_rate": 1.580345947809962e-06,
|
|
"loss": 0.09231213331222535,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.356824932767361,
|
|
"grad_norm": 0.1845703125,
|
|
"learning_rate": 1.5761918966796462e-06,
|
|
"loss": 0.08510161638259887,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.3583241971907533,
|
|
"grad_norm": 0.171875,
|
|
"learning_rate": 1.5720229074402883e-06,
|
|
"loss": 0.10984573364257813,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.3598234616141456,
|
|
"grad_norm": 0.26171875,
|
|
"learning_rate": 1.5678390881753512e-06,
|
|
"loss": 0.11594033241271973,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.36132272603753784,
|
|
"grad_norm": 0.376953125,
|
|
"learning_rate": 1.5636405473527763e-06,
|
|
"loss": 0.09002584218978882,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.3628219904609301,
|
|
"grad_norm": 0.216796875,
|
|
"learning_rate": 1.5594273938221683e-06,
|
|
"loss": 0.09397087097167969,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.3643212548843224,
|
|
"grad_norm": 0.3515625,
|
|
"learning_rate": 1.5551997368119758e-06,
|
|
"loss": 0.10535862445831298,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.36582051930771464,
|
|
"grad_norm": 0.1787109375,
|
|
"learning_rate": 1.5509576859266589e-06,
|
|
"loss": 0.09418719410896301,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.3673197837311069,
|
|
"grad_norm": 0.19921875,
|
|
"learning_rate": 1.5467013511438455e-06,
|
|
"loss": 0.10402942895889282,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.3688190481544992,
|
|
"grad_norm": 0.171875,
|
|
"learning_rate": 1.5424308428114842e-06,
|
|
"loss": 0.09072368144989014,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.3703183125778915,
|
|
"grad_norm": 0.2177734375,
|
|
"learning_rate": 1.5381462716449793e-06,
|
|
"loss": 0.12782552242279052,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.37181757700128376,
|
|
"grad_norm": 0.271484375,
|
|
"learning_rate": 1.5338477487243229e-06,
|
|
"loss": 0.12468627691268921,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.373316841424676,
|
|
"grad_norm": 0.19921875,
|
|
"learning_rate": 1.5295353854912142e-06,
|
|
"loss": 0.08745025396347046,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.3748161058480683,
|
|
"grad_norm": 0.177734375,
|
|
"learning_rate": 1.5252092937461708e-06,
|
|
"loss": 0.11175857782363892,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.37631537027146056,
|
|
"grad_norm": 0.2412109375,
|
|
"learning_rate": 1.52086958564563e-06,
|
|
"loss": 0.09319526553153992,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.3778146346948528,
|
|
"grad_norm": 0.2890625,
|
|
"learning_rate": 1.5165163736990402e-06,
|
|
"loss": 0.09921846985816955,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.3793138991182451,
|
|
"grad_norm": 0.228515625,
|
|
"learning_rate": 1.5121497707659459e-06,
|
|
"loss": 0.13923016786575318,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.3808131635416374,
|
|
"grad_norm": 0.177734375,
|
|
"learning_rate": 1.5077698900530605e-06,
|
|
"loss": 0.09786847829818726,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.3823124279650297,
|
|
"grad_norm": 0.185546875,
|
|
"learning_rate": 1.5033768451113309e-06,
|
|
"loss": 0.09633988738059998,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.38381169238842194,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 1.4989707498329943e-06,
|
|
"loss": 0.14051291942596436,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.3853109568118142,
|
|
"grad_norm": 0.1962890625,
|
|
"learning_rate": 1.4945517184486266e-06,
|
|
"loss": 0.09283372163772582,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.3868102212352065,
|
|
"grad_norm": 0.18359375,
|
|
"learning_rate": 1.4901198655241784e-06,
|
|
"loss": 0.09845755696296692,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.38830948565859874,
|
|
"grad_norm": 0.2216796875,
|
|
"learning_rate": 1.4856753059580065e-06,
|
|
"loss": 0.09300137758255005,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.389808750081991,
|
|
"grad_norm": 0.2138671875,
|
|
"learning_rate": 1.4812181549778956e-06,
|
|
"loss": 0.0833775520324707,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.3913080145053833,
|
|
"grad_norm": 0.24609375,
|
|
"learning_rate": 1.4767485281380694e-06,
|
|
"loss": 0.09278824925422668,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.3928072789287756,
|
|
"grad_norm": 0.2353515625,
|
|
"learning_rate": 1.4722665413161948e-06,
|
|
"loss": 0.09754594564437866,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.39430654335216786,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 1.46777231071038e-06,
|
|
"loss": 0.1008460521697998,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.3958058077755601,
|
|
"grad_norm": 0.28515625,
|
|
"learning_rate": 1.4632659528361591e-06,
|
|
"loss": 0.0745955765247345,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.3973050721989524,
|
|
"grad_norm": 0.2470703125,
|
|
"learning_rate": 1.4587475845234729e-06,
|
|
"loss": 0.11444522142410278,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.39880433662234466,
|
|
"grad_norm": 0.23046875,
|
|
"learning_rate": 1.454217322913641e-06,
|
|
"loss": 0.09638299942016601,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.4003036010457369,
|
|
"grad_norm": 0.16015625,
|
|
"learning_rate": 1.4496752854563217e-06,
|
|
"loss": 0.0774892508983612,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.4018028654691292,
|
|
"grad_norm": 0.263671875,
|
|
"learning_rate": 1.4451215899064699e-06,
|
|
"loss": 0.10078433752059937,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.40330212989252145,
|
|
"grad_norm": 0.2001953125,
|
|
"learning_rate": 1.4405563543212841e-06,
|
|
"loss": 0.0878619134426117,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.4048013943159138,
|
|
"grad_norm": 0.1982421875,
|
|
"learning_rate": 1.4359796970571434e-06,
|
|
"loss": 0.08299956321716309,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.40630065873930604,
|
|
"grad_norm": 0.244140625,
|
|
"learning_rate": 1.4313917367665414e-06,
|
|
"loss": 0.11845102310180664,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.4077999231626983,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 1.4267925923950094e-06,
|
|
"loss": 0.1439320921897888,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.4092991875860906,
|
|
"grad_norm": 0.248046875,
|
|
"learning_rate": 1.422182383178032e-06,
|
|
"loss": 0.09109203219413757,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.41079845200948284,
|
|
"grad_norm": 0.2099609375,
|
|
"learning_rate": 1.4175612286379562e-06,
|
|
"loss": 0.07972334623336792,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.4122977164328751,
|
|
"grad_norm": 0.1748046875,
|
|
"learning_rate": 1.412929248580894e-06,
|
|
"loss": 0.08981594443321228,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.41379698085626737,
|
|
"grad_norm": 0.201171875,
|
|
"learning_rate": 1.4082865630936134e-06,
|
|
"loss": 0.10788861513137818,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.4152962452796597,
|
|
"grad_norm": 0.251953125,
|
|
"learning_rate": 1.4036332925404283e-06,
|
|
"loss": 0.08774803280830383,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.41679550970305196,
|
|
"grad_norm": 0.1806640625,
|
|
"learning_rate": 1.3989695575600763e-06,
|
|
"loss": 0.0800628900527954,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.4182947741264442,
|
|
"grad_norm": 0.216796875,
|
|
"learning_rate": 1.3942954790625904e-06,
|
|
"loss": 0.11887997388839722,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.4197940385498365,
|
|
"grad_norm": 0.1650390625,
|
|
"learning_rate": 1.3896111782261668e-06,
|
|
"loss": 0.09116448163986206,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.42129330297322876,
|
|
"grad_norm": 0.400390625,
|
|
"learning_rate": 1.3849167764940211e-06,
|
|
"loss": 0.11099686622619628,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.422792567396621,
|
|
"grad_norm": 0.228515625,
|
|
"learning_rate": 1.38021239557124e-06,
|
|
"loss": 0.09188846349716187,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.4242918318200133,
|
|
"grad_norm": 0.1748046875,
|
|
"learning_rate": 1.3754981574216267e-06,
|
|
"loss": 0.09292811751365662,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.42579109624340555,
|
|
"grad_norm": 0.30078125,
|
|
"learning_rate": 1.3707741842645392e-06,
|
|
"loss": 0.0990601897239685,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.4272903606667979,
|
|
"grad_norm": 0.21875,
|
|
"learning_rate": 1.3660405985717212e-06,
|
|
"loss": 0.0773146092891693,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.42878962509019014,
|
|
"grad_norm": 0.224609375,
|
|
"learning_rate": 1.361297523064126e-06,
|
|
"loss": 0.09871623516082764,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.4302888895135824,
|
|
"grad_norm": 0.224609375,
|
|
"learning_rate": 1.3565450807087373e-06,
|
|
"loss": 0.09449006915092469,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.4317881539369747,
|
|
"grad_norm": 0.2265625,
|
|
"learning_rate": 1.3517833947153782e-06,
|
|
"loss": 0.09626795053482055,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.43328741836036694,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 1.34701258853352e-06,
|
|
"loss": 0.07917786836624145,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.4347866827837592,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 1.3422327858490792e-06,
|
|
"loss": 0.10537385940551758,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.43628594720715147,
|
|
"grad_norm": 0.1923828125,
|
|
"learning_rate": 1.337444110581212e-06,
|
|
"loss": 0.07042791247367859,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.4377852116305438,
|
|
"grad_norm": 0.2119140625,
|
|
"learning_rate": 1.3326466868791013e-06,
|
|
"loss": 0.0855652630329132,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.43928447605393606,
|
|
"grad_norm": 0.205078125,
|
|
"learning_rate": 1.3278406391187391e-06,
|
|
"loss": 0.09092465043067932,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.4407837404773283,
|
|
"grad_norm": 0.216796875,
|
|
"learning_rate": 1.3230260918997004e-06,
|
|
"loss": 0.10829230546951293,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.4422830049007206,
|
|
"grad_norm": 0.31640625,
|
|
"learning_rate": 1.3182031700419129e-06,
|
|
"loss": 0.09212432503700256,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.44378226932411285,
|
|
"grad_norm": 0.1708984375,
|
|
"learning_rate": 1.3133719985824237e-06,
|
|
"loss": 0.06796190738677979,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.4452815337475051,
|
|
"grad_norm": 0.2080078125,
|
|
"learning_rate": 1.3085327027721536e-06,
|
|
"loss": 0.08660737872123718,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.4467807981708974,
|
|
"grad_norm": 0.1943359375,
|
|
"learning_rate": 1.3036854080726525e-06,
|
|
"loss": 0.07199004888534546,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.44828006259428965,
|
|
"grad_norm": 0.197265625,
|
|
"learning_rate": 1.298830240152847e-06,
|
|
"loss": 0.11634057760238647,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.449779327017682,
|
|
"grad_norm": 0.240234375,
|
|
"learning_rate": 1.2939673248857805e-06,
|
|
"loss": 0.11802215576171875,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.45127859144107424,
|
|
"grad_norm": 0.21875,
|
|
"learning_rate": 1.2890967883453509e-06,
|
|
"loss": 0.10256350040435791,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.4527778558644665,
|
|
"grad_norm": 0.2470703125,
|
|
"learning_rate": 1.2842187568030431e-06,
|
|
"loss": 0.08822081089019776,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.45427712028785877,
|
|
"grad_norm": 0.205078125,
|
|
"learning_rate": 1.2793333567246526e-06,
|
|
"loss": 0.08067854046821595,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.45577638471125104,
|
|
"grad_norm": 0.2099609375,
|
|
"learning_rate": 1.2744407147670098e-06,
|
|
"loss": 0.09741014242172241,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.4572756491346433,
|
|
"grad_norm": 0.234375,
|
|
"learning_rate": 1.269540957774695e-06,
|
|
"loss": 0.07846143245697021,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.45877491355803557,
|
|
"grad_norm": 0.353515625,
|
|
"learning_rate": 1.2646342127767486e-06,
|
|
"loss": 0.10557938814163208,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.46027417798142783,
|
|
"grad_norm": 0.251953125,
|
|
"learning_rate": 1.2597206069833805e-06,
|
|
"loss": 0.0840741217136383,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.46177344240482016,
|
|
"grad_norm": 0.205078125,
|
|
"learning_rate": 1.2548002677826704e-06,
|
|
"loss": 0.09562651515007019,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.4632727068282124,
|
|
"grad_norm": 0.2236328125,
|
|
"learning_rate": 1.2498733227372648e-06,
|
|
"loss": 0.09925270080566406,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.4647719712516047,
|
|
"grad_norm": 0.2255859375,
|
|
"learning_rate": 1.2449398995810709e-06,
|
|
"loss": 0.10337086915969848,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.46627123567499695,
|
|
"grad_norm": 0.3671875,
|
|
"learning_rate": 1.2400001262159458e-06,
|
|
"loss": 0.07978419065475464,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.4677705000983892,
|
|
"grad_norm": 0.30859375,
|
|
"learning_rate": 1.2350541307083776e-06,
|
|
"loss": 0.07110666632652282,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.4692697645217815,
|
|
"grad_norm": 0.2197265625,
|
|
"learning_rate": 1.2301020412861675e-06,
|
|
"loss": 0.07428762912750245,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.47076902894517375,
|
|
"grad_norm": 0.36328125,
|
|
"learning_rate": 1.2251439863351068e-06,
|
|
"loss": 0.09102022051811218,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.47226829336856607,
|
|
"grad_norm": 0.302734375,
|
|
"learning_rate": 1.220180094395644e-06,
|
|
"loss": 0.08342552185058594,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.47376755779195834,
|
|
"grad_norm": 0.24609375,
|
|
"learning_rate": 1.2152104941595562e-06,
|
|
"loss": 0.12274667024612426,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.4752668222153506,
|
|
"grad_norm": 0.19921875,
|
|
"learning_rate": 1.2102353144666117e-06,
|
|
"loss": 0.09014168381690979,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.47676608663874287,
|
|
"grad_norm": 0.17578125,
|
|
"learning_rate": 1.205254684301229e-06,
|
|
"loss": 0.07782111167907715,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.47826535106213514,
|
|
"grad_norm": 0.2001953125,
|
|
"learning_rate": 1.2002687327891328e-06,
|
|
"loss": 0.07985667586326599,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.4797646154855274,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 1.1952775891940082e-06,
|
|
"loss": 0.09129717350006103,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.48126387990891967,
|
|
"grad_norm": 0.234375,
|
|
"learning_rate": 1.190281382914146e-06,
|
|
"loss": 0.1002733588218689,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.48276314433231193,
|
|
"grad_norm": 0.23046875,
|
|
"learning_rate": 1.185280243479092e-06,
|
|
"loss": 0.08630979657173157,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.48426240875570425,
|
|
"grad_norm": 0.1982421875,
|
|
"learning_rate": 1.1802743005462862e-06,
|
|
"loss": 0.08386391997337342,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.4857616731790965,
|
|
"grad_norm": 0.23828125,
|
|
"learning_rate": 1.1752636838977013e-06,
|
|
"loss": 0.08188863396644593,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.4872609376024888,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 1.1702485234364797e-06,
|
|
"loss": 0.10928175449371338,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.48876020202588105,
|
|
"grad_norm": 0.1923828125,
|
|
"learning_rate": 1.165228949183565e-06,
|
|
"loss": 0.09540101885795593,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.4902594664492733,
|
|
"grad_norm": 0.2265625,
|
|
"learning_rate": 1.16020509127433e-06,
|
|
"loss": 0.092869633436203,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.4917587308726656,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 1.1551770799552039e-06,
|
|
"loss": 0.09745745658874512,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.49325799529605785,
|
|
"grad_norm": 0.19921875,
|
|
"learning_rate": 1.1501450455802968e-06,
|
|
"loss": 0.09029659032821655,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.49475725971945017,
|
|
"grad_norm": 0.228515625,
|
|
"learning_rate": 1.145109118608017e-06,
|
|
"loss": 0.09824432134628296,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.49625652414284244,
|
|
"grad_norm": 0.26171875,
|
|
"learning_rate": 1.1400694295976915e-06,
|
|
"loss": 0.08436204195022583,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.4977557885662347,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 1.135026109206181e-06,
|
|
"loss": 0.10501574277877808,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.49925505298962697,
|
|
"grad_norm": 0.337890625,
|
|
"learning_rate": 1.1299792881844906e-06,
|
|
"loss": 0.09339694380760193,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.5007543174130192,
|
|
"grad_norm": 0.224609375,
|
|
"learning_rate": 1.1249290973743814e-06,
|
|
"loss": 0.07747515439987182,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.5022535818364116,
|
|
"grad_norm": 0.232421875,
|
|
"learning_rate": 1.1198756677049796e-06,
|
|
"loss": 0.09033283591270447,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.5037528462598038,
|
|
"grad_norm": 0.234375,
|
|
"learning_rate": 1.1148191301893795e-06,
|
|
"loss": 0.06604780554771424,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.5052521106831961,
|
|
"grad_norm": 0.220703125,
|
|
"learning_rate": 1.1097596159212475e-06,
|
|
"loss": 0.08669602274894714,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.5067513751065883,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 1.104697256071426e-06,
|
|
"loss": 0.11573494672775268,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.5082506395299806,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 1.0996321818845294e-06,
|
|
"loss": 0.09091781973838806,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.5097499039533728,
|
|
"grad_norm": 0.244140625,
|
|
"learning_rate": 1.0945645246755424e-06,
|
|
"loss": 0.0938392698764801,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.5112491683767652,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 1.089494415826418e-06,
|
|
"loss": 0.08227325677871704,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.5127484328001575,
|
|
"grad_norm": 0.2138671875,
|
|
"learning_rate": 1.084421986782667e-06,
|
|
"loss": 0.07320802211761475,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.5142476972235497,
|
|
"grad_norm": 0.1953125,
|
|
"learning_rate": 1.079347369049954e-06,
|
|
"loss": 0.08411517143249511,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.515746961646942,
|
|
"grad_norm": 0.2451171875,
|
|
"learning_rate": 1.0742706941906873e-06,
|
|
"loss": 0.1013220191001892,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.5172462260703342,
|
|
"grad_norm": 0.2255859375,
|
|
"learning_rate": 1.0691920938206052e-06,
|
|
"loss": 0.08412815928459168,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.5187454904937265,
|
|
"grad_norm": 0.21484375,
|
|
"learning_rate": 1.0641116996053678e-06,
|
|
"loss": 0.08085081577301026,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.5202447549171187,
|
|
"grad_norm": 0.291015625,
|
|
"learning_rate": 1.0590296432571414e-06,
|
|
"loss": 0.08313990831375122,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.5217440193405111,
|
|
"grad_norm": 0.275390625,
|
|
"learning_rate": 1.0539460565311836e-06,
|
|
"loss": 0.0919266939163208,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.5232432837639034,
|
|
"grad_norm": 0.2470703125,
|
|
"learning_rate": 1.048861071222428e-06,
|
|
"loss": 0.09890375733375549,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.5247425481872956,
|
|
"grad_norm": 0.2451171875,
|
|
"learning_rate": 1.0437748191620678e-06,
|
|
"loss": 0.08521285653114319,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.5262418126106879,
|
|
"grad_norm": 0.2275390625,
|
|
"learning_rate": 1.0386874322141365e-06,
|
|
"loss": 0.08201659321784974,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.5277410770340801,
|
|
"grad_norm": 0.419921875,
|
|
"learning_rate": 1.0335990422720908e-06,
|
|
"loss": 0.08876433968544006,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.5292403414574725,
|
|
"grad_norm": 0.2099609375,
|
|
"learning_rate": 1.0285097812553916e-06,
|
|
"loss": 0.08933233618736267,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.5307396058808647,
|
|
"grad_norm": 0.240234375,
|
|
"learning_rate": 1.0234197811060808e-06,
|
|
"loss": 0.07142494320869446,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.532238870304257,
|
|
"grad_norm": 0.220703125,
|
|
"learning_rate": 1.0183291737853636e-06,
|
|
"loss": 0.07216275334358216,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.5337381347276492,
|
|
"grad_norm": 0.2353515625,
|
|
"learning_rate": 1.0132380912701884e-06,
|
|
"loss": 0.09240591526031494,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.5352373991510415,
|
|
"grad_norm": 0.1962890625,
|
|
"learning_rate": 1.0081466655498198e-06,
|
|
"loss": 0.08051929473876954,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.5367366635744338,
|
|
"grad_norm": 0.2451171875,
|
|
"learning_rate": 1.0030550286224228e-06,
|
|
"loss": 0.06649044156074524,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.538235927997826,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 9.979633124916373e-07,
|
|
"loss": 0.09150764346122742,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.5397351924212184,
|
|
"grad_norm": 0.212890625,
|
|
"learning_rate": 9.928716491631568e-07,
|
|
"loss": 0.09035595655441284,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.5412344568446106,
|
|
"grad_norm": 0.1806640625,
|
|
"learning_rate": 9.877801706413051e-07,
|
|
"loss": 0.09294023513793945,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.5427337212680029,
|
|
"grad_norm": 0.2265625,
|
|
"learning_rate": 9.826890089256157e-07,
|
|
"loss": 0.1178174376487732,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.5442329856913951,
|
|
"grad_norm": 0.2490234375,
|
|
"learning_rate": 9.775982960074077e-07,
|
|
"loss": 0.10003062486648559,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.5457322501147874,
|
|
"grad_norm": 0.333984375,
|
|
"learning_rate": 9.725081638663661e-07,
|
|
"loss": 0.10663024187088013,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.5472315145381798,
|
|
"grad_norm": 0.2421875,
|
|
"learning_rate": 9.674187444671184e-07,
|
|
"loss": 0.09378329515457154,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.548730778961572,
|
|
"grad_norm": 0.244140625,
|
|
"learning_rate": 9.623301697558134e-07,
|
|
"loss": 0.0637846291065216,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.5502300433849643,
|
|
"grad_norm": 0.185546875,
|
|
"learning_rate": 9.572425716567015e-07,
|
|
"loss": 0.0605103075504303,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.5517293078083565,
|
|
"grad_norm": 0.2236328125,
|
|
"learning_rate": 9.521560820687135e-07,
|
|
"loss": 0.09556649327278137,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.5532285722317488,
|
|
"grad_norm": 0.24609375,
|
|
"learning_rate": 9.470708328620413e-07,
|
|
"loss": 0.09757782220840454,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.554727836655141,
|
|
"grad_norm": 0.197265625,
|
|
"learning_rate": 9.419869558747198e-07,
|
|
"loss": 0.09097603559494019,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.5562271010785333,
|
|
"grad_norm": 0.234375,
|
|
"learning_rate": 9.369045829092076e-07,
|
|
"loss": 0.089606112241745,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.5577263655019257,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 9.318238457289711e-07,
|
|
"loss": 0.09462766051292419,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.5592256299253179,
|
|
"grad_norm": 0.1513671875,
|
|
"learning_rate": 9.267448760550683e-07,
|
|
"loss": 0.06713712811470032,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.5607248943487102,
|
|
"grad_norm": 0.2109375,
|
|
"learning_rate": 9.216678055627325e-07,
|
|
"loss": 0.08841444849967957,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.5622241587721024,
|
|
"grad_norm": 0.2373046875,
|
|
"learning_rate": 9.165927658779603e-07,
|
|
"loss": 0.07210164666175842,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.5637234231954947,
|
|
"grad_norm": 0.2373046875,
|
|
"learning_rate": 9.11519888574099e-07,
|
|
"loss": 0.09946097731590271,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.5652226876188869,
|
|
"grad_norm": 0.2373046875,
|
|
"learning_rate": 9.064493051684341e-07,
|
|
"loss": 0.07101974487304688,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 0.5667219520422793,
|
|
"grad_norm": 0.2236328125,
|
|
"learning_rate": 9.013811471187807e-07,
|
|
"loss": 0.10910413265228272,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.5682212164656715,
|
|
"grad_norm": 0.25,
|
|
"learning_rate": 8.963155458200753e-07,
|
|
"loss": 0.07558327913284302,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 0.5697204808890638,
|
|
"grad_norm": 0.2392578125,
|
|
"learning_rate": 8.912526326009686e-07,
|
|
"loss": 0.08378031253814697,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.5712197453124561,
|
|
"grad_norm": 0.291015625,
|
|
"learning_rate": 8.861925387204217e-07,
|
|
"loss": 0.0926354169845581,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 0.5727190097358483,
|
|
"grad_norm": 0.2421875,
|
|
"learning_rate": 8.811353953643031e-07,
|
|
"loss": 0.0765921413898468,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.5742182741592406,
|
|
"grad_norm": 0.197265625,
|
|
"learning_rate": 8.760813336419868e-07,
|
|
"loss": 0.09550715684890747,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 0.5757175385826329,
|
|
"grad_norm": 0.2109375,
|
|
"learning_rate": 8.710304845829533e-07,
|
|
"loss": 0.07235878109931945,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.5772168030060252,
|
|
"grad_norm": 0.1943359375,
|
|
"learning_rate": 8.65982979133394e-07,
|
|
"loss": 0.08240407705307007,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.5787160674294174,
|
|
"grad_norm": 0.212890625,
|
|
"learning_rate": 8.609389481528138e-07,
|
|
"loss": 0.0828467309474945,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.5802153318528097,
|
|
"grad_norm": 0.2080078125,
|
|
"learning_rate": 8.558985224106409e-07,
|
|
"loss": 0.06905397176742553,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 0.581714596276202,
|
|
"grad_norm": 0.1953125,
|
|
"learning_rate": 8.508618325828361e-07,
|
|
"loss": 0.08870742321014405,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.5832138606995942,
|
|
"grad_norm": 0.32421875,
|
|
"learning_rate": 8.458290092485034e-07,
|
|
"loss": 0.08924266099929809,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 0.5847131251229866,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 8.408001828865064e-07,
|
|
"loss": 0.08538001179695129,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.5862123895463788,
|
|
"grad_norm": 0.21875,
|
|
"learning_rate": 8.357754838720846e-07,
|
|
"loss": 0.05365139842033386,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 0.5877116539697711,
|
|
"grad_norm": 0.197265625,
|
|
"learning_rate": 8.307550424734735e-07,
|
|
"loss": 0.07388515472412109,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.5892109183931633,
|
|
"grad_norm": 0.1875,
|
|
"learning_rate": 8.257389888485274e-07,
|
|
"loss": 0.09646939039230347,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 0.5907101828165556,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 8.207274530413457e-07,
|
|
"loss": 0.09254279732704163,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.592209447239948,
|
|
"grad_norm": 0.2109375,
|
|
"learning_rate": 8.157205649789001e-07,
|
|
"loss": 0.06844722628593444,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.5937087116633402,
|
|
"grad_norm": 0.2080078125,
|
|
"learning_rate": 8.107184544676671e-07,
|
|
"loss": 0.07432733774185181,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 0.5952079760867325,
|
|
"grad_norm": 0.271484375,
|
|
"learning_rate": 8.057212511902623e-07,
|
|
"loss": 0.08080208897590638,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 0.5967072405101247,
|
|
"grad_norm": 0.189453125,
|
|
"learning_rate": 8.007290847020783e-07,
|
|
"loss": 0.10689427852630615,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 0.598206504933517,
|
|
"grad_norm": 0.203125,
|
|
"learning_rate": 7.957420844279256e-07,
|
|
"loss": 0.0826223611831665,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 0.5997057693569092,
|
|
"grad_norm": 0.330078125,
|
|
"learning_rate": 7.907603796586793e-07,
|
|
"loss": 0.08745207786560058,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.6012050337803015,
|
|
"grad_norm": 0.205078125,
|
|
"learning_rate": 7.857840995479237e-07,
|
|
"loss": 0.06742951273918152,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 0.6027042982036939,
|
|
"grad_norm": 0.296875,
|
|
"learning_rate": 7.808133731086063e-07,
|
|
"loss": 0.10504342317581176,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 0.6042035626270861,
|
|
"grad_norm": 0.34765625,
|
|
"learning_rate": 7.758483292096928e-07,
|
|
"loss": 0.10398197174072266,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 0.6057028270504784,
|
|
"grad_norm": 0.28515625,
|
|
"learning_rate": 7.708890965728249e-07,
|
|
"loss": 0.11235659122467041,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 0.6072020914738706,
|
|
"grad_norm": 0.28515625,
|
|
"learning_rate": 7.659358037689845e-07,
|
|
"loss": 0.10213931798934936,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.6087013558972629,
|
|
"grad_norm": 0.2314453125,
|
|
"learning_rate": 7.609885792151602e-07,
|
|
"loss": 0.09277363419532776,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 0.6102006203206551,
|
|
"grad_norm": 0.279296875,
|
|
"learning_rate": 7.560475511710174e-07,
|
|
"loss": 0.08845908641815185,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 0.6116998847440475,
|
|
"grad_norm": 0.2275390625,
|
|
"learning_rate": 7.511128477355728e-07,
|
|
"loss": 0.06152995824813843,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 0.6131991491674397,
|
|
"grad_norm": 0.1982421875,
|
|
"learning_rate": 7.461845968438753e-07,
|
|
"loss": 0.0993484079837799,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 0.614698413590832,
|
|
"grad_norm": 0.232421875,
|
|
"learning_rate": 7.412629262636861e-07,
|
|
"loss": 0.08685197830200195,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.6161976780142243,
|
|
"grad_norm": 0.203125,
|
|
"learning_rate": 7.363479635921693e-07,
|
|
"loss": 0.10489131212234497,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 0.6176969424376165,
|
|
"grad_norm": 0.2265625,
|
|
"learning_rate": 7.314398362525827e-07,
|
|
"loss": 0.0976183295249939,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 0.6191962068610088,
|
|
"grad_norm": 0.318359375,
|
|
"learning_rate": 7.265386714909732e-07,
|
|
"loss": 0.10362049341201782,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 0.6206954712844011,
|
|
"grad_norm": 0.21875,
|
|
"learning_rate": 7.216445963728795e-07,
|
|
"loss": 0.09439095258712768,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 0.6221947357077934,
|
|
"grad_norm": 0.20703125,
|
|
"learning_rate": 7.167577377800372e-07,
|
|
"loss": 0.07266764044761657,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.6236940001311856,
|
|
"grad_norm": 0.2021484375,
|
|
"learning_rate": 7.118782224070886e-07,
|
|
"loss": 0.08935718536376953,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 0.6251932645545779,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 7.070061767582993e-07,
|
|
"loss": 0.09530102014541626,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 0.6266925289779702,
|
|
"grad_norm": 0.205078125,
|
|
"learning_rate": 7.021417271442786e-07,
|
|
"loss": 0.08460386395454407,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 0.6281917934013624,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 6.972849996787029e-07,
|
|
"loss": 0.09141365885734558,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 0.6296910578247548,
|
|
"grad_norm": 0.18359375,
|
|
"learning_rate": 6.924361202750484e-07,
|
|
"loss": 0.09532070755958558,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.631190322248147,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 6.875952146433252e-07,
|
|
"loss": 0.09375123977661133,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 0.6326895866715393,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 6.827624082868191e-07,
|
|
"loss": 0.07426313161849976,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 0.6341888510949315,
|
|
"grad_norm": 0.267578125,
|
|
"learning_rate": 6.779378264988369e-07,
|
|
"loss": 0.09327669143676758,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 0.6356881155183238,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 6.731215943594597e-07,
|
|
"loss": 0.08692552447319031,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 0.6371873799417161,
|
|
"grad_norm": 0.283203125,
|
|
"learning_rate": 6.683138367322982e-07,
|
|
"loss": 0.0770199477672577,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.6386866443651084,
|
|
"grad_norm": 0.220703125,
|
|
"learning_rate": 6.635146782612568e-07,
|
|
"loss": 0.07209202647209167,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 0.6401859087885007,
|
|
"grad_norm": 0.2060546875,
|
|
"learning_rate": 6.587242433673023e-07,
|
|
"loss": 0.07247981429100037,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 0.6416851732118929,
|
|
"grad_norm": 0.19921875,
|
|
"learning_rate": 6.539426562452364e-07,
|
|
"loss": 0.07441559433937073,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 0.6431844376352852,
|
|
"grad_norm": 0.2021484375,
|
|
"learning_rate": 6.491700408604781e-07,
|
|
"loss": 0.0830713927745819,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 0.6446837020586774,
|
|
"grad_norm": 0.1845703125,
|
|
"learning_rate": 6.444065209458494e-07,
|
|
"loss": 0.0942071557044983,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.6461829664820697,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 6.396522199983659e-07,
|
|
"loss": 0.08134819865226746,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 0.647682230905462,
|
|
"grad_norm": 0.236328125,
|
|
"learning_rate": 6.349072612760366e-07,
|
|
"loss": 0.10018385648727417,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 0.6491814953288543,
|
|
"grad_norm": 0.228515625,
|
|
"learning_rate": 6.301717677946678e-07,
|
|
"loss": 0.09734719395637512,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 0.6506807597522466,
|
|
"grad_norm": 0.2431640625,
|
|
"learning_rate": 6.254458623246745e-07,
|
|
"loss": 0.0996459424495697,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 0.6521800241756388,
|
|
"grad_norm": 0.2236328125,
|
|
"learning_rate": 6.207296673878957e-07,
|
|
"loss": 0.070529043674469,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.6536792885990311,
|
|
"grad_norm": 0.20703125,
|
|
"learning_rate": 6.160233052544206e-07,
|
|
"loss": 0.07517372369766236,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 0.6551785530224233,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 6.113268979394162e-07,
|
|
"loss": 0.08323991298675537,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 0.6566778174458157,
|
|
"grad_norm": 0.2294921875,
|
|
"learning_rate": 6.066405671999657e-07,
|
|
"loss": 0.09829720854759216,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 0.6581770818692079,
|
|
"grad_norm": 0.30859375,
|
|
"learning_rate": 6.019644345319108e-07,
|
|
"loss": 0.06705747246742248,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 0.6596763462926002,
|
|
"grad_norm": 0.326171875,
|
|
"learning_rate": 5.972986211667032e-07,
|
|
"loss": 0.08918554186820984,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.6611756107159925,
|
|
"grad_norm": 0.193359375,
|
|
"learning_rate": 5.92643248068259e-07,
|
|
"loss": 0.0527131199836731,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 0.6626748751393847,
|
|
"grad_norm": 0.2109375,
|
|
"learning_rate": 5.87998435929826e-07,
|
|
"loss": 0.061626529693603514,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 0.664174139562777,
|
|
"grad_norm": 0.2314453125,
|
|
"learning_rate": 5.83364305170852e-07,
|
|
"loss": 0.10371142625808716,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 0.6656734039861693,
|
|
"grad_norm": 0.2236328125,
|
|
"learning_rate": 5.787409759338644e-07,
|
|
"loss": 0.08246560096740722,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 0.6671726684095616,
|
|
"grad_norm": 0.2099609375,
|
|
"learning_rate": 5.741285680813544e-07,
|
|
"loss": 0.07695434689521789,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 0.6686719328329538,
|
|
"grad_norm": 0.1982421875,
|
|
"learning_rate": 5.695272011926701e-07,
|
|
"loss": 0.06416907906532288,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 0.6701711972563461,
|
|
"grad_norm": 0.310546875,
|
|
"learning_rate": 5.649369945609169e-07,
|
|
"loss": 0.05495827198028565,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 0.6716704616797384,
|
|
"grad_norm": 0.224609375,
|
|
"learning_rate": 5.603580671898629e-07,
|
|
"loss": 0.07965745329856873,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 0.6731697261031306,
|
|
"grad_norm": 0.322265625,
|
|
"learning_rate": 5.557905377908558e-07,
|
|
"loss": 0.10348300933837891,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 0.674668990526523,
|
|
"grad_norm": 0.337890625,
|
|
"learning_rate": 5.512345247797437e-07,
|
|
"loss": 0.11305124759674072,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.6761682549499152,
|
|
"grad_norm": 0.2119140625,
|
|
"learning_rate": 5.466901462738057e-07,
|
|
"loss": 0.06318964958190917,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 0.6776675193733075,
|
|
"grad_norm": 0.23046875,
|
|
"learning_rate": 5.421575200886899e-07,
|
|
"loss": 0.10519200563430786,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 0.6791667837966997,
|
|
"grad_norm": 0.1787109375,
|
|
"learning_rate": 5.376367637353586e-07,
|
|
"loss": 0.08189275860786438,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 0.680666048220092,
|
|
"grad_norm": 0.498046875,
|
|
"learning_rate": 5.331279944170417e-07,
|
|
"loss": 0.09210953116416931,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 0.6821653126434843,
|
|
"grad_norm": 0.189453125,
|
|
"learning_rate": 5.286313290261982e-07,
|
|
"loss": 0.07461657524108886,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 0.6836645770668766,
|
|
"grad_norm": 0.30859375,
|
|
"learning_rate": 5.24146884141486e-07,
|
|
"loss": 0.09393454194068909,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 0.6851638414902689,
|
|
"grad_norm": 0.22265625,
|
|
"learning_rate": 5.19674776024739e-07,
|
|
"loss": 0.08053632378578186,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 0.6866631059136611,
|
|
"grad_norm": 0.294921875,
|
|
"learning_rate": 5.152151206179538e-07,
|
|
"loss": 0.07931421399116516,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 0.6881623703370534,
|
|
"grad_norm": 0.236328125,
|
|
"learning_rate": 5.107680335402824e-07,
|
|
"loss": 0.09329952597618103,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 0.6896616347604456,
|
|
"grad_norm": 0.240234375,
|
|
"learning_rate": 5.063336300850362e-07,
|
|
"loss": 0.07256720066070557,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.6911608991838379,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 5.019120252166966e-07,
|
|
"loss": 0.07386515140533448,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 0.6926601636072302,
|
|
"grad_norm": 0.1904296875,
|
|
"learning_rate": 4.975033335679332e-07,
|
|
"loss": 0.0855524480342865,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 0.6941594280306225,
|
|
"grad_norm": 0.220703125,
|
|
"learning_rate": 4.931076694366337e-07,
|
|
"loss": 0.08902753591537475,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 0.6956586924540148,
|
|
"grad_norm": 0.2236328125,
|
|
"learning_rate": 4.887251467829398e-07,
|
|
"loss": 0.09814743995666504,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 0.697157956877407,
|
|
"grad_norm": 0.294921875,
|
|
"learning_rate": 4.843558792262924e-07,
|
|
"loss": 0.09769907593727112,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 0.6986572213007993,
|
|
"grad_norm": 0.294921875,
|
|
"learning_rate": 4.799999800424867e-07,
|
|
"loss": 0.12376710176467895,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 0.7001564857241915,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 4.7565756216073505e-07,
|
|
"loss": 0.07605620622634887,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 0.7016557501475839,
|
|
"grad_norm": 0.296875,
|
|
"learning_rate": 4.713287381607389e-07,
|
|
"loss": 0.09146468043327331,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 0.7031550145709761,
|
|
"grad_norm": 0.2001953125,
|
|
"learning_rate": 4.670136202697706e-07,
|
|
"loss": 0.11566205024719238,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 0.7046542789943684,
|
|
"grad_norm": 0.2099609375,
|
|
"learning_rate": 4.6271232035976395e-07,
|
|
"loss": 0.07541021108627319,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.7061535434177607,
|
|
"grad_norm": 0.2255859375,
|
|
"learning_rate": 4.5842494994441315e-07,
|
|
"loss": 0.10867191553115844,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 0.7076528078411529,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 4.541516201762824e-07,
|
|
"loss": 0.08358562588691712,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 0.7091520722645452,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 4.4989244184392405e-07,
|
|
"loss": 0.10019409656524658,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 0.7106513366879375,
|
|
"grad_norm": 0.2353515625,
|
|
"learning_rate": 4.456475253690061e-07,
|
|
"loss": 0.08848651647567748,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 0.7121506011113298,
|
|
"grad_norm": 0.201171875,
|
|
"learning_rate": 4.414169808034496e-07,
|
|
"loss": 0.07086822390556335,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 0.713649865534722,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 4.3720091782657574e-07,
|
|
"loss": 0.1078036069869995,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 0.7151491299581143,
|
|
"grad_norm": 0.2314453125,
|
|
"learning_rate": 4.32999445742262e-07,
|
|
"loss": 0.09499780535697937,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 0.7166483943815066,
|
|
"grad_norm": 0.2431640625,
|
|
"learning_rate": 4.2881267347610837e-07,
|
|
"loss": 0.08308950662612916,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 0.7181476588048988,
|
|
"grad_norm": 0.2890625,
|
|
"learning_rate": 4.2464070957261375e-07,
|
|
"loss": 0.08044061660766602,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 0.7196469232282912,
|
|
"grad_norm": 0.2001953125,
|
|
"learning_rate": 4.204836621923618e-07,
|
|
"loss": 0.06061916947364807,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.7211461876516834,
|
|
"grad_norm": 0.2490234375,
|
|
"learning_rate": 4.1634163910921606e-07,
|
|
"loss": 0.10452162027359009,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 0.7226454520750757,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 4.1221474770752696e-07,
|
|
"loss": 0.0969232976436615,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 0.7241447164984679,
|
|
"grad_norm": 0.1728515625,
|
|
"learning_rate": 4.081030949793471e-07,
|
|
"loss": 0.07360079884529114,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 0.7256439809218602,
|
|
"grad_norm": 0.1943359375,
|
|
"learning_rate": 4.0400678752165807e-07,
|
|
"loss": 0.08355346322059631,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 0.7271432453452524,
|
|
"grad_norm": 0.279296875,
|
|
"learning_rate": 3.9992593153360563e-07,
|
|
"loss": 0.07457499504089356,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 0.7286425097686448,
|
|
"grad_norm": 0.314453125,
|
|
"learning_rate": 3.9586063281374796e-07,
|
|
"loss": 0.0845346987247467,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 0.7301417741920371,
|
|
"grad_norm": 0.2275390625,
|
|
"learning_rate": 3.9181099675731154e-07,
|
|
"loss": 0.07429866194725036,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 0.7316410386154293,
|
|
"grad_norm": 0.2255859375,
|
|
"learning_rate": 3.8777712835345966e-07,
|
|
"loss": 0.05976992845535278,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 0.7331403030388216,
|
|
"grad_norm": 0.1884765625,
|
|
"learning_rate": 3.837591321825696e-07,
|
|
"loss": 0.07514649033546447,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 0.7346395674622138,
|
|
"grad_norm": 0.22265625,
|
|
"learning_rate": 3.7975711241352224e-07,
|
|
"loss": 0.0838453233242035,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.7361388318856061,
|
|
"grad_norm": 0.28125,
|
|
"learning_rate": 3.757711728010007e-07,
|
|
"loss": 0.08041094541549683,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 0.7376380963089983,
|
|
"grad_norm": 0.271484375,
|
|
"learning_rate": 3.7180141668280065e-07,
|
|
"loss": 0.0707211971282959,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 0.7391373607323907,
|
|
"grad_norm": 0.2109375,
|
|
"learning_rate": 3.678479469771516e-07,
|
|
"loss": 0.09502058625221252,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 0.740636625155783,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 3.639108661800482e-07,
|
|
"loss": 0.09508728384971618,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 0.7421358895791752,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 3.59990276362593e-07,
|
|
"loss": 0.07535126805305481,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 0.7436351540025675,
|
|
"grad_norm": 0.271484375,
|
|
"learning_rate": 3.5608627916835077e-07,
|
|
"loss": 0.07866016626358033,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 0.7451344184259597,
|
|
"grad_norm": 0.177734375,
|
|
"learning_rate": 3.521989758107122e-07,
|
|
"loss": 0.10100013017654419,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 0.746633682849352,
|
|
"grad_norm": 0.365234375,
|
|
"learning_rate": 3.4832846707027144e-07,
|
|
"loss": 0.08256787061691284,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 0.7481329472727443,
|
|
"grad_norm": 0.185546875,
|
|
"learning_rate": 3.444748532922116e-07,
|
|
"loss": 0.08142110109329223,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 0.7496322116961366,
|
|
"grad_norm": 0.1806640625,
|
|
"learning_rate": 3.4063823438370477e-07,
|
|
"loss": 0.09730502367019653,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.7511314761195289,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 3.3681870981132076e-07,
|
|
"loss": 0.051060861349105834,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 0.7526307405429211,
|
|
"grad_norm": 0.29296875,
|
|
"learning_rate": 3.330163785984491e-07,
|
|
"loss": 0.07702358365058899,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 0.7541300049663134,
|
|
"grad_norm": 0.25,
|
|
"learning_rate": 3.292313393227313e-07,
|
|
"loss": 0.07249666452407837,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 0.7556292693897056,
|
|
"grad_norm": 0.2119140625,
|
|
"learning_rate": 3.254636901135055e-07,
|
|
"loss": 0.08777963519096374,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 0.757128533813098,
|
|
"grad_norm": 0.26171875,
|
|
"learning_rate": 3.2171352864926216e-07,
|
|
"loss": 0.09629991054534912,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 0.7586277982364902,
|
|
"grad_norm": 0.400390625,
|
|
"learning_rate": 3.179809521551119e-07,
|
|
"loss": 0.07828204035758972,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 0.7601270626598825,
|
|
"grad_norm": 0.2236328125,
|
|
"learning_rate": 3.142660574002648e-07,
|
|
"loss": 0.06039868593215943,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 0.7616263270832748,
|
|
"grad_norm": 0.26171875,
|
|
"learning_rate": 3.1056894069552154e-07,
|
|
"loss": 0.06850762367248535,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 0.763125591506667,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 3.0688969789077656e-07,
|
|
"loss": 0.07535871863365173,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 0.7646248559300594,
|
|
"grad_norm": 0.2275390625,
|
|
"learning_rate": 3.0322842437253303e-07,
|
|
"loss": 0.0845901370048523,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.7661241203534516,
|
|
"grad_norm": 0.267578125,
|
|
"learning_rate": 2.9958521506143006e-07,
|
|
"loss": 0.09275015592575073,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 0.7676233847768439,
|
|
"grad_norm": 0.24609375,
|
|
"learning_rate": 2.9596016440978175e-07,
|
|
"loss": 0.10449213981628418,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 0.7691226492002361,
|
|
"grad_norm": 0.2060546875,
|
|
"learning_rate": 2.923533663991282e-07,
|
|
"loss": 0.08837388157844543,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 0.7706219136236284,
|
|
"grad_norm": 0.330078125,
|
|
"learning_rate": 2.8876491453779936e-07,
|
|
"loss": 0.09125276803970336,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 0.7721211780470206,
|
|
"grad_norm": 0.2734375,
|
|
"learning_rate": 2.851949018584906e-07,
|
|
"loss": 0.0870974063873291,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 0.773620442470413,
|
|
"grad_norm": 1.109375,
|
|
"learning_rate": 2.816434209158508e-07,
|
|
"loss": 0.11278444528579712,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 0.7751197068938053,
|
|
"grad_norm": 0.2431640625,
|
|
"learning_rate": 2.781105637840829e-07,
|
|
"loss": 0.11417597532272339,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 0.7766189713171975,
|
|
"grad_norm": 0.2080078125,
|
|
"learning_rate": 2.7459642205455657e-07,
|
|
"loss": 0.0695708453655243,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 0.7781182357405898,
|
|
"grad_norm": 0.294921875,
|
|
"learning_rate": 2.71101086833434e-07,
|
|
"loss": 0.07352896332740784,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 0.779617500163982,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 2.6762464873930754e-07,
|
|
"loss": 0.09707750678062439,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.7811167645873743,
|
|
"grad_norm": 0.1865234375,
|
|
"learning_rate": 2.6416719790085084e-07,
|
|
"loss": 0.09525392651557922,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 0.7826160290107665,
|
|
"grad_norm": 0.291015625,
|
|
"learning_rate": 2.607288239544817e-07,
|
|
"loss": 0.10324461460113525,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 0.7841152934341589,
|
|
"grad_norm": 0.2490234375,
|
|
"learning_rate": 2.573096160420386e-07,
|
|
"loss": 0.056819206476211546,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 0.7856145578575512,
|
|
"grad_norm": 0.1875,
|
|
"learning_rate": 2.5390966280846925e-07,
|
|
"loss": 0.07321354150772094,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 0.7871138222809434,
|
|
"grad_norm": 0.2177734375,
|
|
"learning_rate": 2.505290523995329e-07,
|
|
"loss": 0.05529284477233887,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 0.7886130867043357,
|
|
"grad_norm": 0.28515625,
|
|
"learning_rate": 2.4716787245951465e-07,
|
|
"loss": 0.08749927282333374,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 0.7901123511277279,
|
|
"grad_norm": 0.251953125,
|
|
"learning_rate": 2.4382621012895367e-07,
|
|
"loss": 0.10226259231567383,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 0.7916116155511203,
|
|
"grad_norm": 0.369140625,
|
|
"learning_rate": 2.405041520423835e-07,
|
|
"loss": 0.08864956498146057,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 0.7931108799745125,
|
|
"grad_norm": 0.2197265625,
|
|
"learning_rate": 2.372017843260864e-07,
|
|
"loss": 0.10684455633163452,
|
|
"step": 5290
|
|
},
|
|
{
|
|
"epoch": 0.7946101443979048,
|
|
"grad_norm": 0.1884765625,
|
|
"learning_rate": 2.3391919259586057e-07,
|
|
"loss": 0.09059134125709534,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 0.7961094088212971,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 2.3065646195479992e-07,
|
|
"loss": 0.07700026631355286,
|
|
"step": 5310
|
|
},
|
|
{
|
|
"epoch": 0.7976086732446893,
|
|
"grad_norm": 0.37890625,
|
|
"learning_rate": 2.2741367699108839e-07,
|
|
"loss": 0.08473354578018188,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 0.7991079376680816,
|
|
"grad_norm": 0.2265625,
|
|
"learning_rate": 2.2419092177580666e-07,
|
|
"loss": 0.07873227596282958,
|
|
"step": 5330
|
|
},
|
|
{
|
|
"epoch": 0.8006072020914738,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 2.209882798607523e-07,
|
|
"loss": 0.09732807874679565,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 0.8021064665148662,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 2.178058342762743e-07,
|
|
"loss": 0.10025830268859863,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 0.8036057309382584,
|
|
"grad_norm": 0.263671875,
|
|
"learning_rate": 2.1464366752911979e-07,
|
|
"loss": 0.09230310916900634,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 0.8051049953616507,
|
|
"grad_norm": 0.2353515625,
|
|
"learning_rate": 2.1150186160029525e-07,
|
|
"loss": 0.06340540051460267,
|
|
"step": 5370
|
|
},
|
|
{
|
|
"epoch": 0.8066042597850429,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 2.0838049794294132e-07,
|
|
"loss": 0.10046428442001343,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 0.8081035242084352,
|
|
"grad_norm": 0.220703125,
|
|
"learning_rate": 2.052796574802209e-07,
|
|
"loss": 0.06854251027107239,
|
|
"step": 5390
|
|
},
|
|
{
|
|
"epoch": 0.8096027886318276,
|
|
"grad_norm": 0.2216796875,
|
|
"learning_rate": 2.0219942060322114e-07,
|
|
"loss": 0.08301514387130737,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 0.8111020530552198,
|
|
"grad_norm": 0.2734375,
|
|
"learning_rate": 1.99139867168869e-07,
|
|
"loss": 0.06499930620193481,
|
|
"step": 5410
|
|
},
|
|
{
|
|
"epoch": 0.8126013174786121,
|
|
"grad_norm": 0.275390625,
|
|
"learning_rate": 1.9610107649786167e-07,
|
|
"loss": 0.08899691700935364,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 0.8141005819020043,
|
|
"grad_norm": 0.1923828125,
|
|
"learning_rate": 1.9308312737260934e-07,
|
|
"loss": 0.06367949843406677,
|
|
"step": 5430
|
|
},
|
|
{
|
|
"epoch": 0.8155998463253966,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 1.9008609803519304e-07,
|
|
"loss": 0.09109672904014587,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 0.8170991107487888,
|
|
"grad_norm": 0.2373046875,
|
|
"learning_rate": 1.871100661853363e-07,
|
|
"loss": 0.0652251660823822,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 0.8185983751721811,
|
|
"grad_norm": 0.263671875,
|
|
"learning_rate": 1.841551089783907e-07,
|
|
"loss": 0.10543818473815918,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 0.8200976395955735,
|
|
"grad_norm": 0.2333984375,
|
|
"learning_rate": 1.8122130302333517e-07,
|
|
"loss": 0.07551140189170838,
|
|
"step": 5470
|
|
},
|
|
{
|
|
"epoch": 0.8215969040189657,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 1.7830872438079048e-07,
|
|
"loss": 0.07271650433540344,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 0.823096168442358,
|
|
"grad_norm": 0.21484375,
|
|
"learning_rate": 1.7541744856104667e-07,
|
|
"loss": 0.07429500818252563,
|
|
"step": 5490
|
|
},
|
|
{
|
|
"epoch": 0.8245954328657502,
|
|
"grad_norm": 0.287109375,
|
|
"learning_rate": 1.7254755052210624e-07,
|
|
"loss": 0.06771766543388366,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 0.8260946972891425,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 1.6969910466773973e-07,
|
|
"loss": 0.11255881786346436,
|
|
"step": 5510
|
|
},
|
|
{
|
|
"epoch": 0.8275939617125347,
|
|
"grad_norm": 0.2080078125,
|
|
"learning_rate": 1.66872184845558e-07,
|
|
"loss": 0.07378043532371521,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 0.8290932261359271,
|
|
"grad_norm": 0.2236328125,
|
|
"learning_rate": 1.6406686434509644e-07,
|
|
"loss": 0.06890552639961242,
|
|
"step": 5530
|
|
},
|
|
{
|
|
"epoch": 0.8305924905593194,
|
|
"grad_norm": 0.2060546875,
|
|
"learning_rate": 1.6128321589591587e-07,
|
|
"loss": 0.08552584648132325,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 0.8320917549827116,
|
|
"grad_norm": 0.326171875,
|
|
"learning_rate": 1.5852131166571648e-07,
|
|
"loss": 0.08140406608581544,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 0.8335910194061039,
|
|
"grad_norm": 0.251953125,
|
|
"learning_rate": 1.55781223258467e-07,
|
|
"loss": 0.09987716674804688,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 0.8350902838294961,
|
|
"grad_norm": 0.1982421875,
|
|
"learning_rate": 1.5306302171254836e-07,
|
|
"loss": 0.0620901346206665,
|
|
"step": 5570
|
|
},
|
|
{
|
|
"epoch": 0.8365895482528884,
|
|
"grad_norm": 0.263671875,
|
|
"learning_rate": 1.503667774989119e-07,
|
|
"loss": 0.07742155194282532,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 0.8380888126762807,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 1.4769256051925228e-07,
|
|
"loss": 0.09683317542076111,
|
|
"step": 5590
|
|
},
|
|
{
|
|
"epoch": 0.839588077099673,
|
|
"grad_norm": 0.2177734375,
|
|
"learning_rate": 1.4504044010419513e-07,
|
|
"loss": 0.10250561237335205,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 0.8410873415230652,
|
|
"grad_norm": 0.2314453125,
|
|
"learning_rate": 1.4241048501150088e-07,
|
|
"loss": 0.0593035876750946,
|
|
"step": 5610
|
|
},
|
|
{
|
|
"epoch": 0.8425866059464575,
|
|
"grad_norm": 0.33203125,
|
|
"learning_rate": 1.3980276342427966e-07,
|
|
"loss": 0.07098089456558228,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 0.8440858703698498,
|
|
"grad_norm": 0.25,
|
|
"learning_rate": 1.3721734294922594e-07,
|
|
"loss": 0.08620147705078125,
|
|
"step": 5630
|
|
},
|
|
{
|
|
"epoch": 0.845585134793242,
|
|
"grad_norm": 0.2138671875,
|
|
"learning_rate": 1.346542906148649e-07,
|
|
"loss": 0.08298314213752747,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 0.8470843992166344,
|
|
"grad_norm": 0.34765625,
|
|
"learning_rate": 1.3211367286981458e-07,
|
|
"loss": 0.1136427640914917,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 0.8485836636400266,
|
|
"grad_norm": 0.25,
|
|
"learning_rate": 1.2959555558106282e-07,
|
|
"loss": 0.0708082675933838,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 0.8500829280634189,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 1.271000040322614e-07,
|
|
"loss": 0.09266042709350586,
|
|
"step": 5670
|
|
},
|
|
{
|
|
"epoch": 0.8515821924868111,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 1.2462708292203062e-07,
|
|
"loss": 0.09188313484191894,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 0.8530814569102034,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 1.2217685636228447e-07,
|
|
"loss": 0.11194919347763062,
|
|
"step": 5690
|
|
},
|
|
{
|
|
"epoch": 0.8545807213335957,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 1.1974938787656742e-07,
|
|
"loss": 0.0845366358757019,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 0.856079985756988,
|
|
"grad_norm": 0.28515625,
|
|
"learning_rate": 1.1734474039840737e-07,
|
|
"loss": 0.07923954129219055,
|
|
"step": 5710
|
|
},
|
|
{
|
|
"epoch": 0.8575792501803803,
|
|
"grad_norm": 0.306640625,
|
|
"learning_rate": 1.1496297626968465e-07,
|
|
"loss": 0.09228439927101136,
|
|
"step": 5720
|
|
},
|
|
{
|
|
"epoch": 0.8590785146037725,
|
|
"grad_norm": 0.2314453125,
|
|
"learning_rate": 1.1260415723901584e-07,
|
|
"loss": 0.08742096424102783,
|
|
"step": 5730
|
|
},
|
|
{
|
|
"epoch": 0.8605777790271648,
|
|
"grad_norm": 0.2353515625,
|
|
"learning_rate": 1.1026834446015177e-07,
|
|
"loss": 0.07722960710525513,
|
|
"step": 5740
|
|
},
|
|
{
|
|
"epoch": 0.862077043450557,
|
|
"grad_norm": 0.2060546875,
|
|
"learning_rate": 1.0795559849039315e-07,
|
|
"loss": 0.08857112526893615,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 0.8635763078739493,
|
|
"grad_norm": 0.205078125,
|
|
"learning_rate": 1.0566597928902043e-07,
|
|
"loss": 0.06474360227584838,
|
|
"step": 5760
|
|
},
|
|
{
|
|
"epoch": 0.8650755722973417,
|
|
"grad_norm": 0.29296875,
|
|
"learning_rate": 1.033995462157392e-07,
|
|
"loss": 0.09699549674987792,
|
|
"step": 5770
|
|
},
|
|
{
|
|
"epoch": 0.8665748367207339,
|
|
"grad_norm": 0.2451171875,
|
|
"learning_rate": 1.0115635802914101e-07,
|
|
"loss": 0.07245502471923829,
|
|
"step": 5780
|
|
},
|
|
{
|
|
"epoch": 0.8680741011441262,
|
|
"grad_norm": 0.26171875,
|
|
"learning_rate": 9.89364728851807e-08,
|
|
"loss": 0.07710716128349304,
|
|
"step": 5790
|
|
},
|
|
{
|
|
"epoch": 0.8695733655675184,
|
|
"grad_norm": 0.294921875,
|
|
"learning_rate": 9.673994833566746e-08,
|
|
"loss": 0.07985681295394897,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 0.8710726299909107,
|
|
"grad_norm": 0.212890625,
|
|
"learning_rate": 9.456684132677418e-08,
|
|
"loss": 0.07051183581352234,
|
|
"step": 5810
|
|
},
|
|
{
|
|
"epoch": 0.8725718944143029,
|
|
"grad_norm": 0.2392578125,
|
|
"learning_rate": 9.241720819756016e-08,
|
|
"loss": 0.09385765790939331,
|
|
"step": 5820
|
|
},
|
|
{
|
|
"epoch": 0.8740711588376953,
|
|
"grad_norm": 0.302734375,
|
|
"learning_rate": 9.029110467851076e-08,
|
|
"loss": 0.07226101160049439,
|
|
"step": 5830
|
|
},
|
|
{
|
|
"epoch": 0.8755704232610876,
|
|
"grad_norm": 0.224609375,
|
|
"learning_rate": 8.818858589009248e-08,
|
|
"loss": 0.07575808763504029,
|
|
"step": 5840
|
|
},
|
|
{
|
|
"epoch": 0.8770696876844798,
|
|
"grad_norm": 0.1962890625,
|
|
"learning_rate": 8.610970634132465e-08,
|
|
"loss": 0.07295922040939332,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 0.8785689521078721,
|
|
"grad_norm": 0.291015625,
|
|
"learning_rate": 8.405451992836442e-08,
|
|
"loss": 0.08540709614753723,
|
|
"step": 5860
|
|
},
|
|
{
|
|
"epoch": 0.8800682165312643,
|
|
"grad_norm": 0.240234375,
|
|
"learning_rate": 8.202307993311153e-08,
|
|
"loss": 0.08457719087600708,
|
|
"step": 5870
|
|
},
|
|
{
|
|
"epoch": 0.8815674809546566,
|
|
"grad_norm": 0.224609375,
|
|
"learning_rate": 8.001543902182594e-08,
|
|
"loss": 0.06852260828018189,
|
|
"step": 5880
|
|
},
|
|
{
|
|
"epoch": 0.8830667453780489,
|
|
"grad_norm": 0.208984375,
|
|
"learning_rate": 7.803164924376248e-08,
|
|
"loss": 0.0945811927318573,
|
|
"step": 5890
|
|
},
|
|
{
|
|
"epoch": 0.8845660098014412,
|
|
"grad_norm": 0.2734375,
|
|
"learning_rate": 7.607176202982112e-08,
|
|
"loss": 0.07205227017402649,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 0.8860652742248334,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 7.413582819121511e-08,
|
|
"loss": 0.08640796542167664,
|
|
"step": 5910
|
|
},
|
|
{
|
|
"epoch": 0.8875645386482257,
|
|
"grad_norm": 0.2060546875,
|
|
"learning_rate": 7.22238979181512e-08,
|
|
"loss": 0.0951160728931427,
|
|
"step": 5920
|
|
},
|
|
{
|
|
"epoch": 0.889063803071618,
|
|
"grad_norm": 0.21484375,
|
|
"learning_rate": 7.033602077853052e-08,
|
|
"loss": 0.07211223244667053,
|
|
"step": 5930
|
|
},
|
|
{
|
|
"epoch": 0.8905630674950102,
|
|
"grad_norm": 0.2373046875,
|
|
"learning_rate": 6.847224571666277e-08,
|
|
"loss": 0.07400254607200622,
|
|
"step": 5940
|
|
},
|
|
{
|
|
"epoch": 0.8920623319184026,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 6.663262105199718e-08,
|
|
"loss": 0.09436286687850952,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 0.8935615963417948,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 6.481719447786971e-08,
|
|
"loss": 0.07624666690826416,
|
|
"step": 5960
|
|
},
|
|
{
|
|
"epoch": 0.8950608607651871,
|
|
"grad_norm": 0.25,
|
|
"learning_rate": 6.302601306026755e-08,
|
|
"loss": 0.08409606218338013,
|
|
"step": 5970
|
|
},
|
|
{
|
|
"epoch": 0.8965601251885793,
|
|
"grad_norm": 0.2265625,
|
|
"learning_rate": 6.125912323660709e-08,
|
|
"loss": 0.07607480883598328,
|
|
"step": 5980
|
|
},
|
|
{
|
|
"epoch": 0.8980593896119716,
|
|
"grad_norm": 0.2412109375,
|
|
"learning_rate": 5.951657081453176e-08,
|
|
"loss": 0.08595433235168456,
|
|
"step": 5990
|
|
},
|
|
{
|
|
"epoch": 0.899558654035364,
|
|
"grad_norm": 0.181640625,
|
|
"learning_rate": 5.7798400970723634e-08,
|
|
"loss": 0.0745903193950653,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 0.9010579184587562,
|
|
"grad_norm": 0.2392578125,
|
|
"learning_rate": 5.610465824973232e-08,
|
|
"loss": 0.07999681830406188,
|
|
"step": 6010
|
|
},
|
|
{
|
|
"epoch": 0.9025571828821485,
|
|
"grad_norm": 0.205078125,
|
|
"learning_rate": 5.443538656281954e-08,
|
|
"loss": 0.08919501900672913,
|
|
"step": 6020
|
|
},
|
|
{
|
|
"epoch": 0.9040564473055407,
|
|
"grad_norm": 0.20703125,
|
|
"learning_rate": 5.279062918682253e-08,
|
|
"loss": 0.07325602769851684,
|
|
"step": 6030
|
|
},
|
|
{
|
|
"epoch": 0.905555711728933,
|
|
"grad_norm": 0.2236328125,
|
|
"learning_rate": 5.117042876302946e-08,
|
|
"loss": 0.07375933527946472,
|
|
"step": 6040
|
|
},
|
|
{
|
|
"epoch": 0.9070549761523252,
|
|
"grad_norm": 0.30859375,
|
|
"learning_rate": 4.9574827296075986e-08,
|
|
"loss": 0.09143089056015015,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 0.9085542405757175,
|
|
"grad_norm": 0.205078125,
|
|
"learning_rate": 4.800386615285534e-08,
|
|
"loss": 0.06721729636192322,
|
|
"step": 6060
|
|
},
|
|
{
|
|
"epoch": 0.9100535049991099,
|
|
"grad_norm": 0.2314453125,
|
|
"learning_rate": 4.645758606144623e-08,
|
|
"loss": 0.0724267840385437,
|
|
"step": 6070
|
|
},
|
|
{
|
|
"epoch": 0.9115527694225021,
|
|
"grad_norm": 0.263671875,
|
|
"learning_rate": 4.49360271100564e-08,
|
|
"loss": 0.09417140483856201,
|
|
"step": 6080
|
|
},
|
|
{
|
|
"epoch": 0.9130520338458944,
|
|
"grad_norm": 0.2138671875,
|
|
"learning_rate": 4.3439228745984493e-08,
|
|
"loss": 0.10223345756530762,
|
|
"step": 6090
|
|
},
|
|
{
|
|
"epoch": 0.9145512982692866,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 4.196722977459566e-08,
|
|
"loss": 0.08283578753471374,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 0.9160505626926789,
|
|
"grad_norm": 0.3359375,
|
|
"learning_rate": 4.0520068358317e-08,
|
|
"loss": 0.11019489765167237,
|
|
"step": 6110
|
|
},
|
|
{
|
|
"epoch": 0.9175498271160711,
|
|
"grad_norm": 0.21484375,
|
|
"learning_rate": 3.9097782015647286e-08,
|
|
"loss": 0.07297813296318054,
|
|
"step": 6120
|
|
},
|
|
{
|
|
"epoch": 0.9190490915394635,
|
|
"grad_norm": 0.2060546875,
|
|
"learning_rate": 3.7700407620184674e-08,
|
|
"loss": 0.07638216018676758,
|
|
"step": 6130
|
|
},
|
|
{
|
|
"epoch": 0.9205483559628557,
|
|
"grad_norm": 0.1962890625,
|
|
"learning_rate": 3.632798139967064e-08,
|
|
"loss": 0.09769478440284729,
|
|
"step": 6140
|
|
},
|
|
{
|
|
"epoch": 0.922047620386248,
|
|
"grad_norm": 0.345703125,
|
|
"learning_rate": 3.498053893505126e-08,
|
|
"loss": 0.07059162259101867,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 0.9235468848096403,
|
|
"grad_norm": 0.2080078125,
|
|
"learning_rate": 3.365811515955319e-08,
|
|
"loss": 0.10193029642105103,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 0.9250461492330325,
|
|
"grad_norm": 0.2099609375,
|
|
"learning_rate": 3.236074435777991e-08,
|
|
"loss": 0.08877017498016357,
|
|
"step": 6170
|
|
},
|
|
{
|
|
"epoch": 0.9265454136564248,
|
|
"grad_norm": 0.19921875,
|
|
"learning_rate": 3.1088460164821694e-08,
|
|
"loss": 0.07558783888816833,
|
|
"step": 6180
|
|
},
|
|
{
|
|
"epoch": 0.928044678079817,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 2.984129556538417e-08,
|
|
"loss": 0.10496606826782226,
|
|
"step": 6190
|
|
},
|
|
{
|
|
"epoch": 0.9295439425032094,
|
|
"grad_norm": 0.1826171875,
|
|
"learning_rate": 2.8619282892932472e-08,
|
|
"loss": 0.08706371784210205,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 0.9310432069266016,
|
|
"grad_norm": 0.24609375,
|
|
"learning_rate": 2.742245382885422e-08,
|
|
"loss": 0.07445533275604248,
|
|
"step": 6210
|
|
},
|
|
{
|
|
"epoch": 0.9325424713499939,
|
|
"grad_norm": 0.2392578125,
|
|
"learning_rate": 2.6250839401636636e-08,
|
|
"loss": 0.08374568819999695,
|
|
"step": 6220
|
|
},
|
|
{
|
|
"epoch": 0.9340417357733862,
|
|
"grad_norm": 0.220703125,
|
|
"learning_rate": 2.510446998606297e-08,
|
|
"loss": 0.08437891006469726,
|
|
"step": 6230
|
|
},
|
|
{
|
|
"epoch": 0.9355410001967784,
|
|
"grad_norm": 0.232421875,
|
|
"learning_rate": 2.3983375302425445e-08,
|
|
"loss": 0.06599584221839905,
|
|
"step": 6240
|
|
},
|
|
{
|
|
"epoch": 0.9370402646201708,
|
|
"grad_norm": 0.390625,
|
|
"learning_rate": 2.2887584415753558e-08,
|
|
"loss": 0.08677806854248046,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 0.938539529043563,
|
|
"grad_norm": 0.173828125,
|
|
"learning_rate": 2.1817125735061448e-08,
|
|
"loss": 0.057446730136871335,
|
|
"step": 6260
|
|
},
|
|
{
|
|
"epoch": 0.9400387934669553,
|
|
"grad_norm": 0.279296875,
|
|
"learning_rate": 2.0772027012611382e-08,
|
|
"loss": 0.07344555258750915,
|
|
"step": 6270
|
|
},
|
|
{
|
|
"epoch": 0.9415380578903475,
|
|
"grad_norm": 0.2333984375,
|
|
"learning_rate": 1.975231534319366e-08,
|
|
"loss": 0.061513519287109374,
|
|
"step": 6280
|
|
},
|
|
{
|
|
"epoch": 0.9430373223137398,
|
|
"grad_norm": 0.234375,
|
|
"learning_rate": 1.875801716342462e-08,
|
|
"loss": 0.08662024140357971,
|
|
"step": 6290
|
|
},
|
|
{
|
|
"epoch": 0.9445365867371321,
|
|
"grad_norm": 0.2890625,
|
|
"learning_rate": 1.7789158251061087e-08,
|
|
"loss": 0.08880329728126526,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 0.9460358511605244,
|
|
"grad_norm": 0.23046875,
|
|
"learning_rate": 1.684576372433222e-08,
|
|
"loss": 0.08403295874595643,
|
|
"step": 6310
|
|
},
|
|
{
|
|
"epoch": 0.9475351155839167,
|
|
"grad_norm": 0.2099609375,
|
|
"learning_rate": 1.5927858041288154e-08,
|
|
"loss": 0.07371333837509156,
|
|
"step": 6320
|
|
},
|
|
{
|
|
"epoch": 0.9490343800073089,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 1.503546499916608e-08,
|
|
"loss": 0.0930757999420166,
|
|
"step": 6330
|
|
},
|
|
{
|
|
"epoch": 0.9505336444307012,
|
|
"grad_norm": 0.2490234375,
|
|
"learning_rate": 1.4168607733773042e-08,
|
|
"loss": 0.09260554909706116,
|
|
"step": 6340
|
|
},
|
|
{
|
|
"epoch": 0.9520329088540934,
|
|
"grad_norm": 0.23046875,
|
|
"learning_rate": 1.3327308718886322e-08,
|
|
"loss": 0.06500183939933776,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 0.9535321732774857,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 1.2511589765670682e-08,
|
|
"loss": 0.12267719507217408,
|
|
"step": 6360
|
|
},
|
|
{
|
|
"epoch": 0.9550314377008781,
|
|
"grad_norm": 0.23046875,
|
|
"learning_rate": 1.1721472022113044e-08,
|
|
"loss": 0.15489401817321777,
|
|
"step": 6370
|
|
},
|
|
{
|
|
"epoch": 0.9565307021242703,
|
|
"grad_norm": 0.41015625,
|
|
"learning_rate": 1.0956975972474136e-08,
|
|
"loss": 0.08266881704330445,
|
|
"step": 6380
|
|
},
|
|
{
|
|
"epoch": 0.9580299665476626,
|
|
"grad_norm": 0.21875,
|
|
"learning_rate": 1.0218121436757266e-08,
|
|
"loss": 0.062265390157699586,
|
|
"step": 6390
|
|
},
|
|
{
|
|
"epoch": 0.9595292309710548,
|
|
"grad_norm": 0.3125,
|
|
"learning_rate": 9.504927570194831e-09,
|
|
"loss": 0.11146190166473388,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 0.9610284953944471,
|
|
"grad_norm": 0.271484375,
|
|
"learning_rate": 8.817412862751172e-09,
|
|
"loss": 0.11401185989379883,
|
|
"step": 6410
|
|
},
|
|
{
|
|
"epoch": 0.9625277598178393,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 8.155595138644055e-09,
|
|
"loss": 0.06964959502220154,
|
|
"step": 6420
|
|
},
|
|
{
|
|
"epoch": 0.9640270242412317,
|
|
"grad_norm": 0.248046875,
|
|
"learning_rate": 7.519491555881497e-09,
|
|
"loss": 0.08737698793411255,
|
|
"step": 6430
|
|
},
|
|
{
|
|
"epoch": 0.9655262886646239,
|
|
"grad_norm": 0.39453125,
|
|
"learning_rate": 6.909118605817776e-09,
|
|
"loss": 0.09992367029190063,
|
|
"step": 6440
|
|
},
|
|
{
|
|
"epoch": 0.9670255530880162,
|
|
"grad_norm": 0.458984375,
|
|
"learning_rate": 6.324492112725676e-09,
|
|
"loss": 0.10620630979537964,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 0.9685248175114085,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 5.765627233386028e-09,
|
|
"loss": 0.09715937972068786,
|
|
"step": 6460
|
|
},
|
|
{
|
|
"epoch": 0.9700240819348007,
|
|
"grad_norm": 0.18359375,
|
|
"learning_rate": 5.2325384566949126e-09,
|
|
"loss": 0.07616119980812072,
|
|
"step": 6470
|
|
},
|
|
{
|
|
"epoch": 0.971523346358193,
|
|
"grad_norm": 0.2041015625,
|
|
"learning_rate": 4.725239603287856e-09,
|
|
"loss": 0.08586298823356628,
|
|
"step": 6480
|
|
},
|
|
{
|
|
"epoch": 0.9730226107815853,
|
|
"grad_norm": 0.2158203125,
|
|
"learning_rate": 4.243743825181889e-09,
|
|
"loss": 0.10227413177490234,
|
|
"step": 6490
|
|
},
|
|
{
|
|
"epoch": 0.9745218752049776,
|
|
"grad_norm": 0.240234375,
|
|
"learning_rate": 3.788063605434267e-09,
|
|
"loss": 0.1260104298591614,
|
|
"step": 6500
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 6670,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 5.128917086797111e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|