16051 lines
394 KiB
JSON
16051 lines
394 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.25599901577555206,
|
|
"eval_steps": 500,
|
|
"global_step": 22889,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00011184368726268168,
|
|
"grad_norm": 0.6328383684158325,
|
|
"learning_rate": 4.5e-06,
|
|
"loss": 1.734,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.00022368737452536336,
|
|
"grad_norm": 0.566952109336853,
|
|
"learning_rate": 9.5e-06,
|
|
"loss": 1.6903,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.00033553106178804503,
|
|
"grad_norm": 0.5359939932823181,
|
|
"learning_rate": 1.4500000000000002e-05,
|
|
"loss": 1.6266,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.0004473747490507267,
|
|
"grad_norm": 0.4729914367198944,
|
|
"learning_rate": 1.95e-05,
|
|
"loss": 1.5731,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.0005592184363134084,
|
|
"grad_norm": 0.42020025849342346,
|
|
"learning_rate": 2.4500000000000003e-05,
|
|
"loss": 1.5335,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.0006710621235760901,
|
|
"grad_norm": 0.4461672604084015,
|
|
"learning_rate": 2.95e-05,
|
|
"loss": 1.4851,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.0007829058108387717,
|
|
"grad_norm": 0.4443751275539398,
|
|
"learning_rate": 3.4500000000000005e-05,
|
|
"loss": 1.4431,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.0008947494981014534,
|
|
"grad_norm": 0.4204632639884949,
|
|
"learning_rate": 3.95e-05,
|
|
"loss": 1.4036,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.0010065931853641351,
|
|
"grad_norm": 0.3985028862953186,
|
|
"learning_rate": 4.45e-05,
|
|
"loss": 1.3725,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.0011184368726268167,
|
|
"grad_norm": 0.4111650586128235,
|
|
"learning_rate": 4.9500000000000004e-05,
|
|
"loss": 1.3527,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.0012302805598894985,
|
|
"grad_norm": 0.4175569713115692,
|
|
"learning_rate": 5.45e-05,
|
|
"loss": 1.3431,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.0013421242471521801,
|
|
"grad_norm": 0.3871678411960602,
|
|
"learning_rate": 5.9499999999999996e-05,
|
|
"loss": 1.3322,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.0014539679344148617,
|
|
"grad_norm": 0.39584827423095703,
|
|
"learning_rate": 6.450000000000001e-05,
|
|
"loss": 1.3075,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.0015658116216775435,
|
|
"grad_norm": 0.4165605902671814,
|
|
"learning_rate": 6.950000000000001e-05,
|
|
"loss": 1.286,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.001677655308940225,
|
|
"grad_norm": 0.3985513150691986,
|
|
"learning_rate": 7.45e-05,
|
|
"loss": 1.2567,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.0017894989962029069,
|
|
"grad_norm": 0.39112743735313416,
|
|
"learning_rate": 7.950000000000001e-05,
|
|
"loss": 1.2448,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.0019013426834655885,
|
|
"grad_norm": 0.3867124915122986,
|
|
"learning_rate": 8.450000000000001e-05,
|
|
"loss": 1.2405,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.0020131863707282703,
|
|
"grad_norm": 0.3955863416194916,
|
|
"learning_rate": 8.95e-05,
|
|
"loss": 1.2123,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.002125030057990952,
|
|
"grad_norm": 0.40293410420417786,
|
|
"learning_rate": 9.45e-05,
|
|
"loss": 1.2081,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.0022368737452536334,
|
|
"grad_norm": 0.3828902542591095,
|
|
"learning_rate": 9.95e-05,
|
|
"loss": 1.2049,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.002348717432516315,
|
|
"grad_norm": 0.3969178795814514,
|
|
"learning_rate": 0.00010449999999999999,
|
|
"loss": 1.1892,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.002460561119778997,
|
|
"grad_norm": 0.4122287929058075,
|
|
"learning_rate": 0.0001095,
|
|
"loss": 1.184,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.0025724048070416786,
|
|
"grad_norm": 0.3793940246105194,
|
|
"learning_rate": 0.0001145,
|
|
"loss": 1.1809,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.0026842484943043602,
|
|
"grad_norm": 0.4132145643234253,
|
|
"learning_rate": 0.00011949999999999999,
|
|
"loss": 1.1883,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.002796092181567042,
|
|
"grad_norm": 0.3900831639766693,
|
|
"learning_rate": 0.0001245,
|
|
"loss": 1.1818,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.0029079358688297234,
|
|
"grad_norm": 0.3898029625415802,
|
|
"learning_rate": 0.0001295,
|
|
"loss": 1.1693,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.0030197795560924054,
|
|
"grad_norm": 0.40828797221183777,
|
|
"learning_rate": 0.00013450000000000002,
|
|
"loss": 1.1869,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.003131623243355087,
|
|
"grad_norm": 0.3976770341396332,
|
|
"learning_rate": 0.0001395,
|
|
"loss": 1.1841,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.0032434669306177686,
|
|
"grad_norm": 0.3902062773704529,
|
|
"learning_rate": 0.0001445,
|
|
"loss": 1.1843,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.00335531061788045,
|
|
"grad_norm": 0.38051125407218933,
|
|
"learning_rate": 0.0001495,
|
|
"loss": 1.1662,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.0034671543051431318,
|
|
"grad_norm": 0.3628483712673187,
|
|
"learning_rate": 0.00015450000000000001,
|
|
"loss": 1.1638,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.0035789979924058138,
|
|
"grad_norm": 0.3693360388278961,
|
|
"learning_rate": 0.0001595,
|
|
"loss": 1.1606,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.0036908416796684954,
|
|
"grad_norm": 0.38896557688713074,
|
|
"learning_rate": 0.00016450000000000001,
|
|
"loss": 1.1448,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.003802685366931177,
|
|
"grad_norm": 0.40257108211517334,
|
|
"learning_rate": 0.00016950000000000003,
|
|
"loss": 1.143,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.0039145290541938585,
|
|
"grad_norm": 0.38656994700431824,
|
|
"learning_rate": 0.00017449999999999999,
|
|
"loss": 1.141,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.0040263727414565405,
|
|
"grad_norm": 0.3700025677680969,
|
|
"learning_rate": 0.0001795,
|
|
"loss": 1.136,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.004138216428719222,
|
|
"grad_norm": 0.37222161889076233,
|
|
"learning_rate": 0.0001845,
|
|
"loss": 1.1292,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.004250060115981904,
|
|
"grad_norm": 0.39386317133903503,
|
|
"learning_rate": 0.0001895,
|
|
"loss": 1.1139,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.004361903803244586,
|
|
"grad_norm": 0.3776305913925171,
|
|
"learning_rate": 0.0001945,
|
|
"loss": 1.1125,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.004473747490507267,
|
|
"grad_norm": 0.40314197540283203,
|
|
"learning_rate": 0.00019950000000000002,
|
|
"loss": 1.0962,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.004585591177769949,
|
|
"grad_norm": 0.37841472029685974,
|
|
"learning_rate": 0.00020449999999999998,
|
|
"loss": 1.0987,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.00469743486503263,
|
|
"grad_norm": 0.3678649365901947,
|
|
"learning_rate": 0.0002095,
|
|
"loss": 1.0826,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.004809278552295312,
|
|
"grad_norm": 0.37902751564979553,
|
|
"learning_rate": 0.0002145,
|
|
"loss": 1.0973,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.004921122239557994,
|
|
"grad_norm": 0.3776302635669708,
|
|
"learning_rate": 0.0002195,
|
|
"loss": 1.112,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.005032965926820675,
|
|
"grad_norm": 0.43771493434906006,
|
|
"learning_rate": 0.0002245,
|
|
"loss": 1.1005,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.005144809614083357,
|
|
"grad_norm": 0.3662595748901367,
|
|
"learning_rate": 0.00022950000000000002,
|
|
"loss": 1.0899,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.005256653301346038,
|
|
"grad_norm": 0.37473002076148987,
|
|
"learning_rate": 0.00023449999999999998,
|
|
"loss": 1.0982,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.0053684969886087204,
|
|
"grad_norm": 0.35591790080070496,
|
|
"learning_rate": 0.0002395,
|
|
"loss": 1.1005,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.0054803406758714025,
|
|
"grad_norm": 0.3825643062591553,
|
|
"learning_rate": 0.0002445,
|
|
"loss": 1.0896,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.005592184363134084,
|
|
"grad_norm": 0.3784261643886566,
|
|
"learning_rate": 0.0002495,
|
|
"loss": 1.1039,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.005704028050396766,
|
|
"grad_norm": 0.35387158393859863,
|
|
"learning_rate": 0.0002545,
|
|
"loss": 1.1038,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.005815871737659447,
|
|
"grad_norm": 0.3992142975330353,
|
|
"learning_rate": 0.0002595,
|
|
"loss": 1.088,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.005927715424922129,
|
|
"grad_norm": 0.36795270442962646,
|
|
"learning_rate": 0.00026450000000000003,
|
|
"loss": 1.0888,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.006039559112184811,
|
|
"grad_norm": 0.4007701575756073,
|
|
"learning_rate": 0.00026950000000000005,
|
|
"loss": 1.0838,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.006151402799447492,
|
|
"grad_norm": 0.34527722001075745,
|
|
"learning_rate": 0.0002745,
|
|
"loss": 1.0892,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.006263246486710174,
|
|
"grad_norm": 0.37232115864753723,
|
|
"learning_rate": 0.0002795,
|
|
"loss": 1.0939,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.006375090173972855,
|
|
"grad_norm": 0.4048405885696411,
|
|
"learning_rate": 0.0002845,
|
|
"loss": 1.0863,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.006486933861235537,
|
|
"grad_norm": 0.37317511439323425,
|
|
"learning_rate": 0.0002895,
|
|
"loss": 1.0711,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.006598777548498219,
|
|
"grad_norm": 0.38564008474349976,
|
|
"learning_rate": 0.0002945,
|
|
"loss": 1.091,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.0067106212357609,
|
|
"grad_norm": 0.3639361262321472,
|
|
"learning_rate": 0.0002995,
|
|
"loss": 1.0682,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.006822464923023582,
|
|
"grad_norm": 0.35907182097435,
|
|
"learning_rate": 0.0003045,
|
|
"loss": 1.0755,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.0069343086102862635,
|
|
"grad_norm": 0.35199785232543945,
|
|
"learning_rate": 0.0003095,
|
|
"loss": 1.0581,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.0070461522975489455,
|
|
"grad_norm": 0.35156381130218506,
|
|
"learning_rate": 0.0003145,
|
|
"loss": 1.0651,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.0071579959848116275,
|
|
"grad_norm": 0.3742520213127136,
|
|
"learning_rate": 0.0003195,
|
|
"loss": 1.0555,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.007269839672074309,
|
|
"grad_norm": 0.3587191700935364,
|
|
"learning_rate": 0.00032450000000000003,
|
|
"loss": 1.0548,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.007381683359336991,
|
|
"grad_norm": 0.37587791681289673,
|
|
"learning_rate": 0.00032950000000000004,
|
|
"loss": 1.0437,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.007493527046599672,
|
|
"grad_norm": 0.3410298526287079,
|
|
"learning_rate": 0.00033450000000000005,
|
|
"loss": 1.0426,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.007605370733862354,
|
|
"grad_norm": 0.3450978696346283,
|
|
"learning_rate": 0.0003395,
|
|
"loss": 1.0487,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.007717214421125036,
|
|
"grad_norm": 0.3445068299770355,
|
|
"learning_rate": 0.00034449999999999997,
|
|
"loss": 1.0411,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.007829058108387717,
|
|
"grad_norm": 0.34611567854881287,
|
|
"learning_rate": 0.0003495,
|
|
"loss": 1.0404,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.007940901795650398,
|
|
"grad_norm": 0.3339330852031708,
|
|
"learning_rate": 0.0003545,
|
|
"loss": 1.0361,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.008052745482913081,
|
|
"grad_norm": 0.33232080936431885,
|
|
"learning_rate": 0.0003595,
|
|
"loss": 1.0271,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.008164589170175762,
|
|
"grad_norm": 0.33050498366355896,
|
|
"learning_rate": 0.0003645,
|
|
"loss": 1.0316,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.008276432857438443,
|
|
"grad_norm": 0.3449972867965698,
|
|
"learning_rate": 0.0003695,
|
|
"loss": 1.0426,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.008388276544701126,
|
|
"grad_norm": 0.3543892502784729,
|
|
"learning_rate": 0.0003745,
|
|
"loss": 1.0475,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.008500120231963807,
|
|
"grad_norm": 0.3447831869125366,
|
|
"learning_rate": 0.0003795,
|
|
"loss": 1.0482,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.008611963919226489,
|
|
"grad_norm": 0.33845630288124084,
|
|
"learning_rate": 0.0003845,
|
|
"loss": 1.0533,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.008723807606489171,
|
|
"grad_norm": 0.3394622802734375,
|
|
"learning_rate": 0.00038950000000000003,
|
|
"loss": 1.0803,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.008835651293751853,
|
|
"grad_norm": 0.33649975061416626,
|
|
"learning_rate": 0.00039450000000000005,
|
|
"loss": 1.0461,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.008947494981014534,
|
|
"grad_norm": 0.3265191912651062,
|
|
"learning_rate": 0.0003995,
|
|
"loss": 1.0714,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.009059338668277215,
|
|
"grad_norm": 0.34960776567459106,
|
|
"learning_rate": 0.0004045,
|
|
"loss": 1.0542,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.009171182355539898,
|
|
"grad_norm": 0.3353814482688904,
|
|
"learning_rate": 0.0004095,
|
|
"loss": 1.0625,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.009283026042802579,
|
|
"grad_norm": 0.3499109148979187,
|
|
"learning_rate": 0.0004145,
|
|
"loss": 1.0679,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.00939486973006526,
|
|
"grad_norm": 0.33906084299087524,
|
|
"learning_rate": 0.0004195,
|
|
"loss": 1.0659,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.009506713417327943,
|
|
"grad_norm": 0.3245256543159485,
|
|
"learning_rate": 0.0004245,
|
|
"loss": 1.078,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.009618557104590624,
|
|
"grad_norm": 0.3364386260509491,
|
|
"learning_rate": 0.0004295,
|
|
"loss": 1.0771,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.009730400791853305,
|
|
"grad_norm": 0.348718523979187,
|
|
"learning_rate": 0.0004345,
|
|
"loss": 1.0751,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.009842244479115988,
|
|
"grad_norm": 0.31124839186668396,
|
|
"learning_rate": 0.0004395,
|
|
"loss": 1.0693,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.00995408816637867,
|
|
"grad_norm": 0.3478352129459381,
|
|
"learning_rate": 0.0004445,
|
|
"loss": 1.0682,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.01006593185364135,
|
|
"grad_norm": 0.31189802289009094,
|
|
"learning_rate": 0.00044950000000000003,
|
|
"loss": 1.0608,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.010177775540904033,
|
|
"grad_norm": 0.34715884923934937,
|
|
"learning_rate": 0.00045450000000000004,
|
|
"loss": 1.0698,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.010289619228166715,
|
|
"grad_norm": 0.3279336988925934,
|
|
"learning_rate": 0.00045950000000000006,
|
|
"loss": 1.0728,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.010401462915429396,
|
|
"grad_norm": 0.32010868191719055,
|
|
"learning_rate": 0.0004645,
|
|
"loss": 1.0765,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.010513306602692077,
|
|
"grad_norm": 0.3618028163909912,
|
|
"learning_rate": 0.0004695,
|
|
"loss": 1.0815,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.01062515028995476,
|
|
"grad_norm": 0.3403186798095703,
|
|
"learning_rate": 0.0004745,
|
|
"loss": 1.0713,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.010736993977217441,
|
|
"grad_norm": 0.347687691450119,
|
|
"learning_rate": 0.0004795,
|
|
"loss": 1.0844,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.010848837664480122,
|
|
"grad_norm": 0.3537987768650055,
|
|
"learning_rate": 0.0004845,
|
|
"loss": 1.0762,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.010960681351742805,
|
|
"grad_norm": 0.42015892267227173,
|
|
"learning_rate": 0.0004895,
|
|
"loss": 1.0832,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.011072525039005486,
|
|
"grad_norm": 0.35781368613243103,
|
|
"learning_rate": 0.0004945,
|
|
"loss": 1.0606,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.011184368726268167,
|
|
"grad_norm": 0.3361358344554901,
|
|
"learning_rate": 0.0004995,
|
|
"loss": 1.0717,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.01129621241353085,
|
|
"grad_norm": 0.36569204926490784,
|
|
"learning_rate": 0.0004997944172872219,
|
|
"loss": 1.0602,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.011408056100793531,
|
|
"grad_norm": 0.31979477405548096,
|
|
"learning_rate": 0.0004995659920508017,
|
|
"loss": 1.0531,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.011519899788056212,
|
|
"grad_norm": 0.3295707404613495,
|
|
"learning_rate": 0.0004993375668143817,
|
|
"loss": 1.0346,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.011631743475318894,
|
|
"grad_norm": 0.3207838833332062,
|
|
"learning_rate": 0.0004991091415779616,
|
|
"loss": 1.059,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.011743587162581576,
|
|
"grad_norm": 0.33032119274139404,
|
|
"learning_rate": 0.0004988807163415415,
|
|
"loss": 1.0573,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.011855430849844258,
|
|
"grad_norm": 0.3566173017024994,
|
|
"learning_rate": 0.0004986522911051213,
|
|
"loss": 1.0501,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.011967274537106939,
|
|
"grad_norm": 0.31658655405044556,
|
|
"learning_rate": 0.0004984238658687012,
|
|
"loss": 1.0706,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.012079118224369622,
|
|
"grad_norm": 0.3438680171966553,
|
|
"learning_rate": 0.0004981954406322811,
|
|
"loss": 1.0765,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.012190961911632303,
|
|
"grad_norm": 0.3130144774913788,
|
|
"learning_rate": 0.0004979670153958609,
|
|
"loss": 1.0588,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.012302805598894984,
|
|
"grad_norm": 0.31765422224998474,
|
|
"learning_rate": 0.0004977385901594408,
|
|
"loss": 1.0703,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.012414649286157667,
|
|
"grad_norm": 0.36112868785858154,
|
|
"learning_rate": 0.0004975101649230207,
|
|
"loss": 1.0642,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.012526492973420348,
|
|
"grad_norm": 0.33418065309524536,
|
|
"learning_rate": 0.0004972817396866005,
|
|
"loss": 1.0572,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.01263833666068303,
|
|
"grad_norm": 0.34439629316329956,
|
|
"learning_rate": 0.0004970533144501805,
|
|
"loss": 1.0473,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.01275018034794571,
|
|
"grad_norm": 0.32954639196395874,
|
|
"learning_rate": 0.0004968248892137603,
|
|
"loss": 1.054,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.012862024035208393,
|
|
"grad_norm": 0.3351511061191559,
|
|
"learning_rate": 0.0004965964639773402,
|
|
"loss": 1.0444,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.012973867722471074,
|
|
"grad_norm": 0.3065156638622284,
|
|
"learning_rate": 0.0004963680387409202,
|
|
"loss": 1.0546,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.013085711409733755,
|
|
"grad_norm": 0.36450672149658203,
|
|
"learning_rate": 0.0004961396135045,
|
|
"loss": 1.0501,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.013197555096996438,
|
|
"grad_norm": 0.3020591735839844,
|
|
"learning_rate": 0.0004959111882680799,
|
|
"loss": 1.052,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.01330939878425912,
|
|
"grad_norm": 0.3097701966762543,
|
|
"learning_rate": 0.0004956827630316598,
|
|
"loss": 1.0695,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.0134212424715218,
|
|
"grad_norm": 0.3410932719707489,
|
|
"learning_rate": 0.0004954543377952396,
|
|
"loss": 1.0692,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.013533086158784484,
|
|
"grad_norm": 0.38478952646255493,
|
|
"learning_rate": 0.0004952259125588195,
|
|
"loss": 1.0592,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.013644929846047165,
|
|
"grad_norm": 0.3737089931964874,
|
|
"learning_rate": 0.0004949974873223994,
|
|
"loss": 1.0808,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.013756773533309846,
|
|
"grad_norm": 0.3264448940753937,
|
|
"learning_rate": 0.0004947690620859793,
|
|
"loss": 1.0759,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.013868617220572527,
|
|
"grad_norm": 0.3922732472419739,
|
|
"learning_rate": 0.0004945406368495591,
|
|
"loss": 1.0634,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.01398046090783521,
|
|
"grad_norm": 0.36068034172058105,
|
|
"learning_rate": 0.000494312211613139,
|
|
"loss": 1.0683,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.014092304595097891,
|
|
"grad_norm": 0.3544798791408539,
|
|
"learning_rate": 0.0004940837863767189,
|
|
"loss": 1.0687,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.014204148282360572,
|
|
"grad_norm": 0.31447795033454895,
|
|
"learning_rate": 0.0004938553611402987,
|
|
"loss": 1.0549,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.014315991969623255,
|
|
"grad_norm": 0.37639158964157104,
|
|
"learning_rate": 0.0004936269359038786,
|
|
"loss": 1.0698,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.014427835656885936,
|
|
"grad_norm": 0.32416418194770813,
|
|
"learning_rate": 0.0004933985106674586,
|
|
"loss": 1.0617,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.014539679344148617,
|
|
"grad_norm": 0.3122979998588562,
|
|
"learning_rate": 0.0004931700854310385,
|
|
"loss": 1.0553,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.0146515230314113,
|
|
"grad_norm": 0.3574884533882141,
|
|
"learning_rate": 0.0004929416601946184,
|
|
"loss": 1.0598,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.014763366718673981,
|
|
"grad_norm": 0.30762428045272827,
|
|
"learning_rate": 0.0004927132349581982,
|
|
"loss": 1.0642,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.014875210405936663,
|
|
"grad_norm": 0.34350454807281494,
|
|
"learning_rate": 0.0004924848097217781,
|
|
"loss": 1.0663,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.014987054093199344,
|
|
"grad_norm": 0.33486828207969666,
|
|
"learning_rate": 0.000492256384485358,
|
|
"loss": 1.0479,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.015098897780462027,
|
|
"grad_norm": 0.3025324046611786,
|
|
"learning_rate": 0.0004920279592489378,
|
|
"loss": 1.0705,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.015210741467724708,
|
|
"grad_norm": 0.35260385274887085,
|
|
"learning_rate": 0.0004917995340125177,
|
|
"loss": 1.0762,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.015322585154987389,
|
|
"grad_norm": 0.3188925087451935,
|
|
"learning_rate": 0.0004915711087760976,
|
|
"loss": 1.069,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.015434428842250072,
|
|
"grad_norm": 0.332660436630249,
|
|
"learning_rate": 0.0004913426835396775,
|
|
"loss": 1.0749,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.015546272529512753,
|
|
"grad_norm": 0.31745171546936035,
|
|
"learning_rate": 0.0004911142583032573,
|
|
"loss": 1.0811,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.015658116216775434,
|
|
"grad_norm": 0.3237819969654083,
|
|
"learning_rate": 0.0004908858330668372,
|
|
"loss": 1.0634,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.015769959904038115,
|
|
"grad_norm": 0.3300880789756775,
|
|
"learning_rate": 0.0004906574078304171,
|
|
"loss": 1.0554,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.015881803591300796,
|
|
"grad_norm": 0.32475635409355164,
|
|
"learning_rate": 0.0004904289825939969,
|
|
"loss": 1.0598,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.01599364727856348,
|
|
"grad_norm": 0.31278952956199646,
|
|
"learning_rate": 0.0004902005573575769,
|
|
"loss": 1.0498,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.016105490965826162,
|
|
"grad_norm": 0.308680921792984,
|
|
"learning_rate": 0.0004899721321211568,
|
|
"loss": 1.0586,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.016217334653088843,
|
|
"grad_norm": 0.34637314081192017,
|
|
"learning_rate": 0.0004897437068847367,
|
|
"loss": 1.0535,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.016329178340351524,
|
|
"grad_norm": 0.3220643401145935,
|
|
"learning_rate": 0.0004895152816483165,
|
|
"loss": 1.0624,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.016441022027614206,
|
|
"grad_norm": 0.31472912430763245,
|
|
"learning_rate": 0.0004892868564118964,
|
|
"loss": 1.0748,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.016552865714876887,
|
|
"grad_norm": 0.3416632115840912,
|
|
"learning_rate": 0.0004890584311754763,
|
|
"loss": 1.0715,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.01666470940213957,
|
|
"grad_norm": 0.3463667631149292,
|
|
"learning_rate": 0.0004888300059390561,
|
|
"loss": 1.0914,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.016776553089402253,
|
|
"grad_norm": 0.3322199881076813,
|
|
"learning_rate": 0.000488601580702636,
|
|
"loss": 1.0707,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.016888396776664934,
|
|
"grad_norm": 0.3899800479412079,
|
|
"learning_rate": 0.0004883731554662159,
|
|
"loss": 1.0883,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.017000240463927615,
|
|
"grad_norm": 0.3409605324268341,
|
|
"learning_rate": 0.0004881447302297958,
|
|
"loss": 1.0982,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.017112084151190296,
|
|
"grad_norm": 0.3720357120037079,
|
|
"learning_rate": 0.0004879163049933757,
|
|
"loss": 1.0674,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.017223927838452977,
|
|
"grad_norm": 0.326050728559494,
|
|
"learning_rate": 0.00048768787975695554,
|
|
"loss": 1.0764,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.01733577152571566,
|
|
"grad_norm": 0.3238283395767212,
|
|
"learning_rate": 0.0004874594545205354,
|
|
"loss": 1.0547,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.017447615212978343,
|
|
"grad_norm": 0.3324073553085327,
|
|
"learning_rate": 0.00048723102928411536,
|
|
"loss": 1.0608,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.017559458900241024,
|
|
"grad_norm": 0.3382217586040497,
|
|
"learning_rate": 0.0004870026040476952,
|
|
"loss": 1.0505,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.017671302587503705,
|
|
"grad_norm": 0.3409116566181183,
|
|
"learning_rate": 0.00048677417881127507,
|
|
"loss": 1.0673,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.017783146274766386,
|
|
"grad_norm": 0.3123399019241333,
|
|
"learning_rate": 0.000486545753574855,
|
|
"loss": 1.0461,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.017894989962029068,
|
|
"grad_norm": 0.3178008198738098,
|
|
"learning_rate": 0.00048631732833843484,
|
|
"loss": 1.0526,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.01800683364929175,
|
|
"grad_norm": 0.37002459168434143,
|
|
"learning_rate": 0.0004860889031020147,
|
|
"loss": 1.0483,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.01811867733655443,
|
|
"grad_norm": 0.31036287546157837,
|
|
"learning_rate": 0.0004858604778655946,
|
|
"loss": 1.0418,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.018230521023817114,
|
|
"grad_norm": 0.3027215600013733,
|
|
"learning_rate": 0.00048563205262917446,
|
|
"loss": 1.0467,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.018342364711079796,
|
|
"grad_norm": 0.32144612073898315,
|
|
"learning_rate": 0.00048540362739275437,
|
|
"loss": 1.0437,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.018454208398342477,
|
|
"grad_norm": 0.3156447410583496,
|
|
"learning_rate": 0.0004851752021563343,
|
|
"loss": 1.0447,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.018566052085605158,
|
|
"grad_norm": 0.3228546380996704,
|
|
"learning_rate": 0.00048494677691991413,
|
|
"loss": 1.056,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.01867789577286784,
|
|
"grad_norm": 0.3478510081768036,
|
|
"learning_rate": 0.000484718351683494,
|
|
"loss": 1.0523,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.01878973946013052,
|
|
"grad_norm": 0.3413507342338562,
|
|
"learning_rate": 0.0004844899264470739,
|
|
"loss": 1.049,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.018901583147393205,
|
|
"grad_norm": 0.3277221918106079,
|
|
"learning_rate": 0.00048426150121065375,
|
|
"loss": 1.0403,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.019013426834655886,
|
|
"grad_norm": 0.3044646382331848,
|
|
"learning_rate": 0.0004840330759742336,
|
|
"loss": 1.0518,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.019125270521918567,
|
|
"grad_norm": 0.31599846482276917,
|
|
"learning_rate": 0.0004838046507378135,
|
|
"loss": 1.0475,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.01923711420918125,
|
|
"grad_norm": 0.346741646528244,
|
|
"learning_rate": 0.00048357622550139343,
|
|
"loss": 1.0515,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.01934895789644393,
|
|
"grad_norm": 0.32756108045578003,
|
|
"learning_rate": 0.0004833478002649733,
|
|
"loss": 1.054,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.01946080158370661,
|
|
"grad_norm": 0.3318345546722412,
|
|
"learning_rate": 0.0004831193750285532,
|
|
"loss": 1.0575,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.019572645270969292,
|
|
"grad_norm": 0.3389560282230377,
|
|
"learning_rate": 0.00048289094979213305,
|
|
"loss": 1.0576,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.019684488958231976,
|
|
"grad_norm": 0.31532642245292664,
|
|
"learning_rate": 0.0004826625245557129,
|
|
"loss": 1.0554,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.019796332645494658,
|
|
"grad_norm": 0.3263496160507202,
|
|
"learning_rate": 0.0004824340993192928,
|
|
"loss": 1.0697,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.01990817633275734,
|
|
"grad_norm": 0.328225314617157,
|
|
"learning_rate": 0.00048220567408287267,
|
|
"loss": 1.0584,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.02002002002002002,
|
|
"grad_norm": 0.3030998706817627,
|
|
"learning_rate": 0.00048197724884645253,
|
|
"loss": 1.0555,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.0201318637072827,
|
|
"grad_norm": 0.32594701647758484,
|
|
"learning_rate": 0.0004817488236100325,
|
|
"loss": 1.0512,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.020243707394545382,
|
|
"grad_norm": 0.2882954776287079,
|
|
"learning_rate": 0.00048152039837361235,
|
|
"loss": 1.0441,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.020355551081808067,
|
|
"grad_norm": 0.33917129039764404,
|
|
"learning_rate": 0.0004812919731371922,
|
|
"loss": 1.048,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.020467394769070748,
|
|
"grad_norm": 0.32748523354530334,
|
|
"learning_rate": 0.0004810635479007721,
|
|
"loss": 1.042,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.02057923845633343,
|
|
"grad_norm": 0.32332462072372437,
|
|
"learning_rate": 0.00048083512266435197,
|
|
"loss": 1.0396,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.02069108214359611,
|
|
"grad_norm": 0.36977729201316833,
|
|
"learning_rate": 0.0004806066974279318,
|
|
"loss": 1.0337,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.02080292583085879,
|
|
"grad_norm": 0.33298948407173157,
|
|
"learning_rate": 0.00048037827219151174,
|
|
"loss": 1.045,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.020914769518121473,
|
|
"grad_norm": 0.328861802816391,
|
|
"learning_rate": 0.00048014984695509165,
|
|
"loss": 1.053,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.021026613205384154,
|
|
"grad_norm": 0.3438888490200043,
|
|
"learning_rate": 0.0004799214217186715,
|
|
"loss": 1.0385,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.02113845689264684,
|
|
"grad_norm": 0.3251883387565613,
|
|
"learning_rate": 0.00047969299648225136,
|
|
"loss": 1.0436,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.02125030057990952,
|
|
"grad_norm": 0.3300330340862274,
|
|
"learning_rate": 0.00047946457124583127,
|
|
"loss": 1.0627,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.0213621442671722,
|
|
"grad_norm": 0.31774377822875977,
|
|
"learning_rate": 0.0004792361460094111,
|
|
"loss": 1.0491,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.021473987954434882,
|
|
"grad_norm": 0.36171990633010864,
|
|
"learning_rate": 0.000479007720772991,
|
|
"loss": 1.0536,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.021585831641697563,
|
|
"grad_norm": 0.33032888174057007,
|
|
"learning_rate": 0.0004787792955365709,
|
|
"loss": 1.0327,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.021697675328960244,
|
|
"grad_norm": 0.34056538343429565,
|
|
"learning_rate": 0.00047855087030015074,
|
|
"loss": 1.0354,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.021809519016222925,
|
|
"grad_norm": 0.31768256425857544,
|
|
"learning_rate": 0.00047832244506373065,
|
|
"loss": 1.0278,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.02192136270348561,
|
|
"grad_norm": 0.33165955543518066,
|
|
"learning_rate": 0.00047809401982731056,
|
|
"loss": 1.057,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.02203320639074829,
|
|
"grad_norm": 0.34456339478492737,
|
|
"learning_rate": 0.0004778655945908904,
|
|
"loss": 1.0465,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.022145050078010972,
|
|
"grad_norm": 0.35331544280052185,
|
|
"learning_rate": 0.0004776371693544703,
|
|
"loss": 1.0509,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.022256893765273653,
|
|
"grad_norm": 0.3497447669506073,
|
|
"learning_rate": 0.0004774087441180502,
|
|
"loss": 1.0579,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.022368737452536334,
|
|
"grad_norm": 0.31631171703338623,
|
|
"learning_rate": 0.00047718031888163004,
|
|
"loss": 1.0747,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.022480581139799016,
|
|
"grad_norm": 0.34811535477638245,
|
|
"learning_rate": 0.0004769518936452099,
|
|
"loss": 1.0443,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.0225924248270617,
|
|
"grad_norm": 0.350975900888443,
|
|
"learning_rate": 0.0004767234684087898,
|
|
"loss": 1.0721,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.02270426851432438,
|
|
"grad_norm": 0.38026875257492065,
|
|
"learning_rate": 0.0004764950431723697,
|
|
"loss": 1.0502,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.022816112201587063,
|
|
"grad_norm": 0.3079335391521454,
|
|
"learning_rate": 0.00047626661793594957,
|
|
"loss": 1.0325,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.022927955888849744,
|
|
"grad_norm": 0.3412174582481384,
|
|
"learning_rate": 0.0004760381926995295,
|
|
"loss": 1.026,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.023039799576112425,
|
|
"grad_norm": 0.31905752420425415,
|
|
"learning_rate": 0.00047580976746310934,
|
|
"loss": 1.033,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.023151643263375106,
|
|
"grad_norm": 0.3110033869743347,
|
|
"learning_rate": 0.0004755813422266892,
|
|
"loss": 1.026,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.023263486950637787,
|
|
"grad_norm": 0.3087383210659027,
|
|
"learning_rate": 0.0004753529169902691,
|
|
"loss": 1.0285,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.023375330637900472,
|
|
"grad_norm": 0.310497522354126,
|
|
"learning_rate": 0.00047512449175384896,
|
|
"loss": 1.012,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.023487174325163153,
|
|
"grad_norm": 0.35822993516921997,
|
|
"learning_rate": 0.0004748960665174288,
|
|
"loss": 1.0124,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.023599018012425834,
|
|
"grad_norm": 0.3355759084224701,
|
|
"learning_rate": 0.0004746676412810088,
|
|
"loss": 1.0159,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.023710861699688515,
|
|
"grad_norm": 0.29633432626724243,
|
|
"learning_rate": 0.00047443921604458863,
|
|
"loss": 1.0068,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.023822705386951196,
|
|
"grad_norm": 0.3268597424030304,
|
|
"learning_rate": 0.0004742107908081685,
|
|
"loss": 1.0029,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.023934549074213878,
|
|
"grad_norm": 0.32010769844055176,
|
|
"learning_rate": 0.0004739823655717484,
|
|
"loss": 1.0081,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.02404639276147656,
|
|
"grad_norm": 0.30638498067855835,
|
|
"learning_rate": 0.00047375394033532826,
|
|
"loss": 0.9955,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.024158236448739243,
|
|
"grad_norm": 0.32299259305000305,
|
|
"learning_rate": 0.0004735255150989081,
|
|
"loss": 1.0028,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.024270080136001924,
|
|
"grad_norm": 0.30714213848114014,
|
|
"learning_rate": 0.000473297089862488,
|
|
"loss": 1.0163,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.024381923823264606,
|
|
"grad_norm": 0.3207940459251404,
|
|
"learning_rate": 0.0004730686646260679,
|
|
"loss": 1.0053,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.024493767510527287,
|
|
"grad_norm": 0.3073663115501404,
|
|
"learning_rate": 0.0004728402393896478,
|
|
"loss": 1.0007,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.024605611197789968,
|
|
"grad_norm": 0.3209913671016693,
|
|
"learning_rate": 0.0004726118141532277,
|
|
"loss": 1.0065,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.02471745488505265,
|
|
"grad_norm": 0.2987804114818573,
|
|
"learning_rate": 0.00047238338891680755,
|
|
"loss": 1.0015,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.024829298572315334,
|
|
"grad_norm": 0.31511807441711426,
|
|
"learning_rate": 0.0004721549636803874,
|
|
"loss": 0.9892,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.024941142259578015,
|
|
"grad_norm": 0.2840864956378937,
|
|
"learning_rate": 0.0004719265384439673,
|
|
"loss": 1.0084,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.025052985946840696,
|
|
"grad_norm": 0.3094743490219116,
|
|
"learning_rate": 0.0004716981132075472,
|
|
"loss": 1.0169,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.025164829634103377,
|
|
"grad_norm": 0.2905067205429077,
|
|
"learning_rate": 0.00047146968797112703,
|
|
"loss": 0.9991,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.02527667332136606,
|
|
"grad_norm": 0.31322264671325684,
|
|
"learning_rate": 0.00047124126273470694,
|
|
"loss": 1.0169,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.02538851700862874,
|
|
"grad_norm": 0.29053428769111633,
|
|
"learning_rate": 0.00047101283749828685,
|
|
"loss": 0.9942,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.02550036069589142,
|
|
"grad_norm": 0.2863853871822357,
|
|
"learning_rate": 0.0004707844122618667,
|
|
"loss": 1.002,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.025612204383154105,
|
|
"grad_norm": 0.3087761104106903,
|
|
"learning_rate": 0.0004705559870254466,
|
|
"loss": 1.0025,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.025724048070416786,
|
|
"grad_norm": 0.3308629095554352,
|
|
"learning_rate": 0.00047032756178902647,
|
|
"loss": 1.0078,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.025835891757679467,
|
|
"grad_norm": 0.29703134298324585,
|
|
"learning_rate": 0.0004700991365526063,
|
|
"loss": 1.006,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.02594773544494215,
|
|
"grad_norm": 0.27238258719444275,
|
|
"learning_rate": 0.0004698707113161862,
|
|
"loss": 0.9963,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.02605957913220483,
|
|
"grad_norm": 0.2795617878437042,
|
|
"learning_rate": 0.0004696422860797661,
|
|
"loss": 0.9876,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.02617142281946751,
|
|
"grad_norm": 0.2989327013492584,
|
|
"learning_rate": 0.000469413860843346,
|
|
"loss": 0.9864,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.026283266506730196,
|
|
"grad_norm": 0.3229614794254303,
|
|
"learning_rate": 0.00046918543560692586,
|
|
"loss": 0.9849,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.026395110193992877,
|
|
"grad_norm": 0.2921406328678131,
|
|
"learning_rate": 0.00046895701037050577,
|
|
"loss": 0.9764,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.026506953881255558,
|
|
"grad_norm": 0.2955220639705658,
|
|
"learning_rate": 0.0004687285851340856,
|
|
"loss": 0.9883,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.02661879756851824,
|
|
"grad_norm": 0.31378960609436035,
|
|
"learning_rate": 0.0004685001598976655,
|
|
"loss": 0.9978,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.02673064125578092,
|
|
"grad_norm": 0.30504587292671204,
|
|
"learning_rate": 0.0004682717346612454,
|
|
"loss": 0.9912,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.0268424849430436,
|
|
"grad_norm": 0.3066459000110626,
|
|
"learning_rate": 0.00046804330942482524,
|
|
"loss": 0.9877,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.026954328630306282,
|
|
"grad_norm": 0.3198714256286621,
|
|
"learning_rate": 0.0004678148841884051,
|
|
"loss": 0.98,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.027066172317568967,
|
|
"grad_norm": 0.27119094133377075,
|
|
"learning_rate": 0.00046758645895198506,
|
|
"loss": 1.001,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.027178016004831648,
|
|
"grad_norm": 0.28178098797798157,
|
|
"learning_rate": 0.0004673580337155649,
|
|
"loss": 0.9605,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.02728985969209433,
|
|
"grad_norm": 0.29373088479042053,
|
|
"learning_rate": 0.0004671296084791448,
|
|
"loss": 0.9834,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.02740170337935701,
|
|
"grad_norm": 0.2861827313899994,
|
|
"learning_rate": 0.0004669011832427247,
|
|
"loss": 0.9797,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.02751354706661969,
|
|
"grad_norm": 0.3488409221172333,
|
|
"learning_rate": 0.00046667275800630454,
|
|
"loss": 0.9682,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.027625390753882373,
|
|
"grad_norm": 0.29631665349006653,
|
|
"learning_rate": 0.0004664443327698844,
|
|
"loss": 0.9751,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.027737234441145054,
|
|
"grad_norm": 0.27299416065216064,
|
|
"learning_rate": 0.0004662159075334643,
|
|
"loss": 0.9571,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.02784907812840774,
|
|
"grad_norm": 0.30409684777259827,
|
|
"learning_rate": 0.00046598748229704416,
|
|
"loss": 0.968,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.02796092181567042,
|
|
"grad_norm": 0.2957991063594818,
|
|
"learning_rate": 0.00046575905706062407,
|
|
"loss": 0.9814,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.0280727655029331,
|
|
"grad_norm": 0.28328225016593933,
|
|
"learning_rate": 0.000465530631824204,
|
|
"loss": 0.9816,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.028184609190195782,
|
|
"grad_norm": 0.40670067071914673,
|
|
"learning_rate": 0.00046530220658778384,
|
|
"loss": 0.9737,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.028296452877458463,
|
|
"grad_norm": 0.2818649411201477,
|
|
"learning_rate": 0.0004650737813513637,
|
|
"loss": 0.9891,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.028408296564721144,
|
|
"grad_norm": 0.3054118752479553,
|
|
"learning_rate": 0.0004648453561149436,
|
|
"loss": 0.9976,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.02852014025198383,
|
|
"grad_norm": 0.31439468264579773,
|
|
"learning_rate": 0.00046461693087852346,
|
|
"loss": 0.9928,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.02863198393924651,
|
|
"grad_norm": 0.3173445761203766,
|
|
"learning_rate": 0.0004643885056421033,
|
|
"loss": 1.0002,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.02874382762650919,
|
|
"grad_norm": 0.32495757937431335,
|
|
"learning_rate": 0.0004641600804056832,
|
|
"loss": 0.9981,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.028855671313771872,
|
|
"grad_norm": 0.35957351326942444,
|
|
"learning_rate": 0.00046393165516926313,
|
|
"loss": 1.0112,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.028967515001034554,
|
|
"grad_norm": 0.3070557713508606,
|
|
"learning_rate": 0.000463703229932843,
|
|
"loss": 1.0047,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.029079358688297235,
|
|
"grad_norm": 0.3227770924568176,
|
|
"learning_rate": 0.0004634748046964229,
|
|
"loss": 1.0115,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.029191202375559916,
|
|
"grad_norm": 0.34345880150794983,
|
|
"learning_rate": 0.00046324637946000276,
|
|
"loss": 0.9984,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.0293030460628226,
|
|
"grad_norm": 0.34459254145622253,
|
|
"learning_rate": 0.0004630179542235826,
|
|
"loss": 0.9965,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.02941488975008528,
|
|
"grad_norm": 0.3396269679069519,
|
|
"learning_rate": 0.0004627895289871625,
|
|
"loss": 0.9986,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.029526733437347963,
|
|
"grad_norm": 0.3370846211910248,
|
|
"learning_rate": 0.0004625611037507424,
|
|
"loss": 0.9987,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.029638577124610644,
|
|
"grad_norm": 0.30689191818237305,
|
|
"learning_rate": 0.00046233267851432223,
|
|
"loss": 1.0081,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.029750420811873325,
|
|
"grad_norm": 0.35536935925483704,
|
|
"learning_rate": 0.0004621042532779022,
|
|
"loss": 0.9948,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.029862264499136006,
|
|
"grad_norm": 0.3295105993747711,
|
|
"learning_rate": 0.00046187582804148205,
|
|
"loss": 1.0115,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.029974108186398687,
|
|
"grad_norm": 0.34881895780563354,
|
|
"learning_rate": 0.0004616474028050619,
|
|
"loss": 1.0024,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.030085951873661372,
|
|
"grad_norm": 0.379261314868927,
|
|
"learning_rate": 0.0004614189775686418,
|
|
"loss": 0.9965,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.030197795560924053,
|
|
"grad_norm": 0.34729093313217163,
|
|
"learning_rate": 0.0004611905523322217,
|
|
"loss": 1.0026,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.030309639248186734,
|
|
"grad_norm": 0.34687525033950806,
|
|
"learning_rate": 0.00046096212709580153,
|
|
"loss": 0.9992,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.030421482935449416,
|
|
"grad_norm": 0.3564583659172058,
|
|
"learning_rate": 0.00046073370185938144,
|
|
"loss": 0.9859,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.030533326622712097,
|
|
"grad_norm": 0.3762670159339905,
|
|
"learning_rate": 0.0004605052766229613,
|
|
"loss": 1.0059,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.030645170309974778,
|
|
"grad_norm": 0.3470481038093567,
|
|
"learning_rate": 0.0004602768513865412,
|
|
"loss": 1.0044,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.030757013997237462,
|
|
"grad_norm": 0.3322189450263977,
|
|
"learning_rate": 0.0004600484261501211,
|
|
"loss": 0.9811,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.030868857684500144,
|
|
"grad_norm": 0.3248903751373291,
|
|
"learning_rate": 0.00045982000091370097,
|
|
"loss": 0.9721,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.030980701371762825,
|
|
"grad_norm": 0.32881951332092285,
|
|
"learning_rate": 0.0004595915756772808,
|
|
"loss": 0.9821,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.031092545059025506,
|
|
"grad_norm": 0.35410797595977783,
|
|
"learning_rate": 0.0004593631504408607,
|
|
"loss": 0.9786,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.031204388746288187,
|
|
"grad_norm": 0.3307279050350189,
|
|
"learning_rate": 0.0004591347252044406,
|
|
"loss": 0.9759,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.03131623243355087,
|
|
"grad_norm": 0.3207128643989563,
|
|
"learning_rate": 0.00045890629996802045,
|
|
"loss": 0.9812,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.03142807612081355,
|
|
"grad_norm": 0.3065459728240967,
|
|
"learning_rate": 0.0004586778747316003,
|
|
"loss": 0.9596,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.03153991980807623,
|
|
"grad_norm": 0.3115104138851166,
|
|
"learning_rate": 0.00045844944949518027,
|
|
"loss": 0.9732,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.031651763495338915,
|
|
"grad_norm": 0.3136879801750183,
|
|
"learning_rate": 0.0004582210242587601,
|
|
"loss": 0.9818,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.03176360718260159,
|
|
"grad_norm": 0.3240731656551361,
|
|
"learning_rate": 0.00045799259902234,
|
|
"loss": 0.9836,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.03187545086986428,
|
|
"grad_norm": 0.31390219926834106,
|
|
"learning_rate": 0.0004577641737859199,
|
|
"loss": 0.9837,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.03198729455712696,
|
|
"grad_norm": 0.3056069612503052,
|
|
"learning_rate": 0.00045753574854949975,
|
|
"loss": 0.995,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.03209913824438964,
|
|
"grad_norm": 0.29556363821029663,
|
|
"learning_rate": 0.0004573073233130796,
|
|
"loss": 1.0018,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.032210981931652324,
|
|
"grad_norm": 0.2931666374206543,
|
|
"learning_rate": 0.0004570788980766595,
|
|
"loss": 1.0124,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.032322825618915,
|
|
"grad_norm": 0.31029924750328064,
|
|
"learning_rate": 0.0004568504728402394,
|
|
"loss": 1.0115,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.03243466930617769,
|
|
"grad_norm": 0.3164144456386566,
|
|
"learning_rate": 0.0004566220476038193,
|
|
"loss": 0.9966,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.032546512993440364,
|
|
"grad_norm": 0.31638383865356445,
|
|
"learning_rate": 0.0004563936223673992,
|
|
"loss": 0.989,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.03265835668070305,
|
|
"grad_norm": 0.28559473156929016,
|
|
"learning_rate": 0.00045616519713097904,
|
|
"loss": 1.0038,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.032770200367965734,
|
|
"grad_norm": 0.285154789686203,
|
|
"learning_rate": 0.0004559367718945589,
|
|
"loss": 1.0009,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.03288204405522841,
|
|
"grad_norm": 0.2722555100917816,
|
|
"learning_rate": 0.0004557083466581388,
|
|
"loss": 0.9977,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.032993887742491096,
|
|
"grad_norm": 0.2854909896850586,
|
|
"learning_rate": 0.00045547992142171866,
|
|
"loss": 0.9996,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.033105731429753774,
|
|
"grad_norm": 0.2726607620716095,
|
|
"learning_rate": 0.0004552514961852985,
|
|
"loss": 0.9925,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.03321757511701646,
|
|
"grad_norm": 0.30692654848098755,
|
|
"learning_rate": 0.0004550230709488785,
|
|
"loss": 0.9776,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.03332941880427914,
|
|
"grad_norm": 0.2921067774295807,
|
|
"learning_rate": 0.00045479464571245834,
|
|
"loss": 0.9831,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.03344126249154182,
|
|
"grad_norm": 0.30490297079086304,
|
|
"learning_rate": 0.0004545662204760382,
|
|
"loss": 0.9835,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.033553106178804505,
|
|
"grad_norm": 0.2823980450630188,
|
|
"learning_rate": 0.0004543377952396181,
|
|
"loss": 0.9859,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.03366494986606718,
|
|
"grad_norm": 0.31844133138656616,
|
|
"learning_rate": 0.00045410937000319796,
|
|
"loss": 1.0007,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.03377679355332987,
|
|
"grad_norm": 0.30595019459724426,
|
|
"learning_rate": 0.0004538809447667778,
|
|
"loss": 1.0069,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.033888637240592545,
|
|
"grad_norm": 0.31177419424057007,
|
|
"learning_rate": 0.0004536525195303577,
|
|
"loss": 1.0068,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.03400048092785523,
|
|
"grad_norm": 0.33921870589256287,
|
|
"learning_rate": 0.0004534240942939376,
|
|
"loss": 1.0116,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.034112324615117914,
|
|
"grad_norm": 0.29299408197402954,
|
|
"learning_rate": 0.0004531956690575175,
|
|
"loss": 1.0014,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.03422416830238059,
|
|
"grad_norm": 0.28572002053260803,
|
|
"learning_rate": 0.0004529672438210974,
|
|
"loss": 0.9976,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.03433601198964328,
|
|
"grad_norm": 0.30842283368110657,
|
|
"learning_rate": 0.00045273881858467726,
|
|
"loss": 0.9994,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.034447855676905954,
|
|
"grad_norm": 0.29677408933639526,
|
|
"learning_rate": 0.0004525103933482571,
|
|
"loss": 1.0055,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.03455969936416864,
|
|
"grad_norm": 0.388823926448822,
|
|
"learning_rate": 0.000452281968111837,
|
|
"loss": 1.0062,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.03467154305143132,
|
|
"grad_norm": 0.2956707775592804,
|
|
"learning_rate": 0.0004520535428754169,
|
|
"loss": 0.9794,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.034783386738694,
|
|
"grad_norm": 0.3179475665092468,
|
|
"learning_rate": 0.00045182511763899673,
|
|
"loss": 0.9831,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.034895230425956686,
|
|
"grad_norm": 0.29509803652763367,
|
|
"learning_rate": 0.00045159669240257664,
|
|
"loss": 0.9851,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.035007074113219364,
|
|
"grad_norm": 0.31095758080482483,
|
|
"learning_rate": 0.00045136826716615655,
|
|
"loss": 0.9852,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.03511891780048205,
|
|
"grad_norm": 0.27768880128860474,
|
|
"learning_rate": 0.0004511398419297364,
|
|
"loss": 0.9741,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.035230761487744726,
|
|
"grad_norm": 0.3117106854915619,
|
|
"learning_rate": 0.0004509114166933163,
|
|
"loss": 0.9987,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.03534260517500741,
|
|
"grad_norm": 0.30113616585731506,
|
|
"learning_rate": 0.0004506829914568962,
|
|
"loss": 0.9855,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.03545444886227009,
|
|
"grad_norm": 0.2842777967453003,
|
|
"learning_rate": 0.00045045456622047603,
|
|
"loss": 0.9793,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.03556629254953277,
|
|
"grad_norm": 0.30115559697151184,
|
|
"learning_rate": 0.00045022614098405594,
|
|
"loss": 0.9854,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.03567813623679546,
|
|
"grad_norm": 0.3350517153739929,
|
|
"learning_rate": 0.0004499977157476358,
|
|
"loss": 0.9787,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.035789979924058135,
|
|
"grad_norm": 0.2736664414405823,
|
|
"learning_rate": 0.00044976929051121565,
|
|
"loss": 1.0067,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.03590182361132082,
|
|
"grad_norm": 0.2868112027645111,
|
|
"learning_rate": 0.0004495408652747956,
|
|
"loss": 1.0002,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.0360136672985835,
|
|
"grad_norm": 0.27296972274780273,
|
|
"learning_rate": 0.00044931244003837547,
|
|
"loss": 0.9939,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.03612551098584618,
|
|
"grad_norm": 0.2894013226032257,
|
|
"learning_rate": 0.00044908401480195533,
|
|
"loss": 1.0017,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.03623735467310886,
|
|
"grad_norm": 0.26549386978149414,
|
|
"learning_rate": 0.0004488555895655352,
|
|
"loss": 0.9953,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.036349198360371544,
|
|
"grad_norm": 0.27381303906440735,
|
|
"learning_rate": 0.0004486271643291151,
|
|
"loss": 1.0077,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.03646104204763423,
|
|
"grad_norm": 0.2829972505569458,
|
|
"learning_rate": 0.00044839873909269495,
|
|
"loss": 1.0008,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.03657288573489691,
|
|
"grad_norm": 0.29023584723472595,
|
|
"learning_rate": 0.0004481703138562748,
|
|
"loss": 0.9999,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.03668472942215959,
|
|
"grad_norm": 0.29526880383491516,
|
|
"learning_rate": 0.00044794188861985477,
|
|
"loss": 0.9982,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.03679657310942227,
|
|
"grad_norm": 0.27724817395210266,
|
|
"learning_rate": 0.0004477134633834346,
|
|
"loss": 1.0109,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.036908416796684954,
|
|
"grad_norm": 0.2780180275440216,
|
|
"learning_rate": 0.0004474850381470145,
|
|
"loss": 0.997,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.03702026048394764,
|
|
"grad_norm": 0.29814234375953674,
|
|
"learning_rate": 0.0004472566129105944,
|
|
"loss": 1.0056,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.037132104171210316,
|
|
"grad_norm": 0.3131207823753357,
|
|
"learning_rate": 0.00044702818767417425,
|
|
"loss": 0.999,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.037243947858473,
|
|
"grad_norm": 0.2865641415119171,
|
|
"learning_rate": 0.0004467997624377541,
|
|
"loss": 0.9938,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.03735579154573568,
|
|
"grad_norm": 0.31247007846832275,
|
|
"learning_rate": 0.000446571337201334,
|
|
"loss": 1.0029,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.03746763523299836,
|
|
"grad_norm": 0.3432846665382385,
|
|
"learning_rate": 0.00044634291196491387,
|
|
"loss": 0.9861,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.03757947892026104,
|
|
"grad_norm": 0.3200684189796448,
|
|
"learning_rate": 0.0004461144867284938,
|
|
"loss": 0.9958,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.037691322607523725,
|
|
"grad_norm": 0.3280775547027588,
|
|
"learning_rate": 0.0004458860614920737,
|
|
"loss": 0.9972,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.03780316629478641,
|
|
"grad_norm": 0.3129955232143402,
|
|
"learning_rate": 0.00044565763625565354,
|
|
"loss": 0.9947,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.03791500998204909,
|
|
"grad_norm": 0.27574583888053894,
|
|
"learning_rate": 0.0004454292110192334,
|
|
"loss": 1.0004,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.03802685366931177,
|
|
"grad_norm": 0.3088320791721344,
|
|
"learning_rate": 0.0004452007857828133,
|
|
"loss": 0.9907,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.03813869735657445,
|
|
"grad_norm": 0.3232235908508301,
|
|
"learning_rate": 0.00044497236054639316,
|
|
"loss": 0.9956,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.038250541043837134,
|
|
"grad_norm": 0.3009951114654541,
|
|
"learning_rate": 0.000444743935309973,
|
|
"loss": 0.9899,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.03836238473109981,
|
|
"grad_norm": 0.2987104058265686,
|
|
"learning_rate": 0.00044451551007355293,
|
|
"loss": 0.9852,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.0384742284183625,
|
|
"grad_norm": 0.2890870273113251,
|
|
"learning_rate": 0.00044428708483713284,
|
|
"loss": 0.9775,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.03858607210562518,
|
|
"grad_norm": 0.2704969048500061,
|
|
"learning_rate": 0.0004440586596007127,
|
|
"loss": 0.9745,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.03869791579288786,
|
|
"grad_norm": 0.3041844964027405,
|
|
"learning_rate": 0.0004438302343642926,
|
|
"loss": 0.977,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.038809759480150544,
|
|
"grad_norm": 0.2794378995895386,
|
|
"learning_rate": 0.00044360180912787246,
|
|
"loss": 0.9818,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.03892160316741322,
|
|
"grad_norm": 0.2784910798072815,
|
|
"learning_rate": 0.0004433733838914523,
|
|
"loss": 0.9655,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.039033446854675906,
|
|
"grad_norm": 0.2610478103160858,
|
|
"learning_rate": 0.0004431449586550322,
|
|
"loss": 0.975,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.039145290541938584,
|
|
"grad_norm": 0.2646799087524414,
|
|
"learning_rate": 0.0004429165334186121,
|
|
"loss": 0.9767,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.03925713422920127,
|
|
"grad_norm": 0.2622663676738739,
|
|
"learning_rate": 0.00044268810818219194,
|
|
"loss": 0.98,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.03936897791646395,
|
|
"grad_norm": 0.26897987723350525,
|
|
"learning_rate": 0.0004424596829457719,
|
|
"loss": 0.9718,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.03948082160372663,
|
|
"grad_norm": 0.29816752672195435,
|
|
"learning_rate": 0.00044223125770935176,
|
|
"loss": 1.0074,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.039592665290989315,
|
|
"grad_norm": 0.2652198076248169,
|
|
"learning_rate": 0.0004420028324729316,
|
|
"loss": 0.9789,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.03970450897825199,
|
|
"grad_norm": 0.2648336887359619,
|
|
"learning_rate": 0.0004417744072365115,
|
|
"loss": 0.9794,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.03981635266551468,
|
|
"grad_norm": 0.25409677624702454,
|
|
"learning_rate": 0.0004415459820000914,
|
|
"loss": 0.9868,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.039928196352777355,
|
|
"grad_norm": 0.25675469636917114,
|
|
"learning_rate": 0.00044131755676367123,
|
|
"loss": 0.9827,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.04004004004004004,
|
|
"grad_norm": 0.2915634214878082,
|
|
"learning_rate": 0.00044108913152725114,
|
|
"loss": 0.9833,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.040151883727302724,
|
|
"grad_norm": 0.29538393020629883,
|
|
"learning_rate": 0.000440860706290831,
|
|
"loss": 0.9848,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.0402637274145654,
|
|
"grad_norm": 0.3026215732097626,
|
|
"learning_rate": 0.0004406322810544109,
|
|
"loss": 0.9778,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.04037557110182809,
|
|
"grad_norm": 0.30865418910980225,
|
|
"learning_rate": 0.0004404038558179908,
|
|
"loss": 0.9743,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.040487414789090764,
|
|
"grad_norm": 0.28092265129089355,
|
|
"learning_rate": 0.0004401754305815707,
|
|
"loss": 0.9795,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.04059925847635345,
|
|
"grad_norm": 0.27747923135757446,
|
|
"learning_rate": 0.00043994700534515053,
|
|
"loss": 0.9642,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.040711102163616134,
|
|
"grad_norm": 0.28192010521888733,
|
|
"learning_rate": 0.00043971858010873044,
|
|
"loss": 0.9742,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.04082294585087881,
|
|
"grad_norm": 0.2670564651489258,
|
|
"learning_rate": 0.0004394901548723103,
|
|
"loss": 0.9544,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.040934789538141496,
|
|
"grad_norm": 0.3089617192745209,
|
|
"learning_rate": 0.00043926172963589015,
|
|
"loss": 0.9563,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.041046633225404174,
|
|
"grad_norm": 0.26768213510513306,
|
|
"learning_rate": 0.00043903330439947,
|
|
"loss": 0.9531,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.04115847691266686,
|
|
"grad_norm": 0.28865131735801697,
|
|
"learning_rate": 0.00043880487916305,
|
|
"loss": 0.9579,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.041270320599929536,
|
|
"grad_norm": 0.27369582653045654,
|
|
"learning_rate": 0.00043857645392662983,
|
|
"loss": 0.9679,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.04138216428719222,
|
|
"grad_norm": 0.2889108955860138,
|
|
"learning_rate": 0.0004383480286902097,
|
|
"loss": 0.9561,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.041494007974454905,
|
|
"grad_norm": 0.2701929211616516,
|
|
"learning_rate": 0.0004381196034537896,
|
|
"loss": 0.9642,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.04160585166171758,
|
|
"grad_norm": 0.2817586064338684,
|
|
"learning_rate": 0.00043789117821736945,
|
|
"loss": 0.9701,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.04171769534898027,
|
|
"grad_norm": 0.2924664318561554,
|
|
"learning_rate": 0.0004376627529809493,
|
|
"loss": 0.9617,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.041829539036242945,
|
|
"grad_norm": 0.28590497374534607,
|
|
"learning_rate": 0.0004374343277445292,
|
|
"loss": 0.9646,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.04194138272350563,
|
|
"grad_norm": 0.270046591758728,
|
|
"learning_rate": 0.0004372059025081091,
|
|
"loss": 0.95,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.04205322641076831,
|
|
"grad_norm": 0.2508755326271057,
|
|
"learning_rate": 0.000436977477271689,
|
|
"loss": 0.9525,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.04216507009803099,
|
|
"grad_norm": 0.26878127455711365,
|
|
"learning_rate": 0.0004367490520352689,
|
|
"loss": 0.9609,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 0.04227691378529368,
|
|
"grad_norm": 0.26882994174957275,
|
|
"learning_rate": 0.00043652062679884875,
|
|
"loss": 0.9671,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.042388757472556354,
|
|
"grad_norm": 0.28049325942993164,
|
|
"learning_rate": 0.0004362922015624286,
|
|
"loss": 0.9492,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 0.04250060115981904,
|
|
"grad_norm": 0.33502647280693054,
|
|
"learning_rate": 0.0004360637763260085,
|
|
"loss": 0.9537,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.04261244484708172,
|
|
"grad_norm": 0.321997731924057,
|
|
"learning_rate": 0.00043583535108958837,
|
|
"loss": 0.9646,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 0.0427242885343444,
|
|
"grad_norm": 0.29477357864379883,
|
|
"learning_rate": 0.0004356069258531682,
|
|
"loss": 0.9794,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.04283613222160708,
|
|
"grad_norm": 0.2989972233772278,
|
|
"learning_rate": 0.0004353785006167482,
|
|
"loss": 0.9645,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 0.042947975908869764,
|
|
"grad_norm": 0.33459851145744324,
|
|
"learning_rate": 0.00043515007538032804,
|
|
"loss": 0.9556,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.04305981959613245,
|
|
"grad_norm": 0.2941781282424927,
|
|
"learning_rate": 0.0004349216501439079,
|
|
"loss": 0.9507,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.043171663283395126,
|
|
"grad_norm": 0.27801111340522766,
|
|
"learning_rate": 0.0004346932249074878,
|
|
"loss": 0.9623,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.04328350697065781,
|
|
"grad_norm": 0.2765832841396332,
|
|
"learning_rate": 0.00043446479967106767,
|
|
"loss": 0.9815,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 0.04339535065792049,
|
|
"grad_norm": 0.303786039352417,
|
|
"learning_rate": 0.0004342363744346475,
|
|
"loss": 0.9575,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.04350719434518317,
|
|
"grad_norm": 0.29517048597335815,
|
|
"learning_rate": 0.00043400794919822743,
|
|
"loss": 0.9554,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 0.04361903803244585,
|
|
"grad_norm": 0.28657206892967224,
|
|
"learning_rate": 0.0004337795239618073,
|
|
"loss": 0.9631,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.043730881719708535,
|
|
"grad_norm": 0.2933245003223419,
|
|
"learning_rate": 0.0004335510987253872,
|
|
"loss": 0.987,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 0.04384272540697122,
|
|
"grad_norm": 0.31331002712249756,
|
|
"learning_rate": 0.0004333226734889671,
|
|
"loss": 0.971,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.0439545690942339,
|
|
"grad_norm": 0.32431700825691223,
|
|
"learning_rate": 0.00043309424825254696,
|
|
"loss": 0.9603,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 0.04406641278149658,
|
|
"grad_norm": 0.3346642851829529,
|
|
"learning_rate": 0.0004328658230161268,
|
|
"loss": 0.9721,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.04417825646875926,
|
|
"grad_norm": 0.33921241760253906,
|
|
"learning_rate": 0.00043263739777970673,
|
|
"loss": 0.9639,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.044290100156021944,
|
|
"grad_norm": 0.3068247139453888,
|
|
"learning_rate": 0.0004324089725432866,
|
|
"loss": 0.9756,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 0.04440194384328462,
|
|
"grad_norm": 0.3049049973487854,
|
|
"learning_rate": 0.00043218054730686644,
|
|
"loss": 0.9693,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 0.04451378753054731,
|
|
"grad_norm": 0.30104655027389526,
|
|
"learning_rate": 0.00043195212207044635,
|
|
"loss": 0.9704,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 0.04462563121780999,
|
|
"grad_norm": 0.36955609917640686,
|
|
"learning_rate": 0.00043172369683402626,
|
|
"loss": 0.9527,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 0.04473747490507267,
|
|
"grad_norm": 0.318854957818985,
|
|
"learning_rate": 0.0004314952715976061,
|
|
"loss": 0.9543,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.044849318592335354,
|
|
"grad_norm": 0.3166191875934601,
|
|
"learning_rate": 0.000431266846361186,
|
|
"loss": 0.968,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 0.04496116227959803,
|
|
"grad_norm": 0.2976950407028198,
|
|
"learning_rate": 0.0004310384211247659,
|
|
"loss": 0.9822,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 0.045073005966860716,
|
|
"grad_norm": 0.2912284731864929,
|
|
"learning_rate": 0.00043080999588834574,
|
|
"loss": 0.9759,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 0.0451848496541234,
|
|
"grad_norm": 0.31027549505233765,
|
|
"learning_rate": 0.00043058157065192565,
|
|
"loss": 0.9794,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 0.04529669334138608,
|
|
"grad_norm": 0.3182738721370697,
|
|
"learning_rate": 0.0004303531454155055,
|
|
"loss": 0.9654,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.04540853702864876,
|
|
"grad_norm": 0.3006060719490051,
|
|
"learning_rate": 0.00043012472017908536,
|
|
"loss": 0.9548,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 0.04552038071591144,
|
|
"grad_norm": 0.2828291654586792,
|
|
"learning_rate": 0.0004298962949426653,
|
|
"loss": 0.9611,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 0.045632224403174125,
|
|
"grad_norm": 0.30988603830337524,
|
|
"learning_rate": 0.0004296678697062452,
|
|
"loss": 0.9614,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 0.0457440680904368,
|
|
"grad_norm": 0.29344943165779114,
|
|
"learning_rate": 0.00042943944446982503,
|
|
"loss": 0.9522,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 0.04585591177769949,
|
|
"grad_norm": 0.29713529348373413,
|
|
"learning_rate": 0.00042921101923340494,
|
|
"loss": 0.9468,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.04596775546496217,
|
|
"grad_norm": 0.2815961539745331,
|
|
"learning_rate": 0.0004289825939969848,
|
|
"loss": 0.9546,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 0.04607959915222485,
|
|
"grad_norm": 0.25218480825424194,
|
|
"learning_rate": 0.00042875416876056465,
|
|
"loss": 0.9372,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 0.046191442839487534,
|
|
"grad_norm": 0.2735552191734314,
|
|
"learning_rate": 0.0004285257435241445,
|
|
"loss": 0.942,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 0.04630328652675021,
|
|
"grad_norm": 0.27451473474502563,
|
|
"learning_rate": 0.0004282973182877245,
|
|
"loss": 0.931,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 0.0464151302140129,
|
|
"grad_norm": 0.24361196160316467,
|
|
"learning_rate": 0.00042806889305130433,
|
|
"loss": 0.924,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.046526973901275574,
|
|
"grad_norm": 0.25817179679870605,
|
|
"learning_rate": 0.0004278404678148842,
|
|
"loss": 0.9373,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 0.04663881758853826,
|
|
"grad_norm": 0.28722450137138367,
|
|
"learning_rate": 0.0004276120425784641,
|
|
"loss": 0.9271,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 0.046750661275800943,
|
|
"grad_norm": 0.25202882289886475,
|
|
"learning_rate": 0.00042738361734204395,
|
|
"loss": 0.9187,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 0.04686250496306362,
|
|
"grad_norm": 0.2637481391429901,
|
|
"learning_rate": 0.0004271551921056238,
|
|
"loss": 0.9402,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 0.046974348650326306,
|
|
"grad_norm": 0.2684090733528137,
|
|
"learning_rate": 0.0004269267668692037,
|
|
"loss": 0.9574,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.047086192337588983,
|
|
"grad_norm": 0.28711873292922974,
|
|
"learning_rate": 0.00042669834163278357,
|
|
"loss": 0.9551,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 0.04719803602485167,
|
|
"grad_norm": 0.2933102250099182,
|
|
"learning_rate": 0.0004264699163963635,
|
|
"loss": 0.9457,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 0.047309879712114346,
|
|
"grad_norm": 0.2875578701496124,
|
|
"learning_rate": 0.0004262414911599434,
|
|
"loss": 0.9667,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 0.04742172339937703,
|
|
"grad_norm": 0.3007104694843292,
|
|
"learning_rate": 0.00042601306592352325,
|
|
"loss": 0.9672,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 0.047533567086639715,
|
|
"grad_norm": 0.30211201310157776,
|
|
"learning_rate": 0.0004257846406871031,
|
|
"loss": 0.9781,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.04764541077390239,
|
|
"grad_norm": 0.29263827204704285,
|
|
"learning_rate": 0.000425556215450683,
|
|
"loss": 0.9923,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 0.04775725446116508,
|
|
"grad_norm": 0.29569676518440247,
|
|
"learning_rate": 0.00042532779021426287,
|
|
"loss": 0.9913,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 0.047869098148427755,
|
|
"grad_norm": 0.28223690390586853,
|
|
"learning_rate": 0.0004250993649778427,
|
|
"loss": 0.9817,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 0.04798094183569044,
|
|
"grad_norm": 0.271419882774353,
|
|
"learning_rate": 0.00042487093974142263,
|
|
"loss": 0.9977,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 0.04809278552295312,
|
|
"grad_norm": 0.26362791657447815,
|
|
"learning_rate": 0.00042464251450500254,
|
|
"loss": 0.9859,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.0482046292102158,
|
|
"grad_norm": 0.31365934014320374,
|
|
"learning_rate": 0.0004244140892685824,
|
|
"loss": 0.9862,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 0.04831647289747849,
|
|
"grad_norm": 0.26915237307548523,
|
|
"learning_rate": 0.0004241856640321623,
|
|
"loss": 0.9693,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 0.048428316584741164,
|
|
"grad_norm": 0.2639203369617462,
|
|
"learning_rate": 0.00042395723879574217,
|
|
"loss": 0.9691,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 0.04854016027200385,
|
|
"grad_norm": 0.30106601119041443,
|
|
"learning_rate": 0.000423728813559322,
|
|
"loss": 0.9521,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 0.04865200395926653,
|
|
"grad_norm": 0.2807524800300598,
|
|
"learning_rate": 0.00042350038832290193,
|
|
"loss": 0.9616,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.04876384764652921,
|
|
"grad_norm": 0.27363407611846924,
|
|
"learning_rate": 0.0004232719630864818,
|
|
"loss": 0.9538,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 0.048875691333791896,
|
|
"grad_norm": 0.29041701555252075,
|
|
"learning_rate": 0.00042304353785006164,
|
|
"loss": 0.9455,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 0.048987535021054573,
|
|
"grad_norm": 0.28237226605415344,
|
|
"learning_rate": 0.0004228151126136416,
|
|
"loss": 0.9615,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 0.04909937870831726,
|
|
"grad_norm": 0.30885329842567444,
|
|
"learning_rate": 0.00042258668737722146,
|
|
"loss": 0.9691,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 0.049211222395579936,
|
|
"grad_norm": 0.2734643220901489,
|
|
"learning_rate": 0.0004223582621408013,
|
|
"loss": 0.9663,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.04932306608284262,
|
|
"grad_norm": 0.2652278244495392,
|
|
"learning_rate": 0.00042212983690438123,
|
|
"loss": 0.9439,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 0.0494349097701053,
|
|
"grad_norm": 0.27749761939048767,
|
|
"learning_rate": 0.0004219014116679611,
|
|
"loss": 0.9623,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 0.04954675345736798,
|
|
"grad_norm": 0.2812553942203522,
|
|
"learning_rate": 0.00042167298643154094,
|
|
"loss": 0.9557,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 0.04965859714463067,
|
|
"grad_norm": 0.2762252688407898,
|
|
"learning_rate": 0.00042144456119512085,
|
|
"loss": 0.945,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 0.049770440831893345,
|
|
"grad_norm": 0.277118980884552,
|
|
"learning_rate": 0.0004212161359587007,
|
|
"loss": 0.93,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 0.04988228451915603,
|
|
"grad_norm": 0.2723037004470825,
|
|
"learning_rate": 0.0004209877107222806,
|
|
"loss": 0.963,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 0.04999412820641871,
|
|
"grad_norm": 0.29789137840270996,
|
|
"learning_rate": 0.0004207592854858605,
|
|
"loss": 0.954,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 0.05010597189368139,
|
|
"grad_norm": 0.26940014958381653,
|
|
"learning_rate": 0.0004205308602494404,
|
|
"loss": 0.9443,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 0.05021781558094407,
|
|
"grad_norm": 0.263300359249115,
|
|
"learning_rate": 0.00042030243501302024,
|
|
"loss": 0.9403,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 0.050329659268206754,
|
|
"grad_norm": 0.27823972702026367,
|
|
"learning_rate": 0.00042007400977660015,
|
|
"loss": 0.95,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.05044150295546944,
|
|
"grad_norm": 0.2782444357872009,
|
|
"learning_rate": 0.00041984558454018,
|
|
"loss": 0.953,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 0.05055334664273212,
|
|
"grad_norm": 0.277182936668396,
|
|
"learning_rate": 0.00041961715930375986,
|
|
"loss": 0.9498,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 0.0506651903299948,
|
|
"grad_norm": 0.2942575514316559,
|
|
"learning_rate": 0.00041938873406733977,
|
|
"loss": 0.957,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 0.05077703401725748,
|
|
"grad_norm": 0.3258327543735504,
|
|
"learning_rate": 0.0004191603088309197,
|
|
"loss": 0.9626,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 0.05088887770452016,
|
|
"grad_norm": 0.27874353528022766,
|
|
"learning_rate": 0.00041893188359449953,
|
|
"loss": 0.971,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 0.05100072139178284,
|
|
"grad_norm": 0.2981313169002533,
|
|
"learning_rate": 0.00041870345835807944,
|
|
"loss": 0.965,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 0.051112565079045526,
|
|
"grad_norm": 0.30568984150886536,
|
|
"learning_rate": 0.0004184750331216593,
|
|
"loss": 0.9566,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 0.05122440876630821,
|
|
"grad_norm": 0.27867600321769714,
|
|
"learning_rate": 0.00041824660788523915,
|
|
"loss": 0.94,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 0.05133625245357089,
|
|
"grad_norm": 0.30877605080604553,
|
|
"learning_rate": 0.000418018182648819,
|
|
"loss": 0.9453,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 0.05144809614083357,
|
|
"grad_norm": 0.3018844425678253,
|
|
"learning_rate": 0.0004177897574123989,
|
|
"loss": 0.9511,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.05155993982809625,
|
|
"grad_norm": 0.27943944931030273,
|
|
"learning_rate": 0.0004175613321759788,
|
|
"loss": 0.9371,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 0.051671783515358935,
|
|
"grad_norm": 0.2654775381088257,
|
|
"learning_rate": 0.0004173329069395587,
|
|
"loss": 0.9366,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 0.05178362720262161,
|
|
"grad_norm": 0.27594050765037537,
|
|
"learning_rate": 0.0004171044817031386,
|
|
"loss": 0.9229,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 0.0518954708898843,
|
|
"grad_norm": 0.26856914162635803,
|
|
"learning_rate": 0.00041687605646671845,
|
|
"loss": 0.9357,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 0.05200731457714698,
|
|
"grad_norm": 0.2956237494945526,
|
|
"learning_rate": 0.0004166476312302983,
|
|
"loss": 0.9023,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 0.05211915826440966,
|
|
"grad_norm": 0.30004164576530457,
|
|
"learning_rate": 0.0004164192059938782,
|
|
"loss": 0.9273,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 0.052231001951672344,
|
|
"grad_norm": 0.2691096365451813,
|
|
"learning_rate": 0.0004161907807574581,
|
|
"loss": 0.9332,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 0.05234284563893502,
|
|
"grad_norm": 0.2551780641078949,
|
|
"learning_rate": 0.00041596235552103793,
|
|
"loss": 0.9327,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 0.052454689326197707,
|
|
"grad_norm": 0.2806546092033386,
|
|
"learning_rate": 0.0004157339302846179,
|
|
"loss": 0.9355,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 0.05256653301346039,
|
|
"grad_norm": 0.27648645639419556,
|
|
"learning_rate": 0.00041550550504819775,
|
|
"loss": 0.9348,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.05267837670072307,
|
|
"grad_norm": 0.2816336750984192,
|
|
"learning_rate": 0.0004152770798117776,
|
|
"loss": 0.9294,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 0.05279022038798575,
|
|
"grad_norm": 0.29570698738098145,
|
|
"learning_rate": 0.0004150486545753575,
|
|
"loss": 0.9317,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 0.05290206407524843,
|
|
"grad_norm": 0.26981687545776367,
|
|
"learning_rate": 0.00041482022933893737,
|
|
"loss": 0.9317,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 0.053013907762511116,
|
|
"grad_norm": 0.2586159110069275,
|
|
"learning_rate": 0.0004145918041025172,
|
|
"loss": 0.9162,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 0.05312575144977379,
|
|
"grad_norm": 0.24129503965377808,
|
|
"learning_rate": 0.00041436337886609714,
|
|
"loss": 0.934,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 0.05323759513703648,
|
|
"grad_norm": 0.28072717785835266,
|
|
"learning_rate": 0.000414134953629677,
|
|
"loss": 0.9089,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 0.05334943882429916,
|
|
"grad_norm": 0.2760024964809418,
|
|
"learning_rate": 0.0004139065283932569,
|
|
"loss": 0.9115,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 0.05346128251156184,
|
|
"grad_norm": 0.28894710540771484,
|
|
"learning_rate": 0.0004136781031568368,
|
|
"loss": 0.9108,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 0.053573126198824525,
|
|
"grad_norm": 0.27882319688796997,
|
|
"learning_rate": 0.00041344967792041667,
|
|
"loss": 0.9184,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 0.0536849698860872,
|
|
"grad_norm": 0.27242934703826904,
|
|
"learning_rate": 0.0004132212526839965,
|
|
"loss": 0.9498,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.05379681357334989,
|
|
"grad_norm": 0.2809596359729767,
|
|
"learning_rate": 0.00041299282744757643,
|
|
"loss": 0.9365,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 0.053908657260612565,
|
|
"grad_norm": 0.3026556074619293,
|
|
"learning_rate": 0.0004127644022111563,
|
|
"loss": 0.9433,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 0.05402050094787525,
|
|
"grad_norm": 0.2933846116065979,
|
|
"learning_rate": 0.00041253597697473614,
|
|
"loss": 0.9351,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 0.054132344635137934,
|
|
"grad_norm": 0.2774868309497833,
|
|
"learning_rate": 0.00041230755173831605,
|
|
"loss": 0.9285,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 0.05424418832240061,
|
|
"grad_norm": 0.2859903573989868,
|
|
"learning_rate": 0.00041207912650189596,
|
|
"loss": 0.9344,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 0.054356032009663297,
|
|
"grad_norm": 0.26687270402908325,
|
|
"learning_rate": 0.0004118507012654758,
|
|
"loss": 0.9281,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 0.054467875696925974,
|
|
"grad_norm": 0.31075340509414673,
|
|
"learning_rate": 0.00041162227602905573,
|
|
"loss": 0.9418,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 0.05457971938418866,
|
|
"grad_norm": 0.2569184899330139,
|
|
"learning_rate": 0.0004113938507926356,
|
|
"loss": 0.9394,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 0.054691563071451336,
|
|
"grad_norm": 0.26250478625297546,
|
|
"learning_rate": 0.00041116542555621544,
|
|
"loss": 0.9499,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 0.05480340675871402,
|
|
"grad_norm": 0.27604004740715027,
|
|
"learning_rate": 0.00041093700031979535,
|
|
"loss": 0.9268,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.054915250445976706,
|
|
"grad_norm": 0.26279163360595703,
|
|
"learning_rate": 0.0004107085750833752,
|
|
"loss": 0.9313,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 0.05502709413323938,
|
|
"grad_norm": 0.29265978932380676,
|
|
"learning_rate": 0.00041048014984695506,
|
|
"loss": 0.9498,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 0.05513893782050207,
|
|
"grad_norm": 0.32107868790626526,
|
|
"learning_rate": 0.000410251724610535,
|
|
"loss": 0.9708,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 0.055250781507764746,
|
|
"grad_norm": 0.32804161310195923,
|
|
"learning_rate": 0.0004100232993741149,
|
|
"loss": 0.9624,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 0.05536262519502743,
|
|
"grad_norm": 0.3207037150859833,
|
|
"learning_rate": 0.00040979487413769474,
|
|
"loss": 0.9538,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 0.05547446888229011,
|
|
"grad_norm": 0.29660555720329285,
|
|
"learning_rate": 0.00040956644890127465,
|
|
"loss": 0.9677,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 0.05558631256955279,
|
|
"grad_norm": 0.34930771589279175,
|
|
"learning_rate": 0.0004093380236648545,
|
|
"loss": 0.9777,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 0.05569815625681548,
|
|
"grad_norm": 0.3037464916706085,
|
|
"learning_rate": 0.00040910959842843436,
|
|
"loss": 0.9826,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 0.055809999944078155,
|
|
"grad_norm": 0.31435292959213257,
|
|
"learning_rate": 0.00040888117319201427,
|
|
"loss": 0.9677,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 0.05592184363134084,
|
|
"grad_norm": 0.29182785749435425,
|
|
"learning_rate": 0.0004086527479555941,
|
|
"loss": 0.9563,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.05603368731860352,
|
|
"grad_norm": 0.34796231985092163,
|
|
"learning_rate": 0.00040842432271917403,
|
|
"loss": 0.957,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 0.0561455310058662,
|
|
"grad_norm": 0.3027050495147705,
|
|
"learning_rate": 0.00040819589748275394,
|
|
"loss": 0.967,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 0.056257374693128887,
|
|
"grad_norm": 0.3419332802295685,
|
|
"learning_rate": 0.0004079674722463338,
|
|
"loss": 0.9654,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 0.056369218380391564,
|
|
"grad_norm": 0.29381224513053894,
|
|
"learning_rate": 0.00040773904700991366,
|
|
"loss": 0.9647,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 0.05648106206765425,
|
|
"grad_norm": 0.29206860065460205,
|
|
"learning_rate": 0.0004075106217734935,
|
|
"loss": 0.9637,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 0.056592905754916926,
|
|
"grad_norm": 0.3169795274734497,
|
|
"learning_rate": 0.0004072821965370734,
|
|
"loss": 0.963,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 0.05670474944217961,
|
|
"grad_norm": 0.30713772773742676,
|
|
"learning_rate": 0.0004070537713006533,
|
|
"loss": 0.9766,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 0.05681659312944229,
|
|
"grad_norm": 0.29805994033813477,
|
|
"learning_rate": 0.00040682534606423313,
|
|
"loss": 0.9597,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 0.05692843681670497,
|
|
"grad_norm": 0.33419644832611084,
|
|
"learning_rate": 0.0004065969208278131,
|
|
"loss": 0.9598,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 0.05704028050396766,
|
|
"grad_norm": 0.31769025325775146,
|
|
"learning_rate": 0.00040636849559139295,
|
|
"loss": 0.942,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.057152124191230336,
|
|
"grad_norm": 0.3017726242542267,
|
|
"learning_rate": 0.0004061400703549728,
|
|
"loss": 0.9627,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 0.05726396787849302,
|
|
"grad_norm": 0.32213470339775085,
|
|
"learning_rate": 0.0004059116451185527,
|
|
"loss": 0.9518,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 0.0573758115657557,
|
|
"grad_norm": 0.29069948196411133,
|
|
"learning_rate": 0.0004056832198821326,
|
|
"loss": 0.9337,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 0.05748765525301838,
|
|
"grad_norm": 0.32283100485801697,
|
|
"learning_rate": 0.00040545479464571243,
|
|
"loss": 0.959,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 0.05759949894028106,
|
|
"grad_norm": 0.3191847801208496,
|
|
"learning_rate": 0.00040522636940929234,
|
|
"loss": 0.9439,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 0.057711342627543745,
|
|
"grad_norm": 0.565864622592926,
|
|
"learning_rate": 0.00040499794417287225,
|
|
"loss": 0.9587,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 0.05782318631480643,
|
|
"grad_norm": 0.3419003188610077,
|
|
"learning_rate": 0.0004047695189364521,
|
|
"loss": 0.9466,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 0.05793503000206911,
|
|
"grad_norm": 0.28331097960472107,
|
|
"learning_rate": 0.000404541093700032,
|
|
"loss": 0.9472,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 0.05804687368933179,
|
|
"grad_norm": 0.2994554042816162,
|
|
"learning_rate": 0.00040431266846361187,
|
|
"loss": 0.9434,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 0.05815871737659447,
|
|
"grad_norm": 0.30070000886917114,
|
|
"learning_rate": 0.0004040842432271917,
|
|
"loss": 0.9408,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.058270561063857154,
|
|
"grad_norm": 0.29924333095550537,
|
|
"learning_rate": 0.00040385581799077164,
|
|
"loss": 0.9484,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 0.05838240475111983,
|
|
"grad_norm": 0.2905283272266388,
|
|
"learning_rate": 0.0004036273927543515,
|
|
"loss": 0.9636,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 0.058494248438382516,
|
|
"grad_norm": 0.3290540874004364,
|
|
"learning_rate": 0.00040339896751793135,
|
|
"loss": 0.9396,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 0.0586060921256452,
|
|
"grad_norm": 0.29686272144317627,
|
|
"learning_rate": 0.0004031705422815113,
|
|
"loss": 0.9408,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 0.05871793581290788,
|
|
"grad_norm": 0.2768057882785797,
|
|
"learning_rate": 0.00040294211704509117,
|
|
"loss": 0.9328,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 0.05882977950017056,
|
|
"grad_norm": 0.2614899277687073,
|
|
"learning_rate": 0.000402713691808671,
|
|
"loss": 0.9483,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 0.05894162318743324,
|
|
"grad_norm": 0.2692766487598419,
|
|
"learning_rate": 0.00040248526657225093,
|
|
"loss": 0.9479,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 0.059053466874695926,
|
|
"grad_norm": 0.3009514808654785,
|
|
"learning_rate": 0.0004022568413358308,
|
|
"loss": 0.9681,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 0.0591653105619586,
|
|
"grad_norm": 0.27767086029052734,
|
|
"learning_rate": 0.00040202841609941064,
|
|
"loss": 0.9685,
|
|
"step": 5290
|
|
},
|
|
{
|
|
"epoch": 0.05927715424922129,
|
|
"grad_norm": 0.2956901788711548,
|
|
"learning_rate": 0.00040179999086299055,
|
|
"loss": 0.9609,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 0.05938899793648397,
|
|
"grad_norm": 0.3046570420265198,
|
|
"learning_rate": 0.0004015715656265704,
|
|
"loss": 0.961,
|
|
"step": 5310
|
|
},
|
|
{
|
|
"epoch": 0.05950084162374665,
|
|
"grad_norm": 0.24477365612983704,
|
|
"learning_rate": 0.0004013431403901503,
|
|
"loss": 0.9501,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 0.059612685311009335,
|
|
"grad_norm": 0.25505194067955017,
|
|
"learning_rate": 0.00040111471515373023,
|
|
"loss": 0.946,
|
|
"step": 5330
|
|
},
|
|
{
|
|
"epoch": 0.05972452899827201,
|
|
"grad_norm": 0.26015251874923706,
|
|
"learning_rate": 0.0004008862899173101,
|
|
"loss": 0.9372,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 0.0598363726855347,
|
|
"grad_norm": 0.24911250174045563,
|
|
"learning_rate": 0.00040065786468088994,
|
|
"loss": 0.9487,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 0.059948216372797375,
|
|
"grad_norm": 0.2779735028743744,
|
|
"learning_rate": 0.00040042943944446985,
|
|
"loss": 0.9316,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 0.06006006006006006,
|
|
"grad_norm": 0.30663251876831055,
|
|
"learning_rate": 0.0004002010142080497,
|
|
"loss": 0.9461,
|
|
"step": 5370
|
|
},
|
|
{
|
|
"epoch": 0.060171903747322744,
|
|
"grad_norm": 0.2724740505218506,
|
|
"learning_rate": 0.00039997258897162956,
|
|
"loss": 0.9214,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 0.06028374743458542,
|
|
"grad_norm": 0.26819276809692383,
|
|
"learning_rate": 0.00039974416373520947,
|
|
"loss": 0.9368,
|
|
"step": 5390
|
|
},
|
|
{
|
|
"epoch": 0.060395591121848106,
|
|
"grad_norm": 0.26342320442199707,
|
|
"learning_rate": 0.0003995157384987894,
|
|
"loss": 0.9332,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 0.060507434809110784,
|
|
"grad_norm": 0.32590556144714355,
|
|
"learning_rate": 0.00039928731326236924,
|
|
"loss": 0.9286,
|
|
"step": 5410
|
|
},
|
|
{
|
|
"epoch": 0.06061927849637347,
|
|
"grad_norm": 0.2747272849082947,
|
|
"learning_rate": 0.00039905888802594915,
|
|
"loss": 0.932,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 0.06073112218363615,
|
|
"grad_norm": 0.23089702427387238,
|
|
"learning_rate": 0.000398830462789529,
|
|
"loss": 0.9216,
|
|
"step": 5430
|
|
},
|
|
{
|
|
"epoch": 0.06084296587089883,
|
|
"grad_norm": 0.24383346736431122,
|
|
"learning_rate": 0.00039860203755310886,
|
|
"loss": 0.9333,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 0.060954809558161516,
|
|
"grad_norm": 0.23999489843845367,
|
|
"learning_rate": 0.00039837361231668877,
|
|
"loss": 0.9134,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 0.06106665324542419,
|
|
"grad_norm": 0.3041435480117798,
|
|
"learning_rate": 0.0003981451870802686,
|
|
"loss": 0.9226,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 0.06117849693268688,
|
|
"grad_norm": 0.2667579650878906,
|
|
"learning_rate": 0.0003979167618438485,
|
|
"loss": 0.9148,
|
|
"step": 5470
|
|
},
|
|
{
|
|
"epoch": 0.061290340619949556,
|
|
"grad_norm": 0.2730364203453064,
|
|
"learning_rate": 0.0003976883366074284,
|
|
"loss": 0.9073,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 0.06140218430721224,
|
|
"grad_norm": 0.28175118565559387,
|
|
"learning_rate": 0.0003974599113710083,
|
|
"loss": 0.9097,
|
|
"step": 5490
|
|
},
|
|
{
|
|
"epoch": 0.061514027994474925,
|
|
"grad_norm": 0.2826266288757324,
|
|
"learning_rate": 0.00039723148613458816,
|
|
"loss": 0.8972,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 0.0616258716817376,
|
|
"grad_norm": 0.25821810960769653,
|
|
"learning_rate": 0.000397003060898168,
|
|
"loss": 0.8898,
|
|
"step": 5510
|
|
},
|
|
{
|
|
"epoch": 0.06173771536900029,
|
|
"grad_norm": 0.31401073932647705,
|
|
"learning_rate": 0.0003967746356617479,
|
|
"loss": 0.8986,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 0.061849559056262965,
|
|
"grad_norm": 0.2664715349674225,
|
|
"learning_rate": 0.0003965462104253278,
|
|
"loss": 0.9178,
|
|
"step": 5530
|
|
},
|
|
{
|
|
"epoch": 0.06196140274352565,
|
|
"grad_norm": 0.2725924253463745,
|
|
"learning_rate": 0.00039631778518890763,
|
|
"loss": 0.8941,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 0.06207324643078833,
|
|
"grad_norm": 0.2991993725299835,
|
|
"learning_rate": 0.0003960893599524876,
|
|
"loss": 0.899,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 0.06218509011805101,
|
|
"grad_norm": 0.2683865427970886,
|
|
"learning_rate": 0.00039586093471606745,
|
|
"loss": 0.9105,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 0.062296933805313696,
|
|
"grad_norm": 0.29127469658851624,
|
|
"learning_rate": 0.0003956325094796473,
|
|
"loss": 0.9091,
|
|
"step": 5570
|
|
},
|
|
{
|
|
"epoch": 0.062408777492576374,
|
|
"grad_norm": 0.28191229701042175,
|
|
"learning_rate": 0.0003954040842432272,
|
|
"loss": 0.9078,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 0.06252062117983906,
|
|
"grad_norm": 0.28319644927978516,
|
|
"learning_rate": 0.0003951756590068071,
|
|
"loss": 0.9134,
|
|
"step": 5590
|
|
},
|
|
{
|
|
"epoch": 0.06263246486710174,
|
|
"grad_norm": 0.2563108205795288,
|
|
"learning_rate": 0.00039494723377038693,
|
|
"loss": 0.9166,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 0.06274430855436441,
|
|
"grad_norm": 0.29730817675590515,
|
|
"learning_rate": 0.00039471880853396684,
|
|
"loss": 0.9101,
|
|
"step": 5610
|
|
},
|
|
{
|
|
"epoch": 0.0628561522416271,
|
|
"grad_norm": 0.25925830006599426,
|
|
"learning_rate": 0.0003944903832975467,
|
|
"loss": 0.9131,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 0.06296799592888978,
|
|
"grad_norm": 0.2645208537578583,
|
|
"learning_rate": 0.0003942619580611266,
|
|
"loss": 0.9203,
|
|
"step": 5630
|
|
},
|
|
{
|
|
"epoch": 0.06307983961615246,
|
|
"grad_norm": 0.2844574749469757,
|
|
"learning_rate": 0.0003940335328247065,
|
|
"loss": 0.914,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 0.06319168330341515,
|
|
"grad_norm": 0.2687402367591858,
|
|
"learning_rate": 0.00039380510758828637,
|
|
"loss": 0.9095,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 0.06330352699067783,
|
|
"grad_norm": 0.22893477976322174,
|
|
"learning_rate": 0.00039357668235186623,
|
|
"loss": 0.8993,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 0.06341537067794051,
|
|
"grad_norm": 0.27271768450737,
|
|
"learning_rate": 0.00039334825711544614,
|
|
"loss": 0.8989,
|
|
"step": 5670
|
|
},
|
|
{
|
|
"epoch": 0.06352721436520319,
|
|
"grad_norm": 0.27709853649139404,
|
|
"learning_rate": 0.000393119831879026,
|
|
"loss": 0.8998,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 0.06363905805246588,
|
|
"grad_norm": 0.24321520328521729,
|
|
"learning_rate": 0.00039289140664260585,
|
|
"loss": 0.887,
|
|
"step": 5690
|
|
},
|
|
{
|
|
"epoch": 0.06375090173972855,
|
|
"grad_norm": 0.26779887080192566,
|
|
"learning_rate": 0.00039266298140618576,
|
|
"loss": 0.9091,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 0.06386274542699123,
|
|
"grad_norm": 0.2612350881099701,
|
|
"learning_rate": 0.00039243455616976567,
|
|
"loss": 0.9043,
|
|
"step": 5710
|
|
},
|
|
{
|
|
"epoch": 0.06397458911425392,
|
|
"grad_norm": 0.26247987151145935,
|
|
"learning_rate": 0.0003922061309333455,
|
|
"loss": 0.9024,
|
|
"step": 5720
|
|
},
|
|
{
|
|
"epoch": 0.0640864328015166,
|
|
"grad_norm": 0.2605653703212738,
|
|
"learning_rate": 0.00039197770569692543,
|
|
"loss": 0.9311,
|
|
"step": 5730
|
|
},
|
|
{
|
|
"epoch": 0.06419827648877928,
|
|
"grad_norm": 0.28249841928482056,
|
|
"learning_rate": 0.0003917492804605053,
|
|
"loss": 0.9265,
|
|
"step": 5740
|
|
},
|
|
{
|
|
"epoch": 0.06431012017604196,
|
|
"grad_norm": 0.2880108654499054,
|
|
"learning_rate": 0.00039152085522408515,
|
|
"loss": 0.9331,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 0.06442196386330465,
|
|
"grad_norm": 0.31626009941101074,
|
|
"learning_rate": 0.00039129242998766506,
|
|
"loss": 0.9483,
|
|
"step": 5760
|
|
},
|
|
{
|
|
"epoch": 0.06453380755056733,
|
|
"grad_norm": 0.28972744941711426,
|
|
"learning_rate": 0.0003910640047512449,
|
|
"loss": 0.9239,
|
|
"step": 5770
|
|
},
|
|
{
|
|
"epoch": 0.06464565123783,
|
|
"grad_norm": 0.27140864729881287,
|
|
"learning_rate": 0.00039083557951482477,
|
|
"loss": 0.9259,
|
|
"step": 5780
|
|
},
|
|
{
|
|
"epoch": 0.0647574949250927,
|
|
"grad_norm": 0.26331818103790283,
|
|
"learning_rate": 0.00039060715427840473,
|
|
"loss": 0.9383,
|
|
"step": 5790
|
|
},
|
|
{
|
|
"epoch": 0.06486933861235537,
|
|
"grad_norm": 0.26927000284194946,
|
|
"learning_rate": 0.0003903787290419846,
|
|
"loss": 0.9236,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 0.06498118229961805,
|
|
"grad_norm": 0.2833601236343384,
|
|
"learning_rate": 0.00039015030380556444,
|
|
"loss": 0.9257,
|
|
"step": 5810
|
|
},
|
|
{
|
|
"epoch": 0.06509302598688073,
|
|
"grad_norm": 0.2970174551010132,
|
|
"learning_rate": 0.00038992187856914435,
|
|
"loss": 0.9164,
|
|
"step": 5820
|
|
},
|
|
{
|
|
"epoch": 0.06520486967414342,
|
|
"grad_norm": 0.27904263138771057,
|
|
"learning_rate": 0.0003896934533327242,
|
|
"loss": 0.9045,
|
|
"step": 5830
|
|
},
|
|
{
|
|
"epoch": 0.0653167133614061,
|
|
"grad_norm": 0.24879537522792816,
|
|
"learning_rate": 0.00038946502809630406,
|
|
"loss": 0.9,
|
|
"step": 5840
|
|
},
|
|
{
|
|
"epoch": 0.06542855704866878,
|
|
"grad_norm": 0.2897798717021942,
|
|
"learning_rate": 0.000389236602859884,
|
|
"loss": 0.919,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 0.06554040073593147,
|
|
"grad_norm": 0.26522865891456604,
|
|
"learning_rate": 0.00038900817762346383,
|
|
"loss": 0.9168,
|
|
"step": 5860
|
|
},
|
|
{
|
|
"epoch": 0.06565224442319414,
|
|
"grad_norm": 0.26405441761016846,
|
|
"learning_rate": 0.00038877975238704374,
|
|
"loss": 0.9169,
|
|
"step": 5870
|
|
},
|
|
{
|
|
"epoch": 0.06576408811045682,
|
|
"grad_norm": 0.2543514370918274,
|
|
"learning_rate": 0.00038855132715062365,
|
|
"loss": 0.917,
|
|
"step": 5880
|
|
},
|
|
{
|
|
"epoch": 0.06587593179771951,
|
|
"grad_norm": 0.2683538794517517,
|
|
"learning_rate": 0.0003883229019142035,
|
|
"loss": 0.9179,
|
|
"step": 5890
|
|
},
|
|
{
|
|
"epoch": 0.06598777548498219,
|
|
"grad_norm": 0.24559274315834045,
|
|
"learning_rate": 0.00038809447667778336,
|
|
"loss": 0.907,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 0.06609961917224487,
|
|
"grad_norm": 0.2604455351829529,
|
|
"learning_rate": 0.00038786605144136327,
|
|
"loss": 0.9172,
|
|
"step": 5910
|
|
},
|
|
{
|
|
"epoch": 0.06621146285950755,
|
|
"grad_norm": 0.24329319596290588,
|
|
"learning_rate": 0.0003876376262049431,
|
|
"loss": 0.9171,
|
|
"step": 5920
|
|
},
|
|
{
|
|
"epoch": 0.06632330654677024,
|
|
"grad_norm": 0.237509623169899,
|
|
"learning_rate": 0.000387409200968523,
|
|
"loss": 0.9272,
|
|
"step": 5930
|
|
},
|
|
{
|
|
"epoch": 0.06643515023403292,
|
|
"grad_norm": 0.2569025754928589,
|
|
"learning_rate": 0.00038718077573210284,
|
|
"loss": 0.9327,
|
|
"step": 5940
|
|
},
|
|
{
|
|
"epoch": 0.0665469939212956,
|
|
"grad_norm": 0.2908497750759125,
|
|
"learning_rate": 0.0003869523504956828,
|
|
"loss": 0.9299,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 0.06665883760855829,
|
|
"grad_norm": 0.24669544398784637,
|
|
"learning_rate": 0.00038672392525926266,
|
|
"loss": 0.9036,
|
|
"step": 5960
|
|
},
|
|
{
|
|
"epoch": 0.06677068129582096,
|
|
"grad_norm": 0.23906981945037842,
|
|
"learning_rate": 0.0003864955000228425,
|
|
"loss": 0.9266,
|
|
"step": 5970
|
|
},
|
|
{
|
|
"epoch": 0.06688252498308364,
|
|
"grad_norm": 0.2822079658508301,
|
|
"learning_rate": 0.0003862670747864224,
|
|
"loss": 0.9209,
|
|
"step": 5980
|
|
},
|
|
{
|
|
"epoch": 0.06699436867034632,
|
|
"grad_norm": 0.27469775080680847,
|
|
"learning_rate": 0.0003860386495500023,
|
|
"loss": 0.9385,
|
|
"step": 5990
|
|
},
|
|
{
|
|
"epoch": 0.06710621235760901,
|
|
"grad_norm": 0.24559862911701202,
|
|
"learning_rate": 0.00038581022431358213,
|
|
"loss": 0.9248,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 0.06721805604487169,
|
|
"grad_norm": 0.24427008628845215,
|
|
"learning_rate": 0.00038558179907716204,
|
|
"loss": 0.9358,
|
|
"step": 6010
|
|
},
|
|
{
|
|
"epoch": 0.06732989973213437,
|
|
"grad_norm": 0.2626965641975403,
|
|
"learning_rate": 0.00038535337384074195,
|
|
"loss": 0.9211,
|
|
"step": 6020
|
|
},
|
|
{
|
|
"epoch": 0.06744174341939706,
|
|
"grad_norm": 0.226990208029747,
|
|
"learning_rate": 0.0003851249486043218,
|
|
"loss": 0.9292,
|
|
"step": 6030
|
|
},
|
|
{
|
|
"epoch": 0.06755358710665973,
|
|
"grad_norm": 0.2762834131717682,
|
|
"learning_rate": 0.0003848965233679017,
|
|
"loss": 0.932,
|
|
"step": 6040
|
|
},
|
|
{
|
|
"epoch": 0.06766543079392241,
|
|
"grad_norm": 0.2799958884716034,
|
|
"learning_rate": 0.0003846680981314816,
|
|
"loss": 0.943,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 0.06777727448118509,
|
|
"grad_norm": 0.26224029064178467,
|
|
"learning_rate": 0.00038443967289506143,
|
|
"loss": 0.9236,
|
|
"step": 6060
|
|
},
|
|
{
|
|
"epoch": 0.06788911816844778,
|
|
"grad_norm": 0.2897866368293762,
|
|
"learning_rate": 0.00038421124765864134,
|
|
"loss": 0.95,
|
|
"step": 6070
|
|
},
|
|
{
|
|
"epoch": 0.06800096185571046,
|
|
"grad_norm": 0.2899113893508911,
|
|
"learning_rate": 0.0003839828224222212,
|
|
"loss": 0.9403,
|
|
"step": 6080
|
|
},
|
|
{
|
|
"epoch": 0.06811280554297314,
|
|
"grad_norm": 0.27765581011772156,
|
|
"learning_rate": 0.00038375439718580105,
|
|
"loss": 0.9447,
|
|
"step": 6090
|
|
},
|
|
{
|
|
"epoch": 0.06822464923023583,
|
|
"grad_norm": 0.27683207392692566,
|
|
"learning_rate": 0.000383525971949381,
|
|
"loss": 0.949,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 0.0683364929174985,
|
|
"grad_norm": 0.2815559506416321,
|
|
"learning_rate": 0.00038329754671296087,
|
|
"loss": 0.9627,
|
|
"step": 6110
|
|
},
|
|
{
|
|
"epoch": 0.06844833660476118,
|
|
"grad_norm": 0.2741657793521881,
|
|
"learning_rate": 0.00038306912147654073,
|
|
"loss": 0.9659,
|
|
"step": 6120
|
|
},
|
|
{
|
|
"epoch": 0.06856018029202386,
|
|
"grad_norm": 0.4103181064128876,
|
|
"learning_rate": 0.00038284069624012064,
|
|
"loss": 0.9612,
|
|
"step": 6130
|
|
},
|
|
{
|
|
"epoch": 0.06867202397928655,
|
|
"grad_norm": 0.2862701416015625,
|
|
"learning_rate": 0.0003826122710037005,
|
|
"loss": 0.9393,
|
|
"step": 6140
|
|
},
|
|
{
|
|
"epoch": 0.06878386766654923,
|
|
"grad_norm": 0.2789844274520874,
|
|
"learning_rate": 0.00038238384576728035,
|
|
"loss": 0.9447,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 0.06889571135381191,
|
|
"grad_norm": 0.590391993522644,
|
|
"learning_rate": 0.00038215542053086026,
|
|
"loss": 0.9525,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 0.0690075550410746,
|
|
"grad_norm": 0.2721211016178131,
|
|
"learning_rate": 0.0003819269952944401,
|
|
"loss": 0.9467,
|
|
"step": 6170
|
|
},
|
|
{
|
|
"epoch": 0.06911939872833728,
|
|
"grad_norm": 0.27576929330825806,
|
|
"learning_rate": 0.00038169857005802,
|
|
"loss": 0.9428,
|
|
"step": 6180
|
|
},
|
|
{
|
|
"epoch": 0.06923124241559996,
|
|
"grad_norm": 0.28229111433029175,
|
|
"learning_rate": 0.00038147014482159993,
|
|
"loss": 0.9418,
|
|
"step": 6190
|
|
},
|
|
{
|
|
"epoch": 0.06934308610286263,
|
|
"grad_norm": 0.29595518112182617,
|
|
"learning_rate": 0.0003812417195851798,
|
|
"loss": 0.9178,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 0.06945492979012532,
|
|
"grad_norm": 0.3055596351623535,
|
|
"learning_rate": 0.00038101329434875965,
|
|
"loss": 0.9464,
|
|
"step": 6210
|
|
},
|
|
{
|
|
"epoch": 0.069566773477388,
|
|
"grad_norm": 0.29212549328804016,
|
|
"learning_rate": 0.00038078486911233956,
|
|
"loss": 0.9491,
|
|
"step": 6220
|
|
},
|
|
{
|
|
"epoch": 0.06967861716465068,
|
|
"grad_norm": 0.288928359746933,
|
|
"learning_rate": 0.0003805564438759194,
|
|
"loss": 0.9285,
|
|
"step": 6230
|
|
},
|
|
{
|
|
"epoch": 0.06979046085191337,
|
|
"grad_norm": 0.2759207487106323,
|
|
"learning_rate": 0.00038032801863949927,
|
|
"loss": 0.9336,
|
|
"step": 6240
|
|
},
|
|
{
|
|
"epoch": 0.06990230453917605,
|
|
"grad_norm": 0.31041648983955383,
|
|
"learning_rate": 0.0003800995934030792,
|
|
"loss": 0.9317,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 0.07001414822643873,
|
|
"grad_norm": 0.29425299167633057,
|
|
"learning_rate": 0.0003798711681666591,
|
|
"loss": 0.9212,
|
|
"step": 6260
|
|
},
|
|
{
|
|
"epoch": 0.0701259919137014,
|
|
"grad_norm": 0.278062105178833,
|
|
"learning_rate": 0.00037964274293023894,
|
|
"loss": 0.9291,
|
|
"step": 6270
|
|
},
|
|
{
|
|
"epoch": 0.0702378356009641,
|
|
"grad_norm": 0.2983698546886444,
|
|
"learning_rate": 0.00037941431769381885,
|
|
"loss": 0.9169,
|
|
"step": 6280
|
|
},
|
|
{
|
|
"epoch": 0.07034967928822677,
|
|
"grad_norm": 0.29595527052879333,
|
|
"learning_rate": 0.0003791858924573987,
|
|
"loss": 0.9286,
|
|
"step": 6290
|
|
},
|
|
{
|
|
"epoch": 0.07046152297548945,
|
|
"grad_norm": 0.26365020871162415,
|
|
"learning_rate": 0.00037895746722097856,
|
|
"loss": 0.9312,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 0.07057336666275214,
|
|
"grad_norm": 0.27807778120040894,
|
|
"learning_rate": 0.0003787290419845585,
|
|
"loss": 0.9274,
|
|
"step": 6310
|
|
},
|
|
{
|
|
"epoch": 0.07068521035001482,
|
|
"grad_norm": 0.2585415840148926,
|
|
"learning_rate": 0.00037850061674813833,
|
|
"loss": 0.9513,
|
|
"step": 6320
|
|
},
|
|
{
|
|
"epoch": 0.0707970540372775,
|
|
"grad_norm": 0.2740543484687805,
|
|
"learning_rate": 0.0003782721915117182,
|
|
"loss": 0.922,
|
|
"step": 6330
|
|
},
|
|
{
|
|
"epoch": 0.07090889772454018,
|
|
"grad_norm": 0.28271788358688354,
|
|
"learning_rate": 0.00037804376627529815,
|
|
"loss": 0.94,
|
|
"step": 6340
|
|
},
|
|
{
|
|
"epoch": 0.07102074141180287,
|
|
"grad_norm": 0.28767603635787964,
|
|
"learning_rate": 0.000377815341038878,
|
|
"loss": 0.9295,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 0.07113258509906555,
|
|
"grad_norm": 0.25200092792510986,
|
|
"learning_rate": 0.00037758691580245786,
|
|
"loss": 0.9219,
|
|
"step": 6360
|
|
},
|
|
{
|
|
"epoch": 0.07124442878632822,
|
|
"grad_norm": 0.27449852228164673,
|
|
"learning_rate": 0.00037735849056603777,
|
|
"loss": 0.9227,
|
|
"step": 6370
|
|
},
|
|
{
|
|
"epoch": 0.07135627247359091,
|
|
"grad_norm": 0.27951040863990784,
|
|
"learning_rate": 0.0003771300653296176,
|
|
"loss": 0.9256,
|
|
"step": 6380
|
|
},
|
|
{
|
|
"epoch": 0.07146811616085359,
|
|
"grad_norm": 0.27883175015449524,
|
|
"learning_rate": 0.0003769016400931975,
|
|
"loss": 0.9244,
|
|
"step": 6390
|
|
},
|
|
{
|
|
"epoch": 0.07157995984811627,
|
|
"grad_norm": 0.27942216396331787,
|
|
"learning_rate": 0.00037667321485677734,
|
|
"loss": 0.9287,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 0.07169180353537895,
|
|
"grad_norm": 0.2605076730251312,
|
|
"learning_rate": 0.00037644478962035725,
|
|
"loss": 0.9213,
|
|
"step": 6410
|
|
},
|
|
{
|
|
"epoch": 0.07180364722264164,
|
|
"grad_norm": 0.25812190771102905,
|
|
"learning_rate": 0.00037621636438393716,
|
|
"loss": 0.9268,
|
|
"step": 6420
|
|
},
|
|
{
|
|
"epoch": 0.07191549090990432,
|
|
"grad_norm": 0.27478551864624023,
|
|
"learning_rate": 0.000375987939147517,
|
|
"loss": 0.9341,
|
|
"step": 6430
|
|
},
|
|
{
|
|
"epoch": 0.072027334597167,
|
|
"grad_norm": 0.2799810469150543,
|
|
"learning_rate": 0.0003757595139110969,
|
|
"loss": 0.9308,
|
|
"step": 6440
|
|
},
|
|
{
|
|
"epoch": 0.07213917828442969,
|
|
"grad_norm": 0.2494313269853592,
|
|
"learning_rate": 0.0003755310886746768,
|
|
"loss": 0.9389,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 0.07225102197169236,
|
|
"grad_norm": 0.3362772762775421,
|
|
"learning_rate": 0.00037530266343825664,
|
|
"loss": 0.9362,
|
|
"step": 6460
|
|
},
|
|
{
|
|
"epoch": 0.07236286565895504,
|
|
"grad_norm": 0.28501999378204346,
|
|
"learning_rate": 0.00037507423820183655,
|
|
"loss": 0.9262,
|
|
"step": 6470
|
|
},
|
|
{
|
|
"epoch": 0.07247470934621772,
|
|
"grad_norm": 0.24787545204162598,
|
|
"learning_rate": 0.0003748458129654164,
|
|
"loss": 0.9409,
|
|
"step": 6480
|
|
},
|
|
{
|
|
"epoch": 0.07258655303348041,
|
|
"grad_norm": 0.277665913105011,
|
|
"learning_rate": 0.0003746173877289963,
|
|
"loss": 0.9244,
|
|
"step": 6490
|
|
},
|
|
{
|
|
"epoch": 0.07269839672074309,
|
|
"grad_norm": 0.2613317370414734,
|
|
"learning_rate": 0.0003743889624925762,
|
|
"loss": 0.9429,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 0.07281024040800577,
|
|
"grad_norm": 0.2740306258201599,
|
|
"learning_rate": 0.0003741605372561561,
|
|
"loss": 0.9422,
|
|
"step": 6510
|
|
},
|
|
{
|
|
"epoch": 0.07292208409526846,
|
|
"grad_norm": 0.3052440881729126,
|
|
"learning_rate": 0.00037393211201973593,
|
|
"loss": 0.9346,
|
|
"step": 6520
|
|
},
|
|
{
|
|
"epoch": 0.07303392778253114,
|
|
"grad_norm": 0.27979132533073425,
|
|
"learning_rate": 0.00037370368678331584,
|
|
"loss": 0.9305,
|
|
"step": 6530
|
|
},
|
|
{
|
|
"epoch": 0.07314577146979381,
|
|
"grad_norm": 0.2834227979183197,
|
|
"learning_rate": 0.0003734752615468957,
|
|
"loss": 0.9305,
|
|
"step": 6540
|
|
},
|
|
{
|
|
"epoch": 0.07325761515705649,
|
|
"grad_norm": 0.28621387481689453,
|
|
"learning_rate": 0.00037324683631047555,
|
|
"loss": 0.9505,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 0.07336945884431918,
|
|
"grad_norm": 0.2539358139038086,
|
|
"learning_rate": 0.00037301841107405546,
|
|
"loss": 0.9491,
|
|
"step": 6560
|
|
},
|
|
{
|
|
"epoch": 0.07348130253158186,
|
|
"grad_norm": 0.29257437586784363,
|
|
"learning_rate": 0.0003727899858376354,
|
|
"loss": 0.9428,
|
|
"step": 6570
|
|
},
|
|
{
|
|
"epoch": 0.07359314621884454,
|
|
"grad_norm": 0.25158485770225525,
|
|
"learning_rate": 0.00037256156060121523,
|
|
"loss": 0.9471,
|
|
"step": 6580
|
|
},
|
|
{
|
|
"epoch": 0.07370498990610723,
|
|
"grad_norm": 0.26301345229148865,
|
|
"learning_rate": 0.00037233313536479514,
|
|
"loss": 0.928,
|
|
"step": 6590
|
|
},
|
|
{
|
|
"epoch": 0.07381683359336991,
|
|
"grad_norm": 0.2519192397594452,
|
|
"learning_rate": 0.000372104710128375,
|
|
"loss": 0.9189,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 0.07392867728063258,
|
|
"grad_norm": 0.29801836609840393,
|
|
"learning_rate": 0.00037187628489195485,
|
|
"loss": 0.9218,
|
|
"step": 6610
|
|
},
|
|
{
|
|
"epoch": 0.07404052096789528,
|
|
"grad_norm": 0.30779263377189636,
|
|
"learning_rate": 0.00037164785965553476,
|
|
"loss": 0.9263,
|
|
"step": 6620
|
|
},
|
|
{
|
|
"epoch": 0.07415236465515795,
|
|
"grad_norm": 0.2758638262748718,
|
|
"learning_rate": 0.0003714194344191146,
|
|
"loss": 0.904,
|
|
"step": 6630
|
|
},
|
|
{
|
|
"epoch": 0.07426420834242063,
|
|
"grad_norm": 0.26482871174812317,
|
|
"learning_rate": 0.00037119100918269447,
|
|
"loss": 0.9024,
|
|
"step": 6640
|
|
},
|
|
{
|
|
"epoch": 0.07437605202968331,
|
|
"grad_norm": 0.24001047015190125,
|
|
"learning_rate": 0.00037096258394627444,
|
|
"loss": 0.914,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 0.074487895716946,
|
|
"grad_norm": 0.2694549560546875,
|
|
"learning_rate": 0.0003707341587098543,
|
|
"loss": 0.921,
|
|
"step": 6660
|
|
},
|
|
{
|
|
"epoch": 0.07459973940420868,
|
|
"grad_norm": 0.25042393803596497,
|
|
"learning_rate": 0.00037050573347343415,
|
|
"loss": 0.9108,
|
|
"step": 6670
|
|
},
|
|
{
|
|
"epoch": 0.07471158309147136,
|
|
"grad_norm": 0.25945019721984863,
|
|
"learning_rate": 0.00037027730823701406,
|
|
"loss": 0.912,
|
|
"step": 6680
|
|
},
|
|
{
|
|
"epoch": 0.07482342677873405,
|
|
"grad_norm": 0.2624742090702057,
|
|
"learning_rate": 0.0003700488830005939,
|
|
"loss": 0.9108,
|
|
"step": 6690
|
|
},
|
|
{
|
|
"epoch": 0.07493527046599673,
|
|
"grad_norm": 0.27438145875930786,
|
|
"learning_rate": 0.00036982045776417377,
|
|
"loss": 0.9215,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 0.0750471141532594,
|
|
"grad_norm": 0.27610865235328674,
|
|
"learning_rate": 0.0003695920325277537,
|
|
"loss": 0.9053,
|
|
"step": 6710
|
|
},
|
|
{
|
|
"epoch": 0.07515895784052208,
|
|
"grad_norm": 0.2616426944732666,
|
|
"learning_rate": 0.00036936360729133353,
|
|
"loss": 0.9255,
|
|
"step": 6720
|
|
},
|
|
{
|
|
"epoch": 0.07527080152778477,
|
|
"grad_norm": 0.3146522641181946,
|
|
"learning_rate": 0.00036913518205491344,
|
|
"loss": 0.9105,
|
|
"step": 6730
|
|
},
|
|
{
|
|
"epoch": 0.07538264521504745,
|
|
"grad_norm": 0.29139819741249084,
|
|
"learning_rate": 0.00036890675681849335,
|
|
"loss": 0.9324,
|
|
"step": 6740
|
|
},
|
|
{
|
|
"epoch": 0.07549448890231013,
|
|
"grad_norm": 0.3176229000091553,
|
|
"learning_rate": 0.0003686783315820732,
|
|
"loss": 0.9434,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 0.07560633258957282,
|
|
"grad_norm": 0.2786601781845093,
|
|
"learning_rate": 0.00036844990634565307,
|
|
"loss": 0.9405,
|
|
"step": 6760
|
|
},
|
|
{
|
|
"epoch": 0.0757181762768355,
|
|
"grad_norm": 0.2988050580024719,
|
|
"learning_rate": 0.000368221481109233,
|
|
"loss": 0.9477,
|
|
"step": 6770
|
|
},
|
|
{
|
|
"epoch": 0.07583001996409817,
|
|
"grad_norm": 0.28120875358581543,
|
|
"learning_rate": 0.00036799305587281283,
|
|
"loss": 0.9521,
|
|
"step": 6780
|
|
},
|
|
{
|
|
"epoch": 0.07594186365136085,
|
|
"grad_norm": 0.27376359701156616,
|
|
"learning_rate": 0.0003677646306363927,
|
|
"loss": 0.9405,
|
|
"step": 6790
|
|
},
|
|
{
|
|
"epoch": 0.07605370733862354,
|
|
"grad_norm": 0.2721284329891205,
|
|
"learning_rate": 0.0003675362053999726,
|
|
"loss": 0.9392,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 0.07616555102588622,
|
|
"grad_norm": 0.31443721055984497,
|
|
"learning_rate": 0.0003673077801635525,
|
|
"loss": 0.939,
|
|
"step": 6810
|
|
},
|
|
{
|
|
"epoch": 0.0762773947131489,
|
|
"grad_norm": 0.27175766229629517,
|
|
"learning_rate": 0.00036707935492713236,
|
|
"loss": 0.9262,
|
|
"step": 6820
|
|
},
|
|
{
|
|
"epoch": 0.07638923840041159,
|
|
"grad_norm": 0.2984711527824402,
|
|
"learning_rate": 0.00036685092969071227,
|
|
"loss": 0.9381,
|
|
"step": 6830
|
|
},
|
|
{
|
|
"epoch": 0.07650108208767427,
|
|
"grad_norm": 0.2773591876029968,
|
|
"learning_rate": 0.00036662250445429213,
|
|
"loss": 0.9217,
|
|
"step": 6840
|
|
},
|
|
{
|
|
"epoch": 0.07661292577493695,
|
|
"grad_norm": 0.29338565468788147,
|
|
"learning_rate": 0.000366394079217872,
|
|
"loss": 0.9197,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 0.07672476946219962,
|
|
"grad_norm": 0.2456415593624115,
|
|
"learning_rate": 0.00036616565398145184,
|
|
"loss": 0.9191,
|
|
"step": 6860
|
|
},
|
|
{
|
|
"epoch": 0.07683661314946232,
|
|
"grad_norm": 0.324935644865036,
|
|
"learning_rate": 0.00036593722874503175,
|
|
"loss": 0.8975,
|
|
"step": 6870
|
|
},
|
|
{
|
|
"epoch": 0.076948456836725,
|
|
"grad_norm": 0.6967706680297852,
|
|
"learning_rate": 0.0003657088035086116,
|
|
"loss": 0.9053,
|
|
"step": 6880
|
|
},
|
|
{
|
|
"epoch": 0.07706030052398767,
|
|
"grad_norm": 0.8192552328109741,
|
|
"learning_rate": 0.0003654803782721915,
|
|
"loss": 0.9129,
|
|
"step": 6890
|
|
},
|
|
{
|
|
"epoch": 0.07717214421125036,
|
|
"grad_norm": 0.4698907136917114,
|
|
"learning_rate": 0.0003652519530357714,
|
|
"loss": 0.9128,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 0.07728398789851304,
|
|
"grad_norm": 0.3055092990398407,
|
|
"learning_rate": 0.0003650235277993513,
|
|
"loss": 0.9207,
|
|
"step": 6910
|
|
},
|
|
{
|
|
"epoch": 0.07739583158577572,
|
|
"grad_norm": 0.31879591941833496,
|
|
"learning_rate": 0.00036479510256293114,
|
|
"loss": 0.9101,
|
|
"step": 6920
|
|
},
|
|
{
|
|
"epoch": 0.0775076752730384,
|
|
"grad_norm": 0.2708083987236023,
|
|
"learning_rate": 0.00036456667732651105,
|
|
"loss": 0.9036,
|
|
"step": 6930
|
|
},
|
|
{
|
|
"epoch": 0.07761951896030109,
|
|
"grad_norm": 0.2801443040370941,
|
|
"learning_rate": 0.0003643382520900909,
|
|
"loss": 0.9031,
|
|
"step": 6940
|
|
},
|
|
{
|
|
"epoch": 0.07773136264756376,
|
|
"grad_norm": 0.2481400966644287,
|
|
"learning_rate": 0.00036410982685367076,
|
|
"loss": 0.8952,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 0.07784320633482644,
|
|
"grad_norm": 0.25424808263778687,
|
|
"learning_rate": 0.0003638814016172507,
|
|
"loss": 0.8846,
|
|
"step": 6960
|
|
},
|
|
{
|
|
"epoch": 0.07795505002208913,
|
|
"grad_norm": 0.2655096650123596,
|
|
"learning_rate": 0.0003636529763808306,
|
|
"loss": 0.8922,
|
|
"step": 6970
|
|
},
|
|
{
|
|
"epoch": 0.07806689370935181,
|
|
"grad_norm": 0.281180202960968,
|
|
"learning_rate": 0.00036342455114441043,
|
|
"loss": 0.8934,
|
|
"step": 6980
|
|
},
|
|
{
|
|
"epoch": 0.07817873739661449,
|
|
"grad_norm": 0.2850550413131714,
|
|
"learning_rate": 0.00036319612590799034,
|
|
"loss": 0.8856,
|
|
"step": 6990
|
|
},
|
|
{
|
|
"epoch": 0.07829058108387717,
|
|
"grad_norm": 0.24838604032993317,
|
|
"learning_rate": 0.0003629677006715702,
|
|
"loss": 0.905,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 0.07840242477113986,
|
|
"grad_norm": 0.2703045606613159,
|
|
"learning_rate": 0.00036273927543515005,
|
|
"loss": 0.8816,
|
|
"step": 7010
|
|
},
|
|
{
|
|
"epoch": 0.07851426845840254,
|
|
"grad_norm": 0.2786656320095062,
|
|
"learning_rate": 0.00036251085019872996,
|
|
"loss": 0.8997,
|
|
"step": 7020
|
|
},
|
|
{
|
|
"epoch": 0.07862611214566521,
|
|
"grad_norm": 0.2771463692188263,
|
|
"learning_rate": 0.0003622824249623098,
|
|
"loss": 0.9033,
|
|
"step": 7030
|
|
},
|
|
{
|
|
"epoch": 0.0787379558329279,
|
|
"grad_norm": 0.2721976339817047,
|
|
"learning_rate": 0.00036205399972588973,
|
|
"loss": 0.9109,
|
|
"step": 7040
|
|
},
|
|
{
|
|
"epoch": 0.07884979952019058,
|
|
"grad_norm": 0.2606031596660614,
|
|
"learning_rate": 0.00036182557448946964,
|
|
"loss": 0.9221,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 0.07896164320745326,
|
|
"grad_norm": 0.45895281434059143,
|
|
"learning_rate": 0.0003615971492530495,
|
|
"loss": 0.908,
|
|
"step": 7060
|
|
},
|
|
{
|
|
"epoch": 0.07907348689471594,
|
|
"grad_norm": 0.30524522066116333,
|
|
"learning_rate": 0.00036136872401662935,
|
|
"loss": 0.9234,
|
|
"step": 7070
|
|
},
|
|
{
|
|
"epoch": 0.07918533058197863,
|
|
"grad_norm": 0.2704319953918457,
|
|
"learning_rate": 0.00036114029878020926,
|
|
"loss": 0.9003,
|
|
"step": 7080
|
|
},
|
|
{
|
|
"epoch": 0.07929717426924131,
|
|
"grad_norm": 0.2770727872848511,
|
|
"learning_rate": 0.0003609118735437891,
|
|
"loss": 0.9253,
|
|
"step": 7090
|
|
},
|
|
{
|
|
"epoch": 0.07940901795650399,
|
|
"grad_norm": 0.25288262963294983,
|
|
"learning_rate": 0.00036068344830736897,
|
|
"loss": 0.9089,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 0.07952086164376668,
|
|
"grad_norm": 0.27105236053466797,
|
|
"learning_rate": 0.0003604550230709489,
|
|
"loss": 0.9138,
|
|
"step": 7110
|
|
},
|
|
{
|
|
"epoch": 0.07963270533102935,
|
|
"grad_norm": 0.2631518840789795,
|
|
"learning_rate": 0.0003602265978345288,
|
|
"loss": 0.9226,
|
|
"step": 7120
|
|
},
|
|
{
|
|
"epoch": 0.07974454901829203,
|
|
"grad_norm": 0.25269970297813416,
|
|
"learning_rate": 0.00035999817259810865,
|
|
"loss": 0.9102,
|
|
"step": 7130
|
|
},
|
|
{
|
|
"epoch": 0.07985639270555471,
|
|
"grad_norm": 0.2576468586921692,
|
|
"learning_rate": 0.00035976974736168856,
|
|
"loss": 0.9075,
|
|
"step": 7140
|
|
},
|
|
{
|
|
"epoch": 0.0799682363928174,
|
|
"grad_norm": 0.26297688484191895,
|
|
"learning_rate": 0.0003595413221252684,
|
|
"loss": 0.9004,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 0.08008008008008008,
|
|
"grad_norm": 0.3029099702835083,
|
|
"learning_rate": 0.00035931289688884827,
|
|
"loss": 0.9165,
|
|
"step": 7160
|
|
},
|
|
{
|
|
"epoch": 0.08019192376734276,
|
|
"grad_norm": 0.2699684798717499,
|
|
"learning_rate": 0.0003590844716524282,
|
|
"loss": 0.9232,
|
|
"step": 7170
|
|
},
|
|
{
|
|
"epoch": 0.08030376745460545,
|
|
"grad_norm": 0.26480093598365784,
|
|
"learning_rate": 0.00035885604641600804,
|
|
"loss": 0.9319,
|
|
"step": 7180
|
|
},
|
|
{
|
|
"epoch": 0.08041561114186813,
|
|
"grad_norm": 0.27503007650375366,
|
|
"learning_rate": 0.0003586276211795879,
|
|
"loss": 0.9398,
|
|
"step": 7190
|
|
},
|
|
{
|
|
"epoch": 0.0805274548291308,
|
|
"grad_norm": 0.2715147137641907,
|
|
"learning_rate": 0.00035839919594316785,
|
|
"loss": 0.9307,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 0.08063929851639348,
|
|
"grad_norm": 0.2697315812110901,
|
|
"learning_rate": 0.0003581707707067477,
|
|
"loss": 0.9342,
|
|
"step": 7210
|
|
},
|
|
{
|
|
"epoch": 0.08075114220365617,
|
|
"grad_norm": 0.2833189070224762,
|
|
"learning_rate": 0.00035794234547032757,
|
|
"loss": 0.9604,
|
|
"step": 7220
|
|
},
|
|
{
|
|
"epoch": 0.08086298589091885,
|
|
"grad_norm": 0.3069300353527069,
|
|
"learning_rate": 0.0003577139202339075,
|
|
"loss": 0.9397,
|
|
"step": 7230
|
|
},
|
|
{
|
|
"epoch": 0.08097482957818153,
|
|
"grad_norm": 0.28459593653678894,
|
|
"learning_rate": 0.00035748549499748733,
|
|
"loss": 0.925,
|
|
"step": 7240
|
|
},
|
|
{
|
|
"epoch": 0.08108667326544422,
|
|
"grad_norm": 0.28896769881248474,
|
|
"learning_rate": 0.0003572570697610672,
|
|
"loss": 0.9245,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 0.0811985169527069,
|
|
"grad_norm": 0.2574586272239685,
|
|
"learning_rate": 0.0003570286445246471,
|
|
"loss": 0.9326,
|
|
"step": 7260
|
|
},
|
|
{
|
|
"epoch": 0.08131036063996958,
|
|
"grad_norm": 0.2965002954006195,
|
|
"learning_rate": 0.00035680021928822695,
|
|
"loss": 0.9221,
|
|
"step": 7270
|
|
},
|
|
{
|
|
"epoch": 0.08142220432723227,
|
|
"grad_norm": 0.2657724618911743,
|
|
"learning_rate": 0.00035657179405180686,
|
|
"loss": 0.9143,
|
|
"step": 7280
|
|
},
|
|
{
|
|
"epoch": 0.08153404801449494,
|
|
"grad_norm": 0.2973329424858093,
|
|
"learning_rate": 0.0003563433688153867,
|
|
"loss": 0.9164,
|
|
"step": 7290
|
|
},
|
|
{
|
|
"epoch": 0.08164589170175762,
|
|
"grad_norm": 0.3032989501953125,
|
|
"learning_rate": 0.00035611494357896663,
|
|
"loss": 0.9254,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 0.0817577353890203,
|
|
"grad_norm": 0.28107839822769165,
|
|
"learning_rate": 0.0003558865183425465,
|
|
"loss": 0.9155,
|
|
"step": 7310
|
|
},
|
|
{
|
|
"epoch": 0.08186957907628299,
|
|
"grad_norm": 0.30296218395233154,
|
|
"learning_rate": 0.00035565809310612634,
|
|
"loss": 0.9218,
|
|
"step": 7320
|
|
},
|
|
{
|
|
"epoch": 0.08198142276354567,
|
|
"grad_norm": 0.28191155195236206,
|
|
"learning_rate": 0.00035542966786970625,
|
|
"loss": 0.9197,
|
|
"step": 7330
|
|
},
|
|
{
|
|
"epoch": 0.08209326645080835,
|
|
"grad_norm": 0.3113023638725281,
|
|
"learning_rate": 0.0003552012426332861,
|
|
"loss": 0.9228,
|
|
"step": 7340
|
|
},
|
|
{
|
|
"epoch": 0.08220511013807104,
|
|
"grad_norm": 0.3066212832927704,
|
|
"learning_rate": 0.00035497281739686596,
|
|
"loss": 0.9191,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 0.08231695382533372,
|
|
"grad_norm": 0.2658233940601349,
|
|
"learning_rate": 0.0003547443921604459,
|
|
"loss": 0.918,
|
|
"step": 7360
|
|
},
|
|
{
|
|
"epoch": 0.0824287975125964,
|
|
"grad_norm": 0.28222033381462097,
|
|
"learning_rate": 0.0003545159669240258,
|
|
"loss": 0.9253,
|
|
"step": 7370
|
|
},
|
|
{
|
|
"epoch": 0.08254064119985907,
|
|
"grad_norm": 0.2917843461036682,
|
|
"learning_rate": 0.00035428754168760564,
|
|
"loss": 0.9059,
|
|
"step": 7380
|
|
},
|
|
{
|
|
"epoch": 0.08265248488712176,
|
|
"grad_norm": 0.290404349565506,
|
|
"learning_rate": 0.00035405911645118555,
|
|
"loss": 0.9044,
|
|
"step": 7390
|
|
},
|
|
{
|
|
"epoch": 0.08276432857438444,
|
|
"grad_norm": 0.28990834951400757,
|
|
"learning_rate": 0.0003538306912147654,
|
|
"loss": 0.9078,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 0.08287617226164712,
|
|
"grad_norm": 0.27296292781829834,
|
|
"learning_rate": 0.00035360226597834526,
|
|
"loss": 0.9081,
|
|
"step": 7410
|
|
},
|
|
{
|
|
"epoch": 0.08298801594890981,
|
|
"grad_norm": 0.25443321466445923,
|
|
"learning_rate": 0.00035337384074192517,
|
|
"loss": 0.9019,
|
|
"step": 7420
|
|
},
|
|
{
|
|
"epoch": 0.08309985963617249,
|
|
"grad_norm": 0.25014832615852356,
|
|
"learning_rate": 0.0003531454155055051,
|
|
"loss": 0.8976,
|
|
"step": 7430
|
|
},
|
|
{
|
|
"epoch": 0.08321170332343517,
|
|
"grad_norm": 0.2844237983226776,
|
|
"learning_rate": 0.00035291699026908493,
|
|
"loss": 0.9039,
|
|
"step": 7440
|
|
},
|
|
{
|
|
"epoch": 0.08332354701069784,
|
|
"grad_norm": 0.26745542883872986,
|
|
"learning_rate": 0.00035268856503266484,
|
|
"loss": 0.8813,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 0.08343539069796053,
|
|
"grad_norm": 0.30750566720962524,
|
|
"learning_rate": 0.0003524601397962447,
|
|
"loss": 0.8988,
|
|
"step": 7460
|
|
},
|
|
{
|
|
"epoch": 0.08354723438522321,
|
|
"grad_norm": 0.2960536777973175,
|
|
"learning_rate": 0.00035223171455982456,
|
|
"loss": 0.8966,
|
|
"step": 7470
|
|
},
|
|
{
|
|
"epoch": 0.08365907807248589,
|
|
"grad_norm": 0.28923213481903076,
|
|
"learning_rate": 0.00035200328932340447,
|
|
"loss": 0.8872,
|
|
"step": 7480
|
|
},
|
|
{
|
|
"epoch": 0.08377092175974858,
|
|
"grad_norm": 0.2762465476989746,
|
|
"learning_rate": 0.0003517748640869843,
|
|
"loss": 0.8655,
|
|
"step": 7490
|
|
},
|
|
{
|
|
"epoch": 0.08388276544701126,
|
|
"grad_norm": 0.2870965301990509,
|
|
"learning_rate": 0.0003515464388505642,
|
|
"loss": 0.889,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 0.08399460913427394,
|
|
"grad_norm": 0.3135611116886139,
|
|
"learning_rate": 0.00035131801361414414,
|
|
"loss": 0.8898,
|
|
"step": 7510
|
|
},
|
|
{
|
|
"epoch": 0.08410645282153661,
|
|
"grad_norm": 0.29541128873825073,
|
|
"learning_rate": 0.000351089588377724,
|
|
"loss": 0.8884,
|
|
"step": 7520
|
|
},
|
|
{
|
|
"epoch": 0.0842182965087993,
|
|
"grad_norm": 0.2667001485824585,
|
|
"learning_rate": 0.00035086116314130385,
|
|
"loss": 0.8923,
|
|
"step": 7530
|
|
},
|
|
{
|
|
"epoch": 0.08433014019606198,
|
|
"grad_norm": 0.28677645325660706,
|
|
"learning_rate": 0.00035063273790488376,
|
|
"loss": 0.8862,
|
|
"step": 7540
|
|
},
|
|
{
|
|
"epoch": 0.08444198388332466,
|
|
"grad_norm": 0.26973757147789,
|
|
"learning_rate": 0.0003504043126684636,
|
|
"loss": 0.8739,
|
|
"step": 7550
|
|
},
|
|
{
|
|
"epoch": 0.08455382757058735,
|
|
"grad_norm": 0.2670735716819763,
|
|
"learning_rate": 0.0003501758874320435,
|
|
"loss": 0.8843,
|
|
"step": 7560
|
|
},
|
|
{
|
|
"epoch": 0.08466567125785003,
|
|
"grad_norm": 0.2678844928741455,
|
|
"learning_rate": 0.0003499474621956234,
|
|
"loss": 0.8855,
|
|
"step": 7570
|
|
},
|
|
{
|
|
"epoch": 0.08477751494511271,
|
|
"grad_norm": 0.26894411444664,
|
|
"learning_rate": 0.00034971903695920324,
|
|
"loss": 0.8828,
|
|
"step": 7580
|
|
},
|
|
{
|
|
"epoch": 0.08488935863237539,
|
|
"grad_norm": 0.28703927993774414,
|
|
"learning_rate": 0.00034949061172278315,
|
|
"loss": 0.885,
|
|
"step": 7590
|
|
},
|
|
{
|
|
"epoch": 0.08500120231963808,
|
|
"grad_norm": 0.2618086636066437,
|
|
"learning_rate": 0.00034926218648636306,
|
|
"loss": 0.8777,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 0.08511304600690076,
|
|
"grad_norm": 0.28816747665405273,
|
|
"learning_rate": 0.0003490337612499429,
|
|
"loss": 0.8836,
|
|
"step": 7610
|
|
},
|
|
{
|
|
"epoch": 0.08522488969416343,
|
|
"grad_norm": 0.29172763228416443,
|
|
"learning_rate": 0.00034880533601352277,
|
|
"loss": 0.8835,
|
|
"step": 7620
|
|
},
|
|
{
|
|
"epoch": 0.08533673338142612,
|
|
"grad_norm": 0.2613106667995453,
|
|
"learning_rate": 0.0003485769107771027,
|
|
"loss": 0.8736,
|
|
"step": 7630
|
|
},
|
|
{
|
|
"epoch": 0.0854485770686888,
|
|
"grad_norm": 0.2737283408641815,
|
|
"learning_rate": 0.00034834848554068254,
|
|
"loss": 0.8589,
|
|
"step": 7640
|
|
},
|
|
{
|
|
"epoch": 0.08556042075595148,
|
|
"grad_norm": 0.2709786295890808,
|
|
"learning_rate": 0.0003481200603042624,
|
|
"loss": 0.8675,
|
|
"step": 7650
|
|
},
|
|
{
|
|
"epoch": 0.08567226444321416,
|
|
"grad_norm": 0.2982759177684784,
|
|
"learning_rate": 0.0003478916350678423,
|
|
"loss": 0.8827,
|
|
"step": 7660
|
|
},
|
|
{
|
|
"epoch": 0.08578410813047685,
|
|
"grad_norm": 0.21551093459129333,
|
|
"learning_rate": 0.0003476632098314222,
|
|
"loss": 0.8663,
|
|
"step": 7670
|
|
},
|
|
{
|
|
"epoch": 0.08589595181773953,
|
|
"grad_norm": 0.26418018341064453,
|
|
"learning_rate": 0.00034743478459500207,
|
|
"loss": 0.8845,
|
|
"step": 7680
|
|
},
|
|
{
|
|
"epoch": 0.0860077955050022,
|
|
"grad_norm": 0.2310175597667694,
|
|
"learning_rate": 0.000347206359358582,
|
|
"loss": 0.8874,
|
|
"step": 7690
|
|
},
|
|
{
|
|
"epoch": 0.0861196391922649,
|
|
"grad_norm": 0.25112512707710266,
|
|
"learning_rate": 0.00034697793412216183,
|
|
"loss": 0.8896,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 0.08623148287952757,
|
|
"grad_norm": 0.33391082286834717,
|
|
"learning_rate": 0.0003467495088857417,
|
|
"loss": 0.8765,
|
|
"step": 7710
|
|
},
|
|
{
|
|
"epoch": 0.08634332656679025,
|
|
"grad_norm": 0.24641484022140503,
|
|
"learning_rate": 0.0003465210836493216,
|
|
"loss": 0.8572,
|
|
"step": 7720
|
|
},
|
|
{
|
|
"epoch": 0.08645517025405293,
|
|
"grad_norm": 0.26017534732818604,
|
|
"learning_rate": 0.00034629265841290145,
|
|
"loss": 0.8585,
|
|
"step": 7730
|
|
},
|
|
{
|
|
"epoch": 0.08656701394131562,
|
|
"grad_norm": 0.23500847816467285,
|
|
"learning_rate": 0.0003460642331764813,
|
|
"loss": 0.8797,
|
|
"step": 7740
|
|
},
|
|
{
|
|
"epoch": 0.0866788576285783,
|
|
"grad_norm": 0.25485488772392273,
|
|
"learning_rate": 0.0003458358079400612,
|
|
"loss": 0.8796,
|
|
"step": 7750
|
|
},
|
|
{
|
|
"epoch": 0.08679070131584098,
|
|
"grad_norm": 0.27644404768943787,
|
|
"learning_rate": 0.00034560738270364113,
|
|
"loss": 0.8708,
|
|
"step": 7760
|
|
},
|
|
{
|
|
"epoch": 0.08690254500310367,
|
|
"grad_norm": 0.233077734708786,
|
|
"learning_rate": 0.000345378957467221,
|
|
"loss": 0.8652,
|
|
"step": 7770
|
|
},
|
|
{
|
|
"epoch": 0.08701438869036635,
|
|
"grad_norm": 0.24039144814014435,
|
|
"learning_rate": 0.00034515053223080084,
|
|
"loss": 0.8723,
|
|
"step": 7780
|
|
},
|
|
{
|
|
"epoch": 0.08712623237762902,
|
|
"grad_norm": 0.23007874190807343,
|
|
"learning_rate": 0.00034492210699438075,
|
|
"loss": 0.8644,
|
|
"step": 7790
|
|
},
|
|
{
|
|
"epoch": 0.0872380760648917,
|
|
"grad_norm": 0.27570798993110657,
|
|
"learning_rate": 0.0003446936817579606,
|
|
"loss": 0.872,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 0.08734991975215439,
|
|
"grad_norm": 0.24157382547855377,
|
|
"learning_rate": 0.00034446525652154046,
|
|
"loss": 0.8846,
|
|
"step": 7810
|
|
},
|
|
{
|
|
"epoch": 0.08746176343941707,
|
|
"grad_norm": 0.2703733742237091,
|
|
"learning_rate": 0.0003442368312851204,
|
|
"loss": 0.889,
|
|
"step": 7820
|
|
},
|
|
{
|
|
"epoch": 0.08757360712667975,
|
|
"grad_norm": 0.26786255836486816,
|
|
"learning_rate": 0.0003440084060487003,
|
|
"loss": 0.8933,
|
|
"step": 7830
|
|
},
|
|
{
|
|
"epoch": 0.08768545081394244,
|
|
"grad_norm": 0.2595812976360321,
|
|
"learning_rate": 0.00034377998081228014,
|
|
"loss": 0.9156,
|
|
"step": 7840
|
|
},
|
|
{
|
|
"epoch": 0.08779729450120512,
|
|
"grad_norm": 0.24396800994873047,
|
|
"learning_rate": 0.00034355155557586005,
|
|
"loss": 0.8849,
|
|
"step": 7850
|
|
},
|
|
{
|
|
"epoch": 0.0879091381884678,
|
|
"grad_norm": 0.24363452196121216,
|
|
"learning_rate": 0.0003433231303394399,
|
|
"loss": 0.9011,
|
|
"step": 7860
|
|
},
|
|
{
|
|
"epoch": 0.08802098187573047,
|
|
"grad_norm": 0.2666647434234619,
|
|
"learning_rate": 0.00034309470510301976,
|
|
"loss": 0.8952,
|
|
"step": 7870
|
|
},
|
|
{
|
|
"epoch": 0.08813282556299316,
|
|
"grad_norm": 0.267863005399704,
|
|
"learning_rate": 0.00034286627986659967,
|
|
"loss": 0.9113,
|
|
"step": 7880
|
|
},
|
|
{
|
|
"epoch": 0.08824466925025584,
|
|
"grad_norm": 0.24397262930870056,
|
|
"learning_rate": 0.0003426378546301795,
|
|
"loss": 0.8762,
|
|
"step": 7890
|
|
},
|
|
{
|
|
"epoch": 0.08835651293751852,
|
|
"grad_norm": 0.23912496864795685,
|
|
"learning_rate": 0.00034240942939375943,
|
|
"loss": 0.8865,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 0.08846835662478121,
|
|
"grad_norm": 0.2737523913383484,
|
|
"learning_rate": 0.00034218100415733934,
|
|
"loss": 0.8732,
|
|
"step": 7910
|
|
},
|
|
{
|
|
"epoch": 0.08858020031204389,
|
|
"grad_norm": 0.24978673458099365,
|
|
"learning_rate": 0.0003419525789209192,
|
|
"loss": 0.8832,
|
|
"step": 7920
|
|
},
|
|
{
|
|
"epoch": 0.08869204399930657,
|
|
"grad_norm": 0.25200751423835754,
|
|
"learning_rate": 0.00034172415368449906,
|
|
"loss": 0.8952,
|
|
"step": 7930
|
|
},
|
|
{
|
|
"epoch": 0.08880388768656924,
|
|
"grad_norm": 0.7863819003105164,
|
|
"learning_rate": 0.00034149572844807897,
|
|
"loss": 0.8708,
|
|
"step": 7940
|
|
},
|
|
{
|
|
"epoch": 0.08891573137383194,
|
|
"grad_norm": 0.2560253441333771,
|
|
"learning_rate": 0.0003412673032116588,
|
|
"loss": 0.8681,
|
|
"step": 7950
|
|
},
|
|
{
|
|
"epoch": 0.08902757506109461,
|
|
"grad_norm": 0.2669181823730469,
|
|
"learning_rate": 0.0003410388779752387,
|
|
"loss": 0.9007,
|
|
"step": 7960
|
|
},
|
|
{
|
|
"epoch": 0.08913941874835729,
|
|
"grad_norm": 0.27906209230422974,
|
|
"learning_rate": 0.0003408104527388186,
|
|
"loss": 0.8988,
|
|
"step": 7970
|
|
},
|
|
{
|
|
"epoch": 0.08925126243561998,
|
|
"grad_norm": 0.2506297826766968,
|
|
"learning_rate": 0.0003405820275023985,
|
|
"loss": 0.8997,
|
|
"step": 7980
|
|
},
|
|
{
|
|
"epoch": 0.08936310612288266,
|
|
"grad_norm": 0.2513269782066345,
|
|
"learning_rate": 0.00034035360226597835,
|
|
"loss": 0.9215,
|
|
"step": 7990
|
|
},
|
|
{
|
|
"epoch": 0.08947494981014534,
|
|
"grad_norm": 0.2672421634197235,
|
|
"learning_rate": 0.00034012517702955826,
|
|
"loss": 0.9112,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 0.08958679349740803,
|
|
"grad_norm": 0.2553747296333313,
|
|
"learning_rate": 0.0003398967517931381,
|
|
"loss": 0.9255,
|
|
"step": 8010
|
|
},
|
|
{
|
|
"epoch": 0.08969863718467071,
|
|
"grad_norm": 0.2325398176908493,
|
|
"learning_rate": 0.000339668326556718,
|
|
"loss": 0.9173,
|
|
"step": 8020
|
|
},
|
|
{
|
|
"epoch": 0.08981048087193338,
|
|
"grad_norm": 0.23461295664310455,
|
|
"learning_rate": 0.0003394399013202979,
|
|
"loss": 0.9183,
|
|
"step": 8030
|
|
},
|
|
{
|
|
"epoch": 0.08992232455919606,
|
|
"grad_norm": 0.26092031598091125,
|
|
"learning_rate": 0.00033921147608387774,
|
|
"loss": 0.9106,
|
|
"step": 8040
|
|
},
|
|
{
|
|
"epoch": 0.09003416824645875,
|
|
"grad_norm": 0.26250872015953064,
|
|
"learning_rate": 0.0003389830508474576,
|
|
"loss": 0.8893,
|
|
"step": 8050
|
|
},
|
|
{
|
|
"epoch": 0.09014601193372143,
|
|
"grad_norm": 0.2501981556415558,
|
|
"learning_rate": 0.00033875462561103756,
|
|
"loss": 0.8934,
|
|
"step": 8060
|
|
},
|
|
{
|
|
"epoch": 0.09025785562098411,
|
|
"grad_norm": 0.26185476779937744,
|
|
"learning_rate": 0.0003385262003746174,
|
|
"loss": 0.8855,
|
|
"step": 8070
|
|
},
|
|
{
|
|
"epoch": 0.0903696993082468,
|
|
"grad_norm": 0.26889827847480774,
|
|
"learning_rate": 0.00033829777513819727,
|
|
"loss": 0.8944,
|
|
"step": 8080
|
|
},
|
|
{
|
|
"epoch": 0.09048154299550948,
|
|
"grad_norm": 0.2473451793193817,
|
|
"learning_rate": 0.0003380693499017772,
|
|
"loss": 0.8937,
|
|
"step": 8090
|
|
},
|
|
{
|
|
"epoch": 0.09059338668277216,
|
|
"grad_norm": 0.24157559871673584,
|
|
"learning_rate": 0.00033784092466535704,
|
|
"loss": 0.8903,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 0.09070523037003483,
|
|
"grad_norm": 0.2701563239097595,
|
|
"learning_rate": 0.0003376124994289369,
|
|
"loss": 0.9109,
|
|
"step": 8110
|
|
},
|
|
{
|
|
"epoch": 0.09081707405729753,
|
|
"grad_norm": 0.28706929087638855,
|
|
"learning_rate": 0.0003373840741925168,
|
|
"loss": 0.8956,
|
|
"step": 8120
|
|
},
|
|
{
|
|
"epoch": 0.0909289177445602,
|
|
"grad_norm": 0.27120909094810486,
|
|
"learning_rate": 0.00033715564895609666,
|
|
"loss": 0.8947,
|
|
"step": 8130
|
|
},
|
|
{
|
|
"epoch": 0.09104076143182288,
|
|
"grad_norm": 0.2504216432571411,
|
|
"learning_rate": 0.00033692722371967657,
|
|
"loss": 0.8814,
|
|
"step": 8140
|
|
},
|
|
{
|
|
"epoch": 0.09115260511908557,
|
|
"grad_norm": 0.2921849489212036,
|
|
"learning_rate": 0.0003366987984832565,
|
|
"loss": 0.8856,
|
|
"step": 8150
|
|
},
|
|
{
|
|
"epoch": 0.09126444880634825,
|
|
"grad_norm": 0.2587922513484955,
|
|
"learning_rate": 0.00033647037324683633,
|
|
"loss": 0.8778,
|
|
"step": 8160
|
|
},
|
|
{
|
|
"epoch": 0.09137629249361093,
|
|
"grad_norm": 0.2399989813566208,
|
|
"learning_rate": 0.0003362419480104162,
|
|
"loss": 0.883,
|
|
"step": 8170
|
|
},
|
|
{
|
|
"epoch": 0.0914881361808736,
|
|
"grad_norm": 0.24794407188892365,
|
|
"learning_rate": 0.0003360135227739961,
|
|
"loss": 0.8935,
|
|
"step": 8180
|
|
},
|
|
{
|
|
"epoch": 0.0915999798681363,
|
|
"grad_norm": 0.26669082045555115,
|
|
"learning_rate": 0.00033578509753757595,
|
|
"loss": 0.863,
|
|
"step": 8190
|
|
},
|
|
{
|
|
"epoch": 0.09171182355539897,
|
|
"grad_norm": 0.25162795186042786,
|
|
"learning_rate": 0.0003355566723011558,
|
|
"loss": 0.8887,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 0.09182366724266165,
|
|
"grad_norm": 0.28969621658325195,
|
|
"learning_rate": 0.00033532824706473567,
|
|
"loss": 0.9066,
|
|
"step": 8210
|
|
},
|
|
{
|
|
"epoch": 0.09193551092992434,
|
|
"grad_norm": 0.25944870710372925,
|
|
"learning_rate": 0.00033509982182831563,
|
|
"loss": 0.8875,
|
|
"step": 8220
|
|
},
|
|
{
|
|
"epoch": 0.09204735461718702,
|
|
"grad_norm": 0.27627986669540405,
|
|
"learning_rate": 0.0003348713965918955,
|
|
"loss": 0.8895,
|
|
"step": 8230
|
|
},
|
|
{
|
|
"epoch": 0.0921591983044497,
|
|
"grad_norm": 0.2673914134502411,
|
|
"learning_rate": 0.00033464297135547534,
|
|
"loss": 0.8937,
|
|
"step": 8240
|
|
},
|
|
{
|
|
"epoch": 0.09227104199171238,
|
|
"grad_norm": 0.2810732126235962,
|
|
"learning_rate": 0.00033441454611905525,
|
|
"loss": 0.9007,
|
|
"step": 8250
|
|
},
|
|
{
|
|
"epoch": 0.09238288567897507,
|
|
"grad_norm": 0.2671091556549072,
|
|
"learning_rate": 0.0003341861208826351,
|
|
"loss": 0.905,
|
|
"step": 8260
|
|
},
|
|
{
|
|
"epoch": 0.09249472936623775,
|
|
"grad_norm": 0.25006943941116333,
|
|
"learning_rate": 0.00033395769564621496,
|
|
"loss": 0.8981,
|
|
"step": 8270
|
|
},
|
|
{
|
|
"epoch": 0.09260657305350042,
|
|
"grad_norm": 0.2891542613506317,
|
|
"learning_rate": 0.0003337292704097949,
|
|
"loss": 0.8978,
|
|
"step": 8280
|
|
},
|
|
{
|
|
"epoch": 0.09271841674076312,
|
|
"grad_norm": 0.29497236013412476,
|
|
"learning_rate": 0.0003335008451733748,
|
|
"loss": 0.9044,
|
|
"step": 8290
|
|
},
|
|
{
|
|
"epoch": 0.0928302604280258,
|
|
"grad_norm": 0.29290974140167236,
|
|
"learning_rate": 0.00033327241993695464,
|
|
"loss": 0.9081,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 0.09294210411528847,
|
|
"grad_norm": 0.27077415585517883,
|
|
"learning_rate": 0.00033304399470053455,
|
|
"loss": 0.9184,
|
|
"step": 8310
|
|
},
|
|
{
|
|
"epoch": 0.09305394780255115,
|
|
"grad_norm": 0.26410186290740967,
|
|
"learning_rate": 0.0003328155694641144,
|
|
"loss": 0.8912,
|
|
"step": 8320
|
|
},
|
|
{
|
|
"epoch": 0.09316579148981384,
|
|
"grad_norm": 0.2818413972854614,
|
|
"learning_rate": 0.00033258714422769426,
|
|
"loss": 0.9096,
|
|
"step": 8330
|
|
},
|
|
{
|
|
"epoch": 0.09327763517707652,
|
|
"grad_norm": 0.265286386013031,
|
|
"learning_rate": 0.00033235871899127417,
|
|
"loss": 0.9192,
|
|
"step": 8340
|
|
},
|
|
{
|
|
"epoch": 0.0933894788643392,
|
|
"grad_norm": 0.2714836597442627,
|
|
"learning_rate": 0.000332130293754854,
|
|
"loss": 0.9122,
|
|
"step": 8350
|
|
},
|
|
{
|
|
"epoch": 0.09350132255160189,
|
|
"grad_norm": 0.2858263850212097,
|
|
"learning_rate": 0.0003319018685184339,
|
|
"loss": 0.9143,
|
|
"step": 8360
|
|
},
|
|
{
|
|
"epoch": 0.09361316623886456,
|
|
"grad_norm": 0.27788257598876953,
|
|
"learning_rate": 0.00033167344328201385,
|
|
"loss": 0.9116,
|
|
"step": 8370
|
|
},
|
|
{
|
|
"epoch": 0.09372500992612724,
|
|
"grad_norm": 0.27748674154281616,
|
|
"learning_rate": 0.0003314450180455937,
|
|
"loss": 0.8934,
|
|
"step": 8380
|
|
},
|
|
{
|
|
"epoch": 0.09383685361338992,
|
|
"grad_norm": 0.4757048785686493,
|
|
"learning_rate": 0.00033121659280917356,
|
|
"loss": 0.9097,
|
|
"step": 8390
|
|
},
|
|
{
|
|
"epoch": 0.09394869730065261,
|
|
"grad_norm": 0.3016970157623291,
|
|
"learning_rate": 0.00033098816757275347,
|
|
"loss": 0.8973,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 0.09406054098791529,
|
|
"grad_norm": 0.2640211880207062,
|
|
"learning_rate": 0.0003307597423363333,
|
|
"loss": 0.8914,
|
|
"step": 8410
|
|
},
|
|
{
|
|
"epoch": 0.09417238467517797,
|
|
"grad_norm": 0.2608022391796112,
|
|
"learning_rate": 0.0003305313170999132,
|
|
"loss": 0.9138,
|
|
"step": 8420
|
|
},
|
|
{
|
|
"epoch": 0.09428422836244066,
|
|
"grad_norm": 0.23691967129707336,
|
|
"learning_rate": 0.0003303028918634931,
|
|
"loss": 0.9149,
|
|
"step": 8430
|
|
},
|
|
{
|
|
"epoch": 0.09439607204970334,
|
|
"grad_norm": 0.28734761476516724,
|
|
"learning_rate": 0.00033007446662707294,
|
|
"loss": 0.9056,
|
|
"step": 8440
|
|
},
|
|
{
|
|
"epoch": 0.09450791573696601,
|
|
"grad_norm": 0.2846873700618744,
|
|
"learning_rate": 0.00032984604139065285,
|
|
"loss": 0.9052,
|
|
"step": 8450
|
|
},
|
|
{
|
|
"epoch": 0.09461975942422869,
|
|
"grad_norm": 0.2613682448863983,
|
|
"learning_rate": 0.00032961761615423276,
|
|
"loss": 0.9129,
|
|
"step": 8460
|
|
},
|
|
{
|
|
"epoch": 0.09473160311149138,
|
|
"grad_norm": 0.25336501002311707,
|
|
"learning_rate": 0.0003293891909178126,
|
|
"loss": 0.9048,
|
|
"step": 8470
|
|
},
|
|
{
|
|
"epoch": 0.09484344679875406,
|
|
"grad_norm": 0.2662324905395508,
|
|
"learning_rate": 0.0003291607656813925,
|
|
"loss": 0.9181,
|
|
"step": 8480
|
|
},
|
|
{
|
|
"epoch": 0.09495529048601674,
|
|
"grad_norm": 0.2482605278491974,
|
|
"learning_rate": 0.0003289323404449724,
|
|
"loss": 0.8978,
|
|
"step": 8490
|
|
},
|
|
{
|
|
"epoch": 0.09506713417327943,
|
|
"grad_norm": 0.24181032180786133,
|
|
"learning_rate": 0.00032870391520855224,
|
|
"loss": 0.9121,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 0.09517897786054211,
|
|
"grad_norm": 0.276621013879776,
|
|
"learning_rate": 0.0003284754899721321,
|
|
"loss": 0.9106,
|
|
"step": 8510
|
|
},
|
|
{
|
|
"epoch": 0.09529082154780479,
|
|
"grad_norm": 0.2788410186767578,
|
|
"learning_rate": 0.000328247064735712,
|
|
"loss": 0.9062,
|
|
"step": 8520
|
|
},
|
|
{
|
|
"epoch": 0.09540266523506746,
|
|
"grad_norm": 0.28387385606765747,
|
|
"learning_rate": 0.0003280186394992919,
|
|
"loss": 0.9309,
|
|
"step": 8530
|
|
},
|
|
{
|
|
"epoch": 0.09551450892233015,
|
|
"grad_norm": 0.2923261523246765,
|
|
"learning_rate": 0.00032779021426287177,
|
|
"loss": 0.9278,
|
|
"step": 8540
|
|
},
|
|
{
|
|
"epoch": 0.09562635260959283,
|
|
"grad_norm": 0.3008005917072296,
|
|
"learning_rate": 0.0003275617890264517,
|
|
"loss": 0.9196,
|
|
"step": 8550
|
|
},
|
|
{
|
|
"epoch": 0.09573819629685551,
|
|
"grad_norm": 0.2849402129650116,
|
|
"learning_rate": 0.00032733336379003154,
|
|
"loss": 0.9243,
|
|
"step": 8560
|
|
},
|
|
{
|
|
"epoch": 0.0958500399841182,
|
|
"grad_norm": 0.262134313583374,
|
|
"learning_rate": 0.0003271049385536114,
|
|
"loss": 0.9346,
|
|
"step": 8570
|
|
},
|
|
{
|
|
"epoch": 0.09596188367138088,
|
|
"grad_norm": 0.2891925573348999,
|
|
"learning_rate": 0.0003268765133171913,
|
|
"loss": 0.9176,
|
|
"step": 8580
|
|
},
|
|
{
|
|
"epoch": 0.09607372735864356,
|
|
"grad_norm": 0.26165837049484253,
|
|
"learning_rate": 0.00032664808808077116,
|
|
"loss": 0.9229,
|
|
"step": 8590
|
|
},
|
|
{
|
|
"epoch": 0.09618557104590623,
|
|
"grad_norm": 0.2683985233306885,
|
|
"learning_rate": 0.000326419662844351,
|
|
"loss": 0.9067,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 0.09629741473316893,
|
|
"grad_norm": 0.25300973653793335,
|
|
"learning_rate": 0.000326191237607931,
|
|
"loss": 0.9037,
|
|
"step": 8610
|
|
},
|
|
{
|
|
"epoch": 0.0964092584204316,
|
|
"grad_norm": 0.30520153045654297,
|
|
"learning_rate": 0.00032596281237151083,
|
|
"loss": 0.9038,
|
|
"step": 8620
|
|
},
|
|
{
|
|
"epoch": 0.09652110210769428,
|
|
"grad_norm": 0.2573854327201843,
|
|
"learning_rate": 0.0003257343871350907,
|
|
"loss": 0.9062,
|
|
"step": 8630
|
|
},
|
|
{
|
|
"epoch": 0.09663294579495697,
|
|
"grad_norm": 0.2664088308811188,
|
|
"learning_rate": 0.0003255059618986706,
|
|
"loss": 0.8864,
|
|
"step": 8640
|
|
},
|
|
{
|
|
"epoch": 0.09674478948221965,
|
|
"grad_norm": 0.26375049352645874,
|
|
"learning_rate": 0.00032527753666225046,
|
|
"loss": 0.8804,
|
|
"step": 8650
|
|
},
|
|
{
|
|
"epoch": 0.09685663316948233,
|
|
"grad_norm": 0.25367647409439087,
|
|
"learning_rate": 0.0003250491114258303,
|
|
"loss": 0.8987,
|
|
"step": 8660
|
|
},
|
|
{
|
|
"epoch": 0.09696847685674502,
|
|
"grad_norm": 0.2764420807361603,
|
|
"learning_rate": 0.00032482068618941017,
|
|
"loss": 0.9078,
|
|
"step": 8670
|
|
},
|
|
{
|
|
"epoch": 0.0970803205440077,
|
|
"grad_norm": 0.2663860023021698,
|
|
"learning_rate": 0.0003245922609529901,
|
|
"loss": 0.8838,
|
|
"step": 8680
|
|
},
|
|
{
|
|
"epoch": 0.09719216423127038,
|
|
"grad_norm": 0.25380998849868774,
|
|
"learning_rate": 0.00032436383571657,
|
|
"loss": 0.8949,
|
|
"step": 8690
|
|
},
|
|
{
|
|
"epoch": 0.09730400791853305,
|
|
"grad_norm": 0.29428210854530334,
|
|
"learning_rate": 0.00032413541048014984,
|
|
"loss": 0.883,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 0.09741585160579574,
|
|
"grad_norm": 0.25604331493377686,
|
|
"learning_rate": 0.00032390698524372975,
|
|
"loss": 0.8891,
|
|
"step": 8710
|
|
},
|
|
{
|
|
"epoch": 0.09752769529305842,
|
|
"grad_norm": 0.26663005352020264,
|
|
"learning_rate": 0.0003236785600073096,
|
|
"loss": 0.8763,
|
|
"step": 8720
|
|
},
|
|
{
|
|
"epoch": 0.0976395389803211,
|
|
"grad_norm": 0.27305158972740173,
|
|
"learning_rate": 0.00032345013477088946,
|
|
"loss": 0.8877,
|
|
"step": 8730
|
|
},
|
|
{
|
|
"epoch": 0.09775138266758379,
|
|
"grad_norm": 0.27395525574684143,
|
|
"learning_rate": 0.0003232217095344694,
|
|
"loss": 0.871,
|
|
"step": 8740
|
|
},
|
|
{
|
|
"epoch": 0.09786322635484647,
|
|
"grad_norm": 0.26152902841567993,
|
|
"learning_rate": 0.00032299328429804923,
|
|
"loss": 0.8714,
|
|
"step": 8750
|
|
},
|
|
{
|
|
"epoch": 0.09797507004210915,
|
|
"grad_norm": 0.2872631847858429,
|
|
"learning_rate": 0.0003227648590616291,
|
|
"loss": 0.8754,
|
|
"step": 8760
|
|
},
|
|
{
|
|
"epoch": 0.09808691372937182,
|
|
"grad_norm": 0.2681150436401367,
|
|
"learning_rate": 0.00032253643382520905,
|
|
"loss": 0.8699,
|
|
"step": 8770
|
|
},
|
|
{
|
|
"epoch": 0.09819875741663452,
|
|
"grad_norm": 0.27205002307891846,
|
|
"learning_rate": 0.0003223080085887889,
|
|
"loss": 0.8743,
|
|
"step": 8780
|
|
},
|
|
{
|
|
"epoch": 0.0983106011038972,
|
|
"grad_norm": 0.27747979760169983,
|
|
"learning_rate": 0.00032207958335236876,
|
|
"loss": 0.8607,
|
|
"step": 8790
|
|
},
|
|
{
|
|
"epoch": 0.09842244479115987,
|
|
"grad_norm": 0.2963927984237671,
|
|
"learning_rate": 0.00032185115811594867,
|
|
"loss": 0.8676,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 0.09853428847842256,
|
|
"grad_norm": 0.26414602994918823,
|
|
"learning_rate": 0.0003216227328795285,
|
|
"loss": 0.8556,
|
|
"step": 8810
|
|
},
|
|
{
|
|
"epoch": 0.09864613216568524,
|
|
"grad_norm": 0.3005480170249939,
|
|
"learning_rate": 0.0003213943076431084,
|
|
"loss": 0.8816,
|
|
"step": 8820
|
|
},
|
|
{
|
|
"epoch": 0.09875797585294792,
|
|
"grad_norm": 0.29625314474105835,
|
|
"learning_rate": 0.0003211658824066883,
|
|
"loss": 0.8747,
|
|
"step": 8830
|
|
},
|
|
{
|
|
"epoch": 0.0988698195402106,
|
|
"grad_norm": 0.2900589108467102,
|
|
"learning_rate": 0.0003209374571702682,
|
|
"loss": 0.8697,
|
|
"step": 8840
|
|
},
|
|
{
|
|
"epoch": 0.09898166322747329,
|
|
"grad_norm": 0.2951551675796509,
|
|
"learning_rate": 0.00032070903193384806,
|
|
"loss": 0.8756,
|
|
"step": 8850
|
|
},
|
|
{
|
|
"epoch": 0.09909350691473597,
|
|
"grad_norm": 0.3049459159374237,
|
|
"learning_rate": 0.00032048060669742797,
|
|
"loss": 0.8767,
|
|
"step": 8860
|
|
},
|
|
{
|
|
"epoch": 0.09920535060199864,
|
|
"grad_norm": 0.30216872692108154,
|
|
"learning_rate": 0.0003202521814610078,
|
|
"loss": 0.8687,
|
|
"step": 8870
|
|
},
|
|
{
|
|
"epoch": 0.09931719428926133,
|
|
"grad_norm": 0.2913934290409088,
|
|
"learning_rate": 0.0003200237562245877,
|
|
"loss": 0.8616,
|
|
"step": 8880
|
|
},
|
|
{
|
|
"epoch": 0.09942903797652401,
|
|
"grad_norm": 0.26879578828811646,
|
|
"learning_rate": 0.0003197953309881676,
|
|
"loss": 0.8681,
|
|
"step": 8890
|
|
},
|
|
{
|
|
"epoch": 0.09954088166378669,
|
|
"grad_norm": 0.28092971444129944,
|
|
"learning_rate": 0.00031956690575174744,
|
|
"loss": 0.8765,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 0.09965272535104937,
|
|
"grad_norm": 0.3074035048484802,
|
|
"learning_rate": 0.0003193384805153273,
|
|
"loss": 0.881,
|
|
"step": 8910
|
|
},
|
|
{
|
|
"epoch": 0.09976456903831206,
|
|
"grad_norm": 0.2945140600204468,
|
|
"learning_rate": 0.00031911005527890726,
|
|
"loss": 0.8913,
|
|
"step": 8920
|
|
},
|
|
{
|
|
"epoch": 0.09987641272557474,
|
|
"grad_norm": 0.2707176208496094,
|
|
"learning_rate": 0.0003188816300424871,
|
|
"loss": 0.8822,
|
|
"step": 8930
|
|
},
|
|
{
|
|
"epoch": 0.09998825641283741,
|
|
"grad_norm": 0.2639947235584259,
|
|
"learning_rate": 0.000318653204806067,
|
|
"loss": 0.8892,
|
|
"step": 8940
|
|
},
|
|
{
|
|
"epoch": 0.1001001001001001,
|
|
"grad_norm": 0.2709505558013916,
|
|
"learning_rate": 0.0003184247795696469,
|
|
"loss": 0.8654,
|
|
"step": 8950
|
|
},
|
|
{
|
|
"epoch": 0.10021194378736278,
|
|
"grad_norm": 0.27803289890289307,
|
|
"learning_rate": 0.00031819635433322674,
|
|
"loss": 0.8887,
|
|
"step": 8960
|
|
},
|
|
{
|
|
"epoch": 0.10032378747462546,
|
|
"grad_norm": 0.25851163268089294,
|
|
"learning_rate": 0.0003179679290968066,
|
|
"loss": 0.8662,
|
|
"step": 8970
|
|
},
|
|
{
|
|
"epoch": 0.10043563116188814,
|
|
"grad_norm": 0.261068731546402,
|
|
"learning_rate": 0.0003177395038603865,
|
|
"loss": 0.8641,
|
|
"step": 8980
|
|
},
|
|
{
|
|
"epoch": 0.10054747484915083,
|
|
"grad_norm": 0.25510483980178833,
|
|
"learning_rate": 0.00031751107862396636,
|
|
"loss": 0.8762,
|
|
"step": 8990
|
|
},
|
|
{
|
|
"epoch": 0.10065931853641351,
|
|
"grad_norm": 0.25765854120254517,
|
|
"learning_rate": 0.00031728265338754627,
|
|
"loss": 0.8837,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 0.10077116222367619,
|
|
"grad_norm": 0.24198535084724426,
|
|
"learning_rate": 0.0003170542281511262,
|
|
"loss": 0.8791,
|
|
"step": 9010
|
|
},
|
|
{
|
|
"epoch": 0.10088300591093888,
|
|
"grad_norm": 0.2673517167568207,
|
|
"learning_rate": 0.00031682580291470604,
|
|
"loss": 0.8795,
|
|
"step": 9020
|
|
},
|
|
{
|
|
"epoch": 0.10099484959820156,
|
|
"grad_norm": 0.26392221450805664,
|
|
"learning_rate": 0.0003165973776782859,
|
|
"loss": 0.8788,
|
|
"step": 9030
|
|
},
|
|
{
|
|
"epoch": 0.10110669328546423,
|
|
"grad_norm": 0.2698739171028137,
|
|
"learning_rate": 0.0003163689524418658,
|
|
"loss": 0.8959,
|
|
"step": 9040
|
|
},
|
|
{
|
|
"epoch": 0.10121853697272691,
|
|
"grad_norm": 0.2800233066082001,
|
|
"learning_rate": 0.00031614052720544566,
|
|
"loss": 0.8945,
|
|
"step": 9050
|
|
},
|
|
{
|
|
"epoch": 0.1013303806599896,
|
|
"grad_norm": 0.29603493213653564,
|
|
"learning_rate": 0.0003159121019690255,
|
|
"loss": 0.892,
|
|
"step": 9060
|
|
},
|
|
{
|
|
"epoch": 0.10144222434725228,
|
|
"grad_norm": 0.26462167501449585,
|
|
"learning_rate": 0.0003156836767326054,
|
|
"loss": 0.8849,
|
|
"step": 9070
|
|
},
|
|
{
|
|
"epoch": 0.10155406803451496,
|
|
"grad_norm": 0.27941739559173584,
|
|
"learning_rate": 0.00031545525149618534,
|
|
"loss": 0.8782,
|
|
"step": 9080
|
|
},
|
|
{
|
|
"epoch": 0.10166591172177765,
|
|
"grad_norm": 0.2777186334133148,
|
|
"learning_rate": 0.0003152268262597652,
|
|
"loss": 0.8787,
|
|
"step": 9090
|
|
},
|
|
{
|
|
"epoch": 0.10177775540904033,
|
|
"grad_norm": 0.25893428921699524,
|
|
"learning_rate": 0.00031499840102334505,
|
|
"loss": 0.8629,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 0.101889599096303,
|
|
"grad_norm": 0.27407601475715637,
|
|
"learning_rate": 0.00031476997578692496,
|
|
"loss": 0.8619,
|
|
"step": 9110
|
|
},
|
|
{
|
|
"epoch": 0.10200144278356568,
|
|
"grad_norm": 0.2663459777832031,
|
|
"learning_rate": 0.0003145415505505048,
|
|
"loss": 0.8474,
|
|
"step": 9120
|
|
},
|
|
{
|
|
"epoch": 0.10211328647082837,
|
|
"grad_norm": 0.2621177136898041,
|
|
"learning_rate": 0.00031431312531408467,
|
|
"loss": 0.8565,
|
|
"step": 9130
|
|
},
|
|
{
|
|
"epoch": 0.10222513015809105,
|
|
"grad_norm": 0.26687386631965637,
|
|
"learning_rate": 0.0003140847000776646,
|
|
"loss": 0.8438,
|
|
"step": 9140
|
|
},
|
|
{
|
|
"epoch": 0.10233697384535373,
|
|
"grad_norm": 0.24772432446479797,
|
|
"learning_rate": 0.00031385627484124443,
|
|
"loss": 0.8511,
|
|
"step": 9150
|
|
},
|
|
{
|
|
"epoch": 0.10244881753261642,
|
|
"grad_norm": 0.278730183839798,
|
|
"learning_rate": 0.00031362784960482434,
|
|
"loss": 0.8499,
|
|
"step": 9160
|
|
},
|
|
{
|
|
"epoch": 0.1025606612198791,
|
|
"grad_norm": 0.28657999634742737,
|
|
"learning_rate": 0.00031339942436840425,
|
|
"loss": 0.85,
|
|
"step": 9170
|
|
},
|
|
{
|
|
"epoch": 0.10267250490714178,
|
|
"grad_norm": 0.2848927676677704,
|
|
"learning_rate": 0.0003131709991319841,
|
|
"loss": 0.8411,
|
|
"step": 9180
|
|
},
|
|
{
|
|
"epoch": 0.10278434859440445,
|
|
"grad_norm": 0.28381872177124023,
|
|
"learning_rate": 0.00031294257389556396,
|
|
"loss": 0.8508,
|
|
"step": 9190
|
|
},
|
|
{
|
|
"epoch": 0.10289619228166715,
|
|
"grad_norm": 0.26624616980552673,
|
|
"learning_rate": 0.0003127141486591439,
|
|
"loss": 0.8658,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 0.10300803596892982,
|
|
"grad_norm": 0.2605401277542114,
|
|
"learning_rate": 0.00031248572342272373,
|
|
"loss": 0.8602,
|
|
"step": 9210
|
|
},
|
|
{
|
|
"epoch": 0.1031198796561925,
|
|
"grad_norm": 0.2819276750087738,
|
|
"learning_rate": 0.0003122572981863036,
|
|
"loss": 0.8614,
|
|
"step": 9220
|
|
},
|
|
{
|
|
"epoch": 0.10323172334345519,
|
|
"grad_norm": 0.27677878737449646,
|
|
"learning_rate": 0.00031202887294988355,
|
|
"loss": 0.8556,
|
|
"step": 9230
|
|
},
|
|
{
|
|
"epoch": 0.10334356703071787,
|
|
"grad_norm": 0.25589799880981445,
|
|
"learning_rate": 0.0003118004477134634,
|
|
"loss": 0.8704,
|
|
"step": 9240
|
|
},
|
|
{
|
|
"epoch": 0.10345541071798055,
|
|
"grad_norm": 0.2731853425502777,
|
|
"learning_rate": 0.00031157202247704326,
|
|
"loss": 0.8428,
|
|
"step": 9250
|
|
},
|
|
{
|
|
"epoch": 0.10356725440524323,
|
|
"grad_norm": 0.3047199547290802,
|
|
"learning_rate": 0.00031134359724062317,
|
|
"loss": 0.8508,
|
|
"step": 9260
|
|
},
|
|
{
|
|
"epoch": 0.10367909809250592,
|
|
"grad_norm": 0.28696686029434204,
|
|
"learning_rate": 0.00031111517200420303,
|
|
"loss": 0.8571,
|
|
"step": 9270
|
|
},
|
|
{
|
|
"epoch": 0.1037909417797686,
|
|
"grad_norm": 0.23354049026966095,
|
|
"learning_rate": 0.0003108867467677829,
|
|
"loss": 0.8518,
|
|
"step": 9280
|
|
},
|
|
{
|
|
"epoch": 0.10390278546703127,
|
|
"grad_norm": 0.27123787999153137,
|
|
"learning_rate": 0.0003106583215313628,
|
|
"loss": 0.8621,
|
|
"step": 9290
|
|
},
|
|
{
|
|
"epoch": 0.10401462915429396,
|
|
"grad_norm": 0.2509523332118988,
|
|
"learning_rate": 0.00031042989629494265,
|
|
"loss": 0.8568,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 0.10412647284155664,
|
|
"grad_norm": 0.2359481155872345,
|
|
"learning_rate": 0.00031020147105852256,
|
|
"loss": 0.8598,
|
|
"step": 9310
|
|
},
|
|
{
|
|
"epoch": 0.10423831652881932,
|
|
"grad_norm": 0.27097463607788086,
|
|
"learning_rate": 0.00030997304582210247,
|
|
"loss": 0.8615,
|
|
"step": 9320
|
|
},
|
|
{
|
|
"epoch": 0.104350160216082,
|
|
"grad_norm": 0.2616114020347595,
|
|
"learning_rate": 0.0003097446205856823,
|
|
"loss": 0.8462,
|
|
"step": 9330
|
|
},
|
|
{
|
|
"epoch": 0.10446200390334469,
|
|
"grad_norm": 0.30027398467063904,
|
|
"learning_rate": 0.0003095161953492622,
|
|
"loss": 0.8683,
|
|
"step": 9340
|
|
},
|
|
{
|
|
"epoch": 0.10457384759060737,
|
|
"grad_norm": 0.28468623757362366,
|
|
"learning_rate": 0.0003092877701128421,
|
|
"loss": 0.856,
|
|
"step": 9350
|
|
},
|
|
{
|
|
"epoch": 0.10468569127787004,
|
|
"grad_norm": 0.318521112203598,
|
|
"learning_rate": 0.00030905934487642195,
|
|
"loss": 0.8532,
|
|
"step": 9360
|
|
},
|
|
{
|
|
"epoch": 0.10479753496513274,
|
|
"grad_norm": 0.3118298351764679,
|
|
"learning_rate": 0.0003088309196400018,
|
|
"loss": 0.8546,
|
|
"step": 9370
|
|
},
|
|
{
|
|
"epoch": 0.10490937865239541,
|
|
"grad_norm": 0.28549399971961975,
|
|
"learning_rate": 0.0003086024944035817,
|
|
"loss": 0.8718,
|
|
"step": 9380
|
|
},
|
|
{
|
|
"epoch": 0.10502122233965809,
|
|
"grad_norm": 0.24803526699543,
|
|
"learning_rate": 0.0003083740691671616,
|
|
"loss": 0.8489,
|
|
"step": 9390
|
|
},
|
|
{
|
|
"epoch": 0.10513306602692078,
|
|
"grad_norm": 0.26765918731689453,
|
|
"learning_rate": 0.0003081456439307415,
|
|
"loss": 0.8617,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 0.10524490971418346,
|
|
"grad_norm": 0.26363757252693176,
|
|
"learning_rate": 0.0003079172186943214,
|
|
"loss": 0.8648,
|
|
"step": 9410
|
|
},
|
|
{
|
|
"epoch": 0.10535675340144614,
|
|
"grad_norm": 0.2734963595867157,
|
|
"learning_rate": 0.00030768879345790124,
|
|
"loss": 0.8556,
|
|
"step": 9420
|
|
},
|
|
{
|
|
"epoch": 0.10546859708870882,
|
|
"grad_norm": 0.2773530185222626,
|
|
"learning_rate": 0.0003074603682214811,
|
|
"loss": 0.8737,
|
|
"step": 9430
|
|
},
|
|
{
|
|
"epoch": 0.1055804407759715,
|
|
"grad_norm": 0.2684498429298401,
|
|
"learning_rate": 0.000307231942985061,
|
|
"loss": 0.8657,
|
|
"step": 9440
|
|
},
|
|
{
|
|
"epoch": 0.10569228446323418,
|
|
"grad_norm": 0.26110732555389404,
|
|
"learning_rate": 0.00030700351774864086,
|
|
"loss": 0.8618,
|
|
"step": 9450
|
|
},
|
|
{
|
|
"epoch": 0.10580412815049686,
|
|
"grad_norm": 0.27595090866088867,
|
|
"learning_rate": 0.0003067750925122207,
|
|
"loss": 0.8654,
|
|
"step": 9460
|
|
},
|
|
{
|
|
"epoch": 0.10591597183775955,
|
|
"grad_norm": 0.2799736559391022,
|
|
"learning_rate": 0.0003065466672758007,
|
|
"loss": 0.8583,
|
|
"step": 9470
|
|
},
|
|
{
|
|
"epoch": 0.10602781552502223,
|
|
"grad_norm": 0.2729387879371643,
|
|
"learning_rate": 0.00030631824203938054,
|
|
"loss": 0.8628,
|
|
"step": 9480
|
|
},
|
|
{
|
|
"epoch": 0.10613965921228491,
|
|
"grad_norm": 0.30332332849502563,
|
|
"learning_rate": 0.0003060898168029604,
|
|
"loss": 0.8512,
|
|
"step": 9490
|
|
},
|
|
{
|
|
"epoch": 0.10625150289954759,
|
|
"grad_norm": 0.276753306388855,
|
|
"learning_rate": 0.0003058613915665403,
|
|
"loss": 0.85,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 0.10636334658681028,
|
|
"grad_norm": 0.3190478980541229,
|
|
"learning_rate": 0.00030563296633012016,
|
|
"loss": 0.8534,
|
|
"step": 9510
|
|
},
|
|
{
|
|
"epoch": 0.10647519027407296,
|
|
"grad_norm": 0.2926968038082123,
|
|
"learning_rate": 0.0003054045410937,
|
|
"loss": 0.8309,
|
|
"step": 9520
|
|
},
|
|
{
|
|
"epoch": 0.10658703396133563,
|
|
"grad_norm": 0.29631507396698,
|
|
"learning_rate": 0.0003051761158572799,
|
|
"loss": 0.8406,
|
|
"step": 9530
|
|
},
|
|
{
|
|
"epoch": 0.10669887764859833,
|
|
"grad_norm": 0.2881840765476227,
|
|
"learning_rate": 0.0003049476906208598,
|
|
"loss": 0.8274,
|
|
"step": 9540
|
|
},
|
|
{
|
|
"epoch": 0.106810721335861,
|
|
"grad_norm": 0.2623940408229828,
|
|
"learning_rate": 0.0003047192653844397,
|
|
"loss": 0.8346,
|
|
"step": 9550
|
|
},
|
|
{
|
|
"epoch": 0.10692256502312368,
|
|
"grad_norm": 0.29798468947410583,
|
|
"learning_rate": 0.00030449084014801955,
|
|
"loss": 0.8362,
|
|
"step": 9560
|
|
},
|
|
{
|
|
"epoch": 0.10703440871038636,
|
|
"grad_norm": 0.2976382076740265,
|
|
"learning_rate": 0.00030426241491159946,
|
|
"loss": 0.8179,
|
|
"step": 9570
|
|
},
|
|
{
|
|
"epoch": 0.10714625239764905,
|
|
"grad_norm": 0.28637486696243286,
|
|
"learning_rate": 0.0003040339896751793,
|
|
"loss": 0.8363,
|
|
"step": 9580
|
|
},
|
|
{
|
|
"epoch": 0.10725809608491173,
|
|
"grad_norm": 0.3023325204849243,
|
|
"learning_rate": 0.00030380556443875917,
|
|
"loss": 0.8382,
|
|
"step": 9590
|
|
},
|
|
{
|
|
"epoch": 0.1073699397721744,
|
|
"grad_norm": 0.2889160215854645,
|
|
"learning_rate": 0.0003035771392023391,
|
|
"loss": 0.8476,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 0.1074817834594371,
|
|
"grad_norm": 0.2868768572807312,
|
|
"learning_rate": 0.00030334871396591893,
|
|
"loss": 0.8482,
|
|
"step": 9610
|
|
},
|
|
{
|
|
"epoch": 0.10759362714669977,
|
|
"grad_norm": 0.2773813307285309,
|
|
"learning_rate": 0.0003031202887294988,
|
|
"loss": 0.8577,
|
|
"step": 9620
|
|
},
|
|
{
|
|
"epoch": 0.10770547083396245,
|
|
"grad_norm": 0.28698423504829407,
|
|
"learning_rate": 0.00030289186349307875,
|
|
"loss": 0.8663,
|
|
"step": 9630
|
|
},
|
|
{
|
|
"epoch": 0.10781731452122513,
|
|
"grad_norm": 0.26839759945869446,
|
|
"learning_rate": 0.0003026634382566586,
|
|
"loss": 0.8649,
|
|
"step": 9640
|
|
},
|
|
{
|
|
"epoch": 0.10792915820848782,
|
|
"grad_norm": 0.2686857283115387,
|
|
"learning_rate": 0.00030243501302023847,
|
|
"loss": 0.8563,
|
|
"step": 9650
|
|
},
|
|
{
|
|
"epoch": 0.1080410018957505,
|
|
"grad_norm": 0.2815250754356384,
|
|
"learning_rate": 0.0003022065877838184,
|
|
"loss": 0.8538,
|
|
"step": 9660
|
|
},
|
|
{
|
|
"epoch": 0.10815284558301318,
|
|
"grad_norm": 0.24625800549983978,
|
|
"learning_rate": 0.00030197816254739823,
|
|
"loss": 0.87,
|
|
"step": 9670
|
|
},
|
|
{
|
|
"epoch": 0.10826468927027587,
|
|
"grad_norm": 0.27051877975463867,
|
|
"learning_rate": 0.0003017497373109781,
|
|
"loss": 0.8692,
|
|
"step": 9680
|
|
},
|
|
{
|
|
"epoch": 0.10837653295753855,
|
|
"grad_norm": 0.253892183303833,
|
|
"learning_rate": 0.000301521312074558,
|
|
"loss": 0.8583,
|
|
"step": 9690
|
|
},
|
|
{
|
|
"epoch": 0.10848837664480122,
|
|
"grad_norm": 0.26951879262924194,
|
|
"learning_rate": 0.0003012928868381379,
|
|
"loss": 0.8699,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 0.1086002203320639,
|
|
"grad_norm": 0.27741488814353943,
|
|
"learning_rate": 0.00030106446160171776,
|
|
"loss": 0.8673,
|
|
"step": 9710
|
|
},
|
|
{
|
|
"epoch": 0.10871206401932659,
|
|
"grad_norm": 0.2655075788497925,
|
|
"learning_rate": 0.00030083603636529767,
|
|
"loss": 0.8628,
|
|
"step": 9720
|
|
},
|
|
{
|
|
"epoch": 0.10882390770658927,
|
|
"grad_norm": 0.298532098531723,
|
|
"learning_rate": 0.00030060761112887753,
|
|
"loss": 0.8707,
|
|
"step": 9730
|
|
},
|
|
{
|
|
"epoch": 0.10893575139385195,
|
|
"grad_norm": 0.3105684816837311,
|
|
"learning_rate": 0.0003003791858924574,
|
|
"loss": 0.8661,
|
|
"step": 9740
|
|
},
|
|
{
|
|
"epoch": 0.10904759508111464,
|
|
"grad_norm": 0.27781355381011963,
|
|
"learning_rate": 0.0003001507606560373,
|
|
"loss": 0.8871,
|
|
"step": 9750
|
|
},
|
|
{
|
|
"epoch": 0.10915943876837732,
|
|
"grad_norm": 0.2966761589050293,
|
|
"learning_rate": 0.00029992233541961715,
|
|
"loss": 0.875,
|
|
"step": 9760
|
|
},
|
|
{
|
|
"epoch": 0.10927128245564,
|
|
"grad_norm": 0.3010736405849457,
|
|
"learning_rate": 0.000299693910183197,
|
|
"loss": 0.8746,
|
|
"step": 9770
|
|
},
|
|
{
|
|
"epoch": 0.10938312614290267,
|
|
"grad_norm": 0.31352171301841736,
|
|
"learning_rate": 0.00029946548494677697,
|
|
"loss": 0.8733,
|
|
"step": 9780
|
|
},
|
|
{
|
|
"epoch": 0.10949496983016536,
|
|
"grad_norm": 0.30627313256263733,
|
|
"learning_rate": 0.0002992370597103568,
|
|
"loss": 0.8675,
|
|
"step": 9790
|
|
},
|
|
{
|
|
"epoch": 0.10960681351742804,
|
|
"grad_norm": 0.23990577459335327,
|
|
"learning_rate": 0.0002990086344739367,
|
|
"loss": 0.8614,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 0.10971865720469072,
|
|
"grad_norm": 0.2856599688529968,
|
|
"learning_rate": 0.0002987802092375166,
|
|
"loss": 0.8454,
|
|
"step": 9810
|
|
},
|
|
{
|
|
"epoch": 0.10983050089195341,
|
|
"grad_norm": 0.26476389169692993,
|
|
"learning_rate": 0.00029855178400109645,
|
|
"loss": 0.8616,
|
|
"step": 9820
|
|
},
|
|
{
|
|
"epoch": 0.10994234457921609,
|
|
"grad_norm": 0.2871752381324768,
|
|
"learning_rate": 0.0002983233587646763,
|
|
"loss": 0.8444,
|
|
"step": 9830
|
|
},
|
|
{
|
|
"epoch": 0.11005418826647877,
|
|
"grad_norm": 0.27318039536476135,
|
|
"learning_rate": 0.0002980949335282562,
|
|
"loss": 0.8487,
|
|
"step": 9840
|
|
},
|
|
{
|
|
"epoch": 0.11016603195374144,
|
|
"grad_norm": 0.25630125403404236,
|
|
"learning_rate": 0.00029786650829183607,
|
|
"loss": 0.846,
|
|
"step": 9850
|
|
},
|
|
{
|
|
"epoch": 0.11027787564100414,
|
|
"grad_norm": 0.23908184468746185,
|
|
"learning_rate": 0.000297638083055416,
|
|
"loss": 0.8403,
|
|
"step": 9860
|
|
},
|
|
{
|
|
"epoch": 0.11038971932826681,
|
|
"grad_norm": 0.2978418469429016,
|
|
"learning_rate": 0.0002974096578189959,
|
|
"loss": 0.8652,
|
|
"step": 9870
|
|
},
|
|
{
|
|
"epoch": 0.11050156301552949,
|
|
"grad_norm": 0.2503781318664551,
|
|
"learning_rate": 0.00029718123258257574,
|
|
"loss": 0.8657,
|
|
"step": 9880
|
|
},
|
|
{
|
|
"epoch": 0.11061340670279218,
|
|
"grad_norm": 0.28556469082832336,
|
|
"learning_rate": 0.0002969528073461556,
|
|
"loss": 0.8501,
|
|
"step": 9890
|
|
},
|
|
{
|
|
"epoch": 0.11072525039005486,
|
|
"grad_norm": 0.2643977701663971,
|
|
"learning_rate": 0.0002967243821097355,
|
|
"loss": 0.8742,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 0.11083709407731754,
|
|
"grad_norm": 0.2757241725921631,
|
|
"learning_rate": 0.00029649595687331536,
|
|
"loss": 0.8837,
|
|
"step": 9910
|
|
},
|
|
{
|
|
"epoch": 0.11094893776458022,
|
|
"grad_norm": 0.28263452649116516,
|
|
"learning_rate": 0.0002962675316368952,
|
|
"loss": 0.8793,
|
|
"step": 9920
|
|
},
|
|
{
|
|
"epoch": 0.11106078145184291,
|
|
"grad_norm": 0.27624276280403137,
|
|
"learning_rate": 0.00029603910640047513,
|
|
"loss": 0.8669,
|
|
"step": 9930
|
|
},
|
|
{
|
|
"epoch": 0.11117262513910559,
|
|
"grad_norm": 0.2814600467681885,
|
|
"learning_rate": 0.00029581068116405504,
|
|
"loss": 0.8858,
|
|
"step": 9940
|
|
},
|
|
{
|
|
"epoch": 0.11128446882636826,
|
|
"grad_norm": 0.2871972918510437,
|
|
"learning_rate": 0.0002955822559276349,
|
|
"loss": 0.8714,
|
|
"step": 9950
|
|
},
|
|
{
|
|
"epoch": 0.11139631251363095,
|
|
"grad_norm": 0.2885976731777191,
|
|
"learning_rate": 0.0002953538306912148,
|
|
"loss": 0.8675,
|
|
"step": 9960
|
|
},
|
|
{
|
|
"epoch": 0.11150815620089363,
|
|
"grad_norm": 0.281021386384964,
|
|
"learning_rate": 0.00029512540545479466,
|
|
"loss": 0.8762,
|
|
"step": 9970
|
|
},
|
|
{
|
|
"epoch": 0.11161999988815631,
|
|
"grad_norm": 0.2923888862133026,
|
|
"learning_rate": 0.0002948969802183745,
|
|
"loss": 0.87,
|
|
"step": 9980
|
|
},
|
|
{
|
|
"epoch": 0.11173184357541899,
|
|
"grad_norm": 0.2596036195755005,
|
|
"learning_rate": 0.00029466855498195443,
|
|
"loss": 0.8696,
|
|
"step": 9990
|
|
},
|
|
{
|
|
"epoch": 0.11184368726268168,
|
|
"grad_norm": 0.2749873697757721,
|
|
"learning_rate": 0.0002944401297455343,
|
|
"loss": 0.8604,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 0.11195553094994436,
|
|
"grad_norm": 0.2696766257286072,
|
|
"learning_rate": 0.00029421170450911414,
|
|
"loss": 0.8743,
|
|
"step": 10010
|
|
},
|
|
{
|
|
"epoch": 0.11206737463720703,
|
|
"grad_norm": 0.2824450731277466,
|
|
"learning_rate": 0.00029398327927269405,
|
|
"loss": 0.8734,
|
|
"step": 10020
|
|
},
|
|
{
|
|
"epoch": 0.11217921832446973,
|
|
"grad_norm": 0.2795054614543915,
|
|
"learning_rate": 0.00029375485403627396,
|
|
"loss": 0.865,
|
|
"step": 10030
|
|
},
|
|
{
|
|
"epoch": 0.1122910620117324,
|
|
"grad_norm": 0.2974453866481781,
|
|
"learning_rate": 0.0002935264287998538,
|
|
"loss": 0.8762,
|
|
"step": 10040
|
|
},
|
|
{
|
|
"epoch": 0.11240290569899508,
|
|
"grad_norm": 0.27134743332862854,
|
|
"learning_rate": 0.00029329800356343367,
|
|
"loss": 0.8616,
|
|
"step": 10050
|
|
},
|
|
{
|
|
"epoch": 0.11251474938625777,
|
|
"grad_norm": 0.2651810348033905,
|
|
"learning_rate": 0.0002930695783270136,
|
|
"loss": 0.8653,
|
|
"step": 10060
|
|
},
|
|
{
|
|
"epoch": 0.11262659307352045,
|
|
"grad_norm": 0.29161420464515686,
|
|
"learning_rate": 0.00029284115309059344,
|
|
"loss": 0.8583,
|
|
"step": 10070
|
|
},
|
|
{
|
|
"epoch": 0.11273843676078313,
|
|
"grad_norm": 0.27624139189720154,
|
|
"learning_rate": 0.0002926127278541733,
|
|
"loss": 0.8447,
|
|
"step": 10080
|
|
},
|
|
{
|
|
"epoch": 0.1128502804480458,
|
|
"grad_norm": 0.290632039308548,
|
|
"learning_rate": 0.00029238430261775326,
|
|
"loss": 0.8568,
|
|
"step": 10090
|
|
},
|
|
{
|
|
"epoch": 0.1129621241353085,
|
|
"grad_norm": 0.2906644940376282,
|
|
"learning_rate": 0.0002921558773813331,
|
|
"loss": 0.8566,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 0.11307396782257118,
|
|
"grad_norm": 0.29284584522247314,
|
|
"learning_rate": 0.00029192745214491297,
|
|
"loss": 0.8679,
|
|
"step": 10110
|
|
},
|
|
{
|
|
"epoch": 0.11318581150983385,
|
|
"grad_norm": 0.29635393619537354,
|
|
"learning_rate": 0.0002916990269084929,
|
|
"loss": 0.8648,
|
|
"step": 10120
|
|
},
|
|
{
|
|
"epoch": 0.11329765519709654,
|
|
"grad_norm": 0.2560585141181946,
|
|
"learning_rate": 0.00029147060167207273,
|
|
"loss": 0.8565,
|
|
"step": 10130
|
|
},
|
|
{
|
|
"epoch": 0.11340949888435922,
|
|
"grad_norm": 0.2480679154396057,
|
|
"learning_rate": 0.0002912421764356526,
|
|
"loss": 0.8574,
|
|
"step": 10140
|
|
},
|
|
{
|
|
"epoch": 0.1135213425716219,
|
|
"grad_norm": 0.28708118200302124,
|
|
"learning_rate": 0.0002910137511992325,
|
|
"loss": 0.8658,
|
|
"step": 10150
|
|
},
|
|
{
|
|
"epoch": 0.11363318625888458,
|
|
"grad_norm": 0.2553873062133789,
|
|
"learning_rate": 0.00029078532596281235,
|
|
"loss": 0.8721,
|
|
"step": 10160
|
|
},
|
|
{
|
|
"epoch": 0.11374502994614727,
|
|
"grad_norm": 0.26742488145828247,
|
|
"learning_rate": 0.00029055690072639226,
|
|
"loss": 0.8608,
|
|
"step": 10170
|
|
},
|
|
{
|
|
"epoch": 0.11385687363340995,
|
|
"grad_norm": 0.2674279510974884,
|
|
"learning_rate": 0.0002903284754899722,
|
|
"loss": 0.8763,
|
|
"step": 10180
|
|
},
|
|
{
|
|
"epoch": 0.11396871732067262,
|
|
"grad_norm": 0.2484348863363266,
|
|
"learning_rate": 0.00029010005025355203,
|
|
"loss": 0.8799,
|
|
"step": 10190
|
|
},
|
|
{
|
|
"epoch": 0.11408056100793532,
|
|
"grad_norm": 0.2603932321071625,
|
|
"learning_rate": 0.0002898716250171319,
|
|
"loss": 0.8922,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 0.114192404695198,
|
|
"grad_norm": 0.2510204613208771,
|
|
"learning_rate": 0.0002896431997807118,
|
|
"loss": 0.8851,
|
|
"step": 10210
|
|
},
|
|
{
|
|
"epoch": 0.11430424838246067,
|
|
"grad_norm": 0.26795732975006104,
|
|
"learning_rate": 0.00028941477454429165,
|
|
"loss": 0.8917,
|
|
"step": 10220
|
|
},
|
|
{
|
|
"epoch": 0.11441609206972335,
|
|
"grad_norm": 0.2880701422691345,
|
|
"learning_rate": 0.0002891863493078715,
|
|
"loss": 0.8903,
|
|
"step": 10230
|
|
},
|
|
{
|
|
"epoch": 0.11452793575698604,
|
|
"grad_norm": 0.23970642685890198,
|
|
"learning_rate": 0.0002889579240714514,
|
|
"loss": 0.8882,
|
|
"step": 10240
|
|
},
|
|
{
|
|
"epoch": 0.11463977944424872,
|
|
"grad_norm": 0.2786742150783539,
|
|
"learning_rate": 0.0002887294988350313,
|
|
"loss": 0.8827,
|
|
"step": 10250
|
|
},
|
|
{
|
|
"epoch": 0.1147516231315114,
|
|
"grad_norm": 0.2780776619911194,
|
|
"learning_rate": 0.0002885010735986112,
|
|
"loss": 0.8879,
|
|
"step": 10260
|
|
},
|
|
{
|
|
"epoch": 0.11486346681877409,
|
|
"grad_norm": 0.26984742283821106,
|
|
"learning_rate": 0.0002882726483621911,
|
|
"loss": 0.8732,
|
|
"step": 10270
|
|
},
|
|
{
|
|
"epoch": 0.11497531050603677,
|
|
"grad_norm": 0.26902884244918823,
|
|
"learning_rate": 0.00028804422312577095,
|
|
"loss": 0.878,
|
|
"step": 10280
|
|
},
|
|
{
|
|
"epoch": 0.11508715419329944,
|
|
"grad_norm": 0.24787285923957825,
|
|
"learning_rate": 0.0002878157978893508,
|
|
"loss": 0.8573,
|
|
"step": 10290
|
|
},
|
|
{
|
|
"epoch": 0.11519899788056212,
|
|
"grad_norm": 0.22702965140342712,
|
|
"learning_rate": 0.0002875873726529307,
|
|
"loss": 0.8621,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 0.11531084156782481,
|
|
"grad_norm": 0.27474096417427063,
|
|
"learning_rate": 0.00028735894741651057,
|
|
"loss": 0.8763,
|
|
"step": 10310
|
|
},
|
|
{
|
|
"epoch": 0.11542268525508749,
|
|
"grad_norm": 0.2605912983417511,
|
|
"learning_rate": 0.0002871305221800904,
|
|
"loss": 0.8706,
|
|
"step": 10320
|
|
},
|
|
{
|
|
"epoch": 0.11553452894235017,
|
|
"grad_norm": 0.25281742215156555,
|
|
"learning_rate": 0.0002869020969436704,
|
|
"loss": 0.855,
|
|
"step": 10330
|
|
},
|
|
{
|
|
"epoch": 0.11564637262961286,
|
|
"grad_norm": 0.2559000849723816,
|
|
"learning_rate": 0.00028667367170725024,
|
|
"loss": 0.8549,
|
|
"step": 10340
|
|
},
|
|
{
|
|
"epoch": 0.11575821631687554,
|
|
"grad_norm": 0.2439345121383667,
|
|
"learning_rate": 0.0002864452464708301,
|
|
"loss": 0.8639,
|
|
"step": 10350
|
|
},
|
|
{
|
|
"epoch": 0.11587006000413821,
|
|
"grad_norm": 0.2690776288509369,
|
|
"learning_rate": 0.00028621682123441,
|
|
"loss": 0.8487,
|
|
"step": 10360
|
|
},
|
|
{
|
|
"epoch": 0.11598190369140089,
|
|
"grad_norm": 0.25111067295074463,
|
|
"learning_rate": 0.00028598839599798987,
|
|
"loss": 0.8558,
|
|
"step": 10370
|
|
},
|
|
{
|
|
"epoch": 0.11609374737866358,
|
|
"grad_norm": 0.26838451623916626,
|
|
"learning_rate": 0.0002857599707615697,
|
|
"loss": 0.8603,
|
|
"step": 10380
|
|
},
|
|
{
|
|
"epoch": 0.11620559106592626,
|
|
"grad_norm": 0.2401856780052185,
|
|
"learning_rate": 0.00028553154552514963,
|
|
"loss": 0.8286,
|
|
"step": 10390
|
|
},
|
|
{
|
|
"epoch": 0.11631743475318894,
|
|
"grad_norm": 0.26284924149513245,
|
|
"learning_rate": 0.0002853031202887295,
|
|
"loss": 0.8402,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 0.11642927844045163,
|
|
"grad_norm": 0.28734955191612244,
|
|
"learning_rate": 0.0002850746950523094,
|
|
"loss": 0.8358,
|
|
"step": 10410
|
|
},
|
|
{
|
|
"epoch": 0.11654112212771431,
|
|
"grad_norm": 0.2564549446105957,
|
|
"learning_rate": 0.0002848462698158893,
|
|
"loss": 0.8458,
|
|
"step": 10420
|
|
},
|
|
{
|
|
"epoch": 0.11665296581497699,
|
|
"grad_norm": 0.2507050633430481,
|
|
"learning_rate": 0.00028461784457946916,
|
|
"loss": 0.8371,
|
|
"step": 10430
|
|
},
|
|
{
|
|
"epoch": 0.11676480950223966,
|
|
"grad_norm": 0.25748834013938904,
|
|
"learning_rate": 0.000284389419343049,
|
|
"loss": 0.8527,
|
|
"step": 10440
|
|
},
|
|
{
|
|
"epoch": 0.11687665318950236,
|
|
"grad_norm": 0.24484454095363617,
|
|
"learning_rate": 0.00028416099410662893,
|
|
"loss": 0.8372,
|
|
"step": 10450
|
|
},
|
|
{
|
|
"epoch": 0.11698849687676503,
|
|
"grad_norm": 0.24171967804431915,
|
|
"learning_rate": 0.0002839325688702088,
|
|
"loss": 0.8327,
|
|
"step": 10460
|
|
},
|
|
{
|
|
"epoch": 0.11710034056402771,
|
|
"grad_norm": 0.30423420667648315,
|
|
"learning_rate": 0.00028370414363378864,
|
|
"loss": 0.8271,
|
|
"step": 10470
|
|
},
|
|
{
|
|
"epoch": 0.1172121842512904,
|
|
"grad_norm": 0.2598424553871155,
|
|
"learning_rate": 0.0002834757183973685,
|
|
"loss": 0.8169,
|
|
"step": 10480
|
|
},
|
|
{
|
|
"epoch": 0.11732402793855308,
|
|
"grad_norm": 0.2608656585216522,
|
|
"learning_rate": 0.00028324729316094846,
|
|
"loss": 0.8261,
|
|
"step": 10490
|
|
},
|
|
{
|
|
"epoch": 0.11743587162581576,
|
|
"grad_norm": 0.25370126962661743,
|
|
"learning_rate": 0.0002830188679245283,
|
|
"loss": 0.8227,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 0.11754771531307844,
|
|
"grad_norm": 0.2760542333126068,
|
|
"learning_rate": 0.00028279044268810817,
|
|
"loss": 0.8413,
|
|
"step": 10510
|
|
},
|
|
{
|
|
"epoch": 0.11765955900034113,
|
|
"grad_norm": 0.24994856119155884,
|
|
"learning_rate": 0.0002825620174516881,
|
|
"loss": 0.8288,
|
|
"step": 10520
|
|
},
|
|
{
|
|
"epoch": 0.1177714026876038,
|
|
"grad_norm": 0.25439032912254333,
|
|
"learning_rate": 0.00028233359221526794,
|
|
"loss": 0.8318,
|
|
"step": 10530
|
|
},
|
|
{
|
|
"epoch": 0.11788324637486648,
|
|
"grad_norm": 0.28182244300842285,
|
|
"learning_rate": 0.0002821051669788478,
|
|
"loss": 0.8437,
|
|
"step": 10540
|
|
},
|
|
{
|
|
"epoch": 0.11799509006212917,
|
|
"grad_norm": 0.2419012039899826,
|
|
"learning_rate": 0.0002818767417424277,
|
|
"loss": 0.8446,
|
|
"step": 10550
|
|
},
|
|
{
|
|
"epoch": 0.11810693374939185,
|
|
"grad_norm": 0.2598857581615448,
|
|
"learning_rate": 0.0002816483165060076,
|
|
"loss": 0.8428,
|
|
"step": 10560
|
|
},
|
|
{
|
|
"epoch": 0.11821877743665453,
|
|
"grad_norm": 0.25206229090690613,
|
|
"learning_rate": 0.00028141989126958747,
|
|
"loss": 0.8533,
|
|
"step": 10570
|
|
},
|
|
{
|
|
"epoch": 0.1183306211239172,
|
|
"grad_norm": 0.25155991315841675,
|
|
"learning_rate": 0.0002811914660331674,
|
|
"loss": 0.8538,
|
|
"step": 10580
|
|
},
|
|
{
|
|
"epoch": 0.1184424648111799,
|
|
"grad_norm": 0.2342199832201004,
|
|
"learning_rate": 0.00028096304079674723,
|
|
"loss": 0.8519,
|
|
"step": 10590
|
|
},
|
|
{
|
|
"epoch": 0.11855430849844258,
|
|
"grad_norm": 0.25823327898979187,
|
|
"learning_rate": 0.0002807346155603271,
|
|
"loss": 0.8483,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 0.11866615218570525,
|
|
"grad_norm": 0.26428598165512085,
|
|
"learning_rate": 0.000280506190323907,
|
|
"loss": 0.86,
|
|
"step": 10610
|
|
},
|
|
{
|
|
"epoch": 0.11877799587296795,
|
|
"grad_norm": 0.25176918506622314,
|
|
"learning_rate": 0.00028027776508748685,
|
|
"loss": 0.8589,
|
|
"step": 10620
|
|
},
|
|
{
|
|
"epoch": 0.11888983956023062,
|
|
"grad_norm": 0.28826919198036194,
|
|
"learning_rate": 0.0002800493398510667,
|
|
"loss": 0.8627,
|
|
"step": 10630
|
|
},
|
|
{
|
|
"epoch": 0.1190016832474933,
|
|
"grad_norm": 0.24679958820343018,
|
|
"learning_rate": 0.0002798209146146467,
|
|
"loss": 0.8563,
|
|
"step": 10640
|
|
},
|
|
{
|
|
"epoch": 0.11911352693475598,
|
|
"grad_norm": 0.2550687789916992,
|
|
"learning_rate": 0.00027959248937822653,
|
|
"loss": 0.8535,
|
|
"step": 10650
|
|
},
|
|
{
|
|
"epoch": 0.11922537062201867,
|
|
"grad_norm": 0.2506476640701294,
|
|
"learning_rate": 0.0002793640641418064,
|
|
"loss": 0.8553,
|
|
"step": 10660
|
|
},
|
|
{
|
|
"epoch": 0.11933721430928135,
|
|
"grad_norm": 0.24980700016021729,
|
|
"learning_rate": 0.0002791356389053863,
|
|
"loss": 0.854,
|
|
"step": 10670
|
|
},
|
|
{
|
|
"epoch": 0.11944905799654403,
|
|
"grad_norm": 0.2280970811843872,
|
|
"learning_rate": 0.00027890721366896615,
|
|
"loss": 0.8569,
|
|
"step": 10680
|
|
},
|
|
{
|
|
"epoch": 0.11956090168380672,
|
|
"grad_norm": 0.25191232562065125,
|
|
"learning_rate": 0.000278678788432546,
|
|
"loss": 0.8566,
|
|
"step": 10690
|
|
},
|
|
{
|
|
"epoch": 0.1196727453710694,
|
|
"grad_norm": 0.2748493552207947,
|
|
"learning_rate": 0.0002784503631961259,
|
|
"loss": 0.8573,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 0.11978458905833207,
|
|
"grad_norm": 0.25123515725135803,
|
|
"learning_rate": 0.00027822193795970577,
|
|
"loss": 0.8473,
|
|
"step": 10710
|
|
},
|
|
{
|
|
"epoch": 0.11989643274559475,
|
|
"grad_norm": 0.25573378801345825,
|
|
"learning_rate": 0.0002779935127232857,
|
|
"loss": 0.8469,
|
|
"step": 10720
|
|
},
|
|
{
|
|
"epoch": 0.12000827643285744,
|
|
"grad_norm": 0.23367713391780853,
|
|
"learning_rate": 0.0002777650874868656,
|
|
"loss": 0.8452,
|
|
"step": 10730
|
|
},
|
|
{
|
|
"epoch": 0.12012012012012012,
|
|
"grad_norm": 0.24593010544776917,
|
|
"learning_rate": 0.00027753666225044545,
|
|
"loss": 0.838,
|
|
"step": 10740
|
|
},
|
|
{
|
|
"epoch": 0.1202319638073828,
|
|
"grad_norm": 0.2422724962234497,
|
|
"learning_rate": 0.0002773082370140253,
|
|
"loss": 0.8398,
|
|
"step": 10750
|
|
},
|
|
{
|
|
"epoch": 0.12034380749464549,
|
|
"grad_norm": 0.24471783638000488,
|
|
"learning_rate": 0.0002770798117776052,
|
|
"loss": 0.8409,
|
|
"step": 10760
|
|
},
|
|
{
|
|
"epoch": 0.12045565118190817,
|
|
"grad_norm": 0.25523480772972107,
|
|
"learning_rate": 0.00027685138654118507,
|
|
"loss": 0.835,
|
|
"step": 10770
|
|
},
|
|
{
|
|
"epoch": 0.12056749486917084,
|
|
"grad_norm": 0.24846532940864563,
|
|
"learning_rate": 0.0002766229613047649,
|
|
"loss": 0.842,
|
|
"step": 10780
|
|
},
|
|
{
|
|
"epoch": 0.12067933855643354,
|
|
"grad_norm": 0.26955240964889526,
|
|
"learning_rate": 0.00027639453606834484,
|
|
"loss": 0.8525,
|
|
"step": 10790
|
|
},
|
|
{
|
|
"epoch": 0.12079118224369621,
|
|
"grad_norm": 0.2711884081363678,
|
|
"learning_rate": 0.00027616611083192475,
|
|
"loss": 0.8352,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 0.12090302593095889,
|
|
"grad_norm": 0.24954953789710999,
|
|
"learning_rate": 0.0002759376855955046,
|
|
"loss": 0.8257,
|
|
"step": 10810
|
|
},
|
|
{
|
|
"epoch": 0.12101486961822157,
|
|
"grad_norm": 0.27029111981391907,
|
|
"learning_rate": 0.0002757092603590845,
|
|
"loss": 0.8147,
|
|
"step": 10820
|
|
},
|
|
{
|
|
"epoch": 0.12112671330548426,
|
|
"grad_norm": 0.2440258413553238,
|
|
"learning_rate": 0.00027548083512266437,
|
|
"loss": 0.8239,
|
|
"step": 10830
|
|
},
|
|
{
|
|
"epoch": 0.12123855699274694,
|
|
"grad_norm": 0.27082934975624084,
|
|
"learning_rate": 0.0002752524098862442,
|
|
"loss": 0.8391,
|
|
"step": 10840
|
|
},
|
|
{
|
|
"epoch": 0.12135040068000962,
|
|
"grad_norm": 0.27641886472702026,
|
|
"learning_rate": 0.00027502398464982413,
|
|
"loss": 0.8276,
|
|
"step": 10850
|
|
},
|
|
{
|
|
"epoch": 0.1214622443672723,
|
|
"grad_norm": 0.24772177636623383,
|
|
"learning_rate": 0.000274795559413404,
|
|
"loss": 0.8226,
|
|
"step": 10860
|
|
},
|
|
{
|
|
"epoch": 0.12157408805453498,
|
|
"grad_norm": 0.2585364580154419,
|
|
"learning_rate": 0.00027456713417698384,
|
|
"loss": 0.8096,
|
|
"step": 10870
|
|
},
|
|
{
|
|
"epoch": 0.12168593174179766,
|
|
"grad_norm": 0.2730146050453186,
|
|
"learning_rate": 0.0002743387089405638,
|
|
"loss": 0.8156,
|
|
"step": 10880
|
|
},
|
|
{
|
|
"epoch": 0.12179777542906034,
|
|
"grad_norm": 0.2693599760532379,
|
|
"learning_rate": 0.00027411028370414366,
|
|
"loss": 0.8125,
|
|
"step": 10890
|
|
},
|
|
{
|
|
"epoch": 0.12190961911632303,
|
|
"grad_norm": 0.26071295142173767,
|
|
"learning_rate": 0.0002738818584677235,
|
|
"loss": 0.8106,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 0.12202146280358571,
|
|
"grad_norm": 0.2560258209705353,
|
|
"learning_rate": 0.0002736534332313034,
|
|
"loss": 0.8195,
|
|
"step": 10910
|
|
},
|
|
{
|
|
"epoch": 0.12213330649084839,
|
|
"grad_norm": 0.27529552578926086,
|
|
"learning_rate": 0.0002734250079948833,
|
|
"loss": 0.8104,
|
|
"step": 10920
|
|
},
|
|
{
|
|
"epoch": 0.12224515017811108,
|
|
"grad_norm": 0.2782133221626282,
|
|
"learning_rate": 0.00027319658275846314,
|
|
"loss": 0.8105,
|
|
"step": 10930
|
|
},
|
|
{
|
|
"epoch": 0.12235699386537376,
|
|
"grad_norm": 0.27981024980545044,
|
|
"learning_rate": 0.000272968157522043,
|
|
"loss": 0.8085,
|
|
"step": 10940
|
|
},
|
|
{
|
|
"epoch": 0.12246883755263643,
|
|
"grad_norm": 0.2741667926311493,
|
|
"learning_rate": 0.0002727397322856229,
|
|
"loss": 0.8042,
|
|
"step": 10950
|
|
},
|
|
{
|
|
"epoch": 0.12258068123989911,
|
|
"grad_norm": 0.2468159943819046,
|
|
"learning_rate": 0.0002725113070492028,
|
|
"loss": 0.8198,
|
|
"step": 10960
|
|
},
|
|
{
|
|
"epoch": 0.1226925249271618,
|
|
"grad_norm": 0.26167941093444824,
|
|
"learning_rate": 0.00027228288181278267,
|
|
"loss": 0.8176,
|
|
"step": 10970
|
|
},
|
|
{
|
|
"epoch": 0.12280436861442448,
|
|
"grad_norm": 0.26660802960395813,
|
|
"learning_rate": 0.0002720544565763626,
|
|
"loss": 0.8036,
|
|
"step": 10980
|
|
},
|
|
{
|
|
"epoch": 0.12291621230168716,
|
|
"grad_norm": 0.301575243473053,
|
|
"learning_rate": 0.00027182603133994244,
|
|
"loss": 0.8049,
|
|
"step": 10990
|
|
},
|
|
{
|
|
"epoch": 0.12302805598894985,
|
|
"grad_norm": 0.2759682834148407,
|
|
"learning_rate": 0.0002715976061035223,
|
|
"loss": 0.8024,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 0.12313989967621253,
|
|
"grad_norm": 0.25659626722335815,
|
|
"learning_rate": 0.0002713691808671022,
|
|
"loss": 0.8229,
|
|
"step": 11010
|
|
},
|
|
{
|
|
"epoch": 0.1232517433634752,
|
|
"grad_norm": 0.2672923505306244,
|
|
"learning_rate": 0.00027114075563068206,
|
|
"loss": 0.8018,
|
|
"step": 11020
|
|
},
|
|
{
|
|
"epoch": 0.12336358705073788,
|
|
"grad_norm": 0.25423988699913025,
|
|
"learning_rate": 0.0002709123303942619,
|
|
"loss": 0.836,
|
|
"step": 11030
|
|
},
|
|
{
|
|
"epoch": 0.12347543073800057,
|
|
"grad_norm": 0.28428804874420166,
|
|
"learning_rate": 0.0002706839051578419,
|
|
"loss": 0.8299,
|
|
"step": 11040
|
|
},
|
|
{
|
|
"epoch": 0.12358727442526325,
|
|
"grad_norm": 0.2924467921257019,
|
|
"learning_rate": 0.00027045547992142173,
|
|
"loss": 0.8236,
|
|
"step": 11050
|
|
},
|
|
{
|
|
"epoch": 0.12369911811252593,
|
|
"grad_norm": 0.25230658054351807,
|
|
"learning_rate": 0.0002702270546850016,
|
|
"loss": 0.8274,
|
|
"step": 11060
|
|
},
|
|
{
|
|
"epoch": 0.12381096179978862,
|
|
"grad_norm": 0.27876734733581543,
|
|
"learning_rate": 0.0002699986294485815,
|
|
"loss": 0.8244,
|
|
"step": 11070
|
|
},
|
|
{
|
|
"epoch": 0.1239228054870513,
|
|
"grad_norm": 0.29841694235801697,
|
|
"learning_rate": 0.00026977020421216136,
|
|
"loss": 0.8327,
|
|
"step": 11080
|
|
},
|
|
{
|
|
"epoch": 0.12403464917431398,
|
|
"grad_norm": 0.3055926263332367,
|
|
"learning_rate": 0.0002695417789757412,
|
|
"loss": 0.8247,
|
|
"step": 11090
|
|
},
|
|
{
|
|
"epoch": 0.12414649286157665,
|
|
"grad_norm": 0.275919109582901,
|
|
"learning_rate": 0.0002693133537393211,
|
|
"loss": 0.8263,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 0.12425833654883935,
|
|
"grad_norm": 0.3069559931755066,
|
|
"learning_rate": 0.00026908492850290103,
|
|
"loss": 0.8242,
|
|
"step": 11110
|
|
},
|
|
{
|
|
"epoch": 0.12437018023610202,
|
|
"grad_norm": 0.2574029564857483,
|
|
"learning_rate": 0.0002688565032664809,
|
|
"loss": 0.819,
|
|
"step": 11120
|
|
},
|
|
{
|
|
"epoch": 0.1244820239233647,
|
|
"grad_norm": 0.25053170323371887,
|
|
"learning_rate": 0.0002686280780300608,
|
|
"loss": 0.8022,
|
|
"step": 11130
|
|
},
|
|
{
|
|
"epoch": 0.12459386761062739,
|
|
"grad_norm": 0.27337634563446045,
|
|
"learning_rate": 0.00026839965279364065,
|
|
"loss": 0.8127,
|
|
"step": 11140
|
|
},
|
|
{
|
|
"epoch": 0.12470571129789007,
|
|
"grad_norm": 0.2531510889530182,
|
|
"learning_rate": 0.0002681712275572205,
|
|
"loss": 0.8138,
|
|
"step": 11150
|
|
},
|
|
{
|
|
"epoch": 0.12481755498515275,
|
|
"grad_norm": 0.27455076575279236,
|
|
"learning_rate": 0.0002679428023208004,
|
|
"loss": 0.7974,
|
|
"step": 11160
|
|
},
|
|
{
|
|
"epoch": 0.12492939867241543,
|
|
"grad_norm": 0.2515604496002197,
|
|
"learning_rate": 0.0002677143770843803,
|
|
"loss": 0.8077,
|
|
"step": 11170
|
|
},
|
|
{
|
|
"epoch": 0.12504124235967812,
|
|
"grad_norm": 0.27941974997520447,
|
|
"learning_rate": 0.00026748595184796013,
|
|
"loss": 0.8099,
|
|
"step": 11180
|
|
},
|
|
{
|
|
"epoch": 0.1251530860469408,
|
|
"grad_norm": 0.2508449852466583,
|
|
"learning_rate": 0.0002672575266115401,
|
|
"loss": 0.8077,
|
|
"step": 11190
|
|
},
|
|
{
|
|
"epoch": 0.12526492973420347,
|
|
"grad_norm": 0.24805410206317902,
|
|
"learning_rate": 0.00026702910137511995,
|
|
"loss": 0.8029,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 0.12537677342146616,
|
|
"grad_norm": 0.2730201184749603,
|
|
"learning_rate": 0.0002668006761386998,
|
|
"loss": 0.8383,
|
|
"step": 11210
|
|
},
|
|
{
|
|
"epoch": 0.12548861710872883,
|
|
"grad_norm": 0.24301932752132416,
|
|
"learning_rate": 0.0002665722509022797,
|
|
"loss": 0.8245,
|
|
"step": 11220
|
|
},
|
|
{
|
|
"epoch": 0.12560046079599152,
|
|
"grad_norm": 0.270059734582901,
|
|
"learning_rate": 0.00026634382566585957,
|
|
"loss": 0.8287,
|
|
"step": 11230
|
|
},
|
|
{
|
|
"epoch": 0.1257123044832542,
|
|
"grad_norm": 0.24491746723651886,
|
|
"learning_rate": 0.0002661154004294394,
|
|
"loss": 0.8283,
|
|
"step": 11240
|
|
},
|
|
{
|
|
"epoch": 0.12582414817051688,
|
|
"grad_norm": 0.2461182177066803,
|
|
"learning_rate": 0.00026588697519301934,
|
|
"loss": 0.8285,
|
|
"step": 11250
|
|
},
|
|
{
|
|
"epoch": 0.12593599185777957,
|
|
"grad_norm": 0.26306700706481934,
|
|
"learning_rate": 0.0002656585499565992,
|
|
"loss": 0.8366,
|
|
"step": 11260
|
|
},
|
|
{
|
|
"epoch": 0.12604783554504226,
|
|
"grad_norm": 0.2317613661289215,
|
|
"learning_rate": 0.0002654301247201791,
|
|
"loss": 0.8373,
|
|
"step": 11270
|
|
},
|
|
{
|
|
"epoch": 0.12615967923230492,
|
|
"grad_norm": 0.25218284130096436,
|
|
"learning_rate": 0.000265201699483759,
|
|
"loss": 0.8163,
|
|
"step": 11280
|
|
},
|
|
{
|
|
"epoch": 0.1262715229195676,
|
|
"grad_norm": 0.2527898848056793,
|
|
"learning_rate": 0.00026497327424733887,
|
|
"loss": 0.819,
|
|
"step": 11290
|
|
},
|
|
{
|
|
"epoch": 0.1263833666068303,
|
|
"grad_norm": 0.2344309389591217,
|
|
"learning_rate": 0.0002647448490109187,
|
|
"loss": 0.8335,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 0.12649521029409297,
|
|
"grad_norm": 0.23913320899009705,
|
|
"learning_rate": 0.00026451642377449863,
|
|
"loss": 0.8289,
|
|
"step": 11310
|
|
},
|
|
{
|
|
"epoch": 0.12660705398135566,
|
|
"grad_norm": 0.24901095032691956,
|
|
"learning_rate": 0.0002642879985380785,
|
|
"loss": 0.8159,
|
|
"step": 11320
|
|
},
|
|
{
|
|
"epoch": 0.12671889766861835,
|
|
"grad_norm": 0.2503173351287842,
|
|
"learning_rate": 0.00026405957330165834,
|
|
"loss": 0.8372,
|
|
"step": 11330
|
|
},
|
|
{
|
|
"epoch": 0.12683074135588102,
|
|
"grad_norm": 0.2341470569372177,
|
|
"learning_rate": 0.00026383114806523825,
|
|
"loss": 0.8264,
|
|
"step": 11340
|
|
},
|
|
{
|
|
"epoch": 0.1269425850431437,
|
|
"grad_norm": 0.23143555223941803,
|
|
"learning_rate": 0.00026360272282881816,
|
|
"loss": 0.824,
|
|
"step": 11350
|
|
},
|
|
{
|
|
"epoch": 0.12705442873040637,
|
|
"grad_norm": 0.24911652505397797,
|
|
"learning_rate": 0.000263374297592398,
|
|
"loss": 0.82,
|
|
"step": 11360
|
|
},
|
|
{
|
|
"epoch": 0.12716627241766906,
|
|
"grad_norm": 0.21931353211402893,
|
|
"learning_rate": 0.0002631458723559779,
|
|
"loss": 0.8194,
|
|
"step": 11370
|
|
},
|
|
{
|
|
"epoch": 0.12727811610493175,
|
|
"grad_norm": 0.2432345151901245,
|
|
"learning_rate": 0.0002629174471195578,
|
|
"loss": 0.8371,
|
|
"step": 11380
|
|
},
|
|
{
|
|
"epoch": 0.12738995979219442,
|
|
"grad_norm": 0.24188277125358582,
|
|
"learning_rate": 0.00026268902188313764,
|
|
"loss": 0.8096,
|
|
"step": 11390
|
|
},
|
|
{
|
|
"epoch": 0.1275018034794571,
|
|
"grad_norm": 0.2522214651107788,
|
|
"learning_rate": 0.0002624605966467175,
|
|
"loss": 0.8187,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 0.1276136471667198,
|
|
"grad_norm": 0.2596495449542999,
|
|
"learning_rate": 0.0002622321714102974,
|
|
"loss": 0.8138,
|
|
"step": 11410
|
|
},
|
|
{
|
|
"epoch": 0.12772549085398247,
|
|
"grad_norm": 0.2708049714565277,
|
|
"learning_rate": 0.00026200374617387726,
|
|
"loss": 0.8066,
|
|
"step": 11420
|
|
},
|
|
{
|
|
"epoch": 0.12783733454124516,
|
|
"grad_norm": 0.27820831537246704,
|
|
"learning_rate": 0.00026177532093745717,
|
|
"loss": 0.8112,
|
|
"step": 11430
|
|
},
|
|
{
|
|
"epoch": 0.12794917822850785,
|
|
"grad_norm": 0.23918400704860687,
|
|
"learning_rate": 0.0002615468957010371,
|
|
"loss": 0.8148,
|
|
"step": 11440
|
|
},
|
|
{
|
|
"epoch": 0.1280610219157705,
|
|
"grad_norm": 0.22054031491279602,
|
|
"learning_rate": 0.00026131847046461694,
|
|
"loss": 0.8183,
|
|
"step": 11450
|
|
},
|
|
{
|
|
"epoch": 0.1281728656030332,
|
|
"grad_norm": 0.25998455286026,
|
|
"learning_rate": 0.0002610900452281968,
|
|
"loss": 0.8242,
|
|
"step": 11460
|
|
},
|
|
{
|
|
"epoch": 0.1282847092902959,
|
|
"grad_norm": 0.26852914690971375,
|
|
"learning_rate": 0.0002608616199917767,
|
|
"loss": 0.8161,
|
|
"step": 11470
|
|
},
|
|
{
|
|
"epoch": 0.12839655297755856,
|
|
"grad_norm": 0.24028563499450684,
|
|
"learning_rate": 0.00026063319475535656,
|
|
"loss": 0.8083,
|
|
"step": 11480
|
|
},
|
|
{
|
|
"epoch": 0.12850839666482125,
|
|
"grad_norm": 0.24944745004177094,
|
|
"learning_rate": 0.0002604047695189364,
|
|
"loss": 0.8168,
|
|
"step": 11490
|
|
},
|
|
{
|
|
"epoch": 0.12862024035208391,
|
|
"grad_norm": 0.26595303416252136,
|
|
"learning_rate": 0.0002601763442825164,
|
|
"loss": 0.8178,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 0.1287320840393466,
|
|
"grad_norm": 0.24556541442871094,
|
|
"learning_rate": 0.00025994791904609623,
|
|
"loss": 0.8229,
|
|
"step": 11510
|
|
},
|
|
{
|
|
"epoch": 0.1288439277266093,
|
|
"grad_norm": 0.24716900289058685,
|
|
"learning_rate": 0.0002597194938096761,
|
|
"loss": 0.809,
|
|
"step": 11520
|
|
},
|
|
{
|
|
"epoch": 0.12895577141387196,
|
|
"grad_norm": 0.24745820462703705,
|
|
"learning_rate": 0.000259491068573256,
|
|
"loss": 0.8293,
|
|
"step": 11530
|
|
},
|
|
{
|
|
"epoch": 0.12906761510113465,
|
|
"grad_norm": 0.2732492983341217,
|
|
"learning_rate": 0.00025926264333683586,
|
|
"loss": 0.8,
|
|
"step": 11540
|
|
},
|
|
{
|
|
"epoch": 0.12917945878839734,
|
|
"grad_norm": 0.23239663243293762,
|
|
"learning_rate": 0.0002590342181004157,
|
|
"loss": 0.8175,
|
|
"step": 11550
|
|
},
|
|
{
|
|
"epoch": 0.12929130247566,
|
|
"grad_norm": 0.24953389167785645,
|
|
"learning_rate": 0.0002588057928639956,
|
|
"loss": 0.8152,
|
|
"step": 11560
|
|
},
|
|
{
|
|
"epoch": 0.1294031461629227,
|
|
"grad_norm": 0.25258156657218933,
|
|
"learning_rate": 0.0002585773676275755,
|
|
"loss": 0.8301,
|
|
"step": 11570
|
|
},
|
|
{
|
|
"epoch": 0.1295149898501854,
|
|
"grad_norm": 0.2609168291091919,
|
|
"learning_rate": 0.0002583489423911554,
|
|
"loss": 0.8197,
|
|
"step": 11580
|
|
},
|
|
{
|
|
"epoch": 0.12962683353744806,
|
|
"grad_norm": 0.2484872192144394,
|
|
"learning_rate": 0.0002581205171547353,
|
|
"loss": 0.8362,
|
|
"step": 11590
|
|
},
|
|
{
|
|
"epoch": 0.12973867722471075,
|
|
"grad_norm": 0.2833307385444641,
|
|
"learning_rate": 0.00025789209191831515,
|
|
"loss": 0.8338,
|
|
"step": 11600
|
|
},
|
|
{
|
|
"epoch": 0.12985052091197344,
|
|
"grad_norm": 0.24657459557056427,
|
|
"learning_rate": 0.000257663666681895,
|
|
"loss": 0.8205,
|
|
"step": 11610
|
|
},
|
|
{
|
|
"epoch": 0.1299623645992361,
|
|
"grad_norm": 0.2499598115682602,
|
|
"learning_rate": 0.0002574352414454749,
|
|
"loss": 0.8406,
|
|
"step": 11620
|
|
},
|
|
{
|
|
"epoch": 0.1300742082864988,
|
|
"grad_norm": 0.2757512629032135,
|
|
"learning_rate": 0.0002572068162090548,
|
|
"loss": 0.8247,
|
|
"step": 11630
|
|
},
|
|
{
|
|
"epoch": 0.13018605197376146,
|
|
"grad_norm": 0.25661805272102356,
|
|
"learning_rate": 0.00025697839097263463,
|
|
"loss": 0.8384,
|
|
"step": 11640
|
|
},
|
|
{
|
|
"epoch": 0.13029789566102415,
|
|
"grad_norm": 0.27651283144950867,
|
|
"learning_rate": 0.00025674996573621454,
|
|
"loss": 0.818,
|
|
"step": 11650
|
|
},
|
|
{
|
|
"epoch": 0.13040973934828684,
|
|
"grad_norm": 0.247050940990448,
|
|
"learning_rate": 0.00025652154049979445,
|
|
"loss": 0.8261,
|
|
"step": 11660
|
|
},
|
|
{
|
|
"epoch": 0.1305215830355495,
|
|
"grad_norm": 0.23124581575393677,
|
|
"learning_rate": 0.0002562931152633743,
|
|
"loss": 0.8259,
|
|
"step": 11670
|
|
},
|
|
{
|
|
"epoch": 0.1306334267228122,
|
|
"grad_norm": 0.2694045603275299,
|
|
"learning_rate": 0.0002560646900269542,
|
|
"loss": 0.8304,
|
|
"step": 11680
|
|
},
|
|
{
|
|
"epoch": 0.1307452704100749,
|
|
"grad_norm": 0.26821568608283997,
|
|
"learning_rate": 0.00025583626479053407,
|
|
"loss": 0.8441,
|
|
"step": 11690
|
|
},
|
|
{
|
|
"epoch": 0.13085711409733755,
|
|
"grad_norm": 0.2747989892959595,
|
|
"learning_rate": 0.0002556078395541139,
|
|
"loss": 0.841,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 0.13096895778460024,
|
|
"grad_norm": 0.28248855471611023,
|
|
"learning_rate": 0.00025537941431769384,
|
|
"loss": 0.857,
|
|
"step": 11710
|
|
},
|
|
{
|
|
"epoch": 0.13108080147186293,
|
|
"grad_norm": 0.25378182530403137,
|
|
"learning_rate": 0.0002551509890812737,
|
|
"loss": 0.8437,
|
|
"step": 11720
|
|
},
|
|
{
|
|
"epoch": 0.1311926451591256,
|
|
"grad_norm": 0.25950944423675537,
|
|
"learning_rate": 0.00025492256384485355,
|
|
"loss": 0.8497,
|
|
"step": 11730
|
|
},
|
|
{
|
|
"epoch": 0.1313044888463883,
|
|
"grad_norm": 0.26261699199676514,
|
|
"learning_rate": 0.0002546941386084335,
|
|
"loss": 0.8477,
|
|
"step": 11740
|
|
},
|
|
{
|
|
"epoch": 0.13141633253365098,
|
|
"grad_norm": 0.30151599645614624,
|
|
"learning_rate": 0.00025446571337201337,
|
|
"loss": 0.8405,
|
|
"step": 11750
|
|
},
|
|
{
|
|
"epoch": 0.13152817622091365,
|
|
"grad_norm": 0.2556060254573822,
|
|
"learning_rate": 0.0002542372881355932,
|
|
"loss": 0.831,
|
|
"step": 11760
|
|
},
|
|
{
|
|
"epoch": 0.13164001990817634,
|
|
"grad_norm": 0.26560309529304504,
|
|
"learning_rate": 0.00025400886289917313,
|
|
"loss": 0.8445,
|
|
"step": 11770
|
|
},
|
|
{
|
|
"epoch": 0.13175186359543903,
|
|
"grad_norm": 0.28504636883735657,
|
|
"learning_rate": 0.000253780437662753,
|
|
"loss": 0.8432,
|
|
"step": 11780
|
|
},
|
|
{
|
|
"epoch": 0.1318637072827017,
|
|
"grad_norm": 0.2985188663005829,
|
|
"learning_rate": 0.00025355201242633285,
|
|
"loss": 0.8584,
|
|
"step": 11790
|
|
},
|
|
{
|
|
"epoch": 0.13197555096996438,
|
|
"grad_norm": 0.28022414445877075,
|
|
"learning_rate": 0.00025332358718991276,
|
|
"loss": 0.8393,
|
|
"step": 11800
|
|
},
|
|
{
|
|
"epoch": 0.13208739465722705,
|
|
"grad_norm": 0.28535568714141846,
|
|
"learning_rate": 0.0002530951619534926,
|
|
"loss": 0.8369,
|
|
"step": 11810
|
|
},
|
|
{
|
|
"epoch": 0.13219923834448974,
|
|
"grad_norm": 0.27764952182769775,
|
|
"learning_rate": 0.0002528667367170725,
|
|
"loss": 0.8435,
|
|
"step": 11820
|
|
},
|
|
{
|
|
"epoch": 0.13231108203175243,
|
|
"grad_norm": 0.28943151235580444,
|
|
"learning_rate": 0.0002526383114806524,
|
|
"loss": 0.8334,
|
|
"step": 11830
|
|
},
|
|
{
|
|
"epoch": 0.1324229257190151,
|
|
"grad_norm": 0.28240668773651123,
|
|
"learning_rate": 0.0002524098862442323,
|
|
"loss": 0.8338,
|
|
"step": 11840
|
|
},
|
|
{
|
|
"epoch": 0.13253476940627779,
|
|
"grad_norm": 0.27650541067123413,
|
|
"learning_rate": 0.00025218146100781214,
|
|
"loss": 0.8275,
|
|
"step": 11850
|
|
},
|
|
{
|
|
"epoch": 0.13264661309354048,
|
|
"grad_norm": 0.27569788694381714,
|
|
"learning_rate": 0.000251953035771392,
|
|
"loss": 0.8323,
|
|
"step": 11860
|
|
},
|
|
{
|
|
"epoch": 0.13275845678080314,
|
|
"grad_norm": 0.29103782773017883,
|
|
"learning_rate": 0.0002517246105349719,
|
|
"loss": 0.8401,
|
|
"step": 11870
|
|
},
|
|
{
|
|
"epoch": 0.13287030046806583,
|
|
"grad_norm": 0.28769806027412415,
|
|
"learning_rate": 0.00025149618529855176,
|
|
"loss": 0.8369,
|
|
"step": 11880
|
|
},
|
|
{
|
|
"epoch": 0.13298214415532852,
|
|
"grad_norm": 0.2803378701210022,
|
|
"learning_rate": 0.0002512677600621316,
|
|
"loss": 0.8308,
|
|
"step": 11890
|
|
},
|
|
{
|
|
"epoch": 0.1330939878425912,
|
|
"grad_norm": 0.29264572262763977,
|
|
"learning_rate": 0.0002510393348257116,
|
|
"loss": 0.8314,
|
|
"step": 11900
|
|
},
|
|
{
|
|
"epoch": 0.13320583152985388,
|
|
"grad_norm": 0.27434802055358887,
|
|
"learning_rate": 0.00025081090958929144,
|
|
"loss": 0.8337,
|
|
"step": 11910
|
|
},
|
|
{
|
|
"epoch": 0.13331767521711657,
|
|
"grad_norm": 0.270589143037796,
|
|
"learning_rate": 0.0002505824843528713,
|
|
"loss": 0.8503,
|
|
"step": 11920
|
|
},
|
|
{
|
|
"epoch": 0.13342951890437924,
|
|
"grad_norm": 0.27260124683380127,
|
|
"learning_rate": 0.0002503540591164512,
|
|
"loss": 0.8293,
|
|
"step": 11930
|
|
},
|
|
{
|
|
"epoch": 0.13354136259164193,
|
|
"grad_norm": 0.2684808075428009,
|
|
"learning_rate": 0.00025012563388003106,
|
|
"loss": 0.8339,
|
|
"step": 11940
|
|
},
|
|
{
|
|
"epoch": 0.1336532062789046,
|
|
"grad_norm": 0.2510156035423279,
|
|
"learning_rate": 0.00024989720864361097,
|
|
"loss": 0.8464,
|
|
"step": 11950
|
|
},
|
|
{
|
|
"epoch": 0.13376504996616728,
|
|
"grad_norm": 0.24331960082054138,
|
|
"learning_rate": 0.0002496687834071908,
|
|
"loss": 0.8443,
|
|
"step": 11960
|
|
},
|
|
{
|
|
"epoch": 0.13387689365342997,
|
|
"grad_norm": 0.2688249349594116,
|
|
"learning_rate": 0.00024944035817077074,
|
|
"loss": 0.8483,
|
|
"step": 11970
|
|
},
|
|
{
|
|
"epoch": 0.13398873734069264,
|
|
"grad_norm": 0.2608729898929596,
|
|
"learning_rate": 0.0002492119329343506,
|
|
"loss": 0.852,
|
|
"step": 11980
|
|
},
|
|
{
|
|
"epoch": 0.13410058102795533,
|
|
"grad_norm": 0.28415507078170776,
|
|
"learning_rate": 0.00024898350769793045,
|
|
"loss": 0.8449,
|
|
"step": 11990
|
|
},
|
|
{
|
|
"epoch": 0.13421242471521802,
|
|
"grad_norm": 0.2920886278152466,
|
|
"learning_rate": 0.00024875508246151036,
|
|
"loss": 0.8281,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 0.13432426840248068,
|
|
"grad_norm": 0.2763430178165436,
|
|
"learning_rate": 0.00024852665722509027,
|
|
"loss": 0.8492,
|
|
"step": 12010
|
|
},
|
|
{
|
|
"epoch": 0.13443611208974338,
|
|
"grad_norm": 0.26460400223731995,
|
|
"learning_rate": 0.0002482982319886701,
|
|
"loss": 0.8409,
|
|
"step": 12020
|
|
},
|
|
{
|
|
"epoch": 0.13454795577700607,
|
|
"grad_norm": 0.2698183059692383,
|
|
"learning_rate": 0.00024806980675225,
|
|
"loss": 0.8295,
|
|
"step": 12030
|
|
},
|
|
{
|
|
"epoch": 0.13465979946426873,
|
|
"grad_norm": 0.2728478014469147,
|
|
"learning_rate": 0.0002478413815158299,
|
|
"loss": 0.837,
|
|
"step": 12040
|
|
},
|
|
{
|
|
"epoch": 0.13477164315153142,
|
|
"grad_norm": 0.282924085855484,
|
|
"learning_rate": 0.00024761295627940974,
|
|
"loss": 0.8482,
|
|
"step": 12050
|
|
},
|
|
{
|
|
"epoch": 0.13488348683879411,
|
|
"grad_norm": 0.264614999294281,
|
|
"learning_rate": 0.00024738453104298965,
|
|
"loss": 0.8432,
|
|
"step": 12060
|
|
},
|
|
{
|
|
"epoch": 0.13499533052605678,
|
|
"grad_norm": 0.2475707232952118,
|
|
"learning_rate": 0.0002471561058065695,
|
|
"loss": 0.8387,
|
|
"step": 12070
|
|
},
|
|
{
|
|
"epoch": 0.13510717421331947,
|
|
"grad_norm": 0.2620779573917389,
|
|
"learning_rate": 0.00024692768057014937,
|
|
"loss": 0.8559,
|
|
"step": 12080
|
|
},
|
|
{
|
|
"epoch": 0.13521901790058213,
|
|
"grad_norm": 0.2645311951637268,
|
|
"learning_rate": 0.0002466992553337293,
|
|
"loss": 0.8363,
|
|
"step": 12090
|
|
},
|
|
{
|
|
"epoch": 0.13533086158784483,
|
|
"grad_norm": 0.27586236596107483,
|
|
"learning_rate": 0.0002464708300973092,
|
|
"loss": 0.8365,
|
|
"step": 12100
|
|
},
|
|
{
|
|
"epoch": 0.13544270527510752,
|
|
"grad_norm": 0.2695125341415405,
|
|
"learning_rate": 0.00024624240486088904,
|
|
"loss": 0.8412,
|
|
"step": 12110
|
|
},
|
|
{
|
|
"epoch": 0.13555454896237018,
|
|
"grad_norm": 0.2473846971988678,
|
|
"learning_rate": 0.0002460139796244689,
|
|
"loss": 0.8362,
|
|
"step": 12120
|
|
},
|
|
{
|
|
"epoch": 0.13566639264963287,
|
|
"grad_norm": 0.28001588582992554,
|
|
"learning_rate": 0.0002457855543880488,
|
|
"loss": 0.8462,
|
|
"step": 12130
|
|
},
|
|
{
|
|
"epoch": 0.13577823633689556,
|
|
"grad_norm": 0.29486599564552307,
|
|
"learning_rate": 0.00024555712915162866,
|
|
"loss": 0.8607,
|
|
"step": 12140
|
|
},
|
|
{
|
|
"epoch": 0.13589008002415823,
|
|
"grad_norm": 0.2761843204498291,
|
|
"learning_rate": 0.00024532870391520857,
|
|
"loss": 0.8668,
|
|
"step": 12150
|
|
},
|
|
{
|
|
"epoch": 0.13600192371142092,
|
|
"grad_norm": 0.25779953598976135,
|
|
"learning_rate": 0.00024510027867878843,
|
|
"loss": 0.853,
|
|
"step": 12160
|
|
},
|
|
{
|
|
"epoch": 0.1361137673986836,
|
|
"grad_norm": 0.27593857049942017,
|
|
"learning_rate": 0.00024487185344236834,
|
|
"loss": 0.8506,
|
|
"step": 12170
|
|
},
|
|
{
|
|
"epoch": 0.13622561108594627,
|
|
"grad_norm": 0.24426791071891785,
|
|
"learning_rate": 0.0002446434282059482,
|
|
"loss": 0.8623,
|
|
"step": 12180
|
|
},
|
|
{
|
|
"epoch": 0.13633745477320897,
|
|
"grad_norm": 0.25555628538131714,
|
|
"learning_rate": 0.00024441500296952805,
|
|
"loss": 0.8493,
|
|
"step": 12190
|
|
},
|
|
{
|
|
"epoch": 0.13644929846047166,
|
|
"grad_norm": 0.2234913557767868,
|
|
"learning_rate": 0.00024418657773310796,
|
|
"loss": 0.8644,
|
|
"step": 12200
|
|
},
|
|
{
|
|
"epoch": 0.13656114214773432,
|
|
"grad_norm": 0.27130651473999023,
|
|
"learning_rate": 0.00024395815249668784,
|
|
"loss": 0.8791,
|
|
"step": 12210
|
|
},
|
|
{
|
|
"epoch": 0.136672985834997,
|
|
"grad_norm": 0.24734824895858765,
|
|
"learning_rate": 0.0002437297272602677,
|
|
"loss": 0.8719,
|
|
"step": 12220
|
|
},
|
|
{
|
|
"epoch": 0.13678482952225968,
|
|
"grad_norm": 0.24316945672035217,
|
|
"learning_rate": 0.0002435013020238476,
|
|
"loss": 0.8546,
|
|
"step": 12230
|
|
},
|
|
{
|
|
"epoch": 0.13689667320952237,
|
|
"grad_norm": 0.2349976748228073,
|
|
"learning_rate": 0.0002432728767874275,
|
|
"loss": 0.8458,
|
|
"step": 12240
|
|
},
|
|
{
|
|
"epoch": 0.13700851689678506,
|
|
"grad_norm": 0.26791033148765564,
|
|
"learning_rate": 0.00024304445155100735,
|
|
"loss": 0.8485,
|
|
"step": 12250
|
|
},
|
|
{
|
|
"epoch": 0.13712036058404772,
|
|
"grad_norm": 0.23598451912403107,
|
|
"learning_rate": 0.00024281602631458723,
|
|
"loss": 0.8451,
|
|
"step": 12260
|
|
},
|
|
{
|
|
"epoch": 0.13723220427131042,
|
|
"grad_norm": 0.23012129962444305,
|
|
"learning_rate": 0.00024258760107816714,
|
|
"loss": 0.8332,
|
|
"step": 12270
|
|
},
|
|
{
|
|
"epoch": 0.1373440479585731,
|
|
"grad_norm": 0.22834524512290955,
|
|
"learning_rate": 0.000242359175841747,
|
|
"loss": 0.8203,
|
|
"step": 12280
|
|
},
|
|
{
|
|
"epoch": 0.13745589164583577,
|
|
"grad_norm": 0.2247861921787262,
|
|
"learning_rate": 0.00024213075060532688,
|
|
"loss": 0.8303,
|
|
"step": 12290
|
|
},
|
|
{
|
|
"epoch": 0.13756773533309846,
|
|
"grad_norm": 0.2438284307718277,
|
|
"learning_rate": 0.00024190232536890676,
|
|
"loss": 0.8216,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 0.13767957902036115,
|
|
"grad_norm": 0.24075888097286224,
|
|
"learning_rate": 0.00024167390013248664,
|
|
"loss": 0.7964,
|
|
"step": 12310
|
|
},
|
|
{
|
|
"epoch": 0.13779142270762382,
|
|
"grad_norm": 0.24668976664543152,
|
|
"learning_rate": 0.00024144547489606653,
|
|
"loss": 0.8028,
|
|
"step": 12320
|
|
},
|
|
{
|
|
"epoch": 0.1379032663948865,
|
|
"grad_norm": 0.26727405190467834,
|
|
"learning_rate": 0.0002412170496596464,
|
|
"loss": 0.8081,
|
|
"step": 12330
|
|
},
|
|
{
|
|
"epoch": 0.1380151100821492,
|
|
"grad_norm": 0.2645564377307892,
|
|
"learning_rate": 0.00024098862442322626,
|
|
"loss": 0.8116,
|
|
"step": 12340
|
|
},
|
|
{
|
|
"epoch": 0.13812695376941186,
|
|
"grad_norm": 0.25368645787239075,
|
|
"learning_rate": 0.00024076019918680617,
|
|
"loss": 0.8105,
|
|
"step": 12350
|
|
},
|
|
{
|
|
"epoch": 0.13823879745667456,
|
|
"grad_norm": 0.26823967695236206,
|
|
"learning_rate": 0.00024053177395038606,
|
|
"loss": 0.8249,
|
|
"step": 12360
|
|
},
|
|
{
|
|
"epoch": 0.13835064114393722,
|
|
"grad_norm": 0.2827225625514984,
|
|
"learning_rate": 0.0002403033487139659,
|
|
"loss": 0.8191,
|
|
"step": 12370
|
|
},
|
|
{
|
|
"epoch": 0.1384624848311999,
|
|
"grad_norm": 0.23261433839797974,
|
|
"learning_rate": 0.00024007492347754582,
|
|
"loss": 0.8215,
|
|
"step": 12380
|
|
},
|
|
{
|
|
"epoch": 0.1385743285184626,
|
|
"grad_norm": 0.27331966161727905,
|
|
"learning_rate": 0.00023984649824112568,
|
|
"loss": 0.8232,
|
|
"step": 12390
|
|
},
|
|
{
|
|
"epoch": 0.13868617220572527,
|
|
"grad_norm": 0.2801966369152069,
|
|
"learning_rate": 0.00023961807300470556,
|
|
"loss": 0.8074,
|
|
"step": 12400
|
|
},
|
|
{
|
|
"epoch": 0.13879801589298796,
|
|
"grad_norm": 0.2379591315984726,
|
|
"learning_rate": 0.00023938964776828544,
|
|
"loss": 0.8209,
|
|
"step": 12410
|
|
},
|
|
{
|
|
"epoch": 0.13890985958025065,
|
|
"grad_norm": 0.27151694893836975,
|
|
"learning_rate": 0.00023916122253186533,
|
|
"loss": 0.8258,
|
|
"step": 12420
|
|
},
|
|
{
|
|
"epoch": 0.1390217032675133,
|
|
"grad_norm": 0.21429865062236786,
|
|
"learning_rate": 0.0002389327972954452,
|
|
"loss": 0.8178,
|
|
"step": 12430
|
|
},
|
|
{
|
|
"epoch": 0.139133546954776,
|
|
"grad_norm": 0.2777722477912903,
|
|
"learning_rate": 0.0002387043720590251,
|
|
"loss": 0.826,
|
|
"step": 12440
|
|
},
|
|
{
|
|
"epoch": 0.1392453906420387,
|
|
"grad_norm": 0.2514742910861969,
|
|
"learning_rate": 0.00023847594682260495,
|
|
"loss": 0.8362,
|
|
"step": 12450
|
|
},
|
|
{
|
|
"epoch": 0.13935723432930136,
|
|
"grad_norm": 0.23247656226158142,
|
|
"learning_rate": 0.00023824752158618486,
|
|
"loss": 0.8049,
|
|
"step": 12460
|
|
},
|
|
{
|
|
"epoch": 0.13946907801656405,
|
|
"grad_norm": 0.2391313910484314,
|
|
"learning_rate": 0.00023801909634976474,
|
|
"loss": 0.8082,
|
|
"step": 12470
|
|
},
|
|
{
|
|
"epoch": 0.13958092170382674,
|
|
"grad_norm": 0.2366340011358261,
|
|
"learning_rate": 0.0002377906711133446,
|
|
"loss": 0.8214,
|
|
"step": 12480
|
|
},
|
|
{
|
|
"epoch": 0.1396927653910894,
|
|
"grad_norm": 0.2570713758468628,
|
|
"learning_rate": 0.00023756224587692448,
|
|
"loss": 0.827,
|
|
"step": 12490
|
|
},
|
|
{
|
|
"epoch": 0.1398046090783521,
|
|
"grad_norm": 0.22823789715766907,
|
|
"learning_rate": 0.0002373338206405044,
|
|
"loss": 0.8314,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 0.1399164527656148,
|
|
"grad_norm": 0.24660278856754303,
|
|
"learning_rate": 0.00023710539540408424,
|
|
"loss": 0.838,
|
|
"step": 12510
|
|
},
|
|
{
|
|
"epoch": 0.14002829645287745,
|
|
"grad_norm": 0.25041723251342773,
|
|
"learning_rate": 0.00023687697016766413,
|
|
"loss": 0.8371,
|
|
"step": 12520
|
|
},
|
|
{
|
|
"epoch": 0.14014014014014015,
|
|
"grad_norm": 0.23942531645298004,
|
|
"learning_rate": 0.000236648544931244,
|
|
"loss": 0.8282,
|
|
"step": 12530
|
|
},
|
|
{
|
|
"epoch": 0.1402519838274028,
|
|
"grad_norm": 0.2445865273475647,
|
|
"learning_rate": 0.0002364201196948239,
|
|
"loss": 0.8307,
|
|
"step": 12540
|
|
},
|
|
{
|
|
"epoch": 0.1403638275146655,
|
|
"grad_norm": 0.25278452038764954,
|
|
"learning_rate": 0.00023619169445840378,
|
|
"loss": 0.8483,
|
|
"step": 12550
|
|
},
|
|
{
|
|
"epoch": 0.1404756712019282,
|
|
"grad_norm": 0.22890037298202515,
|
|
"learning_rate": 0.00023596326922198366,
|
|
"loss": 0.8328,
|
|
"step": 12560
|
|
},
|
|
{
|
|
"epoch": 0.14058751488919086,
|
|
"grad_norm": 0.2360977679491043,
|
|
"learning_rate": 0.00023573484398556351,
|
|
"loss": 0.8373,
|
|
"step": 12570
|
|
},
|
|
{
|
|
"epoch": 0.14069935857645355,
|
|
"grad_norm": 0.22873692214488983,
|
|
"learning_rate": 0.00023550641874914342,
|
|
"loss": 0.8399,
|
|
"step": 12580
|
|
},
|
|
{
|
|
"epoch": 0.14081120226371624,
|
|
"grad_norm": 0.228402242064476,
|
|
"learning_rate": 0.0002352779935127233,
|
|
"loss": 0.8272,
|
|
"step": 12590
|
|
},
|
|
{
|
|
"epoch": 0.1409230459509789,
|
|
"grad_norm": 0.2625369131565094,
|
|
"learning_rate": 0.00023504956827630316,
|
|
"loss": 0.8413,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 0.1410348896382416,
|
|
"grad_norm": 0.2744843363761902,
|
|
"learning_rate": 0.00023482114303988305,
|
|
"loss": 0.823,
|
|
"step": 12610
|
|
},
|
|
{
|
|
"epoch": 0.1411467333255043,
|
|
"grad_norm": 0.24845914542675018,
|
|
"learning_rate": 0.00023459271780346293,
|
|
"loss": 0.8089,
|
|
"step": 12620
|
|
},
|
|
{
|
|
"epoch": 0.14125857701276695,
|
|
"grad_norm": 0.2431713193655014,
|
|
"learning_rate": 0.0002343642925670428,
|
|
"loss": 0.8204,
|
|
"step": 12630
|
|
},
|
|
{
|
|
"epoch": 0.14137042070002964,
|
|
"grad_norm": 0.2636731266975403,
|
|
"learning_rate": 0.0002341358673306227,
|
|
"loss": 0.8241,
|
|
"step": 12640
|
|
},
|
|
{
|
|
"epoch": 0.14148226438729233,
|
|
"grad_norm": 0.24605631828308105,
|
|
"learning_rate": 0.00023390744209420255,
|
|
"loss": 0.837,
|
|
"step": 12650
|
|
},
|
|
{
|
|
"epoch": 0.141594108074555,
|
|
"grad_norm": 0.25722581148147583,
|
|
"learning_rate": 0.00023367901685778246,
|
|
"loss": 0.8338,
|
|
"step": 12660
|
|
},
|
|
{
|
|
"epoch": 0.1417059517618177,
|
|
"grad_norm": 0.2628157138824463,
|
|
"learning_rate": 0.00023345059162136234,
|
|
"loss": 0.8271,
|
|
"step": 12670
|
|
},
|
|
{
|
|
"epoch": 0.14181779544908035,
|
|
"grad_norm": 0.24534687399864197,
|
|
"learning_rate": 0.0002332221663849422,
|
|
"loss": 0.8281,
|
|
"step": 12680
|
|
},
|
|
{
|
|
"epoch": 0.14192963913634304,
|
|
"grad_norm": 0.24370639026165009,
|
|
"learning_rate": 0.00023299374114852208,
|
|
"loss": 0.8243,
|
|
"step": 12690
|
|
},
|
|
{
|
|
"epoch": 0.14204148282360574,
|
|
"grad_norm": 0.2993674576282501,
|
|
"learning_rate": 0.000232765315912102,
|
|
"loss": 0.8191,
|
|
"step": 12700
|
|
},
|
|
{
|
|
"epoch": 0.1421533265108684,
|
|
"grad_norm": 0.2372383326292038,
|
|
"learning_rate": 0.00023253689067568185,
|
|
"loss": 0.8115,
|
|
"step": 12710
|
|
},
|
|
{
|
|
"epoch": 0.1422651701981311,
|
|
"grad_norm": 0.2405237853527069,
|
|
"learning_rate": 0.00023230846543926173,
|
|
"loss": 0.8012,
|
|
"step": 12720
|
|
},
|
|
{
|
|
"epoch": 0.14237701388539378,
|
|
"grad_norm": 0.23501497507095337,
|
|
"learning_rate": 0.0002320800402028416,
|
|
"loss": 0.8272,
|
|
"step": 12730
|
|
},
|
|
{
|
|
"epoch": 0.14248885757265645,
|
|
"grad_norm": 0.2573966085910797,
|
|
"learning_rate": 0.0002318516149664215,
|
|
"loss": 0.8231,
|
|
"step": 12740
|
|
},
|
|
{
|
|
"epoch": 0.14260070125991914,
|
|
"grad_norm": 0.25884565711021423,
|
|
"learning_rate": 0.00023162318973000138,
|
|
"loss": 0.8293,
|
|
"step": 12750
|
|
},
|
|
{
|
|
"epoch": 0.14271254494718183,
|
|
"grad_norm": 0.24788953363895416,
|
|
"learning_rate": 0.00023139476449358126,
|
|
"loss": 0.8338,
|
|
"step": 12760
|
|
},
|
|
{
|
|
"epoch": 0.1428243886344445,
|
|
"grad_norm": 0.23874413967132568,
|
|
"learning_rate": 0.00023116633925716112,
|
|
"loss": 0.8184,
|
|
"step": 12770
|
|
},
|
|
{
|
|
"epoch": 0.14293623232170719,
|
|
"grad_norm": 0.2358027547597885,
|
|
"learning_rate": 0.00023093791402074103,
|
|
"loss": 0.8143,
|
|
"step": 12780
|
|
},
|
|
{
|
|
"epoch": 0.14304807600896988,
|
|
"grad_norm": 0.22447925806045532,
|
|
"learning_rate": 0.0002307094887843209,
|
|
"loss": 0.8093,
|
|
"step": 12790
|
|
},
|
|
{
|
|
"epoch": 0.14315991969623254,
|
|
"grad_norm": 0.25550246238708496,
|
|
"learning_rate": 0.00023048106354790077,
|
|
"loss": 0.8178,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 0.14327176338349523,
|
|
"grad_norm": 0.2370327264070511,
|
|
"learning_rate": 0.00023025263831148065,
|
|
"loss": 0.8035,
|
|
"step": 12810
|
|
},
|
|
{
|
|
"epoch": 0.1433836070707579,
|
|
"grad_norm": 0.24910229444503784,
|
|
"learning_rate": 0.00023002421307506056,
|
|
"loss": 0.7965,
|
|
"step": 12820
|
|
},
|
|
{
|
|
"epoch": 0.1434954507580206,
|
|
"grad_norm": 0.23592302203178406,
|
|
"learning_rate": 0.0002297957878386404,
|
|
"loss": 0.808,
|
|
"step": 12830
|
|
},
|
|
{
|
|
"epoch": 0.14360729444528328,
|
|
"grad_norm": 0.24010522663593292,
|
|
"learning_rate": 0.0002295673626022203,
|
|
"loss": 0.8047,
|
|
"step": 12840
|
|
},
|
|
{
|
|
"epoch": 0.14371913813254594,
|
|
"grad_norm": 0.26334619522094727,
|
|
"learning_rate": 0.00022933893736580015,
|
|
"loss": 0.8011,
|
|
"step": 12850
|
|
},
|
|
{
|
|
"epoch": 0.14383098181980863,
|
|
"grad_norm": 0.23162928223609924,
|
|
"learning_rate": 0.00022911051212938006,
|
|
"loss": 0.811,
|
|
"step": 12860
|
|
},
|
|
{
|
|
"epoch": 0.14394282550707133,
|
|
"grad_norm": 0.24273565411567688,
|
|
"learning_rate": 0.00022888208689295994,
|
|
"loss": 0.8249,
|
|
"step": 12870
|
|
},
|
|
{
|
|
"epoch": 0.144054669194334,
|
|
"grad_norm": 0.239716574549675,
|
|
"learning_rate": 0.0002286536616565398,
|
|
"loss": 0.8146,
|
|
"step": 12880
|
|
},
|
|
{
|
|
"epoch": 0.14416651288159668,
|
|
"grad_norm": 0.22947145998477936,
|
|
"learning_rate": 0.0002284252364201197,
|
|
"loss": 0.8037,
|
|
"step": 12890
|
|
},
|
|
{
|
|
"epoch": 0.14427835656885937,
|
|
"grad_norm": 0.2369975745677948,
|
|
"learning_rate": 0.0002281968111836996,
|
|
"loss": 0.7938,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 0.14439020025612204,
|
|
"grad_norm": 0.23150302469730377,
|
|
"learning_rate": 0.00022796838594727945,
|
|
"loss": 0.7971,
|
|
"step": 12910
|
|
},
|
|
{
|
|
"epoch": 0.14450204394338473,
|
|
"grad_norm": 0.25659120082855225,
|
|
"learning_rate": 0.00022773996071085933,
|
|
"loss": 0.7897,
|
|
"step": 12920
|
|
},
|
|
{
|
|
"epoch": 0.14461388763064742,
|
|
"grad_norm": 0.26838308572769165,
|
|
"learning_rate": 0.00022751153547443924,
|
|
"loss": 0.8025,
|
|
"step": 12930
|
|
},
|
|
{
|
|
"epoch": 0.14472573131791008,
|
|
"grad_norm": 0.2421617954969406,
|
|
"learning_rate": 0.0002272831102380191,
|
|
"loss": 0.7937,
|
|
"step": 12940
|
|
},
|
|
{
|
|
"epoch": 0.14483757500517278,
|
|
"grad_norm": 0.22780479490756989,
|
|
"learning_rate": 0.00022705468500159898,
|
|
"loss": 0.7861,
|
|
"step": 12950
|
|
},
|
|
{
|
|
"epoch": 0.14494941869243544,
|
|
"grad_norm": 0.2561044692993164,
|
|
"learning_rate": 0.00022682625976517886,
|
|
"loss": 0.7817,
|
|
"step": 12960
|
|
},
|
|
{
|
|
"epoch": 0.14506126237969813,
|
|
"grad_norm": 0.24073092639446259,
|
|
"learning_rate": 0.00022659783452875875,
|
|
"loss": 0.8024,
|
|
"step": 12970
|
|
},
|
|
{
|
|
"epoch": 0.14517310606696082,
|
|
"grad_norm": 0.24959658086299896,
|
|
"learning_rate": 0.00022636940929233863,
|
|
"loss": 0.7994,
|
|
"step": 12980
|
|
},
|
|
{
|
|
"epoch": 0.14528494975422349,
|
|
"grad_norm": 0.2711149752140045,
|
|
"learning_rate": 0.0002261409840559185,
|
|
"loss": 0.8011,
|
|
"step": 12990
|
|
},
|
|
{
|
|
"epoch": 0.14539679344148618,
|
|
"grad_norm": 0.2447725236415863,
|
|
"learning_rate": 0.00022591255881949837,
|
|
"loss": 0.7957,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 0.14550863712874887,
|
|
"grad_norm": 0.26505330204963684,
|
|
"learning_rate": 0.00022568413358307828,
|
|
"loss": 0.7932,
|
|
"step": 13010
|
|
},
|
|
{
|
|
"epoch": 0.14562048081601153,
|
|
"grad_norm": 0.256712943315506,
|
|
"learning_rate": 0.00022545570834665816,
|
|
"loss": 0.7919,
|
|
"step": 13020
|
|
},
|
|
{
|
|
"epoch": 0.14573232450327422,
|
|
"grad_norm": 0.23816627264022827,
|
|
"learning_rate": 0.00022522728311023802,
|
|
"loss": 0.7942,
|
|
"step": 13030
|
|
},
|
|
{
|
|
"epoch": 0.14584416819053692,
|
|
"grad_norm": 0.25607794523239136,
|
|
"learning_rate": 0.0002249988578738179,
|
|
"loss": 0.8058,
|
|
"step": 13040
|
|
},
|
|
{
|
|
"epoch": 0.14595601187779958,
|
|
"grad_norm": 0.2644692361354828,
|
|
"learning_rate": 0.0002247704326373978,
|
|
"loss": 0.8026,
|
|
"step": 13050
|
|
},
|
|
{
|
|
"epoch": 0.14606785556506227,
|
|
"grad_norm": 0.24160505831241608,
|
|
"learning_rate": 0.00022454200740097766,
|
|
"loss": 0.8013,
|
|
"step": 13060
|
|
},
|
|
{
|
|
"epoch": 0.14617969925232496,
|
|
"grad_norm": 0.25321200489997864,
|
|
"learning_rate": 0.00022431358216455755,
|
|
"loss": 0.802,
|
|
"step": 13070
|
|
},
|
|
{
|
|
"epoch": 0.14629154293958763,
|
|
"grad_norm": 0.38834208250045776,
|
|
"learning_rate": 0.0002240851569281374,
|
|
"loss": 0.8053,
|
|
"step": 13080
|
|
},
|
|
{
|
|
"epoch": 0.14640338662685032,
|
|
"grad_norm": 0.2638767957687378,
|
|
"learning_rate": 0.0002238567316917173,
|
|
"loss": 0.803,
|
|
"step": 13090
|
|
},
|
|
{
|
|
"epoch": 0.14651523031411298,
|
|
"grad_norm": 0.33412685990333557,
|
|
"learning_rate": 0.0002236283064552972,
|
|
"loss": 0.8091,
|
|
"step": 13100
|
|
},
|
|
{
|
|
"epoch": 0.14662707400137567,
|
|
"grad_norm": 0.27539852261543274,
|
|
"learning_rate": 0.00022339988121887705,
|
|
"loss": 0.8019,
|
|
"step": 13110
|
|
},
|
|
{
|
|
"epoch": 0.14673891768863837,
|
|
"grad_norm": 0.25128626823425293,
|
|
"learning_rate": 0.00022317145598245693,
|
|
"loss": 0.7961,
|
|
"step": 13120
|
|
},
|
|
{
|
|
"epoch": 0.14685076137590103,
|
|
"grad_norm": 0.27428579330444336,
|
|
"learning_rate": 0.00022294303074603684,
|
|
"loss": 0.792,
|
|
"step": 13130
|
|
},
|
|
{
|
|
"epoch": 0.14696260506316372,
|
|
"grad_norm": 0.25421425700187683,
|
|
"learning_rate": 0.0002227146055096167,
|
|
"loss": 0.8139,
|
|
"step": 13140
|
|
},
|
|
{
|
|
"epoch": 0.1470744487504264,
|
|
"grad_norm": 0.23709440231323242,
|
|
"learning_rate": 0.00022248618027319658,
|
|
"loss": 0.8147,
|
|
"step": 13150
|
|
},
|
|
{
|
|
"epoch": 0.14718629243768908,
|
|
"grad_norm": 0.2693617641925812,
|
|
"learning_rate": 0.00022225775503677646,
|
|
"loss": 0.8174,
|
|
"step": 13160
|
|
},
|
|
{
|
|
"epoch": 0.14729813612495177,
|
|
"grad_norm": 0.26674261689186096,
|
|
"learning_rate": 0.00022202932980035635,
|
|
"loss": 0.8105,
|
|
"step": 13170
|
|
},
|
|
{
|
|
"epoch": 0.14740997981221446,
|
|
"grad_norm": 0.2656268775463104,
|
|
"learning_rate": 0.00022180090456393623,
|
|
"loss": 0.8355,
|
|
"step": 13180
|
|
},
|
|
{
|
|
"epoch": 0.14752182349947712,
|
|
"grad_norm": 0.2587822377681732,
|
|
"learning_rate": 0.0002215724793275161,
|
|
"loss": 0.8311,
|
|
"step": 13190
|
|
},
|
|
{
|
|
"epoch": 0.14763366718673981,
|
|
"grad_norm": 0.29723209142684937,
|
|
"learning_rate": 0.00022134405409109597,
|
|
"loss": 0.8664,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 0.1477455108740025,
|
|
"grad_norm": 0.2579325735569,
|
|
"learning_rate": 0.00022111562885467588,
|
|
"loss": 0.8515,
|
|
"step": 13210
|
|
},
|
|
{
|
|
"epoch": 0.14785735456126517,
|
|
"grad_norm": 0.28357258439064026,
|
|
"learning_rate": 0.00022088720361825576,
|
|
"loss": 0.8562,
|
|
"step": 13220
|
|
},
|
|
{
|
|
"epoch": 0.14796919824852786,
|
|
"grad_norm": 0.26742318272590637,
|
|
"learning_rate": 0.00022065877838183562,
|
|
"loss": 0.8571,
|
|
"step": 13230
|
|
},
|
|
{
|
|
"epoch": 0.14808104193579055,
|
|
"grad_norm": 0.2750874161720276,
|
|
"learning_rate": 0.0002204303531454155,
|
|
"loss": 0.8449,
|
|
"step": 13240
|
|
},
|
|
{
|
|
"epoch": 0.14819288562305322,
|
|
"grad_norm": 0.3043031692504883,
|
|
"learning_rate": 0.0002202019279089954,
|
|
"loss": 0.8472,
|
|
"step": 13250
|
|
},
|
|
{
|
|
"epoch": 0.1483047293103159,
|
|
"grad_norm": 0.27216988801956177,
|
|
"learning_rate": 0.00021997350267257527,
|
|
"loss": 0.8732,
|
|
"step": 13260
|
|
},
|
|
{
|
|
"epoch": 0.14841657299757857,
|
|
"grad_norm": 0.2818603515625,
|
|
"learning_rate": 0.00021974507743615515,
|
|
"loss": 0.8333,
|
|
"step": 13270
|
|
},
|
|
{
|
|
"epoch": 0.14852841668484126,
|
|
"grad_norm": 0.2604407072067261,
|
|
"learning_rate": 0.000219516652199735,
|
|
"loss": 0.8467,
|
|
"step": 13280
|
|
},
|
|
{
|
|
"epoch": 0.14864026037210396,
|
|
"grad_norm": 0.28342294692993164,
|
|
"learning_rate": 0.00021928822696331491,
|
|
"loss": 0.8292,
|
|
"step": 13290
|
|
},
|
|
{
|
|
"epoch": 0.14875210405936662,
|
|
"grad_norm": 0.2564396262168884,
|
|
"learning_rate": 0.0002190598017268948,
|
|
"loss": 0.8355,
|
|
"step": 13300
|
|
},
|
|
{
|
|
"epoch": 0.1488639477466293,
|
|
"grad_norm": 0.2528108060359955,
|
|
"learning_rate": 0.00021883137649047465,
|
|
"loss": 0.8269,
|
|
"step": 13310
|
|
},
|
|
{
|
|
"epoch": 0.148975791433892,
|
|
"grad_norm": 0.26454785466194153,
|
|
"learning_rate": 0.00021860295125405456,
|
|
"loss": 0.8425,
|
|
"step": 13320
|
|
},
|
|
{
|
|
"epoch": 0.14908763512115467,
|
|
"grad_norm": 0.25204601883888245,
|
|
"learning_rate": 0.00021837452601763445,
|
|
"loss": 0.8251,
|
|
"step": 13330
|
|
},
|
|
{
|
|
"epoch": 0.14919947880841736,
|
|
"grad_norm": 0.24680152535438538,
|
|
"learning_rate": 0.0002181461007812143,
|
|
"loss": 0.8247,
|
|
"step": 13340
|
|
},
|
|
{
|
|
"epoch": 0.14931132249568005,
|
|
"grad_norm": 0.27356913685798645,
|
|
"learning_rate": 0.00021791767554479418,
|
|
"loss": 0.811,
|
|
"step": 13350
|
|
},
|
|
{
|
|
"epoch": 0.1494231661829427,
|
|
"grad_norm": 0.24703428149223328,
|
|
"learning_rate": 0.0002176892503083741,
|
|
"loss": 0.8145,
|
|
"step": 13360
|
|
},
|
|
{
|
|
"epoch": 0.1495350098702054,
|
|
"grad_norm": 0.27793166041374207,
|
|
"learning_rate": 0.00021746082507195395,
|
|
"loss": 0.8162,
|
|
"step": 13370
|
|
},
|
|
{
|
|
"epoch": 0.1496468535574681,
|
|
"grad_norm": 0.28826582431793213,
|
|
"learning_rate": 0.00021723239983553383,
|
|
"loss": 0.8258,
|
|
"step": 13380
|
|
},
|
|
{
|
|
"epoch": 0.14975869724473076,
|
|
"grad_norm": 0.24826544523239136,
|
|
"learning_rate": 0.00021700397459911372,
|
|
"loss": 0.8131,
|
|
"step": 13390
|
|
},
|
|
{
|
|
"epoch": 0.14987054093199345,
|
|
"grad_norm": 0.29015326499938965,
|
|
"learning_rate": 0.0002167755493626936,
|
|
"loss": 0.8241,
|
|
"step": 13400
|
|
},
|
|
{
|
|
"epoch": 0.14998238461925611,
|
|
"grad_norm": 0.2692265510559082,
|
|
"learning_rate": 0.00021654712412627348,
|
|
"loss": 0.8046,
|
|
"step": 13410
|
|
},
|
|
{
|
|
"epoch": 0.1500942283065188,
|
|
"grad_norm": 0.28277263045310974,
|
|
"learning_rate": 0.00021631869888985336,
|
|
"loss": 0.8075,
|
|
"step": 13420
|
|
},
|
|
{
|
|
"epoch": 0.1502060719937815,
|
|
"grad_norm": 0.25920721888542175,
|
|
"learning_rate": 0.00021609027365343322,
|
|
"loss": 0.8146,
|
|
"step": 13430
|
|
},
|
|
{
|
|
"epoch": 0.15031791568104416,
|
|
"grad_norm": 0.2548248767852783,
|
|
"learning_rate": 0.00021586184841701313,
|
|
"loss": 0.82,
|
|
"step": 13440
|
|
},
|
|
{
|
|
"epoch": 0.15042975936830685,
|
|
"grad_norm": 0.3121783435344696,
|
|
"learning_rate": 0.000215633423180593,
|
|
"loss": 0.796,
|
|
"step": 13450
|
|
},
|
|
{
|
|
"epoch": 0.15054160305556955,
|
|
"grad_norm": 0.2799825370311737,
|
|
"learning_rate": 0.00021540499794417287,
|
|
"loss": 0.8073,
|
|
"step": 13460
|
|
},
|
|
{
|
|
"epoch": 0.1506534467428322,
|
|
"grad_norm": 0.24525675177574158,
|
|
"learning_rate": 0.00021517657270775275,
|
|
"loss": 0.804,
|
|
"step": 13470
|
|
},
|
|
{
|
|
"epoch": 0.1507652904300949,
|
|
"grad_norm": 0.26799294352531433,
|
|
"learning_rate": 0.00021494814747133266,
|
|
"loss": 0.8086,
|
|
"step": 13480
|
|
},
|
|
{
|
|
"epoch": 0.1508771341173576,
|
|
"grad_norm": 0.24744056165218353,
|
|
"learning_rate": 0.00021471972223491252,
|
|
"loss": 0.7972,
|
|
"step": 13490
|
|
},
|
|
{
|
|
"epoch": 0.15098897780462026,
|
|
"grad_norm": 0.27284878492355347,
|
|
"learning_rate": 0.0002144912969984924,
|
|
"loss": 0.8048,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 0.15110082149188295,
|
|
"grad_norm": 0.2427281141281128,
|
|
"learning_rate": 0.00021426287176207225,
|
|
"loss": 0.8043,
|
|
"step": 13510
|
|
},
|
|
{
|
|
"epoch": 0.15121266517914564,
|
|
"grad_norm": 0.27432921528816223,
|
|
"learning_rate": 0.00021403444652565216,
|
|
"loss": 0.8198,
|
|
"step": 13520
|
|
},
|
|
{
|
|
"epoch": 0.1513245088664083,
|
|
"grad_norm": 0.26843661069869995,
|
|
"learning_rate": 0.00021380602128923205,
|
|
"loss": 0.8156,
|
|
"step": 13530
|
|
},
|
|
{
|
|
"epoch": 0.151436352553671,
|
|
"grad_norm": 0.2460176795721054,
|
|
"learning_rate": 0.0002135775960528119,
|
|
"loss": 0.806,
|
|
"step": 13540
|
|
},
|
|
{
|
|
"epoch": 0.15154819624093366,
|
|
"grad_norm": 0.24147658050060272,
|
|
"learning_rate": 0.00021334917081639179,
|
|
"loss": 0.8146,
|
|
"step": 13550
|
|
},
|
|
{
|
|
"epoch": 0.15166003992819635,
|
|
"grad_norm": 0.2715270221233368,
|
|
"learning_rate": 0.0002131207455799717,
|
|
"loss": 0.8065,
|
|
"step": 13560
|
|
},
|
|
{
|
|
"epoch": 0.15177188361545904,
|
|
"grad_norm": 0.2851991653442383,
|
|
"learning_rate": 0.00021289232034355155,
|
|
"loss": 0.8042,
|
|
"step": 13570
|
|
},
|
|
{
|
|
"epoch": 0.1518837273027217,
|
|
"grad_norm": 0.2779170870780945,
|
|
"learning_rate": 0.00021266389510713143,
|
|
"loss": 0.8163,
|
|
"step": 13580
|
|
},
|
|
{
|
|
"epoch": 0.1519955709899844,
|
|
"grad_norm": 0.2853197455406189,
|
|
"learning_rate": 0.00021243546987071132,
|
|
"loss": 0.8025,
|
|
"step": 13590
|
|
},
|
|
{
|
|
"epoch": 0.1521074146772471,
|
|
"grad_norm": 0.2753603160381317,
|
|
"learning_rate": 0.0002122070446342912,
|
|
"loss": 0.8187,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 0.15221925836450975,
|
|
"grad_norm": 0.29546552896499634,
|
|
"learning_rate": 0.00021197861939787108,
|
|
"loss": 0.8189,
|
|
"step": 13610
|
|
},
|
|
{
|
|
"epoch": 0.15233110205177244,
|
|
"grad_norm": 0.2799798250198364,
|
|
"learning_rate": 0.00021175019416145097,
|
|
"loss": 0.8098,
|
|
"step": 13620
|
|
},
|
|
{
|
|
"epoch": 0.15244294573903514,
|
|
"grad_norm": 0.23527085781097412,
|
|
"learning_rate": 0.00021152176892503082,
|
|
"loss": 0.8212,
|
|
"step": 13630
|
|
},
|
|
{
|
|
"epoch": 0.1525547894262978,
|
|
"grad_norm": 0.27207401394844055,
|
|
"learning_rate": 0.00021129334368861073,
|
|
"loss": 0.808,
|
|
"step": 13640
|
|
},
|
|
{
|
|
"epoch": 0.1526666331135605,
|
|
"grad_norm": 0.26520609855651855,
|
|
"learning_rate": 0.00021106491845219061,
|
|
"loss": 0.8133,
|
|
"step": 13650
|
|
},
|
|
{
|
|
"epoch": 0.15277847680082318,
|
|
"grad_norm": 0.2750151455402374,
|
|
"learning_rate": 0.00021083649321577047,
|
|
"loss": 0.8248,
|
|
"step": 13660
|
|
},
|
|
{
|
|
"epoch": 0.15289032048808585,
|
|
"grad_norm": 0.28339120745658875,
|
|
"learning_rate": 0.00021060806797935035,
|
|
"loss": 0.8175,
|
|
"step": 13670
|
|
},
|
|
{
|
|
"epoch": 0.15300216417534854,
|
|
"grad_norm": 0.27611440420150757,
|
|
"learning_rate": 0.00021037964274293026,
|
|
"loss": 0.8232,
|
|
"step": 13680
|
|
},
|
|
{
|
|
"epoch": 0.1531140078626112,
|
|
"grad_norm": 0.264113187789917,
|
|
"learning_rate": 0.00021015121750651012,
|
|
"loss": 0.8217,
|
|
"step": 13690
|
|
},
|
|
{
|
|
"epoch": 0.1532258515498739,
|
|
"grad_norm": 0.27031853795051575,
|
|
"learning_rate": 0.00020992279227009,
|
|
"loss": 0.8242,
|
|
"step": 13700
|
|
},
|
|
{
|
|
"epoch": 0.15333769523713658,
|
|
"grad_norm": 0.2753359079360962,
|
|
"learning_rate": 0.00020969436703366988,
|
|
"loss": 0.8311,
|
|
"step": 13710
|
|
},
|
|
{
|
|
"epoch": 0.15344953892439925,
|
|
"grad_norm": 0.24859648942947388,
|
|
"learning_rate": 0.00020946594179724977,
|
|
"loss": 0.8285,
|
|
"step": 13720
|
|
},
|
|
{
|
|
"epoch": 0.15356138261166194,
|
|
"grad_norm": 0.2773294448852539,
|
|
"learning_rate": 0.00020923751656082965,
|
|
"loss": 0.8201,
|
|
"step": 13730
|
|
},
|
|
{
|
|
"epoch": 0.15367322629892463,
|
|
"grad_norm": 0.23855488002300262,
|
|
"learning_rate": 0.0002090090913244095,
|
|
"loss": 0.8145,
|
|
"step": 13740
|
|
},
|
|
{
|
|
"epoch": 0.1537850699861873,
|
|
"grad_norm": 0.27641457319259644,
|
|
"learning_rate": 0.0002087806660879894,
|
|
"loss": 0.8233,
|
|
"step": 13750
|
|
},
|
|
{
|
|
"epoch": 0.15389691367345,
|
|
"grad_norm": 0.26556023955345154,
|
|
"learning_rate": 0.0002085522408515693,
|
|
"loss": 0.8309,
|
|
"step": 13760
|
|
},
|
|
{
|
|
"epoch": 0.15400875736071268,
|
|
"grad_norm": 0.2980164885520935,
|
|
"learning_rate": 0.00020832381561514915,
|
|
"loss": 0.8585,
|
|
"step": 13770
|
|
},
|
|
{
|
|
"epoch": 0.15412060104797534,
|
|
"grad_norm": 0.21802592277526855,
|
|
"learning_rate": 0.00020809539037872904,
|
|
"loss": 0.8385,
|
|
"step": 13780
|
|
},
|
|
{
|
|
"epoch": 0.15423244473523803,
|
|
"grad_norm": 0.3153620958328247,
|
|
"learning_rate": 0.00020786696514230895,
|
|
"loss": 0.8423,
|
|
"step": 13790
|
|
},
|
|
{
|
|
"epoch": 0.15434428842250072,
|
|
"grad_norm": 0.2928372621536255,
|
|
"learning_rate": 0.0002076385399058888,
|
|
"loss": 0.8399,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 0.1544561321097634,
|
|
"grad_norm": 0.3015557527542114,
|
|
"learning_rate": 0.00020741011466946868,
|
|
"loss": 0.843,
|
|
"step": 13810
|
|
},
|
|
{
|
|
"epoch": 0.15456797579702608,
|
|
"grad_norm": 0.2243575006723404,
|
|
"learning_rate": 0.00020718168943304857,
|
|
"loss": 0.8302,
|
|
"step": 13820
|
|
},
|
|
{
|
|
"epoch": 0.15467981948428874,
|
|
"grad_norm": 0.23281534016132355,
|
|
"learning_rate": 0.00020695326419662845,
|
|
"loss": 0.8268,
|
|
"step": 13830
|
|
},
|
|
{
|
|
"epoch": 0.15479166317155144,
|
|
"grad_norm": 0.2412877380847931,
|
|
"learning_rate": 0.00020672483896020833,
|
|
"loss": 0.849,
|
|
"step": 13840
|
|
},
|
|
{
|
|
"epoch": 0.15490350685881413,
|
|
"grad_norm": 0.2762492001056671,
|
|
"learning_rate": 0.00020649641372378822,
|
|
"loss": 0.8324,
|
|
"step": 13850
|
|
},
|
|
{
|
|
"epoch": 0.1550153505460768,
|
|
"grad_norm": 0.27976560592651367,
|
|
"learning_rate": 0.00020626798848736807,
|
|
"loss": 0.843,
|
|
"step": 13860
|
|
},
|
|
{
|
|
"epoch": 0.15512719423333948,
|
|
"grad_norm": 0.29076194763183594,
|
|
"learning_rate": 0.00020603956325094798,
|
|
"loss": 0.8575,
|
|
"step": 13870
|
|
},
|
|
{
|
|
"epoch": 0.15523903792060217,
|
|
"grad_norm": 0.2367868423461914,
|
|
"learning_rate": 0.00020581113801452786,
|
|
"loss": 0.8465,
|
|
"step": 13880
|
|
},
|
|
{
|
|
"epoch": 0.15535088160786484,
|
|
"grad_norm": 0.26191186904907227,
|
|
"learning_rate": 0.00020558271277810772,
|
|
"loss": 0.8291,
|
|
"step": 13890
|
|
},
|
|
{
|
|
"epoch": 0.15546272529512753,
|
|
"grad_norm": 0.27254414558410645,
|
|
"learning_rate": 0.0002053542875416876,
|
|
"loss": 0.8347,
|
|
"step": 13900
|
|
},
|
|
{
|
|
"epoch": 0.15557456898239022,
|
|
"grad_norm": 0.2718988060951233,
|
|
"learning_rate": 0.0002051258623052675,
|
|
"loss": 0.8319,
|
|
"step": 13910
|
|
},
|
|
{
|
|
"epoch": 0.15568641266965288,
|
|
"grad_norm": 0.24478264153003693,
|
|
"learning_rate": 0.00020489743706884737,
|
|
"loss": 0.8369,
|
|
"step": 13920
|
|
},
|
|
{
|
|
"epoch": 0.15579825635691558,
|
|
"grad_norm": 0.27791038155555725,
|
|
"learning_rate": 0.00020466901183242725,
|
|
"loss": 0.8486,
|
|
"step": 13930
|
|
},
|
|
{
|
|
"epoch": 0.15591010004417827,
|
|
"grad_norm": 0.27220630645751953,
|
|
"learning_rate": 0.00020444058659600713,
|
|
"loss": 0.8335,
|
|
"step": 13940
|
|
},
|
|
{
|
|
"epoch": 0.15602194373144093,
|
|
"grad_norm": 0.2945479154586792,
|
|
"learning_rate": 0.00020421216135958702,
|
|
"loss": 0.8234,
|
|
"step": 13950
|
|
},
|
|
{
|
|
"epoch": 0.15613378741870362,
|
|
"grad_norm": 0.2911258041858673,
|
|
"learning_rate": 0.0002039837361231669,
|
|
"loss": 0.8279,
|
|
"step": 13960
|
|
},
|
|
{
|
|
"epoch": 0.15624563110596631,
|
|
"grad_norm": 0.3039700984954834,
|
|
"learning_rate": 0.00020375531088674676,
|
|
"loss": 0.8409,
|
|
"step": 13970
|
|
},
|
|
{
|
|
"epoch": 0.15635747479322898,
|
|
"grad_norm": 0.27290788292884827,
|
|
"learning_rate": 0.00020352688565032664,
|
|
"loss": 0.8394,
|
|
"step": 13980
|
|
},
|
|
{
|
|
"epoch": 0.15646931848049167,
|
|
"grad_norm": 0.28534916043281555,
|
|
"learning_rate": 0.00020329846041390655,
|
|
"loss": 0.8431,
|
|
"step": 13990
|
|
},
|
|
{
|
|
"epoch": 0.15658116216775433,
|
|
"grad_norm": 0.304221510887146,
|
|
"learning_rate": 0.0002030700351774864,
|
|
"loss": 0.8476,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 0.15669300585501703,
|
|
"grad_norm": 0.3151461184024811,
|
|
"learning_rate": 0.0002028416099410663,
|
|
"loss": 0.852,
|
|
"step": 14010
|
|
},
|
|
{
|
|
"epoch": 0.15680484954227972,
|
|
"grad_norm": 0.2947019040584564,
|
|
"learning_rate": 0.00020261318470464617,
|
|
"loss": 0.8396,
|
|
"step": 14020
|
|
},
|
|
{
|
|
"epoch": 0.15691669322954238,
|
|
"grad_norm": 0.2737627625465393,
|
|
"learning_rate": 0.00020238475946822605,
|
|
"loss": 0.8337,
|
|
"step": 14030
|
|
},
|
|
{
|
|
"epoch": 0.15702853691680507,
|
|
"grad_norm": 0.28257089853286743,
|
|
"learning_rate": 0.00020215633423180594,
|
|
"loss": 0.8475,
|
|
"step": 14040
|
|
},
|
|
{
|
|
"epoch": 0.15714038060406776,
|
|
"grad_norm": 0.3102625608444214,
|
|
"learning_rate": 0.00020192790899538582,
|
|
"loss": 0.8451,
|
|
"step": 14050
|
|
},
|
|
{
|
|
"epoch": 0.15725222429133043,
|
|
"grad_norm": 0.2839931845664978,
|
|
"learning_rate": 0.00020169948375896567,
|
|
"loss": 0.8365,
|
|
"step": 14060
|
|
},
|
|
{
|
|
"epoch": 0.15736406797859312,
|
|
"grad_norm": 0.25566980242729187,
|
|
"learning_rate": 0.00020147105852254558,
|
|
"loss": 0.8287,
|
|
"step": 14070
|
|
},
|
|
{
|
|
"epoch": 0.1574759116658558,
|
|
"grad_norm": 0.267791211605072,
|
|
"learning_rate": 0.00020124263328612547,
|
|
"loss": 0.8289,
|
|
"step": 14080
|
|
},
|
|
{
|
|
"epoch": 0.15758775535311847,
|
|
"grad_norm": 0.267635703086853,
|
|
"learning_rate": 0.00020101420804970532,
|
|
"loss": 0.8357,
|
|
"step": 14090
|
|
},
|
|
{
|
|
"epoch": 0.15769959904038117,
|
|
"grad_norm": 0.28065699338912964,
|
|
"learning_rate": 0.0002007857828132852,
|
|
"loss": 0.8363,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 0.15781144272764386,
|
|
"grad_norm": 0.26585736870765686,
|
|
"learning_rate": 0.00020055735757686512,
|
|
"loss": 0.8409,
|
|
"step": 14110
|
|
},
|
|
{
|
|
"epoch": 0.15792328641490652,
|
|
"grad_norm": 0.2562732398509979,
|
|
"learning_rate": 0.00020032893234044497,
|
|
"loss": 0.8374,
|
|
"step": 14120
|
|
},
|
|
{
|
|
"epoch": 0.1580351301021692,
|
|
"grad_norm": 0.2572222650051117,
|
|
"learning_rate": 0.00020010050710402485,
|
|
"loss": 0.8405,
|
|
"step": 14130
|
|
},
|
|
{
|
|
"epoch": 0.15814697378943188,
|
|
"grad_norm": 0.3075050413608551,
|
|
"learning_rate": 0.00019987208186760474,
|
|
"loss": 0.825,
|
|
"step": 14140
|
|
},
|
|
{
|
|
"epoch": 0.15825881747669457,
|
|
"grad_norm": 0.2630293071269989,
|
|
"learning_rate": 0.00019964365663118462,
|
|
"loss": 0.8326,
|
|
"step": 14150
|
|
},
|
|
{
|
|
"epoch": 0.15837066116395726,
|
|
"grad_norm": 0.255015105009079,
|
|
"learning_rate": 0.0001994152313947645,
|
|
"loss": 0.8181,
|
|
"step": 14160
|
|
},
|
|
{
|
|
"epoch": 0.15848250485121992,
|
|
"grad_norm": 0.25929179787635803,
|
|
"learning_rate": 0.00019918680615834438,
|
|
"loss": 0.8067,
|
|
"step": 14170
|
|
},
|
|
{
|
|
"epoch": 0.15859434853848262,
|
|
"grad_norm": 0.27078965306282043,
|
|
"learning_rate": 0.00019895838092192424,
|
|
"loss": 0.8043,
|
|
"step": 14180
|
|
},
|
|
{
|
|
"epoch": 0.1587061922257453,
|
|
"grad_norm": 0.2618376612663269,
|
|
"learning_rate": 0.00019872995568550415,
|
|
"loss": 0.8191,
|
|
"step": 14190
|
|
},
|
|
{
|
|
"epoch": 0.15881803591300797,
|
|
"grad_norm": 0.246153324842453,
|
|
"learning_rate": 0.000198501530449084,
|
|
"loss": 0.8251,
|
|
"step": 14200
|
|
},
|
|
{
|
|
"epoch": 0.15892987960027066,
|
|
"grad_norm": 0.25498026609420776,
|
|
"learning_rate": 0.0001982731052126639,
|
|
"loss": 0.8319,
|
|
"step": 14210
|
|
},
|
|
{
|
|
"epoch": 0.15904172328753335,
|
|
"grad_norm": 0.2517942190170288,
|
|
"learning_rate": 0.0001980446799762438,
|
|
"loss": 0.8106,
|
|
"step": 14220
|
|
},
|
|
{
|
|
"epoch": 0.15915356697479602,
|
|
"grad_norm": 0.2659161388874054,
|
|
"learning_rate": 0.00019781625473982365,
|
|
"loss": 0.8163,
|
|
"step": 14230
|
|
},
|
|
{
|
|
"epoch": 0.1592654106620587,
|
|
"grad_norm": 0.24527288973331451,
|
|
"learning_rate": 0.00019758782950340354,
|
|
"loss": 0.8359,
|
|
"step": 14240
|
|
},
|
|
{
|
|
"epoch": 0.1593772543493214,
|
|
"grad_norm": 0.23943792283535004,
|
|
"learning_rate": 0.00019735940426698342,
|
|
"loss": 0.8253,
|
|
"step": 14250
|
|
},
|
|
{
|
|
"epoch": 0.15948909803658406,
|
|
"grad_norm": 0.30401650071144104,
|
|
"learning_rate": 0.0001971309790305633,
|
|
"loss": 0.8369,
|
|
"step": 14260
|
|
},
|
|
{
|
|
"epoch": 0.15960094172384676,
|
|
"grad_norm": 0.25001001358032227,
|
|
"learning_rate": 0.00019690255379414319,
|
|
"loss": 0.8354,
|
|
"step": 14270
|
|
},
|
|
{
|
|
"epoch": 0.15971278541110942,
|
|
"grad_norm": 0.2378586083650589,
|
|
"learning_rate": 0.00019667412855772307,
|
|
"loss": 0.8324,
|
|
"step": 14280
|
|
},
|
|
{
|
|
"epoch": 0.1598246290983721,
|
|
"grad_norm": 0.26216059923171997,
|
|
"learning_rate": 0.00019644570332130292,
|
|
"loss": 0.8227,
|
|
"step": 14290
|
|
},
|
|
{
|
|
"epoch": 0.1599364727856348,
|
|
"grad_norm": 0.24156969785690308,
|
|
"learning_rate": 0.00019621727808488283,
|
|
"loss": 0.8362,
|
|
"step": 14300
|
|
},
|
|
{
|
|
"epoch": 0.16004831647289747,
|
|
"grad_norm": 0.24192091822624207,
|
|
"learning_rate": 0.00019598885284846272,
|
|
"loss": 0.835,
|
|
"step": 14310
|
|
},
|
|
{
|
|
"epoch": 0.16016016016016016,
|
|
"grad_norm": 0.24861887097358704,
|
|
"learning_rate": 0.00019576042761204257,
|
|
"loss": 0.8232,
|
|
"step": 14320
|
|
},
|
|
{
|
|
"epoch": 0.16027200384742285,
|
|
"grad_norm": 0.27175864577293396,
|
|
"learning_rate": 0.00019553200237562246,
|
|
"loss": 0.8303,
|
|
"step": 14330
|
|
},
|
|
{
|
|
"epoch": 0.16038384753468551,
|
|
"grad_norm": 0.272334486246109,
|
|
"learning_rate": 0.00019530357713920237,
|
|
"loss": 0.8217,
|
|
"step": 14340
|
|
},
|
|
{
|
|
"epoch": 0.1604956912219482,
|
|
"grad_norm": 0.28357213735580444,
|
|
"learning_rate": 0.00019507515190278222,
|
|
"loss": 0.8343,
|
|
"step": 14350
|
|
},
|
|
{
|
|
"epoch": 0.1606075349092109,
|
|
"grad_norm": 0.272276371717453,
|
|
"learning_rate": 0.0001948467266663621,
|
|
"loss": 0.8235,
|
|
"step": 14360
|
|
},
|
|
{
|
|
"epoch": 0.16071937859647356,
|
|
"grad_norm": 0.26771044731140137,
|
|
"learning_rate": 0.000194618301429942,
|
|
"loss": 0.8292,
|
|
"step": 14370
|
|
},
|
|
{
|
|
"epoch": 0.16083122228373625,
|
|
"grad_norm": 0.27449774742126465,
|
|
"learning_rate": 0.00019438987619352187,
|
|
"loss": 0.8485,
|
|
"step": 14380
|
|
},
|
|
{
|
|
"epoch": 0.16094306597099894,
|
|
"grad_norm": 0.26026156544685364,
|
|
"learning_rate": 0.00019416145095710175,
|
|
"loss": 0.8458,
|
|
"step": 14390
|
|
},
|
|
{
|
|
"epoch": 0.1610549096582616,
|
|
"grad_norm": 0.2667345404624939,
|
|
"learning_rate": 0.00019393302572068164,
|
|
"loss": 0.8519,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 0.1611667533455243,
|
|
"grad_norm": 0.26302048563957214,
|
|
"learning_rate": 0.0001937046004842615,
|
|
"loss": 0.8353,
|
|
"step": 14410
|
|
},
|
|
{
|
|
"epoch": 0.16127859703278696,
|
|
"grad_norm": 0.24420003592967987,
|
|
"learning_rate": 0.0001934761752478414,
|
|
"loss": 0.8464,
|
|
"step": 14420
|
|
},
|
|
{
|
|
"epoch": 0.16139044072004965,
|
|
"grad_norm": 0.2739315629005432,
|
|
"learning_rate": 0.00019324775001142126,
|
|
"loss": 0.8257,
|
|
"step": 14430
|
|
},
|
|
{
|
|
"epoch": 0.16150228440731235,
|
|
"grad_norm": 0.2370629757642746,
|
|
"learning_rate": 0.00019301932477500114,
|
|
"loss": 0.8324,
|
|
"step": 14440
|
|
},
|
|
{
|
|
"epoch": 0.161614128094575,
|
|
"grad_norm": 0.2616153955459595,
|
|
"learning_rate": 0.00019279089953858102,
|
|
"loss": 0.8513,
|
|
"step": 14450
|
|
},
|
|
{
|
|
"epoch": 0.1617259717818377,
|
|
"grad_norm": 0.2527558207511902,
|
|
"learning_rate": 0.0001925624743021609,
|
|
"loss": 0.8435,
|
|
"step": 14460
|
|
},
|
|
{
|
|
"epoch": 0.1618378154691004,
|
|
"grad_norm": 0.28255122900009155,
|
|
"learning_rate": 0.0001923340490657408,
|
|
"loss": 0.8497,
|
|
"step": 14470
|
|
},
|
|
{
|
|
"epoch": 0.16194965915636306,
|
|
"grad_norm": 0.23198026418685913,
|
|
"learning_rate": 0.00019210562382932067,
|
|
"loss": 0.8357,
|
|
"step": 14480
|
|
},
|
|
{
|
|
"epoch": 0.16206150284362575,
|
|
"grad_norm": 0.2534460127353668,
|
|
"learning_rate": 0.00019187719859290053,
|
|
"loss": 0.8396,
|
|
"step": 14490
|
|
},
|
|
{
|
|
"epoch": 0.16217334653088844,
|
|
"grad_norm": 0.2693686783313751,
|
|
"learning_rate": 0.00019164877335648044,
|
|
"loss": 0.8438,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 0.1622851902181511,
|
|
"grad_norm": 0.26181599497795105,
|
|
"learning_rate": 0.00019142034812006032,
|
|
"loss": 0.8452,
|
|
"step": 14510
|
|
},
|
|
{
|
|
"epoch": 0.1623970339054138,
|
|
"grad_norm": 0.2268761545419693,
|
|
"learning_rate": 0.00019119192288364017,
|
|
"loss": 0.8496,
|
|
"step": 14520
|
|
},
|
|
{
|
|
"epoch": 0.1625088775926765,
|
|
"grad_norm": 0.27698907256126404,
|
|
"learning_rate": 0.00019096349764722006,
|
|
"loss": 0.8265,
|
|
"step": 14530
|
|
},
|
|
{
|
|
"epoch": 0.16262072127993915,
|
|
"grad_norm": 0.30570700764656067,
|
|
"learning_rate": 0.00019073507241079997,
|
|
"loss": 0.8399,
|
|
"step": 14540
|
|
},
|
|
{
|
|
"epoch": 0.16273256496720184,
|
|
"grad_norm": 0.2894477844238281,
|
|
"learning_rate": 0.00019050664717437982,
|
|
"loss": 0.8488,
|
|
"step": 14550
|
|
},
|
|
{
|
|
"epoch": 0.16284440865446453,
|
|
"grad_norm": 0.3094457685947418,
|
|
"learning_rate": 0.0001902782219379597,
|
|
"loss": 0.8243,
|
|
"step": 14560
|
|
},
|
|
{
|
|
"epoch": 0.1629562523417272,
|
|
"grad_norm": 0.2908037602901459,
|
|
"learning_rate": 0.0001900497967015396,
|
|
"loss": 0.835,
|
|
"step": 14570
|
|
},
|
|
{
|
|
"epoch": 0.1630680960289899,
|
|
"grad_norm": 0.27222102880477905,
|
|
"learning_rate": 0.00018982137146511947,
|
|
"loss": 0.8306,
|
|
"step": 14580
|
|
},
|
|
{
|
|
"epoch": 0.16317993971625255,
|
|
"grad_norm": 0.2542339563369751,
|
|
"learning_rate": 0.00018959294622869935,
|
|
"loss": 0.8259,
|
|
"step": 14590
|
|
},
|
|
{
|
|
"epoch": 0.16329178340351524,
|
|
"grad_norm": 0.28288012742996216,
|
|
"learning_rate": 0.00018936452099227924,
|
|
"loss": 0.8243,
|
|
"step": 14600
|
|
},
|
|
{
|
|
"epoch": 0.16340362709077794,
|
|
"grad_norm": 0.2584143877029419,
|
|
"learning_rate": 0.0001891360957558591,
|
|
"loss": 0.8224,
|
|
"step": 14610
|
|
},
|
|
{
|
|
"epoch": 0.1635154707780406,
|
|
"grad_norm": 0.26679450273513794,
|
|
"learning_rate": 0.000188907670519439,
|
|
"loss": 0.8142,
|
|
"step": 14620
|
|
},
|
|
{
|
|
"epoch": 0.1636273144653033,
|
|
"grad_norm": 0.24589306116104126,
|
|
"learning_rate": 0.00018867924528301889,
|
|
"loss": 0.81,
|
|
"step": 14630
|
|
},
|
|
{
|
|
"epoch": 0.16373915815256598,
|
|
"grad_norm": 0.28474611043930054,
|
|
"learning_rate": 0.00018845082004659874,
|
|
"loss": 0.7989,
|
|
"step": 14640
|
|
},
|
|
{
|
|
"epoch": 0.16385100183982865,
|
|
"grad_norm": 0.27567991614341736,
|
|
"learning_rate": 0.00018822239481017862,
|
|
"loss": 0.8049,
|
|
"step": 14650
|
|
},
|
|
{
|
|
"epoch": 0.16396284552709134,
|
|
"grad_norm": 0.2509905695915222,
|
|
"learning_rate": 0.0001879939695737585,
|
|
"loss": 0.8168,
|
|
"step": 14660
|
|
},
|
|
{
|
|
"epoch": 0.16407468921435403,
|
|
"grad_norm": 0.30284953117370605,
|
|
"learning_rate": 0.0001877655443373384,
|
|
"loss": 0.8055,
|
|
"step": 14670
|
|
},
|
|
{
|
|
"epoch": 0.1641865329016167,
|
|
"grad_norm": 0.27638325095176697,
|
|
"learning_rate": 0.00018753711910091827,
|
|
"loss": 0.8368,
|
|
"step": 14680
|
|
},
|
|
{
|
|
"epoch": 0.16429837658887939,
|
|
"grad_norm": 0.29546642303466797,
|
|
"learning_rate": 0.00018730869386449816,
|
|
"loss": 0.8161,
|
|
"step": 14690
|
|
},
|
|
{
|
|
"epoch": 0.16441022027614208,
|
|
"grad_norm": 0.2483370304107666,
|
|
"learning_rate": 0.00018708026862807804,
|
|
"loss": 0.8136,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 0.16452206396340474,
|
|
"grad_norm": 0.2862898111343384,
|
|
"learning_rate": 0.00018685184339165792,
|
|
"loss": 0.836,
|
|
"step": 14710
|
|
},
|
|
{
|
|
"epoch": 0.16463390765066743,
|
|
"grad_norm": 0.2730434238910675,
|
|
"learning_rate": 0.00018662341815523778,
|
|
"loss": 0.8279,
|
|
"step": 14720
|
|
},
|
|
{
|
|
"epoch": 0.1647457513379301,
|
|
"grad_norm": 0.2846275269985199,
|
|
"learning_rate": 0.0001863949929188177,
|
|
"loss": 0.7991,
|
|
"step": 14730
|
|
},
|
|
{
|
|
"epoch": 0.1648575950251928,
|
|
"grad_norm": 0.2455524355173111,
|
|
"learning_rate": 0.00018616656768239757,
|
|
"loss": 0.7931,
|
|
"step": 14740
|
|
},
|
|
{
|
|
"epoch": 0.16496943871245548,
|
|
"grad_norm": 0.25060829520225525,
|
|
"learning_rate": 0.00018593814244597743,
|
|
"loss": 0.8009,
|
|
"step": 14750
|
|
},
|
|
{
|
|
"epoch": 0.16508128239971814,
|
|
"grad_norm": 0.2687000334262848,
|
|
"learning_rate": 0.0001857097172095573,
|
|
"loss": 0.7968,
|
|
"step": 14760
|
|
},
|
|
{
|
|
"epoch": 0.16519312608698083,
|
|
"grad_norm": 0.28619691729545593,
|
|
"learning_rate": 0.00018548129197313722,
|
|
"loss": 0.7818,
|
|
"step": 14770
|
|
},
|
|
{
|
|
"epoch": 0.16530496977424353,
|
|
"grad_norm": 0.2549494206905365,
|
|
"learning_rate": 0.00018525286673671707,
|
|
"loss": 0.7877,
|
|
"step": 14780
|
|
},
|
|
{
|
|
"epoch": 0.1654168134615062,
|
|
"grad_norm": 0.2419700175523758,
|
|
"learning_rate": 0.00018502444150029696,
|
|
"loss": 0.7899,
|
|
"step": 14790
|
|
},
|
|
{
|
|
"epoch": 0.16552865714876888,
|
|
"grad_norm": 0.2636066675186157,
|
|
"learning_rate": 0.00018479601626387684,
|
|
"loss": 0.7893,
|
|
"step": 14800
|
|
},
|
|
{
|
|
"epoch": 0.16564050083603157,
|
|
"grad_norm": 0.264072984457016,
|
|
"learning_rate": 0.00018456759102745672,
|
|
"loss": 0.7984,
|
|
"step": 14810
|
|
},
|
|
{
|
|
"epoch": 0.16575234452329424,
|
|
"grad_norm": 0.2661677598953247,
|
|
"learning_rate": 0.0001843391657910366,
|
|
"loss": 0.8085,
|
|
"step": 14820
|
|
},
|
|
{
|
|
"epoch": 0.16586418821055693,
|
|
"grad_norm": 0.28324052691459656,
|
|
"learning_rate": 0.0001841107405546165,
|
|
"loss": 0.8066,
|
|
"step": 14830
|
|
},
|
|
{
|
|
"epoch": 0.16597603189781962,
|
|
"grad_norm": 0.277761310338974,
|
|
"learning_rate": 0.00018388231531819634,
|
|
"loss": 0.8008,
|
|
"step": 14840
|
|
},
|
|
{
|
|
"epoch": 0.16608787558508228,
|
|
"grad_norm": 0.2669602036476135,
|
|
"learning_rate": 0.00018365389008177625,
|
|
"loss": 0.8285,
|
|
"step": 14850
|
|
},
|
|
{
|
|
"epoch": 0.16619971927234498,
|
|
"grad_norm": 0.28757140040397644,
|
|
"learning_rate": 0.00018342546484535614,
|
|
"loss": 0.8121,
|
|
"step": 14860
|
|
},
|
|
{
|
|
"epoch": 0.16631156295960764,
|
|
"grad_norm": 0.2616439163684845,
|
|
"learning_rate": 0.000183197039608936,
|
|
"loss": 0.8185,
|
|
"step": 14870
|
|
},
|
|
{
|
|
"epoch": 0.16642340664687033,
|
|
"grad_norm": 0.28334370255470276,
|
|
"learning_rate": 0.00018296861437251587,
|
|
"loss": 0.8229,
|
|
"step": 14880
|
|
},
|
|
{
|
|
"epoch": 0.16653525033413302,
|
|
"grad_norm": 0.2659022808074951,
|
|
"learning_rate": 0.00018274018913609576,
|
|
"loss": 0.82,
|
|
"step": 14890
|
|
},
|
|
{
|
|
"epoch": 0.1666470940213957,
|
|
"grad_norm": 0.2544262111186981,
|
|
"learning_rate": 0.00018251176389967564,
|
|
"loss": 0.84,
|
|
"step": 14900
|
|
},
|
|
{
|
|
"epoch": 0.16675893770865838,
|
|
"grad_norm": 0.27492937445640564,
|
|
"learning_rate": 0.00018228333866325552,
|
|
"loss": 0.8411,
|
|
"step": 14910
|
|
},
|
|
{
|
|
"epoch": 0.16687078139592107,
|
|
"grad_norm": 0.2961216866970062,
|
|
"learning_rate": 0.00018205491342683538,
|
|
"loss": 0.8178,
|
|
"step": 14920
|
|
},
|
|
{
|
|
"epoch": 0.16698262508318373,
|
|
"grad_norm": 0.2704416811466217,
|
|
"learning_rate": 0.0001818264881904153,
|
|
"loss": 0.8264,
|
|
"step": 14930
|
|
},
|
|
{
|
|
"epoch": 0.16709446877044642,
|
|
"grad_norm": 0.261704683303833,
|
|
"learning_rate": 0.00018159806295399517,
|
|
"loss": 0.8307,
|
|
"step": 14940
|
|
},
|
|
{
|
|
"epoch": 0.16720631245770912,
|
|
"grad_norm": 0.26157405972480774,
|
|
"learning_rate": 0.00018136963771757503,
|
|
"loss": 0.8064,
|
|
"step": 14950
|
|
},
|
|
{
|
|
"epoch": 0.16731815614497178,
|
|
"grad_norm": 0.2589896023273468,
|
|
"learning_rate": 0.0001811412124811549,
|
|
"loss": 0.8195,
|
|
"step": 14960
|
|
},
|
|
{
|
|
"epoch": 0.16742999983223447,
|
|
"grad_norm": 0.24691319465637207,
|
|
"learning_rate": 0.00018091278724473482,
|
|
"loss": 0.8283,
|
|
"step": 14970
|
|
},
|
|
{
|
|
"epoch": 0.16754184351949716,
|
|
"grad_norm": 0.2527819871902466,
|
|
"learning_rate": 0.00018068436200831468,
|
|
"loss": 0.8229,
|
|
"step": 14980
|
|
},
|
|
{
|
|
"epoch": 0.16765368720675983,
|
|
"grad_norm": 0.2639094293117523,
|
|
"learning_rate": 0.00018045593677189456,
|
|
"loss": 0.8393,
|
|
"step": 14990
|
|
},
|
|
{
|
|
"epoch": 0.16776553089402252,
|
|
"grad_norm": 0.24417634308338165,
|
|
"learning_rate": 0.00018022751153547444,
|
|
"loss": 0.8204,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 0.16787737458128518,
|
|
"grad_norm": 0.25673115253448486,
|
|
"learning_rate": 0.00017999908629905432,
|
|
"loss": 0.8184,
|
|
"step": 15010
|
|
},
|
|
{
|
|
"epoch": 0.16798921826854787,
|
|
"grad_norm": 0.254077285528183,
|
|
"learning_rate": 0.0001797706610626342,
|
|
"loss": 0.8195,
|
|
"step": 15020
|
|
},
|
|
{
|
|
"epoch": 0.16810106195581057,
|
|
"grad_norm": 0.2455417662858963,
|
|
"learning_rate": 0.0001795422358262141,
|
|
"loss": 0.8255,
|
|
"step": 15030
|
|
},
|
|
{
|
|
"epoch": 0.16821290564307323,
|
|
"grad_norm": 0.27918189764022827,
|
|
"learning_rate": 0.00017931381058979395,
|
|
"loss": 0.8345,
|
|
"step": 15040
|
|
},
|
|
{
|
|
"epoch": 0.16832474933033592,
|
|
"grad_norm": 0.2272186279296875,
|
|
"learning_rate": 0.00017908538535337386,
|
|
"loss": 0.8178,
|
|
"step": 15050
|
|
},
|
|
{
|
|
"epoch": 0.1684365930175986,
|
|
"grad_norm": 0.269189715385437,
|
|
"learning_rate": 0.00017885696011695374,
|
|
"loss": 0.8343,
|
|
"step": 15060
|
|
},
|
|
{
|
|
"epoch": 0.16854843670486128,
|
|
"grad_norm": 0.2805529832839966,
|
|
"learning_rate": 0.0001786285348805336,
|
|
"loss": 0.8126,
|
|
"step": 15070
|
|
},
|
|
{
|
|
"epoch": 0.16866028039212397,
|
|
"grad_norm": 0.28788769245147705,
|
|
"learning_rate": 0.00017840010964411348,
|
|
"loss": 0.8278,
|
|
"step": 15080
|
|
},
|
|
{
|
|
"epoch": 0.16877212407938666,
|
|
"grad_norm": 0.2439277619123459,
|
|
"learning_rate": 0.00017817168440769336,
|
|
"loss": 0.8272,
|
|
"step": 15090
|
|
},
|
|
{
|
|
"epoch": 0.16888396776664932,
|
|
"grad_norm": 0.3151440918445587,
|
|
"learning_rate": 0.00017794325917127324,
|
|
"loss": 0.8201,
|
|
"step": 15100
|
|
},
|
|
{
|
|
"epoch": 0.16899581145391201,
|
|
"grad_norm": 0.2562885880470276,
|
|
"learning_rate": 0.00017771483393485313,
|
|
"loss": 0.8275,
|
|
"step": 15110
|
|
},
|
|
{
|
|
"epoch": 0.1691076551411747,
|
|
"grad_norm": 0.2718476355075836,
|
|
"learning_rate": 0.00017748640869843298,
|
|
"loss": 0.821,
|
|
"step": 15120
|
|
},
|
|
{
|
|
"epoch": 0.16921949882843737,
|
|
"grad_norm": 0.2699459493160248,
|
|
"learning_rate": 0.0001772579834620129,
|
|
"loss": 0.8352,
|
|
"step": 15130
|
|
},
|
|
{
|
|
"epoch": 0.16933134251570006,
|
|
"grad_norm": 0.29737600684165955,
|
|
"learning_rate": 0.00017702955822559277,
|
|
"loss": 0.8279,
|
|
"step": 15140
|
|
},
|
|
{
|
|
"epoch": 0.16944318620296273,
|
|
"grad_norm": 0.3075369894504547,
|
|
"learning_rate": 0.00017680113298917263,
|
|
"loss": 0.8037,
|
|
"step": 15150
|
|
},
|
|
{
|
|
"epoch": 0.16955502989022542,
|
|
"grad_norm": 0.27061593532562256,
|
|
"learning_rate": 0.00017657270775275254,
|
|
"loss": 0.8149,
|
|
"step": 15160
|
|
},
|
|
{
|
|
"epoch": 0.1696668735774881,
|
|
"grad_norm": 0.26719844341278076,
|
|
"learning_rate": 0.00017634428251633242,
|
|
"loss": 0.7896,
|
|
"step": 15170
|
|
},
|
|
{
|
|
"epoch": 0.16977871726475077,
|
|
"grad_norm": 0.2871409058570862,
|
|
"learning_rate": 0.00017611585727991228,
|
|
"loss": 0.7863,
|
|
"step": 15180
|
|
},
|
|
{
|
|
"epoch": 0.16989056095201346,
|
|
"grad_norm": 0.2502906620502472,
|
|
"learning_rate": 0.00017588743204349216,
|
|
"loss": 0.7817,
|
|
"step": 15190
|
|
},
|
|
{
|
|
"epoch": 0.17000240463927616,
|
|
"grad_norm": 0.2579248547554016,
|
|
"learning_rate": 0.00017565900680707207,
|
|
"loss": 0.796,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 0.17011424832653882,
|
|
"grad_norm": 0.2537415325641632,
|
|
"learning_rate": 0.00017543058157065193,
|
|
"loss": 0.78,
|
|
"step": 15210
|
|
},
|
|
{
|
|
"epoch": 0.1702260920138015,
|
|
"grad_norm": 0.2420157790184021,
|
|
"learning_rate": 0.0001752021563342318,
|
|
"loss": 0.7946,
|
|
"step": 15220
|
|
},
|
|
{
|
|
"epoch": 0.1703379357010642,
|
|
"grad_norm": 0.2423790544271469,
|
|
"learning_rate": 0.0001749737310978117,
|
|
"loss": 0.797,
|
|
"step": 15230
|
|
},
|
|
{
|
|
"epoch": 0.17044977938832687,
|
|
"grad_norm": 0.2521071434020996,
|
|
"learning_rate": 0.00017474530586139157,
|
|
"loss": 0.8073,
|
|
"step": 15240
|
|
},
|
|
{
|
|
"epoch": 0.17056162307558956,
|
|
"grad_norm": 0.22921273112297058,
|
|
"learning_rate": 0.00017451688062497146,
|
|
"loss": 0.7916,
|
|
"step": 15250
|
|
},
|
|
{
|
|
"epoch": 0.17067346676285225,
|
|
"grad_norm": 0.35150206089019775,
|
|
"learning_rate": 0.00017428845538855134,
|
|
"loss": 0.8001,
|
|
"step": 15260
|
|
},
|
|
{
|
|
"epoch": 0.1707853104501149,
|
|
"grad_norm": 0.27637869119644165,
|
|
"learning_rate": 0.0001740600301521312,
|
|
"loss": 0.7948,
|
|
"step": 15270
|
|
},
|
|
{
|
|
"epoch": 0.1708971541373776,
|
|
"grad_norm": 0.22480230033397675,
|
|
"learning_rate": 0.0001738316049157111,
|
|
"loss": 0.7932,
|
|
"step": 15280
|
|
},
|
|
{
|
|
"epoch": 0.1710089978246403,
|
|
"grad_norm": 0.27264508605003357,
|
|
"learning_rate": 0.000173603179679291,
|
|
"loss": 0.8083,
|
|
"step": 15290
|
|
},
|
|
{
|
|
"epoch": 0.17112084151190296,
|
|
"grad_norm": 0.2647417485713959,
|
|
"learning_rate": 0.00017337475444287084,
|
|
"loss": 0.8177,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 0.17123268519916565,
|
|
"grad_norm": 0.23619987070560455,
|
|
"learning_rate": 0.00017314632920645073,
|
|
"loss": 0.8068,
|
|
"step": 15310
|
|
},
|
|
{
|
|
"epoch": 0.17134452888642832,
|
|
"grad_norm": 0.22450131177902222,
|
|
"learning_rate": 0.0001729179039700306,
|
|
"loss": 0.8004,
|
|
"step": 15320
|
|
},
|
|
{
|
|
"epoch": 0.171456372573691,
|
|
"grad_norm": 0.2784859240055084,
|
|
"learning_rate": 0.0001726894787336105,
|
|
"loss": 0.7938,
|
|
"step": 15330
|
|
},
|
|
{
|
|
"epoch": 0.1715682162609537,
|
|
"grad_norm": 0.25513574481010437,
|
|
"learning_rate": 0.00017246105349719038,
|
|
"loss": 0.7844,
|
|
"step": 15340
|
|
},
|
|
{
|
|
"epoch": 0.17168005994821636,
|
|
"grad_norm": 0.27425146102905273,
|
|
"learning_rate": 0.00017223262826077023,
|
|
"loss": 0.7906,
|
|
"step": 15350
|
|
},
|
|
{
|
|
"epoch": 0.17179190363547905,
|
|
"grad_norm": 0.2500791847705841,
|
|
"learning_rate": 0.00017200420302435014,
|
|
"loss": 0.7834,
|
|
"step": 15360
|
|
},
|
|
{
|
|
"epoch": 0.17190374732274175,
|
|
"grad_norm": 0.2550630271434784,
|
|
"learning_rate": 0.00017177577778793002,
|
|
"loss": 0.7736,
|
|
"step": 15370
|
|
},
|
|
{
|
|
"epoch": 0.1720155910100044,
|
|
"grad_norm": 0.25209444761276245,
|
|
"learning_rate": 0.00017154735255150988,
|
|
"loss": 0.773,
|
|
"step": 15380
|
|
},
|
|
{
|
|
"epoch": 0.1721274346972671,
|
|
"grad_norm": 0.2347812056541443,
|
|
"learning_rate": 0.00017131892731508976,
|
|
"loss": 0.7745,
|
|
"step": 15390
|
|
},
|
|
{
|
|
"epoch": 0.1722392783845298,
|
|
"grad_norm": 0.2858305871486664,
|
|
"learning_rate": 0.00017109050207866967,
|
|
"loss": 0.7776,
|
|
"step": 15400
|
|
},
|
|
{
|
|
"epoch": 0.17235112207179246,
|
|
"grad_norm": 0.30414941906929016,
|
|
"learning_rate": 0.00017086207684224953,
|
|
"loss": 0.7701,
|
|
"step": 15410
|
|
},
|
|
{
|
|
"epoch": 0.17246296575905515,
|
|
"grad_norm": 0.2645011842250824,
|
|
"learning_rate": 0.0001706336516058294,
|
|
"loss": 0.7746,
|
|
"step": 15420
|
|
},
|
|
{
|
|
"epoch": 0.17257480944631784,
|
|
"grad_norm": 0.2984048128128052,
|
|
"learning_rate": 0.0001704052263694093,
|
|
"loss": 0.771,
|
|
"step": 15430
|
|
},
|
|
{
|
|
"epoch": 0.1726866531335805,
|
|
"grad_norm": 0.2734147906303406,
|
|
"learning_rate": 0.00017017680113298918,
|
|
"loss": 0.7769,
|
|
"step": 15440
|
|
},
|
|
{
|
|
"epoch": 0.1727984968208432,
|
|
"grad_norm": 0.2632124125957489,
|
|
"learning_rate": 0.00016994837589656906,
|
|
"loss": 0.7754,
|
|
"step": 15450
|
|
},
|
|
{
|
|
"epoch": 0.17291034050810586,
|
|
"grad_norm": 0.29384443163871765,
|
|
"learning_rate": 0.00016971995066014894,
|
|
"loss": 0.7833,
|
|
"step": 15460
|
|
},
|
|
{
|
|
"epoch": 0.17302218419536855,
|
|
"grad_norm": 0.3194182813167572,
|
|
"learning_rate": 0.0001694915254237288,
|
|
"loss": 0.7813,
|
|
"step": 15470
|
|
},
|
|
{
|
|
"epoch": 0.17313402788263124,
|
|
"grad_norm": 0.25995251536369324,
|
|
"learning_rate": 0.0001692631001873087,
|
|
"loss": 0.7796,
|
|
"step": 15480
|
|
},
|
|
{
|
|
"epoch": 0.1732458715698939,
|
|
"grad_norm": 0.272419810295105,
|
|
"learning_rate": 0.0001690346749508886,
|
|
"loss": 0.7839,
|
|
"step": 15490
|
|
},
|
|
{
|
|
"epoch": 0.1733577152571566,
|
|
"grad_norm": 0.26239413022994995,
|
|
"learning_rate": 0.00016880624971446845,
|
|
"loss": 0.7807,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 0.1734695589444193,
|
|
"grad_norm": 0.29991698265075684,
|
|
"learning_rate": 0.00016857782447804833,
|
|
"loss": 0.7941,
|
|
"step": 15510
|
|
},
|
|
{
|
|
"epoch": 0.17358140263168195,
|
|
"grad_norm": 0.2812528908252716,
|
|
"learning_rate": 0.00016834939924162824,
|
|
"loss": 0.7863,
|
|
"step": 15520
|
|
},
|
|
{
|
|
"epoch": 0.17369324631894464,
|
|
"grad_norm": 0.2557685077190399,
|
|
"learning_rate": 0.0001681209740052081,
|
|
"loss": 0.7953,
|
|
"step": 15530
|
|
},
|
|
{
|
|
"epoch": 0.17380509000620734,
|
|
"grad_norm": 0.28565913438796997,
|
|
"learning_rate": 0.00016789254876878798,
|
|
"loss": 0.7934,
|
|
"step": 15540
|
|
},
|
|
{
|
|
"epoch": 0.17391693369347,
|
|
"grad_norm": 0.25316086411476135,
|
|
"learning_rate": 0.00016766412353236783,
|
|
"loss": 0.7969,
|
|
"step": 15550
|
|
},
|
|
{
|
|
"epoch": 0.1740287773807327,
|
|
"grad_norm": 0.2636478543281555,
|
|
"learning_rate": 0.00016743569829594774,
|
|
"loss": 0.8021,
|
|
"step": 15560
|
|
},
|
|
{
|
|
"epoch": 0.17414062106799538,
|
|
"grad_norm": 0.28839442133903503,
|
|
"learning_rate": 0.00016720727305952763,
|
|
"loss": 0.8108,
|
|
"step": 15570
|
|
},
|
|
{
|
|
"epoch": 0.17425246475525805,
|
|
"grad_norm": 0.2453639954328537,
|
|
"learning_rate": 0.00016697884782310748,
|
|
"loss": 0.8034,
|
|
"step": 15580
|
|
},
|
|
{
|
|
"epoch": 0.17436430844252074,
|
|
"grad_norm": 0.2550848424434662,
|
|
"learning_rate": 0.0001667504225866874,
|
|
"loss": 0.8169,
|
|
"step": 15590
|
|
},
|
|
{
|
|
"epoch": 0.1744761521297834,
|
|
"grad_norm": 0.24949923157691956,
|
|
"learning_rate": 0.00016652199735026727,
|
|
"loss": 0.8167,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 0.1745879958170461,
|
|
"grad_norm": 0.24357125163078308,
|
|
"learning_rate": 0.00016629357211384713,
|
|
"loss": 0.821,
|
|
"step": 15610
|
|
},
|
|
{
|
|
"epoch": 0.17469983950430878,
|
|
"grad_norm": 0.2246461659669876,
|
|
"learning_rate": 0.000166065146877427,
|
|
"loss": 0.82,
|
|
"step": 15620
|
|
},
|
|
{
|
|
"epoch": 0.17481168319157145,
|
|
"grad_norm": 0.26160740852355957,
|
|
"learning_rate": 0.00016583672164100692,
|
|
"loss": 0.8167,
|
|
"step": 15630
|
|
},
|
|
{
|
|
"epoch": 0.17492352687883414,
|
|
"grad_norm": 0.25773337483406067,
|
|
"learning_rate": 0.00016560829640458678,
|
|
"loss": 0.8305,
|
|
"step": 15640
|
|
},
|
|
{
|
|
"epoch": 0.17503537056609683,
|
|
"grad_norm": 0.24051527678966522,
|
|
"learning_rate": 0.00016537987116816666,
|
|
"loss": 0.8201,
|
|
"step": 15650
|
|
},
|
|
{
|
|
"epoch": 0.1751472142533595,
|
|
"grad_norm": 0.2507860064506531,
|
|
"learning_rate": 0.00016515144593174654,
|
|
"loss": 0.8444,
|
|
"step": 15660
|
|
},
|
|
{
|
|
"epoch": 0.1752590579406222,
|
|
"grad_norm": 0.24071821570396423,
|
|
"learning_rate": 0.00016492302069532643,
|
|
"loss": 0.8071,
|
|
"step": 15670
|
|
},
|
|
{
|
|
"epoch": 0.17537090162788488,
|
|
"grad_norm": 0.2533905506134033,
|
|
"learning_rate": 0.0001646945954589063,
|
|
"loss": 0.8164,
|
|
"step": 15680
|
|
},
|
|
{
|
|
"epoch": 0.17548274531514754,
|
|
"grad_norm": 0.2546316683292389,
|
|
"learning_rate": 0.0001644661702224862,
|
|
"loss": 0.8237,
|
|
"step": 15690
|
|
},
|
|
{
|
|
"epoch": 0.17559458900241023,
|
|
"grad_norm": 0.25692155957221985,
|
|
"learning_rate": 0.00016423774498606605,
|
|
"loss": 0.8198,
|
|
"step": 15700
|
|
},
|
|
{
|
|
"epoch": 0.17570643268967293,
|
|
"grad_norm": 0.254535436630249,
|
|
"learning_rate": 0.00016400931974964596,
|
|
"loss": 0.8061,
|
|
"step": 15710
|
|
},
|
|
{
|
|
"epoch": 0.1758182763769356,
|
|
"grad_norm": 0.2557326555252075,
|
|
"learning_rate": 0.00016378089451322584,
|
|
"loss": 0.8194,
|
|
"step": 15720
|
|
},
|
|
{
|
|
"epoch": 0.17593012006419828,
|
|
"grad_norm": 0.24234241247177124,
|
|
"learning_rate": 0.0001635524692768057,
|
|
"loss": 0.8183,
|
|
"step": 15730
|
|
},
|
|
{
|
|
"epoch": 0.17604196375146094,
|
|
"grad_norm": 0.2597709596157074,
|
|
"learning_rate": 0.00016332404404038558,
|
|
"loss": 0.7957,
|
|
"step": 15740
|
|
},
|
|
{
|
|
"epoch": 0.17615380743872364,
|
|
"grad_norm": 0.2896418273448944,
|
|
"learning_rate": 0.0001630956188039655,
|
|
"loss": 0.8146,
|
|
"step": 15750
|
|
},
|
|
{
|
|
"epoch": 0.17626565112598633,
|
|
"grad_norm": 0.2686966061592102,
|
|
"learning_rate": 0.00016286719356754535,
|
|
"loss": 0.7988,
|
|
"step": 15760
|
|
},
|
|
{
|
|
"epoch": 0.176377494813249,
|
|
"grad_norm": 0.26220840215682983,
|
|
"learning_rate": 0.00016263876833112523,
|
|
"loss": 0.7936,
|
|
"step": 15770
|
|
},
|
|
{
|
|
"epoch": 0.17648933850051168,
|
|
"grad_norm": 0.260547012090683,
|
|
"learning_rate": 0.00016241034309470508,
|
|
"loss": 0.8002,
|
|
"step": 15780
|
|
},
|
|
{
|
|
"epoch": 0.17660118218777437,
|
|
"grad_norm": 0.22341471910476685,
|
|
"learning_rate": 0.000162181917858285,
|
|
"loss": 0.7935,
|
|
"step": 15790
|
|
},
|
|
{
|
|
"epoch": 0.17671302587503704,
|
|
"grad_norm": 0.24994009733200073,
|
|
"learning_rate": 0.00016195349262186488,
|
|
"loss": 0.7971,
|
|
"step": 15800
|
|
},
|
|
{
|
|
"epoch": 0.17682486956229973,
|
|
"grad_norm": 0.24070651829242706,
|
|
"learning_rate": 0.00016172506738544473,
|
|
"loss": 0.7844,
|
|
"step": 15810
|
|
},
|
|
{
|
|
"epoch": 0.17693671324956242,
|
|
"grad_norm": 0.23858696222305298,
|
|
"learning_rate": 0.00016149664214902461,
|
|
"loss": 0.7687,
|
|
"step": 15820
|
|
},
|
|
{
|
|
"epoch": 0.17704855693682509,
|
|
"grad_norm": 0.24684946238994598,
|
|
"learning_rate": 0.00016126821691260452,
|
|
"loss": 0.7848,
|
|
"step": 15830
|
|
},
|
|
{
|
|
"epoch": 0.17716040062408778,
|
|
"grad_norm": 0.2525545656681061,
|
|
"learning_rate": 0.00016103979167618438,
|
|
"loss": 0.773,
|
|
"step": 15840
|
|
},
|
|
{
|
|
"epoch": 0.17727224431135047,
|
|
"grad_norm": 0.2485392689704895,
|
|
"learning_rate": 0.00016081136643976426,
|
|
"loss": 0.7787,
|
|
"step": 15850
|
|
},
|
|
{
|
|
"epoch": 0.17738408799861313,
|
|
"grad_norm": 0.2384241223335266,
|
|
"learning_rate": 0.00016058294120334415,
|
|
"loss": 0.7732,
|
|
"step": 15860
|
|
},
|
|
{
|
|
"epoch": 0.17749593168587582,
|
|
"grad_norm": 0.25029659271240234,
|
|
"learning_rate": 0.00016035451596692403,
|
|
"loss": 0.7819,
|
|
"step": 15870
|
|
},
|
|
{
|
|
"epoch": 0.1776077753731385,
|
|
"grad_norm": 0.2988499701023102,
|
|
"learning_rate": 0.0001601260907305039,
|
|
"loss": 0.7815,
|
|
"step": 15880
|
|
},
|
|
{
|
|
"epoch": 0.17771961906040118,
|
|
"grad_norm": 0.25840380787849426,
|
|
"learning_rate": 0.0001598976654940838,
|
|
"loss": 0.7899,
|
|
"step": 15890
|
|
},
|
|
{
|
|
"epoch": 0.17783146274766387,
|
|
"grad_norm": 0.2870889902114868,
|
|
"learning_rate": 0.00015966924025766365,
|
|
"loss": 0.7964,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 0.17794330643492653,
|
|
"grad_norm": 0.270702987909317,
|
|
"learning_rate": 0.00015944081502124356,
|
|
"loss": 0.7907,
|
|
"step": 15910
|
|
},
|
|
{
|
|
"epoch": 0.17805515012218923,
|
|
"grad_norm": 0.24939289689064026,
|
|
"learning_rate": 0.00015921238978482344,
|
|
"loss": 0.7909,
|
|
"step": 15920
|
|
},
|
|
{
|
|
"epoch": 0.17816699380945192,
|
|
"grad_norm": 0.25692620873451233,
|
|
"learning_rate": 0.0001589839645484033,
|
|
"loss": 0.7864,
|
|
"step": 15930
|
|
},
|
|
{
|
|
"epoch": 0.17827883749671458,
|
|
"grad_norm": 0.25667235255241394,
|
|
"learning_rate": 0.00015875553931198318,
|
|
"loss": 0.7792,
|
|
"step": 15940
|
|
},
|
|
{
|
|
"epoch": 0.17839068118397727,
|
|
"grad_norm": 0.27988189458847046,
|
|
"learning_rate": 0.0001585271140755631,
|
|
"loss": 0.78,
|
|
"step": 15950
|
|
},
|
|
{
|
|
"epoch": 0.17850252487123996,
|
|
"grad_norm": 0.26706936955451965,
|
|
"learning_rate": 0.00015829868883914295,
|
|
"loss": 0.7764,
|
|
"step": 15960
|
|
},
|
|
{
|
|
"epoch": 0.17861436855850263,
|
|
"grad_norm": 0.25825801491737366,
|
|
"learning_rate": 0.00015807026360272283,
|
|
"loss": 0.7798,
|
|
"step": 15970
|
|
},
|
|
{
|
|
"epoch": 0.17872621224576532,
|
|
"grad_norm": 0.26630404591560364,
|
|
"learning_rate": 0.0001578418383663027,
|
|
"loss": 0.7877,
|
|
"step": 15980
|
|
},
|
|
{
|
|
"epoch": 0.178838055933028,
|
|
"grad_norm": 0.24562442302703857,
|
|
"learning_rate": 0.0001576134131298826,
|
|
"loss": 0.7761,
|
|
"step": 15990
|
|
},
|
|
{
|
|
"epoch": 0.17894989962029068,
|
|
"grad_norm": 0.2607520818710327,
|
|
"learning_rate": 0.00015738498789346248,
|
|
"loss": 0.7844,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 0.17906174330755337,
|
|
"grad_norm": 0.25256794691085815,
|
|
"learning_rate": 0.00015715656265704233,
|
|
"loss": 0.7712,
|
|
"step": 16010
|
|
},
|
|
{
|
|
"epoch": 0.17917358699481606,
|
|
"grad_norm": 0.24657808244228363,
|
|
"learning_rate": 0.00015692813742062222,
|
|
"loss": 0.7766,
|
|
"step": 16020
|
|
},
|
|
{
|
|
"epoch": 0.17928543068207872,
|
|
"grad_norm": 0.2546744644641876,
|
|
"learning_rate": 0.00015669971218420213,
|
|
"loss": 0.781,
|
|
"step": 16030
|
|
},
|
|
{
|
|
"epoch": 0.17939727436934141,
|
|
"grad_norm": 0.24849241971969604,
|
|
"learning_rate": 0.00015647128694778198,
|
|
"loss": 0.786,
|
|
"step": 16040
|
|
},
|
|
{
|
|
"epoch": 0.17950911805660408,
|
|
"grad_norm": 0.2447352409362793,
|
|
"learning_rate": 0.00015624286171136187,
|
|
"loss": 0.7805,
|
|
"step": 16050
|
|
},
|
|
{
|
|
"epoch": 0.17962096174386677,
|
|
"grad_norm": 0.3004114031791687,
|
|
"learning_rate": 0.00015601443647494178,
|
|
"loss": 0.7748,
|
|
"step": 16060
|
|
},
|
|
{
|
|
"epoch": 0.17973280543112946,
|
|
"grad_norm": 0.24974007904529572,
|
|
"learning_rate": 0.00015578601123852163,
|
|
"loss": 0.7823,
|
|
"step": 16070
|
|
},
|
|
{
|
|
"epoch": 0.17984464911839212,
|
|
"grad_norm": 0.2995624542236328,
|
|
"learning_rate": 0.00015555758600210151,
|
|
"loss": 0.7894,
|
|
"step": 16080
|
|
},
|
|
{
|
|
"epoch": 0.17995649280565482,
|
|
"grad_norm": 0.2560220956802368,
|
|
"learning_rate": 0.0001553291607656814,
|
|
"loss": 0.7849,
|
|
"step": 16090
|
|
},
|
|
{
|
|
"epoch": 0.1800683364929175,
|
|
"grad_norm": 0.24940122663974762,
|
|
"learning_rate": 0.00015510073552926128,
|
|
"loss": 0.7903,
|
|
"step": 16100
|
|
},
|
|
{
|
|
"epoch": 0.18018018018018017,
|
|
"grad_norm": 0.22082312405109406,
|
|
"learning_rate": 0.00015487231029284116,
|
|
"loss": 0.783,
|
|
"step": 16110
|
|
},
|
|
{
|
|
"epoch": 0.18029202386744286,
|
|
"grad_norm": 0.2670224606990814,
|
|
"learning_rate": 0.00015464388505642104,
|
|
"loss": 0.7919,
|
|
"step": 16120
|
|
},
|
|
{
|
|
"epoch": 0.18040386755470555,
|
|
"grad_norm": 0.2533135414123535,
|
|
"learning_rate": 0.0001544154598200009,
|
|
"loss": 0.8007,
|
|
"step": 16130
|
|
},
|
|
{
|
|
"epoch": 0.18051571124196822,
|
|
"grad_norm": 0.2660861909389496,
|
|
"learning_rate": 0.0001541870345835808,
|
|
"loss": 0.7913,
|
|
"step": 16140
|
|
},
|
|
{
|
|
"epoch": 0.1806275549292309,
|
|
"grad_norm": 0.2556677460670471,
|
|
"learning_rate": 0.0001539586093471607,
|
|
"loss": 0.7826,
|
|
"step": 16150
|
|
},
|
|
{
|
|
"epoch": 0.1807393986164936,
|
|
"grad_norm": 0.275900661945343,
|
|
"learning_rate": 0.00015373018411074055,
|
|
"loss": 0.8048,
|
|
"step": 16160
|
|
},
|
|
{
|
|
"epoch": 0.18085124230375627,
|
|
"grad_norm": 0.29176998138427734,
|
|
"learning_rate": 0.00015350175887432043,
|
|
"loss": 0.8241,
|
|
"step": 16170
|
|
},
|
|
{
|
|
"epoch": 0.18096308599101896,
|
|
"grad_norm": 0.2635776996612549,
|
|
"learning_rate": 0.00015327333363790034,
|
|
"loss": 0.8211,
|
|
"step": 16180
|
|
},
|
|
{
|
|
"epoch": 0.18107492967828162,
|
|
"grad_norm": 0.27744734287261963,
|
|
"learning_rate": 0.0001530449084014802,
|
|
"loss": 0.8254,
|
|
"step": 16190
|
|
},
|
|
{
|
|
"epoch": 0.1811867733655443,
|
|
"grad_norm": 0.28162074089050293,
|
|
"learning_rate": 0.00015281648316506008,
|
|
"loss": 0.8182,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 0.181298617052807,
|
|
"grad_norm": 0.29347339272499084,
|
|
"learning_rate": 0.00015258805792863996,
|
|
"loss": 0.812,
|
|
"step": 16210
|
|
},
|
|
{
|
|
"epoch": 0.18141046074006967,
|
|
"grad_norm": 0.26170992851257324,
|
|
"learning_rate": 0.00015235963269221985,
|
|
"loss": 0.8221,
|
|
"step": 16220
|
|
},
|
|
{
|
|
"epoch": 0.18152230442733236,
|
|
"grad_norm": 0.27848196029663086,
|
|
"learning_rate": 0.00015213120745579973,
|
|
"loss": 0.825,
|
|
"step": 16230
|
|
},
|
|
{
|
|
"epoch": 0.18163414811459505,
|
|
"grad_norm": 0.2994973659515381,
|
|
"learning_rate": 0.00015190278221937958,
|
|
"loss": 0.8158,
|
|
"step": 16240
|
|
},
|
|
{
|
|
"epoch": 0.18174599180185771,
|
|
"grad_norm": 0.27873843908309937,
|
|
"learning_rate": 0.00015167435698295947,
|
|
"loss": 0.816,
|
|
"step": 16250
|
|
},
|
|
{
|
|
"epoch": 0.1818578354891204,
|
|
"grad_norm": 0.3014775812625885,
|
|
"learning_rate": 0.00015144593174653938,
|
|
"loss": 0.8174,
|
|
"step": 16260
|
|
},
|
|
{
|
|
"epoch": 0.1819696791763831,
|
|
"grad_norm": 0.29963594675064087,
|
|
"learning_rate": 0.00015121750651011923,
|
|
"loss": 0.8104,
|
|
"step": 16270
|
|
},
|
|
{
|
|
"epoch": 0.18208152286364576,
|
|
"grad_norm": 0.3388141393661499,
|
|
"learning_rate": 0.00015098908127369912,
|
|
"loss": 0.826,
|
|
"step": 16280
|
|
},
|
|
{
|
|
"epoch": 0.18219336655090845,
|
|
"grad_norm": 0.29143062233924866,
|
|
"learning_rate": 0.000150760656037279,
|
|
"loss": 0.8222,
|
|
"step": 16290
|
|
},
|
|
{
|
|
"epoch": 0.18230521023817114,
|
|
"grad_norm": 0.327824205160141,
|
|
"learning_rate": 0.00015053223080085888,
|
|
"loss": 0.8186,
|
|
"step": 16300
|
|
},
|
|
{
|
|
"epoch": 0.1824170539254338,
|
|
"grad_norm": 0.3053797483444214,
|
|
"learning_rate": 0.00015030380556443876,
|
|
"loss": 0.8214,
|
|
"step": 16310
|
|
},
|
|
{
|
|
"epoch": 0.1825288976126965,
|
|
"grad_norm": 0.3030015230178833,
|
|
"learning_rate": 0.00015007538032801865,
|
|
"loss": 0.8198,
|
|
"step": 16320
|
|
},
|
|
{
|
|
"epoch": 0.18264074129995916,
|
|
"grad_norm": 0.3147192597389221,
|
|
"learning_rate": 0.0001498469550915985,
|
|
"loss": 0.8224,
|
|
"step": 16330
|
|
},
|
|
{
|
|
"epoch": 0.18275258498722186,
|
|
"grad_norm": 0.2838999927043915,
|
|
"learning_rate": 0.0001496185298551784,
|
|
"loss": 0.8142,
|
|
"step": 16340
|
|
},
|
|
{
|
|
"epoch": 0.18286442867448455,
|
|
"grad_norm": 0.27273476123809814,
|
|
"learning_rate": 0.0001493901046187583,
|
|
"loss": 0.8054,
|
|
"step": 16350
|
|
},
|
|
{
|
|
"epoch": 0.1829762723617472,
|
|
"grad_norm": 0.2754770517349243,
|
|
"learning_rate": 0.00014916167938233815,
|
|
"loss": 0.8131,
|
|
"step": 16360
|
|
},
|
|
{
|
|
"epoch": 0.1830881160490099,
|
|
"grad_norm": 0.29061514139175415,
|
|
"learning_rate": 0.00014893325414591803,
|
|
"loss": 0.7988,
|
|
"step": 16370
|
|
},
|
|
{
|
|
"epoch": 0.1831999597362726,
|
|
"grad_norm": 0.2525017559528351,
|
|
"learning_rate": 0.00014870482890949794,
|
|
"loss": 0.8023,
|
|
"step": 16380
|
|
},
|
|
{
|
|
"epoch": 0.18331180342353526,
|
|
"grad_norm": 0.3019058108329773,
|
|
"learning_rate": 0.0001484764036730778,
|
|
"loss": 0.8077,
|
|
"step": 16390
|
|
},
|
|
{
|
|
"epoch": 0.18342364711079795,
|
|
"grad_norm": 0.302090048789978,
|
|
"learning_rate": 0.00014824797843665768,
|
|
"loss": 0.812,
|
|
"step": 16400
|
|
},
|
|
{
|
|
"epoch": 0.18353549079806064,
|
|
"grad_norm": 0.29742154479026794,
|
|
"learning_rate": 0.00014801955320023757,
|
|
"loss": 0.7911,
|
|
"step": 16410
|
|
},
|
|
{
|
|
"epoch": 0.1836473344853233,
|
|
"grad_norm": 0.31950804591178894,
|
|
"learning_rate": 0.00014779112796381745,
|
|
"loss": 0.7875,
|
|
"step": 16420
|
|
},
|
|
{
|
|
"epoch": 0.183759178172586,
|
|
"grad_norm": 0.32971978187561035,
|
|
"learning_rate": 0.00014756270272739733,
|
|
"loss": 0.7788,
|
|
"step": 16430
|
|
},
|
|
{
|
|
"epoch": 0.1838710218598487,
|
|
"grad_norm": 0.2941220700740814,
|
|
"learning_rate": 0.00014733427749097721,
|
|
"loss": 0.7772,
|
|
"step": 16440
|
|
},
|
|
{
|
|
"epoch": 0.18398286554711135,
|
|
"grad_norm": 0.2639923393726349,
|
|
"learning_rate": 0.00014710585225455707,
|
|
"loss": 0.7708,
|
|
"step": 16450
|
|
},
|
|
{
|
|
"epoch": 0.18409470923437404,
|
|
"grad_norm": 0.2483467161655426,
|
|
"learning_rate": 0.00014687742701813698,
|
|
"loss": 0.7846,
|
|
"step": 16460
|
|
},
|
|
{
|
|
"epoch": 0.1842065529216367,
|
|
"grad_norm": 0.31150713562965393,
|
|
"learning_rate": 0.00014664900178171683,
|
|
"loss": 0.7853,
|
|
"step": 16470
|
|
},
|
|
{
|
|
"epoch": 0.1843183966088994,
|
|
"grad_norm": 0.30439406633377075,
|
|
"learning_rate": 0.00014642057654529672,
|
|
"loss": 0.7779,
|
|
"step": 16480
|
|
},
|
|
{
|
|
"epoch": 0.1844302402961621,
|
|
"grad_norm": 0.29318898916244507,
|
|
"learning_rate": 0.00014619215130887663,
|
|
"loss": 0.7911,
|
|
"step": 16490
|
|
},
|
|
{
|
|
"epoch": 0.18454208398342475,
|
|
"grad_norm": 0.2726874053478241,
|
|
"learning_rate": 0.00014596372607245648,
|
|
"loss": 0.7869,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 0.18465392767068745,
|
|
"grad_norm": 0.2978016436100006,
|
|
"learning_rate": 0.00014573530083603637,
|
|
"loss": 0.783,
|
|
"step": 16510
|
|
},
|
|
{
|
|
"epoch": 0.18476577135795014,
|
|
"grad_norm": 0.3107501268386841,
|
|
"learning_rate": 0.00014550687559961625,
|
|
"loss": 0.801,
|
|
"step": 16520
|
|
},
|
|
{
|
|
"epoch": 0.1848776150452128,
|
|
"grad_norm": 0.2848517894744873,
|
|
"learning_rate": 0.00014527845036319613,
|
|
"loss": 0.8063,
|
|
"step": 16530
|
|
},
|
|
{
|
|
"epoch": 0.1849894587324755,
|
|
"grad_norm": 0.2625429332256317,
|
|
"learning_rate": 0.00014505002512677601,
|
|
"loss": 0.8074,
|
|
"step": 16540
|
|
},
|
|
{
|
|
"epoch": 0.18510130241973818,
|
|
"grad_norm": 0.2805044949054718,
|
|
"learning_rate": 0.0001448215998903559,
|
|
"loss": 0.8013,
|
|
"step": 16550
|
|
},
|
|
{
|
|
"epoch": 0.18521314610700085,
|
|
"grad_norm": 0.27657589316368103,
|
|
"learning_rate": 0.00014459317465393575,
|
|
"loss": 0.8012,
|
|
"step": 16560
|
|
},
|
|
{
|
|
"epoch": 0.18532498979426354,
|
|
"grad_norm": 0.2780141532421112,
|
|
"learning_rate": 0.00014436474941751566,
|
|
"loss": 0.8161,
|
|
"step": 16570
|
|
},
|
|
{
|
|
"epoch": 0.18543683348152623,
|
|
"grad_norm": 0.2871207892894745,
|
|
"learning_rate": 0.00014413632418109555,
|
|
"loss": 0.7899,
|
|
"step": 16580
|
|
},
|
|
{
|
|
"epoch": 0.1855486771687889,
|
|
"grad_norm": 0.2656658887863159,
|
|
"learning_rate": 0.0001439078989446754,
|
|
"loss": 0.7985,
|
|
"step": 16590
|
|
},
|
|
{
|
|
"epoch": 0.1856605208560516,
|
|
"grad_norm": 0.2766350209712982,
|
|
"learning_rate": 0.00014367947370825528,
|
|
"loss": 0.7999,
|
|
"step": 16600
|
|
},
|
|
{
|
|
"epoch": 0.18577236454331428,
|
|
"grad_norm": 0.2616749107837677,
|
|
"learning_rate": 0.0001434510484718352,
|
|
"loss": 0.8002,
|
|
"step": 16610
|
|
},
|
|
{
|
|
"epoch": 0.18588420823057694,
|
|
"grad_norm": 0.25887414813041687,
|
|
"learning_rate": 0.00014322262323541505,
|
|
"loss": 0.8112,
|
|
"step": 16620
|
|
},
|
|
{
|
|
"epoch": 0.18599605191783963,
|
|
"grad_norm": 0.2594297528266907,
|
|
"learning_rate": 0.00014299419799899493,
|
|
"loss": 0.802,
|
|
"step": 16630
|
|
},
|
|
{
|
|
"epoch": 0.1861078956051023,
|
|
"grad_norm": 0.2535499036312103,
|
|
"learning_rate": 0.00014276577276257482,
|
|
"loss": 0.7867,
|
|
"step": 16640
|
|
},
|
|
{
|
|
"epoch": 0.186219739292365,
|
|
"grad_norm": 0.25161436200141907,
|
|
"learning_rate": 0.0001425373475261547,
|
|
"loss": 0.8059,
|
|
"step": 16650
|
|
},
|
|
{
|
|
"epoch": 0.18633158297962768,
|
|
"grad_norm": 0.22897444665431976,
|
|
"learning_rate": 0.00014230892228973458,
|
|
"loss": 0.7864,
|
|
"step": 16660
|
|
},
|
|
{
|
|
"epoch": 0.18644342666689034,
|
|
"grad_norm": 0.27164047956466675,
|
|
"learning_rate": 0.00014208049705331446,
|
|
"loss": 0.796,
|
|
"step": 16670
|
|
},
|
|
{
|
|
"epoch": 0.18655527035415304,
|
|
"grad_norm": 0.2717941701412201,
|
|
"learning_rate": 0.00014185207181689432,
|
|
"loss": 0.7801,
|
|
"step": 16680
|
|
},
|
|
{
|
|
"epoch": 0.18666711404141573,
|
|
"grad_norm": 0.27144837379455566,
|
|
"learning_rate": 0.00014162364658047423,
|
|
"loss": 0.7758,
|
|
"step": 16690
|
|
},
|
|
{
|
|
"epoch": 0.1867789577286784,
|
|
"grad_norm": 0.2357831746339798,
|
|
"learning_rate": 0.00014139522134405409,
|
|
"loss": 0.7674,
|
|
"step": 16700
|
|
},
|
|
{
|
|
"epoch": 0.18689080141594108,
|
|
"grad_norm": 0.23233544826507568,
|
|
"learning_rate": 0.00014116679610763397,
|
|
"loss": 0.7827,
|
|
"step": 16710
|
|
},
|
|
{
|
|
"epoch": 0.18700264510320377,
|
|
"grad_norm": 0.2399321347475052,
|
|
"learning_rate": 0.00014093837087121385,
|
|
"loss": 0.7811,
|
|
"step": 16720
|
|
},
|
|
{
|
|
"epoch": 0.18711448879046644,
|
|
"grad_norm": 0.2493642419576645,
|
|
"learning_rate": 0.00014070994563479373,
|
|
"loss": 0.7762,
|
|
"step": 16730
|
|
},
|
|
{
|
|
"epoch": 0.18722633247772913,
|
|
"grad_norm": 0.23383350670337677,
|
|
"learning_rate": 0.00014048152039837362,
|
|
"loss": 0.7754,
|
|
"step": 16740
|
|
},
|
|
{
|
|
"epoch": 0.18733817616499182,
|
|
"grad_norm": 0.2624364197254181,
|
|
"learning_rate": 0.0001402530951619535,
|
|
"loss": 0.7766,
|
|
"step": 16750
|
|
},
|
|
{
|
|
"epoch": 0.18745001985225448,
|
|
"grad_norm": 0.24138151109218597,
|
|
"learning_rate": 0.00014002466992553336,
|
|
"loss": 0.7869,
|
|
"step": 16760
|
|
},
|
|
{
|
|
"epoch": 0.18756186353951718,
|
|
"grad_norm": 0.2397204041481018,
|
|
"learning_rate": 0.00013979624468911326,
|
|
"loss": 0.7974,
|
|
"step": 16770
|
|
},
|
|
{
|
|
"epoch": 0.18767370722677984,
|
|
"grad_norm": 0.27491655945777893,
|
|
"learning_rate": 0.00013956781945269315,
|
|
"loss": 0.8011,
|
|
"step": 16780
|
|
},
|
|
{
|
|
"epoch": 0.18778555091404253,
|
|
"grad_norm": 0.2321402132511139,
|
|
"learning_rate": 0.000139339394216273,
|
|
"loss": 0.803,
|
|
"step": 16790
|
|
},
|
|
{
|
|
"epoch": 0.18789739460130522,
|
|
"grad_norm": 0.24487042427062988,
|
|
"learning_rate": 0.00013911096897985289,
|
|
"loss": 0.7975,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 0.1880092382885679,
|
|
"grad_norm": 0.23328396677970886,
|
|
"learning_rate": 0.0001388825437434328,
|
|
"loss": 0.795,
|
|
"step": 16810
|
|
},
|
|
{
|
|
"epoch": 0.18812108197583058,
|
|
"grad_norm": 0.22705566883087158,
|
|
"learning_rate": 0.00013865411850701265,
|
|
"loss": 0.7895,
|
|
"step": 16820
|
|
},
|
|
{
|
|
"epoch": 0.18823292566309327,
|
|
"grad_norm": 0.24339929223060608,
|
|
"learning_rate": 0.00013842569327059253,
|
|
"loss": 0.7931,
|
|
"step": 16830
|
|
},
|
|
{
|
|
"epoch": 0.18834476935035593,
|
|
"grad_norm": 0.2613057494163513,
|
|
"learning_rate": 0.00013819726803417242,
|
|
"loss": 0.7785,
|
|
"step": 16840
|
|
},
|
|
{
|
|
"epoch": 0.18845661303761863,
|
|
"grad_norm": 0.27011603116989136,
|
|
"learning_rate": 0.0001379688427977523,
|
|
"loss": 0.7853,
|
|
"step": 16850
|
|
},
|
|
{
|
|
"epoch": 0.18856845672488132,
|
|
"grad_norm": 0.26589342951774597,
|
|
"learning_rate": 0.00013774041756133218,
|
|
"loss": 0.7893,
|
|
"step": 16860
|
|
},
|
|
{
|
|
"epoch": 0.18868030041214398,
|
|
"grad_norm": 0.26286208629608154,
|
|
"learning_rate": 0.00013751199232491207,
|
|
"loss": 0.7707,
|
|
"step": 16870
|
|
},
|
|
{
|
|
"epoch": 0.18879214409940667,
|
|
"grad_norm": 0.3021993637084961,
|
|
"learning_rate": 0.00013728356708849192,
|
|
"loss": 0.7896,
|
|
"step": 16880
|
|
},
|
|
{
|
|
"epoch": 0.18890398778666936,
|
|
"grad_norm": 0.30742523074150085,
|
|
"learning_rate": 0.00013705514185207183,
|
|
"loss": 0.7895,
|
|
"step": 16890
|
|
},
|
|
{
|
|
"epoch": 0.18901583147393203,
|
|
"grad_norm": 0.3027999699115753,
|
|
"learning_rate": 0.0001368267166156517,
|
|
"loss": 0.7839,
|
|
"step": 16900
|
|
},
|
|
{
|
|
"epoch": 0.18912767516119472,
|
|
"grad_norm": 0.29199281334877014,
|
|
"learning_rate": 0.00013659829137923157,
|
|
"loss": 0.7771,
|
|
"step": 16910
|
|
},
|
|
{
|
|
"epoch": 0.18923951884845738,
|
|
"grad_norm": 0.2460477203130722,
|
|
"learning_rate": 0.00013636986614281145,
|
|
"loss": 0.7823,
|
|
"step": 16920
|
|
},
|
|
{
|
|
"epoch": 0.18935136253572007,
|
|
"grad_norm": 0.2608555853366852,
|
|
"learning_rate": 0.00013614144090639134,
|
|
"loss": 0.7664,
|
|
"step": 16930
|
|
},
|
|
{
|
|
"epoch": 0.18946320622298277,
|
|
"grad_norm": 0.2723162770271301,
|
|
"learning_rate": 0.00013591301566997122,
|
|
"loss": 0.7768,
|
|
"step": 16940
|
|
},
|
|
{
|
|
"epoch": 0.18957504991024543,
|
|
"grad_norm": 0.2690962255001068,
|
|
"learning_rate": 0.0001356845904335511,
|
|
"loss": 0.7697,
|
|
"step": 16950
|
|
},
|
|
{
|
|
"epoch": 0.18968689359750812,
|
|
"grad_norm": 0.2892717719078064,
|
|
"learning_rate": 0.00013545616519713096,
|
|
"loss": 0.769,
|
|
"step": 16960
|
|
},
|
|
{
|
|
"epoch": 0.1897987372847708,
|
|
"grad_norm": 0.2581406533718109,
|
|
"learning_rate": 0.00013522773996071087,
|
|
"loss": 0.7766,
|
|
"step": 16970
|
|
},
|
|
{
|
|
"epoch": 0.18991058097203348,
|
|
"grad_norm": 0.2944723963737488,
|
|
"learning_rate": 0.00013499931472429075,
|
|
"loss": 0.7638,
|
|
"step": 16980
|
|
},
|
|
{
|
|
"epoch": 0.19002242465929617,
|
|
"grad_norm": 0.2776504158973694,
|
|
"learning_rate": 0.0001347708894878706,
|
|
"loss": 0.7731,
|
|
"step": 16990
|
|
},
|
|
{
|
|
"epoch": 0.19013426834655886,
|
|
"grad_norm": 0.267098993062973,
|
|
"learning_rate": 0.00013454246425145052,
|
|
"loss": 0.7772,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 0.19024611203382152,
|
|
"grad_norm": 0.2806127071380615,
|
|
"learning_rate": 0.0001343140390150304,
|
|
"loss": 0.772,
|
|
"step": 17010
|
|
},
|
|
{
|
|
"epoch": 0.19035795572108422,
|
|
"grad_norm": 0.2872319519519806,
|
|
"learning_rate": 0.00013408561377861025,
|
|
"loss": 0.7695,
|
|
"step": 17020
|
|
},
|
|
{
|
|
"epoch": 0.1904697994083469,
|
|
"grad_norm": 0.24477818608283997,
|
|
"learning_rate": 0.00013385718854219014,
|
|
"loss": 0.7764,
|
|
"step": 17030
|
|
},
|
|
{
|
|
"epoch": 0.19058164309560957,
|
|
"grad_norm": 0.2637476623058319,
|
|
"learning_rate": 0.00013362876330577005,
|
|
"loss": 0.7712,
|
|
"step": 17040
|
|
},
|
|
{
|
|
"epoch": 0.19069348678287226,
|
|
"grad_norm": 0.2676442861557007,
|
|
"learning_rate": 0.0001334003380693499,
|
|
"loss": 0.7707,
|
|
"step": 17050
|
|
},
|
|
{
|
|
"epoch": 0.19080533047013493,
|
|
"grad_norm": 0.2592306435108185,
|
|
"learning_rate": 0.00013317191283292979,
|
|
"loss": 0.7808,
|
|
"step": 17060
|
|
},
|
|
{
|
|
"epoch": 0.19091717415739762,
|
|
"grad_norm": 0.3543199896812439,
|
|
"learning_rate": 0.00013294348759650967,
|
|
"loss": 0.7928,
|
|
"step": 17070
|
|
},
|
|
{
|
|
"epoch": 0.1910290178446603,
|
|
"grad_norm": 0.26262548565864563,
|
|
"learning_rate": 0.00013271506236008955,
|
|
"loss": 0.7677,
|
|
"step": 17080
|
|
},
|
|
{
|
|
"epoch": 0.19114086153192297,
|
|
"grad_norm": 0.2845424711704254,
|
|
"learning_rate": 0.00013248663712366943,
|
|
"loss": 0.7758,
|
|
"step": 17090
|
|
},
|
|
{
|
|
"epoch": 0.19125270521918566,
|
|
"grad_norm": 0.2694297730922699,
|
|
"learning_rate": 0.00013225821188724932,
|
|
"loss": 0.7857,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 0.19136454890644836,
|
|
"grad_norm": 0.2682325839996338,
|
|
"learning_rate": 0.00013202978665082917,
|
|
"loss": 0.782,
|
|
"step": 17110
|
|
},
|
|
{
|
|
"epoch": 0.19147639259371102,
|
|
"grad_norm": 0.26535049080848694,
|
|
"learning_rate": 0.00013180136141440908,
|
|
"loss": 0.7796,
|
|
"step": 17120
|
|
},
|
|
{
|
|
"epoch": 0.1915882362809737,
|
|
"grad_norm": 0.2759861946105957,
|
|
"learning_rate": 0.00013157293617798894,
|
|
"loss": 0.7732,
|
|
"step": 17130
|
|
},
|
|
{
|
|
"epoch": 0.1917000799682364,
|
|
"grad_norm": 0.24873244762420654,
|
|
"learning_rate": 0.00013134451094156882,
|
|
"loss": 0.7763,
|
|
"step": 17140
|
|
},
|
|
{
|
|
"epoch": 0.19181192365549907,
|
|
"grad_norm": 0.2826152443885803,
|
|
"learning_rate": 0.0001311160857051487,
|
|
"loss": 0.7748,
|
|
"step": 17150
|
|
},
|
|
{
|
|
"epoch": 0.19192376734276176,
|
|
"grad_norm": 0.2823798358440399,
|
|
"learning_rate": 0.00013088766046872859,
|
|
"loss": 0.768,
|
|
"step": 17160
|
|
},
|
|
{
|
|
"epoch": 0.19203561103002445,
|
|
"grad_norm": 0.2591745853424072,
|
|
"learning_rate": 0.00013065923523230847,
|
|
"loss": 0.7831,
|
|
"step": 17170
|
|
},
|
|
{
|
|
"epoch": 0.19214745471728711,
|
|
"grad_norm": 0.24773742258548737,
|
|
"learning_rate": 0.00013043080999588835,
|
|
"loss": 0.7799,
|
|
"step": 17180
|
|
},
|
|
{
|
|
"epoch": 0.1922592984045498,
|
|
"grad_norm": 0.28184765577316284,
|
|
"learning_rate": 0.0001302023847594682,
|
|
"loss": 0.787,
|
|
"step": 17190
|
|
},
|
|
{
|
|
"epoch": 0.19237114209181247,
|
|
"grad_norm": 0.24396668374538422,
|
|
"learning_rate": 0.00012997395952304812,
|
|
"loss": 0.7777,
|
|
"step": 17200
|
|
},
|
|
{
|
|
"epoch": 0.19248298577907516,
|
|
"grad_norm": 0.25493332743644714,
|
|
"learning_rate": 0.000129745534286628,
|
|
"loss": 0.7842,
|
|
"step": 17210
|
|
},
|
|
{
|
|
"epoch": 0.19259482946633785,
|
|
"grad_norm": 0.2615022361278534,
|
|
"learning_rate": 0.00012951710905020786,
|
|
"loss": 0.788,
|
|
"step": 17220
|
|
},
|
|
{
|
|
"epoch": 0.19270667315360052,
|
|
"grad_norm": 0.28270524740219116,
|
|
"learning_rate": 0.00012928868381378774,
|
|
"loss": 0.7788,
|
|
"step": 17230
|
|
},
|
|
{
|
|
"epoch": 0.1928185168408632,
|
|
"grad_norm": 0.24917210638523102,
|
|
"learning_rate": 0.00012906025857736765,
|
|
"loss": 0.7731,
|
|
"step": 17240
|
|
},
|
|
{
|
|
"epoch": 0.1929303605281259,
|
|
"grad_norm": 0.2589946985244751,
|
|
"learning_rate": 0.0001288318333409475,
|
|
"loss": 0.7781,
|
|
"step": 17250
|
|
},
|
|
{
|
|
"epoch": 0.19304220421538856,
|
|
"grad_norm": 0.23770585656166077,
|
|
"learning_rate": 0.0001286034081045274,
|
|
"loss": 0.7902,
|
|
"step": 17260
|
|
},
|
|
{
|
|
"epoch": 0.19315404790265125,
|
|
"grad_norm": 0.22782771289348602,
|
|
"learning_rate": 0.00012837498286810727,
|
|
"loss": 0.7875,
|
|
"step": 17270
|
|
},
|
|
{
|
|
"epoch": 0.19326589158991395,
|
|
"grad_norm": 0.2611001431941986,
|
|
"learning_rate": 0.00012814655763168715,
|
|
"loss": 0.794,
|
|
"step": 17280
|
|
},
|
|
{
|
|
"epoch": 0.1933777352771766,
|
|
"grad_norm": 0.2642746865749359,
|
|
"learning_rate": 0.00012791813239526704,
|
|
"loss": 0.8005,
|
|
"step": 17290
|
|
},
|
|
{
|
|
"epoch": 0.1934895789644393,
|
|
"grad_norm": 0.2470688372850418,
|
|
"learning_rate": 0.00012768970715884692,
|
|
"loss": 0.7854,
|
|
"step": 17300
|
|
},
|
|
{
|
|
"epoch": 0.193601422651702,
|
|
"grad_norm": 0.24735964834690094,
|
|
"learning_rate": 0.00012746128192242677,
|
|
"loss": 0.7918,
|
|
"step": 17310
|
|
},
|
|
{
|
|
"epoch": 0.19371326633896466,
|
|
"grad_norm": 0.2734208405017853,
|
|
"learning_rate": 0.00012723285668600668,
|
|
"loss": 0.7719,
|
|
"step": 17320
|
|
},
|
|
{
|
|
"epoch": 0.19382511002622735,
|
|
"grad_norm": 0.28373652696609497,
|
|
"learning_rate": 0.00012700443144958657,
|
|
"loss": 0.7743,
|
|
"step": 17330
|
|
},
|
|
{
|
|
"epoch": 0.19393695371349004,
|
|
"grad_norm": 0.25755295157432556,
|
|
"learning_rate": 0.00012677600621316642,
|
|
"loss": 0.7761,
|
|
"step": 17340
|
|
},
|
|
{
|
|
"epoch": 0.1940487974007527,
|
|
"grad_norm": 0.2918241322040558,
|
|
"learning_rate": 0.0001265475809767463,
|
|
"loss": 0.7885,
|
|
"step": 17350
|
|
},
|
|
{
|
|
"epoch": 0.1941606410880154,
|
|
"grad_norm": 0.2589518427848816,
|
|
"learning_rate": 0.0001263191557403262,
|
|
"loss": 0.7781,
|
|
"step": 17360
|
|
},
|
|
{
|
|
"epoch": 0.19427248477527806,
|
|
"grad_norm": 0.2941739857196808,
|
|
"learning_rate": 0.00012609073050390607,
|
|
"loss": 0.7896,
|
|
"step": 17370
|
|
},
|
|
{
|
|
"epoch": 0.19438432846254075,
|
|
"grad_norm": 0.2625831663608551,
|
|
"learning_rate": 0.00012586230526748595,
|
|
"loss": 0.7797,
|
|
"step": 17380
|
|
},
|
|
{
|
|
"epoch": 0.19449617214980344,
|
|
"grad_norm": 0.2731517255306244,
|
|
"learning_rate": 0.0001256338800310658,
|
|
"loss": 0.7861,
|
|
"step": 17390
|
|
},
|
|
{
|
|
"epoch": 0.1946080158370661,
|
|
"grad_norm": 0.2802453637123108,
|
|
"learning_rate": 0.00012540545479464572,
|
|
"loss": 0.8066,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 0.1947198595243288,
|
|
"grad_norm": 0.24151596426963806,
|
|
"learning_rate": 0.0001251770295582256,
|
|
"loss": 0.7746,
|
|
"step": 17410
|
|
},
|
|
{
|
|
"epoch": 0.1948317032115915,
|
|
"grad_norm": 0.27006617188453674,
|
|
"learning_rate": 0.00012494860432180549,
|
|
"loss": 0.7796,
|
|
"step": 17420
|
|
},
|
|
{
|
|
"epoch": 0.19494354689885415,
|
|
"grad_norm": 0.2574283480644226,
|
|
"learning_rate": 0.00012472017908538537,
|
|
"loss": 0.7809,
|
|
"step": 17430
|
|
},
|
|
{
|
|
"epoch": 0.19505539058611684,
|
|
"grad_norm": 0.25741514563560486,
|
|
"learning_rate": 0.00012449175384896522,
|
|
"loss": 0.7792,
|
|
"step": 17440
|
|
},
|
|
{
|
|
"epoch": 0.19516723427337954,
|
|
"grad_norm": 0.2619360685348511,
|
|
"learning_rate": 0.00012426332861254513,
|
|
"loss": 0.7768,
|
|
"step": 17450
|
|
},
|
|
{
|
|
"epoch": 0.1952790779606422,
|
|
"grad_norm": 0.28053224086761475,
|
|
"learning_rate": 0.000124034903376125,
|
|
"loss": 0.7841,
|
|
"step": 17460
|
|
},
|
|
{
|
|
"epoch": 0.1953909216479049,
|
|
"grad_norm": 0.24019859731197357,
|
|
"learning_rate": 0.00012380647813970487,
|
|
"loss": 0.783,
|
|
"step": 17470
|
|
},
|
|
{
|
|
"epoch": 0.19550276533516758,
|
|
"grad_norm": 0.2747540771961212,
|
|
"learning_rate": 0.00012357805290328475,
|
|
"loss": 0.7911,
|
|
"step": 17480
|
|
},
|
|
{
|
|
"epoch": 0.19561460902243025,
|
|
"grad_norm": 0.28044483065605164,
|
|
"learning_rate": 0.00012334962766686464,
|
|
"loss": 0.7986,
|
|
"step": 17490
|
|
},
|
|
{
|
|
"epoch": 0.19572645270969294,
|
|
"grad_norm": 0.24908137321472168,
|
|
"learning_rate": 0.00012312120243044452,
|
|
"loss": 0.8087,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"epoch": 0.1958382963969556,
|
|
"grad_norm": 0.29041793942451477,
|
|
"learning_rate": 0.0001228927771940244,
|
|
"loss": 0.8063,
|
|
"step": 17510
|
|
},
|
|
{
|
|
"epoch": 0.1959501400842183,
|
|
"grad_norm": 0.3020537495613098,
|
|
"learning_rate": 0.00012266435195760429,
|
|
"loss": 0.8004,
|
|
"step": 17520
|
|
},
|
|
{
|
|
"epoch": 0.19606198377148099,
|
|
"grad_norm": 0.29414400458335876,
|
|
"learning_rate": 0.00012243592672118417,
|
|
"loss": 0.7846,
|
|
"step": 17530
|
|
},
|
|
{
|
|
"epoch": 0.19617382745874365,
|
|
"grad_norm": 0.2648397386074066,
|
|
"learning_rate": 0.00012220750148476402,
|
|
"loss": 0.7708,
|
|
"step": 17540
|
|
},
|
|
{
|
|
"epoch": 0.19628567114600634,
|
|
"grad_norm": 0.2834302484989166,
|
|
"learning_rate": 0.00012197907624834392,
|
|
"loss": 0.7818,
|
|
"step": 17550
|
|
},
|
|
{
|
|
"epoch": 0.19639751483326903,
|
|
"grad_norm": 0.2748505175113678,
|
|
"learning_rate": 0.0001217506510119238,
|
|
"loss": 0.7642,
|
|
"step": 17560
|
|
},
|
|
{
|
|
"epoch": 0.1965093585205317,
|
|
"grad_norm": 0.32425326108932495,
|
|
"learning_rate": 0.00012152222577550367,
|
|
"loss": 0.7765,
|
|
"step": 17570
|
|
},
|
|
{
|
|
"epoch": 0.1966212022077944,
|
|
"grad_norm": 0.27183324098587036,
|
|
"learning_rate": 0.00012129380053908357,
|
|
"loss": 0.7572,
|
|
"step": 17580
|
|
},
|
|
{
|
|
"epoch": 0.19673304589505708,
|
|
"grad_norm": 0.28190943598747253,
|
|
"learning_rate": 0.00012106537530266344,
|
|
"loss": 0.7571,
|
|
"step": 17590
|
|
},
|
|
{
|
|
"epoch": 0.19684488958231974,
|
|
"grad_norm": 0.5151196718215942,
|
|
"learning_rate": 0.00012083695006624332,
|
|
"loss": 0.7565,
|
|
"step": 17600
|
|
},
|
|
{
|
|
"epoch": 0.19695673326958243,
|
|
"grad_norm": 0.2523132264614105,
|
|
"learning_rate": 0.0001206085248298232,
|
|
"loss": 0.7597,
|
|
"step": 17610
|
|
},
|
|
{
|
|
"epoch": 0.19706857695684513,
|
|
"grad_norm": 0.27336063981056213,
|
|
"learning_rate": 0.00012038009959340309,
|
|
"loss": 0.7546,
|
|
"step": 17620
|
|
},
|
|
{
|
|
"epoch": 0.1971804206441078,
|
|
"grad_norm": 0.25119057297706604,
|
|
"learning_rate": 0.00012015167435698296,
|
|
"loss": 0.7519,
|
|
"step": 17630
|
|
},
|
|
{
|
|
"epoch": 0.19729226433137048,
|
|
"grad_norm": 0.281147301197052,
|
|
"learning_rate": 0.00011992324912056284,
|
|
"loss": 0.7623,
|
|
"step": 17640
|
|
},
|
|
{
|
|
"epoch": 0.19740410801863315,
|
|
"grad_norm": 0.2463361769914627,
|
|
"learning_rate": 0.00011969482388414272,
|
|
"loss": 0.754,
|
|
"step": 17650
|
|
},
|
|
{
|
|
"epoch": 0.19751595170589584,
|
|
"grad_norm": 0.2902059853076935,
|
|
"learning_rate": 0.0001194663986477226,
|
|
"loss": 0.7578,
|
|
"step": 17660
|
|
},
|
|
{
|
|
"epoch": 0.19762779539315853,
|
|
"grad_norm": 0.2590588629245758,
|
|
"learning_rate": 0.00011923797341130247,
|
|
"loss": 0.7427,
|
|
"step": 17670
|
|
},
|
|
{
|
|
"epoch": 0.1977396390804212,
|
|
"grad_norm": 0.24349506199359894,
|
|
"learning_rate": 0.00011900954817488237,
|
|
"loss": 0.7599,
|
|
"step": 17680
|
|
},
|
|
{
|
|
"epoch": 0.19785148276768388,
|
|
"grad_norm": 0.2568139135837555,
|
|
"learning_rate": 0.00011878112293846224,
|
|
"loss": 0.7673,
|
|
"step": 17690
|
|
},
|
|
{
|
|
"epoch": 0.19796332645494658,
|
|
"grad_norm": 0.2617419958114624,
|
|
"learning_rate": 0.00011855269770204212,
|
|
"loss": 0.7637,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 0.19807517014220924,
|
|
"grad_norm": 0.24309082329273224,
|
|
"learning_rate": 0.000118324272465622,
|
|
"loss": 0.7583,
|
|
"step": 17710
|
|
},
|
|
{
|
|
"epoch": 0.19818701382947193,
|
|
"grad_norm": 0.22027656435966492,
|
|
"learning_rate": 0.00011809584722920189,
|
|
"loss": 0.7479,
|
|
"step": 17720
|
|
},
|
|
{
|
|
"epoch": 0.19829885751673462,
|
|
"grad_norm": 0.27296265959739685,
|
|
"learning_rate": 0.00011786742199278176,
|
|
"loss": 0.765,
|
|
"step": 17730
|
|
},
|
|
{
|
|
"epoch": 0.1984107012039973,
|
|
"grad_norm": 0.2589128613471985,
|
|
"learning_rate": 0.00011763899675636165,
|
|
"loss": 0.777,
|
|
"step": 17740
|
|
},
|
|
{
|
|
"epoch": 0.19852254489125998,
|
|
"grad_norm": 0.27665242552757263,
|
|
"learning_rate": 0.00011741057151994152,
|
|
"loss": 0.7656,
|
|
"step": 17750
|
|
},
|
|
{
|
|
"epoch": 0.19863438857852267,
|
|
"grad_norm": 0.27103251218795776,
|
|
"learning_rate": 0.0001171821462835214,
|
|
"loss": 0.7716,
|
|
"step": 17760
|
|
},
|
|
{
|
|
"epoch": 0.19874623226578533,
|
|
"grad_norm": 0.2768172025680542,
|
|
"learning_rate": 0.00011695372104710127,
|
|
"loss": 0.7738,
|
|
"step": 17770
|
|
},
|
|
{
|
|
"epoch": 0.19885807595304802,
|
|
"grad_norm": 0.2424757182598114,
|
|
"learning_rate": 0.00011672529581068117,
|
|
"loss": 0.7793,
|
|
"step": 17780
|
|
},
|
|
{
|
|
"epoch": 0.1989699196403107,
|
|
"grad_norm": 0.2821860909461975,
|
|
"learning_rate": 0.00011649687057426104,
|
|
"loss": 0.7771,
|
|
"step": 17790
|
|
},
|
|
{
|
|
"epoch": 0.19908176332757338,
|
|
"grad_norm": 0.28263264894485474,
|
|
"learning_rate": 0.00011626844533784092,
|
|
"loss": 0.7812,
|
|
"step": 17800
|
|
},
|
|
{
|
|
"epoch": 0.19919360701483607,
|
|
"grad_norm": 0.24835869669914246,
|
|
"learning_rate": 0.0001160400201014208,
|
|
"loss": 0.7753,
|
|
"step": 17810
|
|
},
|
|
{
|
|
"epoch": 0.19930545070209874,
|
|
"grad_norm": 0.23325562477111816,
|
|
"learning_rate": 0.00011581159486500069,
|
|
"loss": 0.7763,
|
|
"step": 17820
|
|
},
|
|
{
|
|
"epoch": 0.19941729438936143,
|
|
"grad_norm": 0.2520182132720947,
|
|
"learning_rate": 0.00011558316962858056,
|
|
"loss": 0.791,
|
|
"step": 17830
|
|
},
|
|
{
|
|
"epoch": 0.19952913807662412,
|
|
"grad_norm": 0.2478768676519394,
|
|
"learning_rate": 0.00011535474439216045,
|
|
"loss": 0.7819,
|
|
"step": 17840
|
|
},
|
|
{
|
|
"epoch": 0.19964098176388678,
|
|
"grad_norm": 0.2749478220939636,
|
|
"learning_rate": 0.00011512631915574032,
|
|
"loss": 0.7805,
|
|
"step": 17850
|
|
},
|
|
{
|
|
"epoch": 0.19975282545114947,
|
|
"grad_norm": 0.2417723685503006,
|
|
"learning_rate": 0.0001148978939193202,
|
|
"loss": 0.766,
|
|
"step": 17860
|
|
},
|
|
{
|
|
"epoch": 0.19986466913841217,
|
|
"grad_norm": 0.25219354033470154,
|
|
"learning_rate": 0.00011466946868290008,
|
|
"loss": 0.758,
|
|
"step": 17870
|
|
},
|
|
{
|
|
"epoch": 0.19997651282567483,
|
|
"grad_norm": 0.24644000828266144,
|
|
"learning_rate": 0.00011444104344647997,
|
|
"loss": 0.7569,
|
|
"step": 17880
|
|
},
|
|
{
|
|
"epoch": 0.20008835651293752,
|
|
"grad_norm": 0.2683338224887848,
|
|
"learning_rate": 0.00011421261821005986,
|
|
"loss": 0.7509,
|
|
"step": 17890
|
|
},
|
|
{
|
|
"epoch": 0.2002002002002002,
|
|
"grad_norm": 0.29149681329727173,
|
|
"learning_rate": 0.00011398419297363972,
|
|
"loss": 0.7611,
|
|
"step": 17900
|
|
},
|
|
{
|
|
"epoch": 0.20031204388746288,
|
|
"grad_norm": 0.2651118338108063,
|
|
"learning_rate": 0.00011375576773721962,
|
|
"loss": 0.756,
|
|
"step": 17910
|
|
},
|
|
{
|
|
"epoch": 0.20042388757472557,
|
|
"grad_norm": 0.26990607380867004,
|
|
"learning_rate": 0.00011352734250079949,
|
|
"loss": 0.7726,
|
|
"step": 17920
|
|
},
|
|
{
|
|
"epoch": 0.20053573126198823,
|
|
"grad_norm": 0.23897935450077057,
|
|
"learning_rate": 0.00011329891726437937,
|
|
"loss": 0.7875,
|
|
"step": 17930
|
|
},
|
|
{
|
|
"epoch": 0.20064757494925092,
|
|
"grad_norm": 0.2300727218389511,
|
|
"learning_rate": 0.00011307049202795926,
|
|
"loss": 0.7697,
|
|
"step": 17940
|
|
},
|
|
{
|
|
"epoch": 0.20075941863651361,
|
|
"grad_norm": 0.2873596251010895,
|
|
"learning_rate": 0.00011284206679153914,
|
|
"loss": 0.7776,
|
|
"step": 17950
|
|
},
|
|
{
|
|
"epoch": 0.20087126232377628,
|
|
"grad_norm": 0.29036712646484375,
|
|
"learning_rate": 0.00011261364155511901,
|
|
"loss": 0.7794,
|
|
"step": 17960
|
|
},
|
|
{
|
|
"epoch": 0.20098310601103897,
|
|
"grad_norm": 0.2837420701980591,
|
|
"learning_rate": 0.0001123852163186989,
|
|
"loss": 0.7818,
|
|
"step": 17970
|
|
},
|
|
{
|
|
"epoch": 0.20109494969830166,
|
|
"grad_norm": 0.2920686602592468,
|
|
"learning_rate": 0.00011215679108227877,
|
|
"loss": 0.7851,
|
|
"step": 17980
|
|
},
|
|
{
|
|
"epoch": 0.20120679338556433,
|
|
"grad_norm": 0.27664583921432495,
|
|
"learning_rate": 0.00011192836584585866,
|
|
"loss": 0.7601,
|
|
"step": 17990
|
|
},
|
|
{
|
|
"epoch": 0.20131863707282702,
|
|
"grad_norm": 0.26870399713516235,
|
|
"learning_rate": 0.00011169994060943853,
|
|
"loss": 0.7961,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 0.2014304807600897,
|
|
"grad_norm": 0.2502228021621704,
|
|
"learning_rate": 0.00011147151537301842,
|
|
"loss": 0.7827,
|
|
"step": 18010
|
|
},
|
|
{
|
|
"epoch": 0.20154232444735237,
|
|
"grad_norm": 0.2473440319299698,
|
|
"learning_rate": 0.00011124309013659829,
|
|
"loss": 0.7815,
|
|
"step": 18020
|
|
},
|
|
{
|
|
"epoch": 0.20165416813461506,
|
|
"grad_norm": 0.2513076663017273,
|
|
"learning_rate": 0.00011101466490017817,
|
|
"loss": 0.7675,
|
|
"step": 18030
|
|
},
|
|
{
|
|
"epoch": 0.20176601182187776,
|
|
"grad_norm": 0.2829226851463318,
|
|
"learning_rate": 0.00011078623966375806,
|
|
"loss": 0.7669,
|
|
"step": 18040
|
|
},
|
|
{
|
|
"epoch": 0.20187785550914042,
|
|
"grad_norm": 0.25758418440818787,
|
|
"learning_rate": 0.00011055781442733794,
|
|
"loss": 0.7707,
|
|
"step": 18050
|
|
},
|
|
{
|
|
"epoch": 0.2019896991964031,
|
|
"grad_norm": 0.27185285091400146,
|
|
"learning_rate": 0.00011032938919091781,
|
|
"loss": 0.7742,
|
|
"step": 18060
|
|
},
|
|
{
|
|
"epoch": 0.2021015428836658,
|
|
"grad_norm": 0.2802230417728424,
|
|
"learning_rate": 0.0001101009639544977,
|
|
"loss": 0.7821,
|
|
"step": 18070
|
|
},
|
|
{
|
|
"epoch": 0.20221338657092847,
|
|
"grad_norm": 0.2882921099662781,
|
|
"learning_rate": 0.00010987253871807757,
|
|
"loss": 0.779,
|
|
"step": 18080
|
|
},
|
|
{
|
|
"epoch": 0.20232523025819116,
|
|
"grad_norm": 0.2569839358329773,
|
|
"learning_rate": 0.00010964411348165746,
|
|
"loss": 0.7694,
|
|
"step": 18090
|
|
},
|
|
{
|
|
"epoch": 0.20243707394545382,
|
|
"grad_norm": 0.2600938379764557,
|
|
"learning_rate": 0.00010941568824523733,
|
|
"loss": 0.7781,
|
|
"step": 18100
|
|
},
|
|
{
|
|
"epoch": 0.2025489176327165,
|
|
"grad_norm": 0.28083154559135437,
|
|
"learning_rate": 0.00010918726300881722,
|
|
"loss": 0.7799,
|
|
"step": 18110
|
|
},
|
|
{
|
|
"epoch": 0.2026607613199792,
|
|
"grad_norm": 0.22990182042121887,
|
|
"learning_rate": 0.00010895883777239709,
|
|
"loss": 0.7883,
|
|
"step": 18120
|
|
},
|
|
{
|
|
"epoch": 0.20277260500724187,
|
|
"grad_norm": 0.27432581782341003,
|
|
"learning_rate": 0.00010873041253597697,
|
|
"loss": 0.7942,
|
|
"step": 18130
|
|
},
|
|
{
|
|
"epoch": 0.20288444869450456,
|
|
"grad_norm": 0.2607738971710205,
|
|
"learning_rate": 0.00010850198729955686,
|
|
"loss": 0.7877,
|
|
"step": 18140
|
|
},
|
|
{
|
|
"epoch": 0.20299629238176725,
|
|
"grad_norm": 0.2818219065666199,
|
|
"learning_rate": 0.00010827356206313674,
|
|
"loss": 0.7948,
|
|
"step": 18150
|
|
},
|
|
{
|
|
"epoch": 0.20310813606902992,
|
|
"grad_norm": 0.2751563489437103,
|
|
"learning_rate": 0.00010804513682671661,
|
|
"loss": 0.7836,
|
|
"step": 18160
|
|
},
|
|
{
|
|
"epoch": 0.2032199797562926,
|
|
"grad_norm": 0.2746957242488861,
|
|
"learning_rate": 0.0001078167115902965,
|
|
"loss": 0.7693,
|
|
"step": 18170
|
|
},
|
|
{
|
|
"epoch": 0.2033318234435553,
|
|
"grad_norm": 0.24990054965019226,
|
|
"learning_rate": 0.00010758828635387638,
|
|
"loss": 0.7869,
|
|
"step": 18180
|
|
},
|
|
{
|
|
"epoch": 0.20344366713081796,
|
|
"grad_norm": 0.24581623077392578,
|
|
"learning_rate": 0.00010735986111745626,
|
|
"loss": 0.768,
|
|
"step": 18190
|
|
},
|
|
{
|
|
"epoch": 0.20355551081808065,
|
|
"grad_norm": 0.26637768745422363,
|
|
"learning_rate": 0.00010713143588103613,
|
|
"loss": 0.7711,
|
|
"step": 18200
|
|
},
|
|
{
|
|
"epoch": 0.20366735450534335,
|
|
"grad_norm": 0.2510250508785248,
|
|
"learning_rate": 0.00010690301064461602,
|
|
"loss": 0.7748,
|
|
"step": 18210
|
|
},
|
|
{
|
|
"epoch": 0.203779198192606,
|
|
"grad_norm": 0.2378496378660202,
|
|
"learning_rate": 0.00010667458540819589,
|
|
"loss": 0.7622,
|
|
"step": 18220
|
|
},
|
|
{
|
|
"epoch": 0.2038910418798687,
|
|
"grad_norm": 0.2507869601249695,
|
|
"learning_rate": 0.00010644616017177578,
|
|
"loss": 0.7739,
|
|
"step": 18230
|
|
},
|
|
{
|
|
"epoch": 0.20400288556713136,
|
|
"grad_norm": 0.24733096361160278,
|
|
"learning_rate": 0.00010621773493535566,
|
|
"loss": 0.7508,
|
|
"step": 18240
|
|
},
|
|
{
|
|
"epoch": 0.20411472925439406,
|
|
"grad_norm": 0.23383109271526337,
|
|
"learning_rate": 0.00010598930969893554,
|
|
"loss": 0.7507,
|
|
"step": 18250
|
|
},
|
|
{
|
|
"epoch": 0.20422657294165675,
|
|
"grad_norm": 0.2543237805366516,
|
|
"learning_rate": 0.00010576088446251541,
|
|
"loss": 0.7578,
|
|
"step": 18260
|
|
},
|
|
{
|
|
"epoch": 0.2043384166289194,
|
|
"grad_norm": 0.25807520747184753,
|
|
"learning_rate": 0.00010553245922609531,
|
|
"loss": 0.7513,
|
|
"step": 18270
|
|
},
|
|
{
|
|
"epoch": 0.2044502603161821,
|
|
"grad_norm": 0.23354406654834747,
|
|
"learning_rate": 0.00010530403398967518,
|
|
"loss": 0.7566,
|
|
"step": 18280
|
|
},
|
|
{
|
|
"epoch": 0.2045621040034448,
|
|
"grad_norm": 0.2685154676437378,
|
|
"learning_rate": 0.00010507560875325506,
|
|
"loss": 0.758,
|
|
"step": 18290
|
|
},
|
|
{
|
|
"epoch": 0.20467394769070746,
|
|
"grad_norm": 0.24349918961524963,
|
|
"learning_rate": 0.00010484718351683494,
|
|
"loss": 0.7686,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 0.20478579137797015,
|
|
"grad_norm": 0.24823498725891113,
|
|
"learning_rate": 0.00010461875828041482,
|
|
"loss": 0.7659,
|
|
"step": 18310
|
|
},
|
|
{
|
|
"epoch": 0.20489763506523284,
|
|
"grad_norm": 0.2511804401874542,
|
|
"learning_rate": 0.0001043903330439947,
|
|
"loss": 0.77,
|
|
"step": 18320
|
|
},
|
|
{
|
|
"epoch": 0.2050094787524955,
|
|
"grad_norm": 0.24065516889095306,
|
|
"learning_rate": 0.00010416190780757458,
|
|
"loss": 0.7677,
|
|
"step": 18330
|
|
},
|
|
{
|
|
"epoch": 0.2051213224397582,
|
|
"grad_norm": 0.2819323241710663,
|
|
"learning_rate": 0.00010393348257115447,
|
|
"loss": 0.753,
|
|
"step": 18340
|
|
},
|
|
{
|
|
"epoch": 0.2052331661270209,
|
|
"grad_norm": 0.26467952132225037,
|
|
"learning_rate": 0.00010370505733473434,
|
|
"loss": 0.7826,
|
|
"step": 18350
|
|
},
|
|
{
|
|
"epoch": 0.20534500981428355,
|
|
"grad_norm": 0.22962163388729095,
|
|
"learning_rate": 0.00010347663209831423,
|
|
"loss": 0.7683,
|
|
"step": 18360
|
|
},
|
|
{
|
|
"epoch": 0.20545685350154624,
|
|
"grad_norm": 0.2582736611366272,
|
|
"learning_rate": 0.00010324820686189411,
|
|
"loss": 0.7951,
|
|
"step": 18370
|
|
},
|
|
{
|
|
"epoch": 0.2055686971888089,
|
|
"grad_norm": 0.2352149486541748,
|
|
"learning_rate": 0.00010301978162547399,
|
|
"loss": 0.7577,
|
|
"step": 18380
|
|
},
|
|
{
|
|
"epoch": 0.2056805408760716,
|
|
"grad_norm": 0.25687554478645325,
|
|
"learning_rate": 0.00010279135638905386,
|
|
"loss": 0.7696,
|
|
"step": 18390
|
|
},
|
|
{
|
|
"epoch": 0.2057923845633343,
|
|
"grad_norm": 0.2579772472381592,
|
|
"learning_rate": 0.00010256293115263376,
|
|
"loss": 0.7837,
|
|
"step": 18400
|
|
},
|
|
{
|
|
"epoch": 0.20590422825059695,
|
|
"grad_norm": 0.24537009000778198,
|
|
"learning_rate": 0.00010233450591621363,
|
|
"loss": 0.7799,
|
|
"step": 18410
|
|
},
|
|
{
|
|
"epoch": 0.20601607193785965,
|
|
"grad_norm": 0.2636966109275818,
|
|
"learning_rate": 0.00010210608067979351,
|
|
"loss": 0.7588,
|
|
"step": 18420
|
|
},
|
|
{
|
|
"epoch": 0.20612791562512234,
|
|
"grad_norm": 0.30670562386512756,
|
|
"learning_rate": 0.00010187765544337338,
|
|
"loss": 0.771,
|
|
"step": 18430
|
|
},
|
|
{
|
|
"epoch": 0.206239759312385,
|
|
"grad_norm": 0.28400668501853943,
|
|
"learning_rate": 0.00010164923020695327,
|
|
"loss": 0.7686,
|
|
"step": 18440
|
|
},
|
|
{
|
|
"epoch": 0.2063516029996477,
|
|
"grad_norm": 0.27395951747894287,
|
|
"learning_rate": 0.00010142080497053314,
|
|
"loss": 0.776,
|
|
"step": 18450
|
|
},
|
|
{
|
|
"epoch": 0.20646344668691038,
|
|
"grad_norm": 0.284868061542511,
|
|
"learning_rate": 0.00010119237973411303,
|
|
"loss": 0.7864,
|
|
"step": 18460
|
|
},
|
|
{
|
|
"epoch": 0.20657529037417305,
|
|
"grad_norm": 0.2859087586402893,
|
|
"learning_rate": 0.00010096395449769291,
|
|
"loss": 0.7749,
|
|
"step": 18470
|
|
},
|
|
{
|
|
"epoch": 0.20668713406143574,
|
|
"grad_norm": 0.28758034110069275,
|
|
"learning_rate": 0.00010073552926127279,
|
|
"loss": 0.7919,
|
|
"step": 18480
|
|
},
|
|
{
|
|
"epoch": 0.20679897774869843,
|
|
"grad_norm": 0.2752404510974884,
|
|
"learning_rate": 0.00010050710402485266,
|
|
"loss": 0.7808,
|
|
"step": 18490
|
|
},
|
|
{
|
|
"epoch": 0.2069108214359611,
|
|
"grad_norm": 0.30756843090057373,
|
|
"learning_rate": 0.00010027867878843256,
|
|
"loss": 0.7734,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"epoch": 0.2070226651232238,
|
|
"grad_norm": 0.2694368064403534,
|
|
"learning_rate": 0.00010005025355201243,
|
|
"loss": 0.7751,
|
|
"step": 18510
|
|
},
|
|
{
|
|
"epoch": 0.20713450881048645,
|
|
"grad_norm": 0.25838834047317505,
|
|
"learning_rate": 9.982182831559231e-05,
|
|
"loss": 0.7686,
|
|
"step": 18520
|
|
},
|
|
{
|
|
"epoch": 0.20724635249774914,
|
|
"grad_norm": 0.257729709148407,
|
|
"learning_rate": 9.959340307917219e-05,
|
|
"loss": 0.7827,
|
|
"step": 18530
|
|
},
|
|
{
|
|
"epoch": 0.20735819618501183,
|
|
"grad_norm": 0.2938844859600067,
|
|
"learning_rate": 9.936497784275208e-05,
|
|
"loss": 0.7685,
|
|
"step": 18540
|
|
},
|
|
{
|
|
"epoch": 0.2074700398722745,
|
|
"grad_norm": 0.25894027948379517,
|
|
"learning_rate": 9.913655260633194e-05,
|
|
"loss": 0.7738,
|
|
"step": 18550
|
|
},
|
|
{
|
|
"epoch": 0.2075818835595372,
|
|
"grad_norm": 0.2751148045063019,
|
|
"learning_rate": 9.890812736991183e-05,
|
|
"loss": 0.7594,
|
|
"step": 18560
|
|
},
|
|
{
|
|
"epoch": 0.20769372724679988,
|
|
"grad_norm": 0.28643253445625305,
|
|
"learning_rate": 9.867970213349171e-05,
|
|
"loss": 0.7737,
|
|
"step": 18570
|
|
},
|
|
{
|
|
"epoch": 0.20780557093406254,
|
|
"grad_norm": 0.2575749158859253,
|
|
"learning_rate": 9.845127689707159e-05,
|
|
"loss": 0.7778,
|
|
"step": 18580
|
|
},
|
|
{
|
|
"epoch": 0.20791741462132524,
|
|
"grad_norm": 0.27625295519828796,
|
|
"learning_rate": 9.822285166065146e-05,
|
|
"loss": 0.7716,
|
|
"step": 18590
|
|
},
|
|
{
|
|
"epoch": 0.20802925830858793,
|
|
"grad_norm": 0.2803322672843933,
|
|
"learning_rate": 9.799442642423136e-05,
|
|
"loss": 0.7805,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 0.2081411019958506,
|
|
"grad_norm": 0.2567484676837921,
|
|
"learning_rate": 9.776600118781123e-05,
|
|
"loss": 0.7633,
|
|
"step": 18610
|
|
},
|
|
{
|
|
"epoch": 0.20825294568311328,
|
|
"grad_norm": 0.28193768858909607,
|
|
"learning_rate": 9.753757595139111e-05,
|
|
"loss": 0.7895,
|
|
"step": 18620
|
|
},
|
|
{
|
|
"epoch": 0.20836478937037597,
|
|
"grad_norm": 0.28459542989730835,
|
|
"learning_rate": 9.7309150714971e-05,
|
|
"loss": 0.7741,
|
|
"step": 18630
|
|
},
|
|
{
|
|
"epoch": 0.20847663305763864,
|
|
"grad_norm": 0.28346261382102966,
|
|
"learning_rate": 9.708072547855088e-05,
|
|
"loss": 0.7813,
|
|
"step": 18640
|
|
},
|
|
{
|
|
"epoch": 0.20858847674490133,
|
|
"grad_norm": 0.2818828523159027,
|
|
"learning_rate": 9.685230024213075e-05,
|
|
"loss": 0.7755,
|
|
"step": 18650
|
|
},
|
|
{
|
|
"epoch": 0.208700320432164,
|
|
"grad_norm": 0.28914326429367065,
|
|
"learning_rate": 9.662387500571063e-05,
|
|
"loss": 0.7798,
|
|
"step": 18660
|
|
},
|
|
{
|
|
"epoch": 0.20881216411942669,
|
|
"grad_norm": 0.2600755989551544,
|
|
"learning_rate": 9.639544976929051e-05,
|
|
"loss": 0.7758,
|
|
"step": 18670
|
|
},
|
|
{
|
|
"epoch": 0.20892400780668938,
|
|
"grad_norm": 0.2726733088493347,
|
|
"learning_rate": 9.61670245328704e-05,
|
|
"loss": 0.7769,
|
|
"step": 18680
|
|
},
|
|
{
|
|
"epoch": 0.20903585149395204,
|
|
"grad_norm": 0.23421594500541687,
|
|
"learning_rate": 9.593859929645026e-05,
|
|
"loss": 0.758,
|
|
"step": 18690
|
|
},
|
|
{
|
|
"epoch": 0.20914769518121473,
|
|
"grad_norm": 0.29468339681625366,
|
|
"learning_rate": 9.571017406003016e-05,
|
|
"loss": 0.7746,
|
|
"step": 18700
|
|
},
|
|
{
|
|
"epoch": 0.20925953886847742,
|
|
"grad_norm": 0.29477235674858093,
|
|
"learning_rate": 9.548174882361003e-05,
|
|
"loss": 0.7633,
|
|
"step": 18710
|
|
},
|
|
{
|
|
"epoch": 0.2093713825557401,
|
|
"grad_norm": 0.2564197778701782,
|
|
"learning_rate": 9.525332358718991e-05,
|
|
"loss": 0.7541,
|
|
"step": 18720
|
|
},
|
|
{
|
|
"epoch": 0.20948322624300278,
|
|
"grad_norm": 0.2745250165462494,
|
|
"learning_rate": 9.50248983507698e-05,
|
|
"loss": 0.7887,
|
|
"step": 18730
|
|
},
|
|
{
|
|
"epoch": 0.20959506993026547,
|
|
"grad_norm": 0.2572060525417328,
|
|
"learning_rate": 9.479647311434968e-05,
|
|
"loss": 0.774,
|
|
"step": 18740
|
|
},
|
|
{
|
|
"epoch": 0.20970691361752813,
|
|
"grad_norm": 0.28513193130493164,
|
|
"learning_rate": 9.456804787792955e-05,
|
|
"loss": 0.7871,
|
|
"step": 18750
|
|
},
|
|
{
|
|
"epoch": 0.20981875730479083,
|
|
"grad_norm": 0.2643887400627136,
|
|
"learning_rate": 9.433962264150944e-05,
|
|
"loss": 0.77,
|
|
"step": 18760
|
|
},
|
|
{
|
|
"epoch": 0.20993060099205352,
|
|
"grad_norm": 0.27534207701683044,
|
|
"learning_rate": 9.411119740508931e-05,
|
|
"loss": 0.7775,
|
|
"step": 18770
|
|
},
|
|
{
|
|
"epoch": 0.21004244467931618,
|
|
"grad_norm": 0.2620585858821869,
|
|
"learning_rate": 9.38827721686692e-05,
|
|
"loss": 0.7808,
|
|
"step": 18780
|
|
},
|
|
{
|
|
"epoch": 0.21015428836657887,
|
|
"grad_norm": 0.2759549915790558,
|
|
"learning_rate": 9.365434693224908e-05,
|
|
"loss": 0.7642,
|
|
"step": 18790
|
|
},
|
|
{
|
|
"epoch": 0.21026613205384156,
|
|
"grad_norm": 0.2919774353504181,
|
|
"learning_rate": 9.342592169582896e-05,
|
|
"loss": 0.7828,
|
|
"step": 18800
|
|
},
|
|
{
|
|
"epoch": 0.21037797574110423,
|
|
"grad_norm": 0.2717173099517822,
|
|
"learning_rate": 9.319749645940884e-05,
|
|
"loss": 0.7513,
|
|
"step": 18810
|
|
},
|
|
{
|
|
"epoch": 0.21048981942836692,
|
|
"grad_norm": 0.2662122845649719,
|
|
"learning_rate": 9.296907122298871e-05,
|
|
"loss": 0.7668,
|
|
"step": 18820
|
|
},
|
|
{
|
|
"epoch": 0.21060166311562958,
|
|
"grad_norm": 0.26051005721092224,
|
|
"learning_rate": 9.274064598656861e-05,
|
|
"loss": 0.7676,
|
|
"step": 18830
|
|
},
|
|
{
|
|
"epoch": 0.21071350680289228,
|
|
"grad_norm": 0.27510005235671997,
|
|
"learning_rate": 9.251222075014848e-05,
|
|
"loss": 0.7507,
|
|
"step": 18840
|
|
},
|
|
{
|
|
"epoch": 0.21082535049015497,
|
|
"grad_norm": 0.23877868056297302,
|
|
"learning_rate": 9.228379551372836e-05,
|
|
"loss": 0.7535,
|
|
"step": 18850
|
|
},
|
|
{
|
|
"epoch": 0.21093719417741763,
|
|
"grad_norm": 0.256104439496994,
|
|
"learning_rate": 9.205537027730824e-05,
|
|
"loss": 0.7546,
|
|
"step": 18860
|
|
},
|
|
{
|
|
"epoch": 0.21104903786468032,
|
|
"grad_norm": 0.2829015552997589,
|
|
"learning_rate": 9.182694504088813e-05,
|
|
"loss": 0.7588,
|
|
"step": 18870
|
|
},
|
|
{
|
|
"epoch": 0.211160881551943,
|
|
"grad_norm": 0.22898368537425995,
|
|
"learning_rate": 9.1598519804468e-05,
|
|
"loss": 0.7551,
|
|
"step": 18880
|
|
},
|
|
{
|
|
"epoch": 0.21127272523920568,
|
|
"grad_norm": 0.23679418861865997,
|
|
"learning_rate": 9.137009456804788e-05,
|
|
"loss": 0.7718,
|
|
"step": 18890
|
|
},
|
|
{
|
|
"epoch": 0.21138456892646837,
|
|
"grad_norm": 0.2878457009792328,
|
|
"learning_rate": 9.114166933162776e-05,
|
|
"loss": 0.7593,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 0.21149641261373106,
|
|
"grad_norm": 0.2936013638973236,
|
|
"learning_rate": 9.091324409520764e-05,
|
|
"loss": 0.7713,
|
|
"step": 18910
|
|
},
|
|
{
|
|
"epoch": 0.21160825630099372,
|
|
"grad_norm": 0.26062774658203125,
|
|
"learning_rate": 9.068481885878751e-05,
|
|
"loss": 0.7763,
|
|
"step": 18920
|
|
},
|
|
{
|
|
"epoch": 0.21172009998825642,
|
|
"grad_norm": 0.3092271685600281,
|
|
"learning_rate": 9.045639362236741e-05,
|
|
"loss": 0.7807,
|
|
"step": 18930
|
|
},
|
|
{
|
|
"epoch": 0.2118319436755191,
|
|
"grad_norm": 0.23566113412380219,
|
|
"learning_rate": 9.022796838594728e-05,
|
|
"loss": 0.7779,
|
|
"step": 18940
|
|
},
|
|
{
|
|
"epoch": 0.21194378736278177,
|
|
"grad_norm": 0.27366477251052856,
|
|
"learning_rate": 8.999954314952716e-05,
|
|
"loss": 0.77,
|
|
"step": 18950
|
|
},
|
|
{
|
|
"epoch": 0.21205563105004446,
|
|
"grad_norm": 0.23270778357982635,
|
|
"learning_rate": 8.977111791310704e-05,
|
|
"loss": 0.7549,
|
|
"step": 18960
|
|
},
|
|
{
|
|
"epoch": 0.21216747473730713,
|
|
"grad_norm": 0.28785306215286255,
|
|
"learning_rate": 8.954269267668693e-05,
|
|
"loss": 0.7677,
|
|
"step": 18970
|
|
},
|
|
{
|
|
"epoch": 0.21227931842456982,
|
|
"grad_norm": 0.2588510811328888,
|
|
"learning_rate": 8.93142674402668e-05,
|
|
"loss": 0.7715,
|
|
"step": 18980
|
|
},
|
|
{
|
|
"epoch": 0.2123911621118325,
|
|
"grad_norm": 0.248029887676239,
|
|
"learning_rate": 8.908584220384668e-05,
|
|
"loss": 0.7749,
|
|
"step": 18990
|
|
},
|
|
{
|
|
"epoch": 0.21250300579909517,
|
|
"grad_norm": 0.2579936981201172,
|
|
"learning_rate": 8.885741696742656e-05,
|
|
"loss": 0.7552,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 0.21261484948635787,
|
|
"grad_norm": 0.26293206214904785,
|
|
"learning_rate": 8.862899173100645e-05,
|
|
"loss": 0.7657,
|
|
"step": 19010
|
|
},
|
|
{
|
|
"epoch": 0.21272669317362056,
|
|
"grad_norm": 0.24589793384075165,
|
|
"learning_rate": 8.840056649458631e-05,
|
|
"loss": 0.7598,
|
|
"step": 19020
|
|
},
|
|
{
|
|
"epoch": 0.21283853686088322,
|
|
"grad_norm": 0.2315252274274826,
|
|
"learning_rate": 8.817214125816621e-05,
|
|
"loss": 0.7637,
|
|
"step": 19030
|
|
},
|
|
{
|
|
"epoch": 0.2129503805481459,
|
|
"grad_norm": 0.2538358271121979,
|
|
"learning_rate": 8.794371602174608e-05,
|
|
"loss": 0.7587,
|
|
"step": 19040
|
|
},
|
|
{
|
|
"epoch": 0.2130622242354086,
|
|
"grad_norm": 0.2626616060733795,
|
|
"learning_rate": 8.771529078532596e-05,
|
|
"loss": 0.7597,
|
|
"step": 19050
|
|
},
|
|
{
|
|
"epoch": 0.21317406792267127,
|
|
"grad_norm": 0.2557279169559479,
|
|
"learning_rate": 8.748686554890585e-05,
|
|
"loss": 0.7499,
|
|
"step": 19060
|
|
},
|
|
{
|
|
"epoch": 0.21328591160993396,
|
|
"grad_norm": 0.25008153915405273,
|
|
"learning_rate": 8.725844031248573e-05,
|
|
"loss": 0.7466,
|
|
"step": 19070
|
|
},
|
|
{
|
|
"epoch": 0.21339775529719665,
|
|
"grad_norm": 0.2647120952606201,
|
|
"learning_rate": 8.70300150760656e-05,
|
|
"loss": 0.7574,
|
|
"step": 19080
|
|
},
|
|
{
|
|
"epoch": 0.21350959898445931,
|
|
"grad_norm": 0.2535738945007324,
|
|
"learning_rate": 8.68015898396455e-05,
|
|
"loss": 0.7672,
|
|
"step": 19090
|
|
},
|
|
{
|
|
"epoch": 0.213621442671722,
|
|
"grad_norm": 0.28925755620002747,
|
|
"learning_rate": 8.657316460322536e-05,
|
|
"loss": 0.7692,
|
|
"step": 19100
|
|
},
|
|
{
|
|
"epoch": 0.21373328635898467,
|
|
"grad_norm": 0.26770591735839844,
|
|
"learning_rate": 8.634473936680525e-05,
|
|
"loss": 0.7511,
|
|
"step": 19110
|
|
},
|
|
{
|
|
"epoch": 0.21384513004624736,
|
|
"grad_norm": 0.25162947177886963,
|
|
"learning_rate": 8.611631413038512e-05,
|
|
"loss": 0.7573,
|
|
"step": 19120
|
|
},
|
|
{
|
|
"epoch": 0.21395697373351005,
|
|
"grad_norm": 0.253324031829834,
|
|
"learning_rate": 8.588788889396501e-05,
|
|
"loss": 0.7516,
|
|
"step": 19130
|
|
},
|
|
{
|
|
"epoch": 0.21406881742077272,
|
|
"grad_norm": 0.2784843146800995,
|
|
"learning_rate": 8.565946365754488e-05,
|
|
"loss": 0.7522,
|
|
"step": 19140
|
|
},
|
|
{
|
|
"epoch": 0.2141806611080354,
|
|
"grad_norm": 0.2869722247123718,
|
|
"learning_rate": 8.543103842112476e-05,
|
|
"loss": 0.7525,
|
|
"step": 19150
|
|
},
|
|
{
|
|
"epoch": 0.2142925047952981,
|
|
"grad_norm": 0.2467101663351059,
|
|
"learning_rate": 8.520261318470465e-05,
|
|
"loss": 0.7336,
|
|
"step": 19160
|
|
},
|
|
{
|
|
"epoch": 0.21440434848256076,
|
|
"grad_norm": 0.26108691096305847,
|
|
"learning_rate": 8.497418794828453e-05,
|
|
"loss": 0.751,
|
|
"step": 19170
|
|
},
|
|
{
|
|
"epoch": 0.21451619216982346,
|
|
"grad_norm": 0.2992580533027649,
|
|
"learning_rate": 8.47457627118644e-05,
|
|
"loss": 0.7599,
|
|
"step": 19180
|
|
},
|
|
{
|
|
"epoch": 0.21462803585708615,
|
|
"grad_norm": 0.2573351562023163,
|
|
"learning_rate": 8.45173374754443e-05,
|
|
"loss": 0.752,
|
|
"step": 19190
|
|
},
|
|
{
|
|
"epoch": 0.2147398795443488,
|
|
"grad_norm": 0.30148234963417053,
|
|
"learning_rate": 8.428891223902416e-05,
|
|
"loss": 0.7536,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 0.2148517232316115,
|
|
"grad_norm": 0.2811321020126343,
|
|
"learning_rate": 8.406048700260405e-05,
|
|
"loss": 0.761,
|
|
"step": 19210
|
|
},
|
|
{
|
|
"epoch": 0.2149635669188742,
|
|
"grad_norm": 0.2792038321495056,
|
|
"learning_rate": 8.383206176618392e-05,
|
|
"loss": 0.7558,
|
|
"step": 19220
|
|
},
|
|
{
|
|
"epoch": 0.21507541060613686,
|
|
"grad_norm": 0.30432426929473877,
|
|
"learning_rate": 8.360363652976381e-05,
|
|
"loss": 0.7541,
|
|
"step": 19230
|
|
},
|
|
{
|
|
"epoch": 0.21518725429339955,
|
|
"grad_norm": 0.28335481882095337,
|
|
"learning_rate": 8.33752112933437e-05,
|
|
"loss": 0.7628,
|
|
"step": 19240
|
|
},
|
|
{
|
|
"epoch": 0.2152990979806622,
|
|
"grad_norm": 0.28402864933013916,
|
|
"learning_rate": 8.314678605692357e-05,
|
|
"loss": 0.7835,
|
|
"step": 19250
|
|
},
|
|
{
|
|
"epoch": 0.2154109416679249,
|
|
"grad_norm": 0.2914164662361145,
|
|
"learning_rate": 8.291836082050346e-05,
|
|
"loss": 0.7705,
|
|
"step": 19260
|
|
},
|
|
{
|
|
"epoch": 0.2155227853551876,
|
|
"grad_norm": 0.27296769618988037,
|
|
"learning_rate": 8.268993558408333e-05,
|
|
"loss": 0.7791,
|
|
"step": 19270
|
|
},
|
|
{
|
|
"epoch": 0.21563462904245026,
|
|
"grad_norm": 0.2987435460090637,
|
|
"learning_rate": 8.246151034766321e-05,
|
|
"loss": 0.7918,
|
|
"step": 19280
|
|
},
|
|
{
|
|
"epoch": 0.21574647272971295,
|
|
"grad_norm": 0.2743736207485199,
|
|
"learning_rate": 8.22330851112431e-05,
|
|
"loss": 0.7777,
|
|
"step": 19290
|
|
},
|
|
{
|
|
"epoch": 0.21585831641697564,
|
|
"grad_norm": 0.2775188982486725,
|
|
"learning_rate": 8.200465987482298e-05,
|
|
"loss": 0.7811,
|
|
"step": 19300
|
|
},
|
|
{
|
|
"epoch": 0.2159701601042383,
|
|
"grad_norm": 0.2942585349082947,
|
|
"learning_rate": 8.177623463840285e-05,
|
|
"loss": 0.7748,
|
|
"step": 19310
|
|
},
|
|
{
|
|
"epoch": 0.216082003791501,
|
|
"grad_norm": 0.2545025050640106,
|
|
"learning_rate": 8.154780940198274e-05,
|
|
"loss": 0.77,
|
|
"step": 19320
|
|
},
|
|
{
|
|
"epoch": 0.2161938474787637,
|
|
"grad_norm": 0.2571526765823364,
|
|
"learning_rate": 8.131938416556261e-05,
|
|
"loss": 0.7735,
|
|
"step": 19330
|
|
},
|
|
{
|
|
"epoch": 0.21630569116602635,
|
|
"grad_norm": 0.2687735855579376,
|
|
"learning_rate": 8.10909589291425e-05,
|
|
"loss": 0.7703,
|
|
"step": 19340
|
|
},
|
|
{
|
|
"epoch": 0.21641753485328905,
|
|
"grad_norm": 0.27332374453544617,
|
|
"learning_rate": 8.086253369272237e-05,
|
|
"loss": 0.7645,
|
|
"step": 19350
|
|
},
|
|
{
|
|
"epoch": 0.21652937854055174,
|
|
"grad_norm": 0.25585636496543884,
|
|
"learning_rate": 8.063410845630226e-05,
|
|
"loss": 0.7651,
|
|
"step": 19360
|
|
},
|
|
{
|
|
"epoch": 0.2166412222278144,
|
|
"grad_norm": 0.25861334800720215,
|
|
"learning_rate": 8.040568321988213e-05,
|
|
"loss": 0.7788,
|
|
"step": 19370
|
|
},
|
|
{
|
|
"epoch": 0.2167530659150771,
|
|
"grad_norm": 0.26126453280448914,
|
|
"learning_rate": 8.017725798346201e-05,
|
|
"loss": 0.7631,
|
|
"step": 19380
|
|
},
|
|
{
|
|
"epoch": 0.21686490960233978,
|
|
"grad_norm": 0.27623289823532104,
|
|
"learning_rate": 7.99488327470419e-05,
|
|
"loss": 0.7555,
|
|
"step": 19390
|
|
},
|
|
{
|
|
"epoch": 0.21697675328960245,
|
|
"grad_norm": 0.256489634513855,
|
|
"learning_rate": 7.972040751062178e-05,
|
|
"loss": 0.7565,
|
|
"step": 19400
|
|
},
|
|
{
|
|
"epoch": 0.21708859697686514,
|
|
"grad_norm": 0.26825475692749023,
|
|
"learning_rate": 7.949198227420165e-05,
|
|
"loss": 0.7619,
|
|
"step": 19410
|
|
},
|
|
{
|
|
"epoch": 0.2172004406641278,
|
|
"grad_norm": 0.2633214294910431,
|
|
"learning_rate": 7.926355703778155e-05,
|
|
"loss": 0.7576,
|
|
"step": 19420
|
|
},
|
|
{
|
|
"epoch": 0.2173122843513905,
|
|
"grad_norm": 0.24602185189723969,
|
|
"learning_rate": 7.903513180136141e-05,
|
|
"loss": 0.748,
|
|
"step": 19430
|
|
},
|
|
{
|
|
"epoch": 0.21742412803865319,
|
|
"grad_norm": 0.24769659340381622,
|
|
"learning_rate": 7.88067065649413e-05,
|
|
"loss": 0.749,
|
|
"step": 19440
|
|
},
|
|
{
|
|
"epoch": 0.21753597172591585,
|
|
"grad_norm": 0.22824670374393463,
|
|
"learning_rate": 7.857828132852117e-05,
|
|
"loss": 0.7439,
|
|
"step": 19450
|
|
},
|
|
{
|
|
"epoch": 0.21764781541317854,
|
|
"grad_norm": 0.24848710000514984,
|
|
"learning_rate": 7.834985609210106e-05,
|
|
"loss": 0.7422,
|
|
"step": 19460
|
|
},
|
|
{
|
|
"epoch": 0.21775965910044123,
|
|
"grad_norm": 0.25875037908554077,
|
|
"learning_rate": 7.812143085568093e-05,
|
|
"loss": 0.7411,
|
|
"step": 19470
|
|
},
|
|
{
|
|
"epoch": 0.2178715027877039,
|
|
"grad_norm": 0.24616488814353943,
|
|
"learning_rate": 7.789300561926082e-05,
|
|
"loss": 0.723,
|
|
"step": 19480
|
|
},
|
|
{
|
|
"epoch": 0.2179833464749666,
|
|
"grad_norm": 0.26018476486206055,
|
|
"learning_rate": 7.76645803828407e-05,
|
|
"loss": 0.7388,
|
|
"step": 19490
|
|
},
|
|
{
|
|
"epoch": 0.21809519016222928,
|
|
"grad_norm": 0.24355724453926086,
|
|
"learning_rate": 7.743615514642058e-05,
|
|
"loss": 0.7337,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 0.21820703384949194,
|
|
"grad_norm": 0.24908235669136047,
|
|
"learning_rate": 7.720772991000045e-05,
|
|
"loss": 0.7378,
|
|
"step": 19510
|
|
},
|
|
{
|
|
"epoch": 0.21831887753675464,
|
|
"grad_norm": 0.2710162401199341,
|
|
"learning_rate": 7.697930467358035e-05,
|
|
"loss": 0.7336,
|
|
"step": 19520
|
|
},
|
|
{
|
|
"epoch": 0.21843072122401733,
|
|
"grad_norm": 0.24222905933856964,
|
|
"learning_rate": 7.675087943716022e-05,
|
|
"loss": 0.7386,
|
|
"step": 19530
|
|
},
|
|
{
|
|
"epoch": 0.21854256491128,
|
|
"grad_norm": 0.23762881755828857,
|
|
"learning_rate": 7.65224542007401e-05,
|
|
"loss": 0.7354,
|
|
"step": 19540
|
|
},
|
|
{
|
|
"epoch": 0.21865440859854268,
|
|
"grad_norm": 0.25905948877334595,
|
|
"learning_rate": 7.629402896431998e-05,
|
|
"loss": 0.7453,
|
|
"step": 19550
|
|
},
|
|
{
|
|
"epoch": 0.21876625228580535,
|
|
"grad_norm": 0.24563716351985931,
|
|
"learning_rate": 7.606560372789986e-05,
|
|
"loss": 0.7422,
|
|
"step": 19560
|
|
},
|
|
{
|
|
"epoch": 0.21887809597306804,
|
|
"grad_norm": 0.2649664878845215,
|
|
"learning_rate": 7.583717849147973e-05,
|
|
"loss": 0.7301,
|
|
"step": 19570
|
|
},
|
|
{
|
|
"epoch": 0.21898993966033073,
|
|
"grad_norm": 0.24720273911952972,
|
|
"learning_rate": 7.560875325505962e-05,
|
|
"loss": 0.7321,
|
|
"step": 19580
|
|
},
|
|
{
|
|
"epoch": 0.2191017833475934,
|
|
"grad_norm": 0.23652884364128113,
|
|
"learning_rate": 7.53803280186395e-05,
|
|
"loss": 0.7296,
|
|
"step": 19590
|
|
},
|
|
{
|
|
"epoch": 0.21921362703485608,
|
|
"grad_norm": 0.23715312778949738,
|
|
"learning_rate": 7.515190278221938e-05,
|
|
"loss": 0.7237,
|
|
"step": 19600
|
|
},
|
|
{
|
|
"epoch": 0.21932547072211878,
|
|
"grad_norm": 0.2500048577785492,
|
|
"learning_rate": 7.492347754579925e-05,
|
|
"loss": 0.7372,
|
|
"step": 19610
|
|
},
|
|
{
|
|
"epoch": 0.21943731440938144,
|
|
"grad_norm": 0.2575337886810303,
|
|
"learning_rate": 7.469505230937915e-05,
|
|
"loss": 0.7393,
|
|
"step": 19620
|
|
},
|
|
{
|
|
"epoch": 0.21954915809664413,
|
|
"grad_norm": 0.255375474691391,
|
|
"learning_rate": 7.446662707295902e-05,
|
|
"loss": 0.75,
|
|
"step": 19630
|
|
},
|
|
{
|
|
"epoch": 0.21966100178390682,
|
|
"grad_norm": 0.2793714106082916,
|
|
"learning_rate": 7.42382018365389e-05,
|
|
"loss": 0.7585,
|
|
"step": 19640
|
|
},
|
|
{
|
|
"epoch": 0.2197728454711695,
|
|
"grad_norm": 0.2588786482810974,
|
|
"learning_rate": 7.400977660011878e-05,
|
|
"loss": 0.7661,
|
|
"step": 19650
|
|
},
|
|
{
|
|
"epoch": 0.21988468915843218,
|
|
"grad_norm": 0.27130866050720215,
|
|
"learning_rate": 7.378135136369867e-05,
|
|
"loss": 0.7579,
|
|
"step": 19660
|
|
},
|
|
{
|
|
"epoch": 0.21999653284569487,
|
|
"grad_norm": 0.2730309069156647,
|
|
"learning_rate": 7.355292612727853e-05,
|
|
"loss": 0.7463,
|
|
"step": 19670
|
|
},
|
|
{
|
|
"epoch": 0.22010837653295753,
|
|
"grad_norm": 0.24330918490886688,
|
|
"learning_rate": 7.332450089085842e-05,
|
|
"loss": 0.7388,
|
|
"step": 19680
|
|
},
|
|
{
|
|
"epoch": 0.22022022022022023,
|
|
"grad_norm": 0.30004703998565674,
|
|
"learning_rate": 7.309607565443831e-05,
|
|
"loss": 0.7633,
|
|
"step": 19690
|
|
},
|
|
{
|
|
"epoch": 0.2203320639074829,
|
|
"grad_norm": 0.2754705548286438,
|
|
"learning_rate": 7.286765041801818e-05,
|
|
"loss": 0.7587,
|
|
"step": 19700
|
|
},
|
|
{
|
|
"epoch": 0.22044390759474558,
|
|
"grad_norm": 0.27601394057273865,
|
|
"learning_rate": 7.263922518159807e-05,
|
|
"loss": 0.7468,
|
|
"step": 19710
|
|
},
|
|
{
|
|
"epoch": 0.22055575128200827,
|
|
"grad_norm": 0.2328653633594513,
|
|
"learning_rate": 7.241079994517795e-05,
|
|
"loss": 0.7432,
|
|
"step": 19720
|
|
},
|
|
{
|
|
"epoch": 0.22066759496927094,
|
|
"grad_norm": 0.23960436880588531,
|
|
"learning_rate": 7.218237470875783e-05,
|
|
"loss": 0.7384,
|
|
"step": 19730
|
|
},
|
|
{
|
|
"epoch": 0.22077943865653363,
|
|
"grad_norm": 0.2687484323978424,
|
|
"learning_rate": 7.19539494723377e-05,
|
|
"loss": 0.738,
|
|
"step": 19740
|
|
},
|
|
{
|
|
"epoch": 0.22089128234379632,
|
|
"grad_norm": 0.2243189811706543,
|
|
"learning_rate": 7.17255242359176e-05,
|
|
"loss": 0.7467,
|
|
"step": 19750
|
|
},
|
|
{
|
|
"epoch": 0.22100312603105898,
|
|
"grad_norm": 0.26094529032707214,
|
|
"learning_rate": 7.149709899949747e-05,
|
|
"loss": 0.7579,
|
|
"step": 19760
|
|
},
|
|
{
|
|
"epoch": 0.22111496971832167,
|
|
"grad_norm": 0.2761390507221222,
|
|
"learning_rate": 7.126867376307735e-05,
|
|
"loss": 0.7491,
|
|
"step": 19770
|
|
},
|
|
{
|
|
"epoch": 0.22122681340558437,
|
|
"grad_norm": 0.2523578405380249,
|
|
"learning_rate": 7.104024852665723e-05,
|
|
"loss": 0.7358,
|
|
"step": 19780
|
|
},
|
|
{
|
|
"epoch": 0.22133865709284703,
|
|
"grad_norm": 0.25612056255340576,
|
|
"learning_rate": 7.081182329023711e-05,
|
|
"loss": 0.7322,
|
|
"step": 19790
|
|
},
|
|
{
|
|
"epoch": 0.22145050078010972,
|
|
"grad_norm": 0.24379362165927887,
|
|
"learning_rate": 7.058339805381698e-05,
|
|
"loss": 0.7438,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 0.2215623444673724,
|
|
"grad_norm": 0.2315502017736435,
|
|
"learning_rate": 7.035497281739687e-05,
|
|
"loss": 0.7349,
|
|
"step": 19810
|
|
},
|
|
{
|
|
"epoch": 0.22167418815463508,
|
|
"grad_norm": 0.41941365599632263,
|
|
"learning_rate": 7.012654758097675e-05,
|
|
"loss": 0.743,
|
|
"step": 19820
|
|
},
|
|
{
|
|
"epoch": 0.22178603184189777,
|
|
"grad_norm": 0.23147599399089813,
|
|
"learning_rate": 6.989812234455663e-05,
|
|
"loss": 0.7381,
|
|
"step": 19830
|
|
},
|
|
{
|
|
"epoch": 0.22189787552916043,
|
|
"grad_norm": 0.25920864939689636,
|
|
"learning_rate": 6.96696971081365e-05,
|
|
"loss": 0.7469,
|
|
"step": 19840
|
|
},
|
|
{
|
|
"epoch": 0.22200971921642312,
|
|
"grad_norm": 0.23870904743671417,
|
|
"learning_rate": 6.94412718717164e-05,
|
|
"loss": 0.7476,
|
|
"step": 19850
|
|
},
|
|
{
|
|
"epoch": 0.22212156290368582,
|
|
"grad_norm": 0.2372673749923706,
|
|
"learning_rate": 6.921284663529627e-05,
|
|
"loss": 0.7468,
|
|
"step": 19860
|
|
},
|
|
{
|
|
"epoch": 0.22223340659094848,
|
|
"grad_norm": 0.2703365683555603,
|
|
"learning_rate": 6.898442139887615e-05,
|
|
"loss": 0.742,
|
|
"step": 19870
|
|
},
|
|
{
|
|
"epoch": 0.22234525027821117,
|
|
"grad_norm": 0.24437329173088074,
|
|
"learning_rate": 6.875599616245603e-05,
|
|
"loss": 0.7217,
|
|
"step": 19880
|
|
},
|
|
{
|
|
"epoch": 0.22245709396547386,
|
|
"grad_norm": 0.21680840849876404,
|
|
"learning_rate": 6.852757092603592e-05,
|
|
"loss": 0.7547,
|
|
"step": 19890
|
|
},
|
|
{
|
|
"epoch": 0.22256893765273653,
|
|
"grad_norm": 0.29101526737213135,
|
|
"learning_rate": 6.829914568961579e-05,
|
|
"loss": 0.7389,
|
|
"step": 19900
|
|
},
|
|
{
|
|
"epoch": 0.22268078133999922,
|
|
"grad_norm": 0.2821531891822815,
|
|
"learning_rate": 6.807072045319567e-05,
|
|
"loss": 0.731,
|
|
"step": 19910
|
|
},
|
|
{
|
|
"epoch": 0.2227926250272619,
|
|
"grad_norm": 0.2773050367832184,
|
|
"learning_rate": 6.784229521677555e-05,
|
|
"loss": 0.7369,
|
|
"step": 19920
|
|
},
|
|
{
|
|
"epoch": 0.22290446871452457,
|
|
"grad_norm": 0.2531367838382721,
|
|
"learning_rate": 6.761386998035543e-05,
|
|
"loss": 0.7399,
|
|
"step": 19930
|
|
},
|
|
{
|
|
"epoch": 0.22301631240178726,
|
|
"grad_norm": 0.28158465027809143,
|
|
"learning_rate": 6.73854447439353e-05,
|
|
"loss": 0.7523,
|
|
"step": 19940
|
|
},
|
|
{
|
|
"epoch": 0.22312815608904996,
|
|
"grad_norm": 0.25612935423851013,
|
|
"learning_rate": 6.71570195075152e-05,
|
|
"loss": 0.7725,
|
|
"step": 19950
|
|
},
|
|
{
|
|
"epoch": 0.22323999977631262,
|
|
"grad_norm": 0.26996153593063354,
|
|
"learning_rate": 6.692859427109507e-05,
|
|
"loss": 0.7823,
|
|
"step": 19960
|
|
},
|
|
{
|
|
"epoch": 0.2233518434635753,
|
|
"grad_norm": 0.28008782863616943,
|
|
"learning_rate": 6.670016903467495e-05,
|
|
"loss": 0.7679,
|
|
"step": 19970
|
|
},
|
|
{
|
|
"epoch": 0.22346368715083798,
|
|
"grad_norm": 0.27016493678092957,
|
|
"learning_rate": 6.647174379825483e-05,
|
|
"loss": 0.7617,
|
|
"step": 19980
|
|
},
|
|
{
|
|
"epoch": 0.22357553083810067,
|
|
"grad_norm": 0.2679850459098816,
|
|
"learning_rate": 6.624331856183472e-05,
|
|
"loss": 0.7737,
|
|
"step": 19990
|
|
},
|
|
{
|
|
"epoch": 0.22368737452536336,
|
|
"grad_norm": 0.2570480406284332,
|
|
"learning_rate": 6.601489332541459e-05,
|
|
"loss": 0.758,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 0.22379921821262602,
|
|
"grad_norm": 0.2503785490989685,
|
|
"learning_rate": 6.578646808899447e-05,
|
|
"loss": 0.761,
|
|
"step": 20010
|
|
},
|
|
{
|
|
"epoch": 0.2239110618998887,
|
|
"grad_norm": 0.2648092210292816,
|
|
"learning_rate": 6.555804285257435e-05,
|
|
"loss": 0.7532,
|
|
"step": 20020
|
|
},
|
|
{
|
|
"epoch": 0.2240229055871514,
|
|
"grad_norm": 0.26829221844673157,
|
|
"learning_rate": 6.532961761615423e-05,
|
|
"loss": 0.7542,
|
|
"step": 20030
|
|
},
|
|
{
|
|
"epoch": 0.22413474927441407,
|
|
"grad_norm": 0.27535539865493774,
|
|
"learning_rate": 6.51011923797341e-05,
|
|
"loss": 0.7578,
|
|
"step": 20040
|
|
},
|
|
{
|
|
"epoch": 0.22424659296167676,
|
|
"grad_norm": 0.28674209117889404,
|
|
"learning_rate": 6.4872767143314e-05,
|
|
"loss": 0.756,
|
|
"step": 20050
|
|
},
|
|
{
|
|
"epoch": 0.22435843664893945,
|
|
"grad_norm": 0.2523026466369629,
|
|
"learning_rate": 6.464434190689387e-05,
|
|
"loss": 0.7514,
|
|
"step": 20060
|
|
},
|
|
{
|
|
"epoch": 0.22447028033620212,
|
|
"grad_norm": 0.24213305115699768,
|
|
"learning_rate": 6.441591667047375e-05,
|
|
"loss": 0.7546,
|
|
"step": 20070
|
|
},
|
|
{
|
|
"epoch": 0.2245821240234648,
|
|
"grad_norm": 0.2779023349285126,
|
|
"learning_rate": 6.418749143405363e-05,
|
|
"loss": 0.7654,
|
|
"step": 20080
|
|
},
|
|
{
|
|
"epoch": 0.2246939677107275,
|
|
"grad_norm": 0.28806111216545105,
|
|
"learning_rate": 6.395906619763352e-05,
|
|
"loss": 0.7612,
|
|
"step": 20090
|
|
},
|
|
{
|
|
"epoch": 0.22480581139799016,
|
|
"grad_norm": 0.2637580931186676,
|
|
"learning_rate": 6.373064096121339e-05,
|
|
"loss": 0.7659,
|
|
"step": 20100
|
|
},
|
|
{
|
|
"epoch": 0.22491765508525285,
|
|
"grad_norm": 0.2683275043964386,
|
|
"learning_rate": 6.350221572479328e-05,
|
|
"loss": 0.753,
|
|
"step": 20110
|
|
},
|
|
{
|
|
"epoch": 0.22502949877251555,
|
|
"grad_norm": 0.2693597078323364,
|
|
"learning_rate": 6.327379048837315e-05,
|
|
"loss": 0.7697,
|
|
"step": 20120
|
|
},
|
|
{
|
|
"epoch": 0.2251413424597782,
|
|
"grad_norm": 0.26335635781288147,
|
|
"learning_rate": 6.304536525195304e-05,
|
|
"loss": 0.7644,
|
|
"step": 20130
|
|
},
|
|
{
|
|
"epoch": 0.2252531861470409,
|
|
"grad_norm": 0.29237446188926697,
|
|
"learning_rate": 6.28169400155329e-05,
|
|
"loss": 0.7721,
|
|
"step": 20140
|
|
},
|
|
{
|
|
"epoch": 0.22536502983430357,
|
|
"grad_norm": 0.3080182373523712,
|
|
"learning_rate": 6.25885147791128e-05,
|
|
"loss": 0.7666,
|
|
"step": 20150
|
|
},
|
|
{
|
|
"epoch": 0.22547687352156626,
|
|
"grad_norm": 0.2831542193889618,
|
|
"learning_rate": 6.236008954269268e-05,
|
|
"loss": 0.7805,
|
|
"step": 20160
|
|
},
|
|
{
|
|
"epoch": 0.22558871720882895,
|
|
"grad_norm": 0.2860835790634155,
|
|
"learning_rate": 6.213166430627257e-05,
|
|
"loss": 0.7816,
|
|
"step": 20170
|
|
},
|
|
{
|
|
"epoch": 0.2257005608960916,
|
|
"grad_norm": 0.28273066878318787,
|
|
"learning_rate": 6.190323906985244e-05,
|
|
"loss": 0.7812,
|
|
"step": 20180
|
|
},
|
|
{
|
|
"epoch": 0.2258124045833543,
|
|
"grad_norm": 0.29203614592552185,
|
|
"learning_rate": 6.167481383343232e-05,
|
|
"loss": 0.7699,
|
|
"step": 20190
|
|
},
|
|
{
|
|
"epoch": 0.225924248270617,
|
|
"grad_norm": 0.2811570167541504,
|
|
"learning_rate": 6.14463885970122e-05,
|
|
"loss": 0.7833,
|
|
"step": 20200
|
|
},
|
|
{
|
|
"epoch": 0.22603609195787966,
|
|
"grad_norm": 0.30047500133514404,
|
|
"learning_rate": 6.121796336059208e-05,
|
|
"loss": 0.7594,
|
|
"step": 20210
|
|
},
|
|
{
|
|
"epoch": 0.22614793564514235,
|
|
"grad_norm": 0.2838903069496155,
|
|
"learning_rate": 6.098953812417196e-05,
|
|
"loss": 0.7678,
|
|
"step": 20220
|
|
},
|
|
{
|
|
"epoch": 0.22625977933240504,
|
|
"grad_norm": 0.2840651273727417,
|
|
"learning_rate": 6.0761112887751836e-05,
|
|
"loss": 0.7546,
|
|
"step": 20230
|
|
},
|
|
{
|
|
"epoch": 0.2263716230196677,
|
|
"grad_norm": 0.31575652956962585,
|
|
"learning_rate": 6.053268765133172e-05,
|
|
"loss": 0.7533,
|
|
"step": 20240
|
|
},
|
|
{
|
|
"epoch": 0.2264834667069304,
|
|
"grad_norm": 0.2692145109176636,
|
|
"learning_rate": 6.03042624149116e-05,
|
|
"loss": 0.744,
|
|
"step": 20250
|
|
},
|
|
{
|
|
"epoch": 0.2265953103941931,
|
|
"grad_norm": 0.3094116449356079,
|
|
"learning_rate": 6.007583717849148e-05,
|
|
"loss": 0.7708,
|
|
"step": 20260
|
|
},
|
|
{
|
|
"epoch": 0.22670715408145575,
|
|
"grad_norm": 0.3123047947883606,
|
|
"learning_rate": 5.984741194207136e-05,
|
|
"loss": 0.7431,
|
|
"step": 20270
|
|
},
|
|
{
|
|
"epoch": 0.22681899776871844,
|
|
"grad_norm": 0.2733646631240845,
|
|
"learning_rate": 5.961898670565124e-05,
|
|
"loss": 0.762,
|
|
"step": 20280
|
|
},
|
|
{
|
|
"epoch": 0.2269308414559811,
|
|
"grad_norm": 0.23944342136383057,
|
|
"learning_rate": 5.939056146923112e-05,
|
|
"loss": 0.7488,
|
|
"step": 20290
|
|
},
|
|
{
|
|
"epoch": 0.2270426851432438,
|
|
"grad_norm": 0.2459600865840912,
|
|
"learning_rate": 5.9162136232811e-05,
|
|
"loss": 0.7443,
|
|
"step": 20300
|
|
},
|
|
{
|
|
"epoch": 0.2271545288305065,
|
|
"grad_norm": 0.2502724826335907,
|
|
"learning_rate": 5.893371099639088e-05,
|
|
"loss": 0.7417,
|
|
"step": 20310
|
|
},
|
|
{
|
|
"epoch": 0.22726637251776916,
|
|
"grad_norm": 0.23721522092819214,
|
|
"learning_rate": 5.870528575997076e-05,
|
|
"loss": 0.7393,
|
|
"step": 20320
|
|
},
|
|
{
|
|
"epoch": 0.22737821620503185,
|
|
"grad_norm": 0.2526785135269165,
|
|
"learning_rate": 5.847686052355064e-05,
|
|
"loss": 0.7346,
|
|
"step": 20330
|
|
},
|
|
{
|
|
"epoch": 0.22749005989229454,
|
|
"grad_norm": 0.2573647201061249,
|
|
"learning_rate": 5.824843528713052e-05,
|
|
"loss": 0.7192,
|
|
"step": 20340
|
|
},
|
|
{
|
|
"epoch": 0.2276019035795572,
|
|
"grad_norm": 0.2632768750190735,
|
|
"learning_rate": 5.80200100507104e-05,
|
|
"loss": 0.7234,
|
|
"step": 20350
|
|
},
|
|
{
|
|
"epoch": 0.2277137472668199,
|
|
"grad_norm": 0.2589345872402191,
|
|
"learning_rate": 5.779158481429028e-05,
|
|
"loss": 0.7165,
|
|
"step": 20360
|
|
},
|
|
{
|
|
"epoch": 0.22782559095408259,
|
|
"grad_norm": 0.2480648308992386,
|
|
"learning_rate": 5.756315957787016e-05,
|
|
"loss": 0.7099,
|
|
"step": 20370
|
|
},
|
|
{
|
|
"epoch": 0.22793743464134525,
|
|
"grad_norm": 0.24949654936790466,
|
|
"learning_rate": 5.733473434145004e-05,
|
|
"loss": 0.7187,
|
|
"step": 20380
|
|
},
|
|
{
|
|
"epoch": 0.22804927832860794,
|
|
"grad_norm": 0.25637611746788025,
|
|
"learning_rate": 5.710630910502993e-05,
|
|
"loss": 0.7098,
|
|
"step": 20390
|
|
},
|
|
{
|
|
"epoch": 0.22816112201587063,
|
|
"grad_norm": 0.28809231519699097,
|
|
"learning_rate": 5.687788386860981e-05,
|
|
"loss": 0.7315,
|
|
"step": 20400
|
|
},
|
|
{
|
|
"epoch": 0.2282729657031333,
|
|
"grad_norm": 0.25564566254615784,
|
|
"learning_rate": 5.6649458632189686e-05,
|
|
"loss": 0.7319,
|
|
"step": 20410
|
|
},
|
|
{
|
|
"epoch": 0.228384809390396,
|
|
"grad_norm": 0.2693794369697571,
|
|
"learning_rate": 5.642103339576957e-05,
|
|
"loss": 0.7173,
|
|
"step": 20420
|
|
},
|
|
{
|
|
"epoch": 0.22849665307765865,
|
|
"grad_norm": 0.24680989980697632,
|
|
"learning_rate": 5.619260815934945e-05,
|
|
"loss": 0.708,
|
|
"step": 20430
|
|
},
|
|
{
|
|
"epoch": 0.22860849676492134,
|
|
"grad_norm": 0.2790026068687439,
|
|
"learning_rate": 5.596418292292933e-05,
|
|
"loss": 0.7023,
|
|
"step": 20440
|
|
},
|
|
{
|
|
"epoch": 0.22872034045218403,
|
|
"grad_norm": 0.2656199038028717,
|
|
"learning_rate": 5.573575768650921e-05,
|
|
"loss": 0.7113,
|
|
"step": 20450
|
|
},
|
|
{
|
|
"epoch": 0.2288321841394467,
|
|
"grad_norm": 0.30832743644714355,
|
|
"learning_rate": 5.550733245008909e-05,
|
|
"loss": 0.7161,
|
|
"step": 20460
|
|
},
|
|
{
|
|
"epoch": 0.2289440278267094,
|
|
"grad_norm": 0.27060794830322266,
|
|
"learning_rate": 5.527890721366897e-05,
|
|
"loss": 0.7208,
|
|
"step": 20470
|
|
},
|
|
{
|
|
"epoch": 0.22905587151397208,
|
|
"grad_norm": 0.26036307215690613,
|
|
"learning_rate": 5.505048197724885e-05,
|
|
"loss": 0.7004,
|
|
"step": 20480
|
|
},
|
|
{
|
|
"epoch": 0.22916771520123475,
|
|
"grad_norm": 0.2758086919784546,
|
|
"learning_rate": 5.482205674082873e-05,
|
|
"loss": 0.7179,
|
|
"step": 20490
|
|
},
|
|
{
|
|
"epoch": 0.22927955888849744,
|
|
"grad_norm": 0.2821243107318878,
|
|
"learning_rate": 5.459363150440861e-05,
|
|
"loss": 0.7255,
|
|
"step": 20500
|
|
},
|
|
{
|
|
"epoch": 0.22939140257576013,
|
|
"grad_norm": 0.2782810628414154,
|
|
"learning_rate": 5.436520626798849e-05,
|
|
"loss": 0.7149,
|
|
"step": 20510
|
|
},
|
|
{
|
|
"epoch": 0.2295032462630228,
|
|
"grad_norm": 0.2755940854549408,
|
|
"learning_rate": 5.413678103156837e-05,
|
|
"loss": 0.7117,
|
|
"step": 20520
|
|
},
|
|
{
|
|
"epoch": 0.22961508995028548,
|
|
"grad_norm": 0.29176777601242065,
|
|
"learning_rate": 5.390835579514825e-05,
|
|
"loss": 0.7188,
|
|
"step": 20530
|
|
},
|
|
{
|
|
"epoch": 0.22972693363754818,
|
|
"grad_norm": 0.27739444375038147,
|
|
"learning_rate": 5.367993055872813e-05,
|
|
"loss": 0.7196,
|
|
"step": 20540
|
|
},
|
|
{
|
|
"epoch": 0.22983877732481084,
|
|
"grad_norm": 0.27187204360961914,
|
|
"learning_rate": 5.345150532230801e-05,
|
|
"loss": 0.722,
|
|
"step": 20550
|
|
},
|
|
{
|
|
"epoch": 0.22995062101207353,
|
|
"grad_norm": 0.2951996624469757,
|
|
"learning_rate": 5.322308008588789e-05,
|
|
"loss": 0.7325,
|
|
"step": 20560
|
|
},
|
|
{
|
|
"epoch": 0.2300624646993362,
|
|
"grad_norm": 0.2677932381629944,
|
|
"learning_rate": 5.299465484946777e-05,
|
|
"loss": 0.7263,
|
|
"step": 20570
|
|
},
|
|
{
|
|
"epoch": 0.23017430838659889,
|
|
"grad_norm": 0.29231807589530945,
|
|
"learning_rate": 5.2766229613047654e-05,
|
|
"loss": 0.7284,
|
|
"step": 20580
|
|
},
|
|
{
|
|
"epoch": 0.23028615207386158,
|
|
"grad_norm": 0.30211326479911804,
|
|
"learning_rate": 5.253780437662753e-05,
|
|
"loss": 0.7222,
|
|
"step": 20590
|
|
},
|
|
{
|
|
"epoch": 0.23039799576112424,
|
|
"grad_norm": 0.29821720719337463,
|
|
"learning_rate": 5.230937914020741e-05,
|
|
"loss": 0.7316,
|
|
"step": 20600
|
|
},
|
|
{
|
|
"epoch": 0.23050983944838693,
|
|
"grad_norm": 0.3019379675388336,
|
|
"learning_rate": 5.208095390378729e-05,
|
|
"loss": 0.7328,
|
|
"step": 20610
|
|
},
|
|
{
|
|
"epoch": 0.23062168313564962,
|
|
"grad_norm": 0.2569403052330017,
|
|
"learning_rate": 5.185252866736717e-05,
|
|
"loss": 0.7215,
|
|
"step": 20620
|
|
},
|
|
{
|
|
"epoch": 0.2307335268229123,
|
|
"grad_norm": 0.3151782155036926,
|
|
"learning_rate": 5.1624103430947054e-05,
|
|
"loss": 0.7326,
|
|
"step": 20630
|
|
},
|
|
{
|
|
"epoch": 0.23084537051017498,
|
|
"grad_norm": 0.2748591899871826,
|
|
"learning_rate": 5.139567819452693e-05,
|
|
"loss": 0.7359,
|
|
"step": 20640
|
|
},
|
|
{
|
|
"epoch": 0.23095721419743767,
|
|
"grad_norm": 0.27494433522224426,
|
|
"learning_rate": 5.116725295810681e-05,
|
|
"loss": 0.7351,
|
|
"step": 20650
|
|
},
|
|
{
|
|
"epoch": 0.23106905788470034,
|
|
"grad_norm": 0.29428452253341675,
|
|
"learning_rate": 5.093882772168669e-05,
|
|
"loss": 0.7361,
|
|
"step": 20660
|
|
},
|
|
{
|
|
"epoch": 0.23118090157196303,
|
|
"grad_norm": 0.2924981117248535,
|
|
"learning_rate": 5.071040248526657e-05,
|
|
"loss": 0.7539,
|
|
"step": 20670
|
|
},
|
|
{
|
|
"epoch": 0.23129274525922572,
|
|
"grad_norm": 0.28647035360336304,
|
|
"learning_rate": 5.0481977248846455e-05,
|
|
"loss": 0.7576,
|
|
"step": 20680
|
|
},
|
|
{
|
|
"epoch": 0.23140458894648838,
|
|
"grad_norm": 0.3107542097568512,
|
|
"learning_rate": 5.025355201242633e-05,
|
|
"loss": 0.7615,
|
|
"step": 20690
|
|
},
|
|
{
|
|
"epoch": 0.23151643263375107,
|
|
"grad_norm": 0.27186501026153564,
|
|
"learning_rate": 5.0025126776006213e-05,
|
|
"loss": 0.7641,
|
|
"step": 20700
|
|
},
|
|
{
|
|
"epoch": 0.23162827632101374,
|
|
"grad_norm": 0.2838156819343567,
|
|
"learning_rate": 4.9796701539586096e-05,
|
|
"loss": 0.7695,
|
|
"step": 20710
|
|
},
|
|
{
|
|
"epoch": 0.23174012000827643,
|
|
"grad_norm": 0.3377101421356201,
|
|
"learning_rate": 4.956827630316597e-05,
|
|
"loss": 0.7696,
|
|
"step": 20720
|
|
},
|
|
{
|
|
"epoch": 0.23185196369553912,
|
|
"grad_norm": 0.3177778422832489,
|
|
"learning_rate": 4.9339851066745855e-05,
|
|
"loss": 0.7677,
|
|
"step": 20730
|
|
},
|
|
{
|
|
"epoch": 0.23196380738280178,
|
|
"grad_norm": 0.3157583773136139,
|
|
"learning_rate": 4.911142583032573e-05,
|
|
"loss": 0.7653,
|
|
"step": 20740
|
|
},
|
|
{
|
|
"epoch": 0.23207565107006448,
|
|
"grad_norm": 0.3123907148838043,
|
|
"learning_rate": 4.8883000593905614e-05,
|
|
"loss": 0.7677,
|
|
"step": 20750
|
|
},
|
|
{
|
|
"epoch": 0.23218749475732717,
|
|
"grad_norm": 0.30460426211357117,
|
|
"learning_rate": 4.86545753574855e-05,
|
|
"loss": 0.7743,
|
|
"step": 20760
|
|
},
|
|
{
|
|
"epoch": 0.23229933844458983,
|
|
"grad_norm": 0.27507251501083374,
|
|
"learning_rate": 4.842615012106537e-05,
|
|
"loss": 0.767,
|
|
"step": 20770
|
|
},
|
|
{
|
|
"epoch": 0.23241118213185252,
|
|
"grad_norm": 0.3233499228954315,
|
|
"learning_rate": 4.8197724884645256e-05,
|
|
"loss": 0.7717,
|
|
"step": 20780
|
|
},
|
|
{
|
|
"epoch": 0.23252302581911521,
|
|
"grad_norm": 0.30144819617271423,
|
|
"learning_rate": 4.796929964822513e-05,
|
|
"loss": 0.7609,
|
|
"step": 20790
|
|
},
|
|
{
|
|
"epoch": 0.23263486950637788,
|
|
"grad_norm": 0.29588454961776733,
|
|
"learning_rate": 4.7740874411805014e-05,
|
|
"loss": 0.7682,
|
|
"step": 20800
|
|
},
|
|
{
|
|
"epoch": 0.23274671319364057,
|
|
"grad_norm": 0.3111203610897064,
|
|
"learning_rate": 4.75124491753849e-05,
|
|
"loss": 0.7652,
|
|
"step": 20810
|
|
},
|
|
{
|
|
"epoch": 0.23285855688090326,
|
|
"grad_norm": 0.28917646408081055,
|
|
"learning_rate": 4.728402393896477e-05,
|
|
"loss": 0.7584,
|
|
"step": 20820
|
|
},
|
|
{
|
|
"epoch": 0.23297040056816593,
|
|
"grad_norm": 0.3156343698501587,
|
|
"learning_rate": 4.7055598702544656e-05,
|
|
"loss": 0.7643,
|
|
"step": 20830
|
|
},
|
|
{
|
|
"epoch": 0.23308224425542862,
|
|
"grad_norm": 0.2909680902957916,
|
|
"learning_rate": 4.682717346612454e-05,
|
|
"loss": 0.7613,
|
|
"step": 20840
|
|
},
|
|
{
|
|
"epoch": 0.2331940879426913,
|
|
"grad_norm": 0.3006870746612549,
|
|
"learning_rate": 4.659874822970442e-05,
|
|
"loss": 0.7603,
|
|
"step": 20850
|
|
},
|
|
{
|
|
"epoch": 0.23330593162995397,
|
|
"grad_norm": 0.2844945192337036,
|
|
"learning_rate": 4.6370322993284304e-05,
|
|
"loss": 0.7589,
|
|
"step": 20860
|
|
},
|
|
{
|
|
"epoch": 0.23341777531721666,
|
|
"grad_norm": 0.26857924461364746,
|
|
"learning_rate": 4.614189775686418e-05,
|
|
"loss": 0.7401,
|
|
"step": 20870
|
|
},
|
|
{
|
|
"epoch": 0.23352961900447933,
|
|
"grad_norm": 0.31332314014434814,
|
|
"learning_rate": 4.591347252044406e-05,
|
|
"loss": 0.7468,
|
|
"step": 20880
|
|
},
|
|
{
|
|
"epoch": 0.23364146269174202,
|
|
"grad_norm": 0.28083765506744385,
|
|
"learning_rate": 4.568504728402394e-05,
|
|
"loss": 0.7451,
|
|
"step": 20890
|
|
},
|
|
{
|
|
"epoch": 0.2337533063790047,
|
|
"grad_norm": 0.29185009002685547,
|
|
"learning_rate": 4.545662204760382e-05,
|
|
"loss": 0.7478,
|
|
"step": 20900
|
|
},
|
|
{
|
|
"epoch": 0.23386515006626737,
|
|
"grad_norm": 0.30532801151275635,
|
|
"learning_rate": 4.5228196811183705e-05,
|
|
"loss": 0.7404,
|
|
"step": 20910
|
|
},
|
|
{
|
|
"epoch": 0.23397699375353007,
|
|
"grad_norm": 0.2724134922027588,
|
|
"learning_rate": 4.499977157476358e-05,
|
|
"loss": 0.732,
|
|
"step": 20920
|
|
},
|
|
{
|
|
"epoch": 0.23408883744079276,
|
|
"grad_norm": 0.29753822088241577,
|
|
"learning_rate": 4.4771346338343464e-05,
|
|
"loss": 0.7236,
|
|
"step": 20930
|
|
},
|
|
{
|
|
"epoch": 0.23420068112805542,
|
|
"grad_norm": 0.31980055570602417,
|
|
"learning_rate": 4.454292110192334e-05,
|
|
"loss": 0.7407,
|
|
"step": 20940
|
|
},
|
|
{
|
|
"epoch": 0.2343125248153181,
|
|
"grad_norm": 0.29578351974487305,
|
|
"learning_rate": 4.431449586550322e-05,
|
|
"loss": 0.7166,
|
|
"step": 20950
|
|
},
|
|
{
|
|
"epoch": 0.2344243685025808,
|
|
"grad_norm": 0.25261184573173523,
|
|
"learning_rate": 4.4086070629083105e-05,
|
|
"loss": 0.7195,
|
|
"step": 20960
|
|
},
|
|
{
|
|
"epoch": 0.23453621218984347,
|
|
"grad_norm": 0.2669534385204315,
|
|
"learning_rate": 4.385764539266298e-05,
|
|
"loss": 0.7224,
|
|
"step": 20970
|
|
},
|
|
{
|
|
"epoch": 0.23464805587710616,
|
|
"grad_norm": 0.2817215919494629,
|
|
"learning_rate": 4.3629220156242864e-05,
|
|
"loss": 0.7405,
|
|
"step": 20980
|
|
},
|
|
{
|
|
"epoch": 0.23475989956436885,
|
|
"grad_norm": 0.27033400535583496,
|
|
"learning_rate": 4.340079491982275e-05,
|
|
"loss": 0.7292,
|
|
"step": 20990
|
|
},
|
|
{
|
|
"epoch": 0.23487174325163152,
|
|
"grad_norm": 0.3083013594150543,
|
|
"learning_rate": 4.317236968340262e-05,
|
|
"loss": 0.7271,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 0.2349835869388942,
|
|
"grad_norm": 0.27074989676475525,
|
|
"learning_rate": 4.2943944446982506e-05,
|
|
"loss": 0.7346,
|
|
"step": 21010
|
|
},
|
|
{
|
|
"epoch": 0.23509543062615687,
|
|
"grad_norm": 0.31609755754470825,
|
|
"learning_rate": 4.271551921056238e-05,
|
|
"loss": 0.7285,
|
|
"step": 21020
|
|
},
|
|
{
|
|
"epoch": 0.23520727431341956,
|
|
"grad_norm": 0.27084672451019287,
|
|
"learning_rate": 4.2487093974142265e-05,
|
|
"loss": 0.7411,
|
|
"step": 21030
|
|
},
|
|
{
|
|
"epoch": 0.23531911800068225,
|
|
"grad_norm": 0.26669842004776,
|
|
"learning_rate": 4.225866873772215e-05,
|
|
"loss": 0.7423,
|
|
"step": 21040
|
|
},
|
|
{
|
|
"epoch": 0.23543096168794492,
|
|
"grad_norm": 0.2873358428478241,
|
|
"learning_rate": 4.2030243501302024e-05,
|
|
"loss": 0.7345,
|
|
"step": 21050
|
|
},
|
|
{
|
|
"epoch": 0.2355428053752076,
|
|
"grad_norm": 0.2831687033176422,
|
|
"learning_rate": 4.1801818264881906e-05,
|
|
"loss": 0.7537,
|
|
"step": 21060
|
|
},
|
|
{
|
|
"epoch": 0.2356546490624703,
|
|
"grad_norm": 0.2781788110733032,
|
|
"learning_rate": 4.157339302846178e-05,
|
|
"loss": 0.7494,
|
|
"step": 21070
|
|
},
|
|
{
|
|
"epoch": 0.23576649274973296,
|
|
"grad_norm": 0.27109071612358093,
|
|
"learning_rate": 4.1344967792041665e-05,
|
|
"loss": 0.7493,
|
|
"step": 21080
|
|
},
|
|
{
|
|
"epoch": 0.23587833643699566,
|
|
"grad_norm": 0.25398164987564087,
|
|
"learning_rate": 4.111654255562155e-05,
|
|
"loss": 0.7369,
|
|
"step": 21090
|
|
},
|
|
{
|
|
"epoch": 0.23599018012425835,
|
|
"grad_norm": 0.3150353729724884,
|
|
"learning_rate": 4.0888117319201424e-05,
|
|
"loss": 0.754,
|
|
"step": 21100
|
|
},
|
|
{
|
|
"epoch": 0.236102023811521,
|
|
"grad_norm": 0.27384257316589355,
|
|
"learning_rate": 4.065969208278131e-05,
|
|
"loss": 0.7439,
|
|
"step": 21110
|
|
},
|
|
{
|
|
"epoch": 0.2362138674987837,
|
|
"grad_norm": 0.2770559787750244,
|
|
"learning_rate": 4.043126684636118e-05,
|
|
"loss": 0.7391,
|
|
"step": 21120
|
|
},
|
|
{
|
|
"epoch": 0.2363257111860464,
|
|
"grad_norm": 0.29367002844810486,
|
|
"learning_rate": 4.0202841609941066e-05,
|
|
"loss": 0.746,
|
|
"step": 21130
|
|
},
|
|
{
|
|
"epoch": 0.23643755487330906,
|
|
"grad_norm": 0.2554051876068115,
|
|
"learning_rate": 3.997441637352095e-05,
|
|
"loss": 0.7386,
|
|
"step": 21140
|
|
},
|
|
{
|
|
"epoch": 0.23654939856057175,
|
|
"grad_norm": 0.2943428158760071,
|
|
"learning_rate": 3.9745991137100825e-05,
|
|
"loss": 0.7437,
|
|
"step": 21150
|
|
},
|
|
{
|
|
"epoch": 0.2366612422478344,
|
|
"grad_norm": 0.24465301632881165,
|
|
"learning_rate": 3.951756590068071e-05,
|
|
"loss": 0.7331,
|
|
"step": 21160
|
|
},
|
|
{
|
|
"epoch": 0.2367730859350971,
|
|
"grad_norm": 0.2545934021472931,
|
|
"learning_rate": 3.9289140664260584e-05,
|
|
"loss": 0.7361,
|
|
"step": 21170
|
|
},
|
|
{
|
|
"epoch": 0.2368849296223598,
|
|
"grad_norm": 0.2792121469974518,
|
|
"learning_rate": 3.9060715427840466e-05,
|
|
"loss": 0.7238,
|
|
"step": 21180
|
|
},
|
|
{
|
|
"epoch": 0.23699677330962246,
|
|
"grad_norm": 0.27943745255470276,
|
|
"learning_rate": 3.883229019142035e-05,
|
|
"loss": 0.726,
|
|
"step": 21190
|
|
},
|
|
{
|
|
"epoch": 0.23710861699688515,
|
|
"grad_norm": 0.2514471411705017,
|
|
"learning_rate": 3.8603864955000225e-05,
|
|
"loss": 0.7214,
|
|
"step": 21200
|
|
},
|
|
{
|
|
"epoch": 0.23722046068414784,
|
|
"grad_norm": 0.2698551416397095,
|
|
"learning_rate": 3.837543971858011e-05,
|
|
"loss": 0.7318,
|
|
"step": 21210
|
|
},
|
|
{
|
|
"epoch": 0.2373323043714105,
|
|
"grad_norm": 0.29603877663612366,
|
|
"learning_rate": 3.814701448215999e-05,
|
|
"loss": 0.742,
|
|
"step": 21220
|
|
},
|
|
{
|
|
"epoch": 0.2374441480586732,
|
|
"grad_norm": 0.26655495166778564,
|
|
"learning_rate": 3.791858924573987e-05,
|
|
"loss": 0.7331,
|
|
"step": 21230
|
|
},
|
|
{
|
|
"epoch": 0.2375559917459359,
|
|
"grad_norm": 0.29367104172706604,
|
|
"learning_rate": 3.769016400931975e-05,
|
|
"loss": 0.7233,
|
|
"step": 21240
|
|
},
|
|
{
|
|
"epoch": 0.23766783543319855,
|
|
"grad_norm": 0.2680334746837616,
|
|
"learning_rate": 3.7461738772899626e-05,
|
|
"loss": 0.732,
|
|
"step": 21250
|
|
},
|
|
{
|
|
"epoch": 0.23777967912046125,
|
|
"grad_norm": 0.2748298943042755,
|
|
"learning_rate": 3.723331353647951e-05,
|
|
"loss": 0.7453,
|
|
"step": 21260
|
|
},
|
|
{
|
|
"epoch": 0.23789152280772394,
|
|
"grad_norm": 0.28276947140693665,
|
|
"learning_rate": 3.700488830005939e-05,
|
|
"loss": 0.7524,
|
|
"step": 21270
|
|
},
|
|
{
|
|
"epoch": 0.2380033664949866,
|
|
"grad_norm": 0.2645372450351715,
|
|
"learning_rate": 3.677646306363927e-05,
|
|
"loss": 0.7542,
|
|
"step": 21280
|
|
},
|
|
{
|
|
"epoch": 0.2381152101822493,
|
|
"grad_norm": 0.2866505980491638,
|
|
"learning_rate": 3.654803782721916e-05,
|
|
"loss": 0.7447,
|
|
"step": 21290
|
|
},
|
|
{
|
|
"epoch": 0.23822705386951196,
|
|
"grad_norm": 0.29611489176750183,
|
|
"learning_rate": 3.631961259079903e-05,
|
|
"loss": 0.7662,
|
|
"step": 21300
|
|
},
|
|
{
|
|
"epoch": 0.23833889755677465,
|
|
"grad_norm": 0.29184749722480774,
|
|
"learning_rate": 3.6091187354378916e-05,
|
|
"loss": 0.7558,
|
|
"step": 21310
|
|
},
|
|
{
|
|
"epoch": 0.23845074124403734,
|
|
"grad_norm": 0.27304571866989136,
|
|
"learning_rate": 3.58627621179588e-05,
|
|
"loss": 0.7578,
|
|
"step": 21320
|
|
},
|
|
{
|
|
"epoch": 0.2385625849313,
|
|
"grad_norm": 0.2700962424278259,
|
|
"learning_rate": 3.5634336881538675e-05,
|
|
"loss": 0.7411,
|
|
"step": 21330
|
|
},
|
|
{
|
|
"epoch": 0.2386744286185627,
|
|
"grad_norm": 0.2845793664455414,
|
|
"learning_rate": 3.540591164511856e-05,
|
|
"loss": 0.7392,
|
|
"step": 21340
|
|
},
|
|
{
|
|
"epoch": 0.2387862723058254,
|
|
"grad_norm": 0.32136180996894836,
|
|
"learning_rate": 3.5177486408698433e-05,
|
|
"loss": 0.7431,
|
|
"step": 21350
|
|
},
|
|
{
|
|
"epoch": 0.23889811599308805,
|
|
"grad_norm": 0.26846998929977417,
|
|
"learning_rate": 3.4949061172278316e-05,
|
|
"loss": 0.737,
|
|
"step": 21360
|
|
},
|
|
{
|
|
"epoch": 0.23900995968035074,
|
|
"grad_norm": 0.26363828778266907,
|
|
"learning_rate": 3.47206359358582e-05,
|
|
"loss": 0.7416,
|
|
"step": 21370
|
|
},
|
|
{
|
|
"epoch": 0.23912180336761343,
|
|
"grad_norm": 0.2900106906890869,
|
|
"learning_rate": 3.4492210699438075e-05,
|
|
"loss": 0.7373,
|
|
"step": 21380
|
|
},
|
|
{
|
|
"epoch": 0.2392336470548761,
|
|
"grad_norm": 0.2762589156627655,
|
|
"learning_rate": 3.426378546301796e-05,
|
|
"loss": 0.7379,
|
|
"step": 21390
|
|
},
|
|
{
|
|
"epoch": 0.2393454907421388,
|
|
"grad_norm": 0.2697104513645172,
|
|
"learning_rate": 3.4035360226597834e-05,
|
|
"loss": 0.7448,
|
|
"step": 21400
|
|
},
|
|
{
|
|
"epoch": 0.23945733442940148,
|
|
"grad_norm": 0.2901761829853058,
|
|
"learning_rate": 3.380693499017772e-05,
|
|
"loss": 0.7394,
|
|
"step": 21410
|
|
},
|
|
{
|
|
"epoch": 0.23956917811666414,
|
|
"grad_norm": 0.245674267411232,
|
|
"learning_rate": 3.35785097537576e-05,
|
|
"loss": 0.7387,
|
|
"step": 21420
|
|
},
|
|
{
|
|
"epoch": 0.23968102180392684,
|
|
"grad_norm": 0.2713403105735779,
|
|
"learning_rate": 3.3350084517337476e-05,
|
|
"loss": 0.7604,
|
|
"step": 21430
|
|
},
|
|
{
|
|
"epoch": 0.2397928654911895,
|
|
"grad_norm": 0.27368244528770447,
|
|
"learning_rate": 3.312165928091736e-05,
|
|
"loss": 0.7489,
|
|
"step": 21440
|
|
},
|
|
{
|
|
"epoch": 0.2399047091784522,
|
|
"grad_norm": 0.3079991340637207,
|
|
"learning_rate": 3.2893234044497234e-05,
|
|
"loss": 0.7653,
|
|
"step": 21450
|
|
},
|
|
{
|
|
"epoch": 0.24001655286571488,
|
|
"grad_norm": 0.2920658588409424,
|
|
"learning_rate": 3.266480880807712e-05,
|
|
"loss": 0.7588,
|
|
"step": 21460
|
|
},
|
|
{
|
|
"epoch": 0.24012839655297755,
|
|
"grad_norm": 0.27589842677116394,
|
|
"learning_rate": 3.2436383571657e-05,
|
|
"loss": 0.7607,
|
|
"step": 21470
|
|
},
|
|
{
|
|
"epoch": 0.24024024024024024,
|
|
"grad_norm": 0.2592112720012665,
|
|
"learning_rate": 3.2207958335236876e-05,
|
|
"loss": 0.745,
|
|
"step": 21480
|
|
},
|
|
{
|
|
"epoch": 0.24035208392750293,
|
|
"grad_norm": 0.27625855803489685,
|
|
"learning_rate": 3.197953309881676e-05,
|
|
"loss": 0.7488,
|
|
"step": 21490
|
|
},
|
|
{
|
|
"epoch": 0.2404639276147656,
|
|
"grad_norm": 0.2769569456577301,
|
|
"learning_rate": 3.175110786239664e-05,
|
|
"loss": 0.7326,
|
|
"step": 21500
|
|
},
|
|
{
|
|
"epoch": 0.24057577130202829,
|
|
"grad_norm": 0.2705914080142975,
|
|
"learning_rate": 3.152268262597652e-05,
|
|
"loss": 0.7512,
|
|
"step": 21510
|
|
},
|
|
{
|
|
"epoch": 0.24068761498929098,
|
|
"grad_norm": 0.2655676603317261,
|
|
"learning_rate": 3.12942573895564e-05,
|
|
"loss": 0.7366,
|
|
"step": 21520
|
|
},
|
|
{
|
|
"epoch": 0.24079945867655364,
|
|
"grad_norm": 0.2606657147407532,
|
|
"learning_rate": 3.106583215313628e-05,
|
|
"loss": 0.7436,
|
|
"step": 21530
|
|
},
|
|
{
|
|
"epoch": 0.24091130236381633,
|
|
"grad_norm": 0.27843552827835083,
|
|
"learning_rate": 3.083740691671616e-05,
|
|
"loss": 0.7342,
|
|
"step": 21540
|
|
},
|
|
{
|
|
"epoch": 0.24102314605107902,
|
|
"grad_norm": 0.27866050601005554,
|
|
"learning_rate": 3.060898168029604e-05,
|
|
"loss": 0.7305,
|
|
"step": 21550
|
|
},
|
|
{
|
|
"epoch": 0.2411349897383417,
|
|
"grad_norm": 0.2803070545196533,
|
|
"learning_rate": 3.0380556443875918e-05,
|
|
"loss": 0.727,
|
|
"step": 21560
|
|
},
|
|
{
|
|
"epoch": 0.24124683342560438,
|
|
"grad_norm": 0.27220121026039124,
|
|
"learning_rate": 3.01521312074558e-05,
|
|
"loss": 0.7195,
|
|
"step": 21570
|
|
},
|
|
{
|
|
"epoch": 0.24135867711286707,
|
|
"grad_norm": 0.26060426235198975,
|
|
"learning_rate": 2.992370597103568e-05,
|
|
"loss": 0.7013,
|
|
"step": 21580
|
|
},
|
|
{
|
|
"epoch": 0.24147052080012973,
|
|
"grad_norm": 0.24253526329994202,
|
|
"learning_rate": 2.969528073461556e-05,
|
|
"loss": 0.6925,
|
|
"step": 21590
|
|
},
|
|
{
|
|
"epoch": 0.24158236448739243,
|
|
"grad_norm": 0.26293566823005676,
|
|
"learning_rate": 2.946685549819544e-05,
|
|
"loss": 0.7028,
|
|
"step": 21600
|
|
},
|
|
{
|
|
"epoch": 0.2416942081746551,
|
|
"grad_norm": 0.26427412033081055,
|
|
"learning_rate": 2.923843026177532e-05,
|
|
"loss": 0.6993,
|
|
"step": 21610
|
|
},
|
|
{
|
|
"epoch": 0.24180605186191778,
|
|
"grad_norm": 0.26823869347572327,
|
|
"learning_rate": 2.90100050253552e-05,
|
|
"loss": 0.6999,
|
|
"step": 21620
|
|
},
|
|
{
|
|
"epoch": 0.24191789554918047,
|
|
"grad_norm": 0.24203690886497498,
|
|
"learning_rate": 2.878157978893508e-05,
|
|
"loss": 0.6906,
|
|
"step": 21630
|
|
},
|
|
{
|
|
"epoch": 0.24202973923644314,
|
|
"grad_norm": 0.2612786889076233,
|
|
"learning_rate": 2.8553154552514964e-05,
|
|
"loss": 0.6952,
|
|
"step": 21640
|
|
},
|
|
{
|
|
"epoch": 0.24214158292370583,
|
|
"grad_norm": 0.27152737975120544,
|
|
"learning_rate": 2.8324729316094843e-05,
|
|
"loss": 0.692,
|
|
"step": 21650
|
|
},
|
|
{
|
|
"epoch": 0.24225342661096852,
|
|
"grad_norm": 0.2592925727367401,
|
|
"learning_rate": 2.8096304079674726e-05,
|
|
"loss": 0.6995,
|
|
"step": 21660
|
|
},
|
|
{
|
|
"epoch": 0.24236527029823118,
|
|
"grad_norm": 0.2419063299894333,
|
|
"learning_rate": 2.7867878843254605e-05,
|
|
"loss": 0.7067,
|
|
"step": 21670
|
|
},
|
|
{
|
|
"epoch": 0.24247711398549388,
|
|
"grad_norm": 0.24731135368347168,
|
|
"learning_rate": 2.7639453606834485e-05,
|
|
"loss": 0.734,
|
|
"step": 21680
|
|
},
|
|
{
|
|
"epoch": 0.24258895767275657,
|
|
"grad_norm": 0.25746017694473267,
|
|
"learning_rate": 2.7411028370414364e-05,
|
|
"loss": 0.7075,
|
|
"step": 21690
|
|
},
|
|
{
|
|
"epoch": 0.24270080136001923,
|
|
"grad_norm": 0.2521972060203552,
|
|
"learning_rate": 2.7182603133994244e-05,
|
|
"loss": 0.7137,
|
|
"step": 21700
|
|
},
|
|
{
|
|
"epoch": 0.24281264504728192,
|
|
"grad_norm": 0.26796218752861023,
|
|
"learning_rate": 2.6954177897574127e-05,
|
|
"loss": 0.7227,
|
|
"step": 21710
|
|
},
|
|
{
|
|
"epoch": 0.2429244887345446,
|
|
"grad_norm": 0.30404597520828247,
|
|
"learning_rate": 2.6725752661154006e-05,
|
|
"loss": 0.7243,
|
|
"step": 21720
|
|
},
|
|
{
|
|
"epoch": 0.24303633242180728,
|
|
"grad_norm": 0.29561156034469604,
|
|
"learning_rate": 2.6497327424733885e-05,
|
|
"loss": 0.7357,
|
|
"step": 21730
|
|
},
|
|
{
|
|
"epoch": 0.24314817610906997,
|
|
"grad_norm": 0.28066596388816833,
|
|
"learning_rate": 2.6268902188313765e-05,
|
|
"loss": 0.7224,
|
|
"step": 21740
|
|
},
|
|
{
|
|
"epoch": 0.24326001979633263,
|
|
"grad_norm": 0.29235216975212097,
|
|
"learning_rate": 2.6040476951893644e-05,
|
|
"loss": 0.7288,
|
|
"step": 21750
|
|
},
|
|
{
|
|
"epoch": 0.24337186348359532,
|
|
"grad_norm": 0.26750460267066956,
|
|
"learning_rate": 2.5812051715473527e-05,
|
|
"loss": 0.7414,
|
|
"step": 21760
|
|
},
|
|
{
|
|
"epoch": 0.24348370717085802,
|
|
"grad_norm": 0.2707473039627075,
|
|
"learning_rate": 2.5583626479053406e-05,
|
|
"loss": 0.7478,
|
|
"step": 21770
|
|
},
|
|
{
|
|
"epoch": 0.24359555085812068,
|
|
"grad_norm": 0.26526397466659546,
|
|
"learning_rate": 2.5355201242633286e-05,
|
|
"loss": 0.7513,
|
|
"step": 21780
|
|
},
|
|
{
|
|
"epoch": 0.24370739454538337,
|
|
"grad_norm": 0.2362915724515915,
|
|
"learning_rate": 2.5126776006213165e-05,
|
|
"loss": 0.7507,
|
|
"step": 21790
|
|
},
|
|
{
|
|
"epoch": 0.24381923823264606,
|
|
"grad_norm": 0.2512950599193573,
|
|
"learning_rate": 2.4898350769793048e-05,
|
|
"loss": 0.7417,
|
|
"step": 21800
|
|
},
|
|
{
|
|
"epoch": 0.24393108191990873,
|
|
"grad_norm": 0.2366458922624588,
|
|
"learning_rate": 2.4669925533372928e-05,
|
|
"loss": 0.7402,
|
|
"step": 21810
|
|
},
|
|
{
|
|
"epoch": 0.24404292560717142,
|
|
"grad_norm": 0.24888353049755096,
|
|
"learning_rate": 2.4441500296952807e-05,
|
|
"loss": 0.7456,
|
|
"step": 21820
|
|
},
|
|
{
|
|
"epoch": 0.2441547692944341,
|
|
"grad_norm": 0.24143491685390472,
|
|
"learning_rate": 2.4213075060532686e-05,
|
|
"loss": 0.7405,
|
|
"step": 21830
|
|
},
|
|
{
|
|
"epoch": 0.24426661298169677,
|
|
"grad_norm": 0.2669823169708252,
|
|
"learning_rate": 2.3984649824112566e-05,
|
|
"loss": 0.7544,
|
|
"step": 21840
|
|
},
|
|
{
|
|
"epoch": 0.24437845666895947,
|
|
"grad_norm": 0.24328452348709106,
|
|
"learning_rate": 2.375622458769245e-05,
|
|
"loss": 0.7347,
|
|
"step": 21850
|
|
},
|
|
{
|
|
"epoch": 0.24449030035622216,
|
|
"grad_norm": 0.26204219460487366,
|
|
"learning_rate": 2.3527799351272328e-05,
|
|
"loss": 0.7397,
|
|
"step": 21860
|
|
},
|
|
{
|
|
"epoch": 0.24460214404348482,
|
|
"grad_norm": 0.2631550431251526,
|
|
"learning_rate": 2.329937411485221e-05,
|
|
"loss": 0.7413,
|
|
"step": 21870
|
|
},
|
|
{
|
|
"epoch": 0.2447139877307475,
|
|
"grad_norm": 0.2729988694190979,
|
|
"learning_rate": 2.307094887843209e-05,
|
|
"loss": 0.7336,
|
|
"step": 21880
|
|
},
|
|
{
|
|
"epoch": 0.24482583141801018,
|
|
"grad_norm": 0.2702917754650116,
|
|
"learning_rate": 2.284252364201197e-05,
|
|
"loss": 0.7294,
|
|
"step": 21890
|
|
},
|
|
{
|
|
"epoch": 0.24493767510527287,
|
|
"grad_norm": 0.22882196307182312,
|
|
"learning_rate": 2.2614098405591852e-05,
|
|
"loss": 0.7164,
|
|
"step": 21900
|
|
},
|
|
{
|
|
"epoch": 0.24504951879253556,
|
|
"grad_norm": 0.2660382390022278,
|
|
"learning_rate": 2.2385673169171732e-05,
|
|
"loss": 0.7231,
|
|
"step": 21910
|
|
},
|
|
{
|
|
"epoch": 0.24516136247979822,
|
|
"grad_norm": 0.2580036222934723,
|
|
"learning_rate": 2.215724793275161e-05,
|
|
"loss": 0.7243,
|
|
"step": 21920
|
|
},
|
|
{
|
|
"epoch": 0.24527320616706091,
|
|
"grad_norm": 0.25490158796310425,
|
|
"learning_rate": 2.192882269633149e-05,
|
|
"loss": 0.7129,
|
|
"step": 21930
|
|
},
|
|
{
|
|
"epoch": 0.2453850498543236,
|
|
"grad_norm": 0.2626509368419647,
|
|
"learning_rate": 2.1700397459911374e-05,
|
|
"loss": 0.7177,
|
|
"step": 21940
|
|
},
|
|
{
|
|
"epoch": 0.24549689354158627,
|
|
"grad_norm": 0.2642146646976471,
|
|
"learning_rate": 2.1471972223491253e-05,
|
|
"loss": 0.7119,
|
|
"step": 21950
|
|
},
|
|
{
|
|
"epoch": 0.24560873722884896,
|
|
"grad_norm": 0.2683079242706299,
|
|
"learning_rate": 2.1243546987071132e-05,
|
|
"loss": 0.7226,
|
|
"step": 21960
|
|
},
|
|
{
|
|
"epoch": 0.24572058091611165,
|
|
"grad_norm": 0.26513761281967163,
|
|
"learning_rate": 2.1015121750651012e-05,
|
|
"loss": 0.7276,
|
|
"step": 21970
|
|
},
|
|
{
|
|
"epoch": 0.24583242460337432,
|
|
"grad_norm": 0.25856319069862366,
|
|
"learning_rate": 2.078669651423089e-05,
|
|
"loss": 0.7168,
|
|
"step": 21980
|
|
},
|
|
{
|
|
"epoch": 0.245944268290637,
|
|
"grad_norm": 0.29048866033554077,
|
|
"learning_rate": 2.0558271277810774e-05,
|
|
"loss": 0.7189,
|
|
"step": 21990
|
|
},
|
|
{
|
|
"epoch": 0.2460561119778997,
|
|
"grad_norm": 0.2775687575340271,
|
|
"learning_rate": 2.0329846041390653e-05,
|
|
"loss": 0.7276,
|
|
"step": 22000
|
|
},
|
|
{
|
|
"epoch": 0.24616795566516236,
|
|
"grad_norm": 0.30157843232154846,
|
|
"learning_rate": 2.0101420804970533e-05,
|
|
"loss": 0.7435,
|
|
"step": 22010
|
|
},
|
|
{
|
|
"epoch": 0.24627979935242506,
|
|
"grad_norm": 0.2602044939994812,
|
|
"learning_rate": 1.9872995568550412e-05,
|
|
"loss": 0.7365,
|
|
"step": 22020
|
|
},
|
|
{
|
|
"epoch": 0.24639164303968772,
|
|
"grad_norm": 0.29975757002830505,
|
|
"learning_rate": 1.9644570332130292e-05,
|
|
"loss": 0.7484,
|
|
"step": 22030
|
|
},
|
|
{
|
|
"epoch": 0.2465034867269504,
|
|
"grad_norm": 0.26586923003196716,
|
|
"learning_rate": 1.9416145095710175e-05,
|
|
"loss": 0.7499,
|
|
"step": 22040
|
|
},
|
|
{
|
|
"epoch": 0.2466153304142131,
|
|
"grad_norm": 0.25447341799736023,
|
|
"learning_rate": 1.9187719859290054e-05,
|
|
"loss": 0.7523,
|
|
"step": 22050
|
|
},
|
|
{
|
|
"epoch": 0.24672717410147577,
|
|
"grad_norm": 0.2876524031162262,
|
|
"learning_rate": 1.8959294622869933e-05,
|
|
"loss": 0.7532,
|
|
"step": 22060
|
|
},
|
|
{
|
|
"epoch": 0.24683901778873846,
|
|
"grad_norm": 0.29897189140319824,
|
|
"learning_rate": 1.8730869386449813e-05,
|
|
"loss": 0.7339,
|
|
"step": 22070
|
|
},
|
|
{
|
|
"epoch": 0.24695086147600115,
|
|
"grad_norm": 0.24629873037338257,
|
|
"learning_rate": 1.8502444150029696e-05,
|
|
"loss": 0.7253,
|
|
"step": 22080
|
|
},
|
|
{
|
|
"epoch": 0.2470627051632638,
|
|
"grad_norm": 0.2844459116458893,
|
|
"learning_rate": 1.827401891360958e-05,
|
|
"loss": 0.7247,
|
|
"step": 22090
|
|
},
|
|
{
|
|
"epoch": 0.2471745488505265,
|
|
"grad_norm": 0.2798469662666321,
|
|
"learning_rate": 1.8045593677189458e-05,
|
|
"loss": 0.7334,
|
|
"step": 22100
|
|
},
|
|
{
|
|
"epoch": 0.2472863925377892,
|
|
"grad_norm": 0.26282501220703125,
|
|
"learning_rate": 1.7817168440769337e-05,
|
|
"loss": 0.735,
|
|
"step": 22110
|
|
},
|
|
{
|
|
"epoch": 0.24739823622505186,
|
|
"grad_norm": 0.25192755460739136,
|
|
"learning_rate": 1.7588743204349217e-05,
|
|
"loss": 0.733,
|
|
"step": 22120
|
|
},
|
|
{
|
|
"epoch": 0.24751007991231455,
|
|
"grad_norm": 0.2808292508125305,
|
|
"learning_rate": 1.73603179679291e-05,
|
|
"loss": 0.7403,
|
|
"step": 22130
|
|
},
|
|
{
|
|
"epoch": 0.24762192359957724,
|
|
"grad_norm": 0.28252866864204407,
|
|
"learning_rate": 1.713189273150898e-05,
|
|
"loss": 0.7296,
|
|
"step": 22140
|
|
},
|
|
{
|
|
"epoch": 0.2477337672868399,
|
|
"grad_norm": 0.2730456590652466,
|
|
"learning_rate": 1.690346749508886e-05,
|
|
"loss": 0.7321,
|
|
"step": 22150
|
|
},
|
|
{
|
|
"epoch": 0.2478456109741026,
|
|
"grad_norm": 0.2562378942966461,
|
|
"learning_rate": 1.6675042258668738e-05,
|
|
"loss": 0.7195,
|
|
"step": 22160
|
|
},
|
|
{
|
|
"epoch": 0.2479574546613653,
|
|
"grad_norm": 0.2450082004070282,
|
|
"learning_rate": 1.6446617022248617e-05,
|
|
"loss": 0.7277,
|
|
"step": 22170
|
|
},
|
|
{
|
|
"epoch": 0.24806929834862795,
|
|
"grad_norm": 0.25871893763542175,
|
|
"learning_rate": 1.62181917858285e-05,
|
|
"loss": 0.7143,
|
|
"step": 22180
|
|
},
|
|
{
|
|
"epoch": 0.24818114203589065,
|
|
"grad_norm": 0.2587449848651886,
|
|
"learning_rate": 1.598976654940838e-05,
|
|
"loss": 0.708,
|
|
"step": 22190
|
|
},
|
|
{
|
|
"epoch": 0.2482929857231533,
|
|
"grad_norm": 0.25496092438697815,
|
|
"learning_rate": 1.576134131298826e-05,
|
|
"loss": 0.7123,
|
|
"step": 22200
|
|
},
|
|
{
|
|
"epoch": 0.248404829410416,
|
|
"grad_norm": 0.2394058257341385,
|
|
"learning_rate": 1.553291607656814e-05,
|
|
"loss": 0.714,
|
|
"step": 22210
|
|
},
|
|
{
|
|
"epoch": 0.2485166730976787,
|
|
"grad_norm": 0.2560165524482727,
|
|
"learning_rate": 1.530449084014802e-05,
|
|
"loss": 0.7162,
|
|
"step": 22220
|
|
},
|
|
{
|
|
"epoch": 0.24862851678494136,
|
|
"grad_norm": 0.24602052569389343,
|
|
"learning_rate": 1.50760656037279e-05,
|
|
"loss": 0.7408,
|
|
"step": 22230
|
|
},
|
|
{
|
|
"epoch": 0.24874036047220405,
|
|
"grad_norm": 0.27800559997558594,
|
|
"learning_rate": 1.484764036730778e-05,
|
|
"loss": 0.7247,
|
|
"step": 22240
|
|
},
|
|
{
|
|
"epoch": 0.24885220415946674,
|
|
"grad_norm": 0.24703536927700043,
|
|
"learning_rate": 1.461921513088766e-05,
|
|
"loss": 0.7352,
|
|
"step": 22250
|
|
},
|
|
{
|
|
"epoch": 0.2489640478467294,
|
|
"grad_norm": 0.27936097979545593,
|
|
"learning_rate": 1.439078989446754e-05,
|
|
"loss": 0.7421,
|
|
"step": 22260
|
|
},
|
|
{
|
|
"epoch": 0.2490758915339921,
|
|
"grad_norm": 0.265828400850296,
|
|
"learning_rate": 1.4162364658047422e-05,
|
|
"loss": 0.7234,
|
|
"step": 22270
|
|
},
|
|
{
|
|
"epoch": 0.24918773522125479,
|
|
"grad_norm": 0.26921194791793823,
|
|
"learning_rate": 1.3933939421627303e-05,
|
|
"loss": 0.7414,
|
|
"step": 22280
|
|
},
|
|
{
|
|
"epoch": 0.24929957890851745,
|
|
"grad_norm": 0.2829255163669586,
|
|
"learning_rate": 1.3705514185207182e-05,
|
|
"loss": 0.7378,
|
|
"step": 22290
|
|
},
|
|
{
|
|
"epoch": 0.24941142259578014,
|
|
"grad_norm": 0.25702667236328125,
|
|
"learning_rate": 1.3477088948787063e-05,
|
|
"loss": 0.7475,
|
|
"step": 22300
|
|
},
|
|
{
|
|
"epoch": 0.24952326628304283,
|
|
"grad_norm": 0.28925350308418274,
|
|
"learning_rate": 1.3248663712366943e-05,
|
|
"loss": 0.738,
|
|
"step": 22310
|
|
},
|
|
{
|
|
"epoch": 0.2496351099703055,
|
|
"grad_norm": 0.2792825698852539,
|
|
"learning_rate": 1.3020238475946822e-05,
|
|
"loss": 0.7315,
|
|
"step": 22320
|
|
},
|
|
{
|
|
"epoch": 0.2497469536575682,
|
|
"grad_norm": 0.246215358376503,
|
|
"learning_rate": 1.2791813239526703e-05,
|
|
"loss": 0.7391,
|
|
"step": 22330
|
|
},
|
|
{
|
|
"epoch": 0.24985879734483085,
|
|
"grad_norm": 0.26492443680763245,
|
|
"learning_rate": 1.2563388003106583e-05,
|
|
"loss": 0.7478,
|
|
"step": 22340
|
|
},
|
|
{
|
|
"epoch": 0.24997064103209354,
|
|
"grad_norm": 0.27402445673942566,
|
|
"learning_rate": 1.2334962766686464e-05,
|
|
"loss": 0.7528,
|
|
"step": 22350
|
|
},
|
|
{
|
|
"epoch": 0.25008248471935624,
|
|
"grad_norm": 0.2757234573364258,
|
|
"learning_rate": 1.2106537530266343e-05,
|
|
"loss": 0.7306,
|
|
"step": 22360
|
|
},
|
|
{
|
|
"epoch": 0.2501943284066189,
|
|
"grad_norm": 0.2723679840564728,
|
|
"learning_rate": 1.1878112293846224e-05,
|
|
"loss": 0.7472,
|
|
"step": 22370
|
|
},
|
|
{
|
|
"epoch": 0.2503061720938816,
|
|
"grad_norm": 0.22666431963443756,
|
|
"learning_rate": 1.1649687057426105e-05,
|
|
"loss": 0.7443,
|
|
"step": 22380
|
|
},
|
|
{
|
|
"epoch": 0.25041801578114425,
|
|
"grad_norm": 0.24548636376857758,
|
|
"learning_rate": 1.1421261821005985e-05,
|
|
"loss": 0.7525,
|
|
"step": 22390
|
|
},
|
|
{
|
|
"epoch": 0.25052985946840695,
|
|
"grad_norm": 0.26941460371017456,
|
|
"learning_rate": 1.1192836584585866e-05,
|
|
"loss": 0.7482,
|
|
"step": 22400
|
|
},
|
|
{
|
|
"epoch": 0.25064170315566964,
|
|
"grad_norm": 0.2741219997406006,
|
|
"learning_rate": 1.0964411348165745e-05,
|
|
"loss": 0.7404,
|
|
"step": 22410
|
|
},
|
|
{
|
|
"epoch": 0.25075354684293233,
|
|
"grad_norm": 0.2622029483318329,
|
|
"learning_rate": 1.0735986111745626e-05,
|
|
"loss": 0.7463,
|
|
"step": 22420
|
|
},
|
|
{
|
|
"epoch": 0.250865390530195,
|
|
"grad_norm": 0.25730788707733154,
|
|
"learning_rate": 1.0507560875325506e-05,
|
|
"loss": 0.7596,
|
|
"step": 22430
|
|
},
|
|
{
|
|
"epoch": 0.25097723421745766,
|
|
"grad_norm": 0.24054691195487976,
|
|
"learning_rate": 1.0279135638905387e-05,
|
|
"loss": 0.7397,
|
|
"step": 22440
|
|
},
|
|
{
|
|
"epoch": 0.25108907790472035,
|
|
"grad_norm": 0.23557224869728088,
|
|
"learning_rate": 1.0050710402485266e-05,
|
|
"loss": 0.7426,
|
|
"step": 22450
|
|
},
|
|
{
|
|
"epoch": 0.25120092159198304,
|
|
"grad_norm": 0.25929298996925354,
|
|
"learning_rate": 9.822285166065146e-06,
|
|
"loss": 0.7402,
|
|
"step": 22460
|
|
},
|
|
{
|
|
"epoch": 0.25131276527924573,
|
|
"grad_norm": 0.26300865411758423,
|
|
"learning_rate": 9.593859929645027e-06,
|
|
"loss": 0.755,
|
|
"step": 22470
|
|
},
|
|
{
|
|
"epoch": 0.2514246089665084,
|
|
"grad_norm": 0.25753623247146606,
|
|
"learning_rate": 9.365434693224906e-06,
|
|
"loss": 0.7536,
|
|
"step": 22480
|
|
},
|
|
{
|
|
"epoch": 0.2515364526537711,
|
|
"grad_norm": 0.2438272088766098,
|
|
"learning_rate": 9.13700945680479e-06,
|
|
"loss": 0.7528,
|
|
"step": 22490
|
|
},
|
|
{
|
|
"epoch": 0.25164829634103375,
|
|
"grad_norm": 0.2870919406414032,
|
|
"learning_rate": 8.908584220384669e-06,
|
|
"loss": 0.772,
|
|
"step": 22500
|
|
},
|
|
{
|
|
"epoch": 0.25176014002829644,
|
|
"grad_norm": 0.2551197111606598,
|
|
"learning_rate": 8.68015898396455e-06,
|
|
"loss": 0.7571,
|
|
"step": 22510
|
|
},
|
|
{
|
|
"epoch": 0.25187198371555913,
|
|
"grad_norm": 0.24423009157180786,
|
|
"learning_rate": 8.45173374754443e-06,
|
|
"loss": 0.7548,
|
|
"step": 22520
|
|
},
|
|
{
|
|
"epoch": 0.2519838274028218,
|
|
"grad_norm": 0.2683405578136444,
|
|
"learning_rate": 8.223308511124309e-06,
|
|
"loss": 0.7631,
|
|
"step": 22530
|
|
},
|
|
{
|
|
"epoch": 0.2520956710900845,
|
|
"grad_norm": 0.25919967889785767,
|
|
"learning_rate": 7.99488327470419e-06,
|
|
"loss": 0.7556,
|
|
"step": 22540
|
|
},
|
|
{
|
|
"epoch": 0.25220751477734715,
|
|
"grad_norm": 0.25076591968536377,
|
|
"learning_rate": 7.76645803828407e-06,
|
|
"loss": 0.7528,
|
|
"step": 22550
|
|
},
|
|
{
|
|
"epoch": 0.25231935846460984,
|
|
"grad_norm": 0.2598860561847687,
|
|
"learning_rate": 7.53803280186395e-06,
|
|
"loss": 0.7565,
|
|
"step": 22560
|
|
},
|
|
{
|
|
"epoch": 0.25243120215187254,
|
|
"grad_norm": 0.30933788418769836,
|
|
"learning_rate": 7.30960756544383e-06,
|
|
"loss": 0.7645,
|
|
"step": 22570
|
|
},
|
|
{
|
|
"epoch": 0.2525430458391352,
|
|
"grad_norm": 0.26472121477127075,
|
|
"learning_rate": 7.081182329023711e-06,
|
|
"loss": 0.7559,
|
|
"step": 22580
|
|
},
|
|
{
|
|
"epoch": 0.2526548895263979,
|
|
"grad_norm": 0.28362420201301575,
|
|
"learning_rate": 6.852757092603591e-06,
|
|
"loss": 0.7618,
|
|
"step": 22590
|
|
},
|
|
{
|
|
"epoch": 0.2527667332136606,
|
|
"grad_norm": 0.27758538722991943,
|
|
"learning_rate": 6.624331856183471e-06,
|
|
"loss": 0.7656,
|
|
"step": 22600
|
|
},
|
|
{
|
|
"epoch": 0.25287857690092325,
|
|
"grad_norm": 0.28303948044776917,
|
|
"learning_rate": 6.395906619763352e-06,
|
|
"loss": 0.7672,
|
|
"step": 22610
|
|
},
|
|
{
|
|
"epoch": 0.25299042058818594,
|
|
"grad_norm": 0.2938460409641266,
|
|
"learning_rate": 6.167481383343232e-06,
|
|
"loss": 0.7662,
|
|
"step": 22620
|
|
},
|
|
{
|
|
"epoch": 0.25310226427544863,
|
|
"grad_norm": 0.25707969069480896,
|
|
"learning_rate": 5.939056146923112e-06,
|
|
"loss": 0.7667,
|
|
"step": 22630
|
|
},
|
|
{
|
|
"epoch": 0.2532141079627113,
|
|
"grad_norm": 0.2813314199447632,
|
|
"learning_rate": 5.710630910502992e-06,
|
|
"loss": 0.7645,
|
|
"step": 22640
|
|
},
|
|
{
|
|
"epoch": 0.253325951649974,
|
|
"grad_norm": 0.2911704480648041,
|
|
"learning_rate": 5.482205674082873e-06,
|
|
"loss": 0.763,
|
|
"step": 22650
|
|
},
|
|
{
|
|
"epoch": 0.2534377953372367,
|
|
"grad_norm": 0.2982921600341797,
|
|
"learning_rate": 5.253780437662753e-06,
|
|
"loss": 0.7606,
|
|
"step": 22660
|
|
},
|
|
{
|
|
"epoch": 0.25354963902449934,
|
|
"grad_norm": 0.2803521156311035,
|
|
"learning_rate": 5.025355201242633e-06,
|
|
"loss": 0.7617,
|
|
"step": 22670
|
|
},
|
|
{
|
|
"epoch": 0.25366148271176203,
|
|
"grad_norm": 0.26502448320388794,
|
|
"learning_rate": 4.7969299648225135e-06,
|
|
"loss": 0.7802,
|
|
"step": 22680
|
|
},
|
|
{
|
|
"epoch": 0.2537733263990247,
|
|
"grad_norm": 0.27778494358062744,
|
|
"learning_rate": 4.568504728402395e-06,
|
|
"loss": 0.7776,
|
|
"step": 22690
|
|
},
|
|
{
|
|
"epoch": 0.2538851700862874,
|
|
"grad_norm": 0.27522069215774536,
|
|
"learning_rate": 4.340079491982275e-06,
|
|
"loss": 0.7712,
|
|
"step": 22700
|
|
},
|
|
{
|
|
"epoch": 0.2539970137735501,
|
|
"grad_norm": 0.2718433141708374,
|
|
"learning_rate": 4.111654255562154e-06,
|
|
"loss": 0.7696,
|
|
"step": 22710
|
|
},
|
|
{
|
|
"epoch": 0.25410885746081274,
|
|
"grad_norm": 0.35057663917541504,
|
|
"learning_rate": 3.883229019142035e-06,
|
|
"loss": 0.7648,
|
|
"step": 22720
|
|
},
|
|
{
|
|
"epoch": 0.25422070114807543,
|
|
"grad_norm": 0.274494469165802,
|
|
"learning_rate": 3.654803782721915e-06,
|
|
"loss": 0.7578,
|
|
"step": 22730
|
|
},
|
|
{
|
|
"epoch": 0.2543325448353381,
|
|
"grad_norm": 0.2570250928401947,
|
|
"learning_rate": 3.4263785463017955e-06,
|
|
"loss": 0.7502,
|
|
"step": 22740
|
|
},
|
|
{
|
|
"epoch": 0.2544443885226008,
|
|
"grad_norm": 0.290217787027359,
|
|
"learning_rate": 3.197953309881676e-06,
|
|
"loss": 0.7607,
|
|
"step": 22750
|
|
},
|
|
{
|
|
"epoch": 0.2545562322098635,
|
|
"grad_norm": 0.25752514600753784,
|
|
"learning_rate": 2.969528073461556e-06,
|
|
"loss": 0.7612,
|
|
"step": 22760
|
|
},
|
|
{
|
|
"epoch": 0.2546680758971262,
|
|
"grad_norm": 0.23857931792736053,
|
|
"learning_rate": 2.7411028370414363e-06,
|
|
"loss": 0.7495,
|
|
"step": 22770
|
|
},
|
|
{
|
|
"epoch": 0.25477991958438884,
|
|
"grad_norm": 0.26004472374916077,
|
|
"learning_rate": 2.5126776006213166e-06,
|
|
"loss": 0.7477,
|
|
"step": 22780
|
|
},
|
|
{
|
|
"epoch": 0.25489176327165153,
|
|
"grad_norm": 0.25449565052986145,
|
|
"learning_rate": 2.2842523642011973e-06,
|
|
"loss": 0.7379,
|
|
"step": 22790
|
|
},
|
|
{
|
|
"epoch": 0.2550036069589142,
|
|
"grad_norm": 0.2568104565143585,
|
|
"learning_rate": 2.055827127781077e-06,
|
|
"loss": 0.7407,
|
|
"step": 22800
|
|
},
|
|
{
|
|
"epoch": 0.2551154506461769,
|
|
"grad_norm": 0.253451406955719,
|
|
"learning_rate": 1.8274018913609574e-06,
|
|
"loss": 0.7241,
|
|
"step": 22810
|
|
},
|
|
{
|
|
"epoch": 0.2552272943334396,
|
|
"grad_norm": 0.25928062200546265,
|
|
"learning_rate": 1.598976654940838e-06,
|
|
"loss": 0.7502,
|
|
"step": 22820
|
|
},
|
|
{
|
|
"epoch": 0.2553391380207023,
|
|
"grad_norm": 0.24965140223503113,
|
|
"learning_rate": 1.3705514185207182e-06,
|
|
"loss": 0.7417,
|
|
"step": 22830
|
|
},
|
|
{
|
|
"epoch": 0.25545098170796493,
|
|
"grad_norm": 0.2660306394100189,
|
|
"learning_rate": 1.1421261821005987e-06,
|
|
"loss": 0.7463,
|
|
"step": 22840
|
|
},
|
|
{
|
|
"epoch": 0.2555628253952276,
|
|
"grad_norm": 0.25784334540367126,
|
|
"learning_rate": 9.137009456804787e-07,
|
|
"loss": 0.7379,
|
|
"step": 22850
|
|
},
|
|
{
|
|
"epoch": 0.2556746690824903,
|
|
"grad_norm": 0.27776214480400085,
|
|
"learning_rate": 6.852757092603591e-07,
|
|
"loss": 0.7562,
|
|
"step": 22860
|
|
},
|
|
{
|
|
"epoch": 0.255786512769753,
|
|
"grad_norm": 0.24403463304042816,
|
|
"learning_rate": 4.5685047284023936e-07,
|
|
"loss": 0.7427,
|
|
"step": 22870
|
|
},
|
|
{
|
|
"epoch": 0.2558983564570157,
|
|
"grad_norm": 0.24544622004032135,
|
|
"learning_rate": 2.2842523642011968e-07,
|
|
"loss": 0.748,
|
|
"step": 22880
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 22889,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 5.946484739580887e+17,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|