2802 lines
68 KiB
JSON
2802 lines
68 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9998731447418495,
|
|
"eval_steps": 500,
|
|
"global_step": 3941,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.002537105163009007,
|
|
"grad_norm": 4.738877773284912,
|
|
"learning_rate": 2.278481012658228e-07,
|
|
"loss": 1.6626,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.005074210326018014,
|
|
"grad_norm": 4.201930046081543,
|
|
"learning_rate": 4.810126582278482e-07,
|
|
"loss": 1.6508,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.00761131548902702,
|
|
"grad_norm": 3.1523282527923584,
|
|
"learning_rate": 7.341772151898735e-07,
|
|
"loss": 1.6588,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.010148420652036028,
|
|
"grad_norm": 2.514768123626709,
|
|
"learning_rate": 9.873417721518988e-07,
|
|
"loss": 1.6091,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.012685525815045033,
|
|
"grad_norm": 1.9985918998718262,
|
|
"learning_rate": 1.240506329113924e-06,
|
|
"loss": 1.5742,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.01522263097805404,
|
|
"grad_norm": 1.5602017641067505,
|
|
"learning_rate": 1.4936708860759495e-06,
|
|
"loss": 1.5291,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.01775973614106305,
|
|
"grad_norm": 1.5359352827072144,
|
|
"learning_rate": 1.7468354430379747e-06,
|
|
"loss": 1.5007,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.020296841304072055,
|
|
"grad_norm": 1.5024731159210205,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": 1.4645,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.022833946467081062,
|
|
"grad_norm": 1.4507105350494385,
|
|
"learning_rate": 2.2531645569620258e-06,
|
|
"loss": 1.4484,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.025371051630090066,
|
|
"grad_norm": 1.4768809080123901,
|
|
"learning_rate": 2.5063291139240508e-06,
|
|
"loss": 1.4516,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.027908156793099072,
|
|
"grad_norm": 1.3820849657058716,
|
|
"learning_rate": 2.7594936708860766e-06,
|
|
"loss": 1.4131,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.03044526195610808,
|
|
"grad_norm": 1.4740626811981201,
|
|
"learning_rate": 3.0126582278481016e-06,
|
|
"loss": 1.4007,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.03298236711911709,
|
|
"grad_norm": 1.3979785442352295,
|
|
"learning_rate": 3.265822784810127e-06,
|
|
"loss": 1.3964,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.0355194722821261,
|
|
"grad_norm": 1.3529947996139526,
|
|
"learning_rate": 3.518987341772152e-06,
|
|
"loss": 1.3681,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.038056577445135104,
|
|
"grad_norm": 1.5106327533721924,
|
|
"learning_rate": 3.7721518987341775e-06,
|
|
"loss": 1.3842,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.04059368260814411,
|
|
"grad_norm": 1.469246506690979,
|
|
"learning_rate": 4.025316455696203e-06,
|
|
"loss": 1.3481,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.04313078777115312,
|
|
"grad_norm": 1.4268747568130493,
|
|
"learning_rate": 4.278481012658228e-06,
|
|
"loss": 1.3738,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.045667892934162124,
|
|
"grad_norm": 1.364691138267517,
|
|
"learning_rate": 4.531645569620253e-06,
|
|
"loss": 1.3568,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.04820499809717113,
|
|
"grad_norm": 1.5261390209197998,
|
|
"learning_rate": 4.784810126582279e-06,
|
|
"loss": 1.347,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.05074210326018013,
|
|
"grad_norm": 1.4499975442886353,
|
|
"learning_rate": 5.037974683544305e-06,
|
|
"loss": 1.367,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05327920842318914,
|
|
"grad_norm": 1.506212592124939,
|
|
"learning_rate": 5.29113924050633e-06,
|
|
"loss": 1.3344,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.055816313586198145,
|
|
"grad_norm": 1.4976462125778198,
|
|
"learning_rate": 5.544303797468355e-06,
|
|
"loss": 1.3454,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.05835341874920715,
|
|
"grad_norm": 1.45829439163208,
|
|
"learning_rate": 5.79746835443038e-06,
|
|
"loss": 1.3444,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.06089052391221616,
|
|
"grad_norm": 1.502082347869873,
|
|
"learning_rate": 6.050632911392406e-06,
|
|
"loss": 1.3264,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.06342762907522517,
|
|
"grad_norm": 1.525439739227295,
|
|
"learning_rate": 6.303797468354431e-06,
|
|
"loss": 1.3259,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.06596473423823418,
|
|
"grad_norm": 1.4980591535568237,
|
|
"learning_rate": 6.5569620253164564e-06,
|
|
"loss": 1.3338,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.06850183940124319,
|
|
"grad_norm": 1.4034385681152344,
|
|
"learning_rate": 6.810126582278481e-06,
|
|
"loss": 1.3293,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.0710389445642522,
|
|
"grad_norm": 1.3902912139892578,
|
|
"learning_rate": 7.0632911392405065e-06,
|
|
"loss": 1.3339,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.0735760497272612,
|
|
"grad_norm": 1.546105146408081,
|
|
"learning_rate": 7.316455696202533e-06,
|
|
"loss": 1.3232,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.07611315489027021,
|
|
"grad_norm": 1.396514654159546,
|
|
"learning_rate": 7.569620253164558e-06,
|
|
"loss": 1.3185,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.07865026005327921,
|
|
"grad_norm": 1.400497555732727,
|
|
"learning_rate": 7.822784810126582e-06,
|
|
"loss": 1.3231,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.08118736521628822,
|
|
"grad_norm": 1.4383889436721802,
|
|
"learning_rate": 8.075949367088608e-06,
|
|
"loss": 1.2916,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.08372447037929723,
|
|
"grad_norm": 1.383995532989502,
|
|
"learning_rate": 8.329113924050633e-06,
|
|
"loss": 1.3045,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.08626157554230623,
|
|
"grad_norm": 1.544356346130371,
|
|
"learning_rate": 8.582278481012659e-06,
|
|
"loss": 1.313,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.08879868070531524,
|
|
"grad_norm": 1.5004216432571411,
|
|
"learning_rate": 8.835443037974685e-06,
|
|
"loss": 1.3128,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.09133578586832425,
|
|
"grad_norm": 1.5500295162200928,
|
|
"learning_rate": 9.08860759493671e-06,
|
|
"loss": 1.2921,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.09387289103133326,
|
|
"grad_norm": 1.3721230030059814,
|
|
"learning_rate": 9.341772151898735e-06,
|
|
"loss": 1.2989,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.09640999619434226,
|
|
"grad_norm": 1.5480691194534302,
|
|
"learning_rate": 9.59493670886076e-06,
|
|
"loss": 1.307,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.09894710135735126,
|
|
"grad_norm": 1.4069089889526367,
|
|
"learning_rate": 9.848101265822785e-06,
|
|
"loss": 1.2914,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.10148420652036026,
|
|
"grad_norm": 1.5373787879943848,
|
|
"learning_rate": 9.999968603457859e-06,
|
|
"loss": 1.302,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.10402131168336927,
|
|
"grad_norm": 1.411720871925354,
|
|
"learning_rate": 9.999615396887012e-06,
|
|
"loss": 1.2963,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.10655841684637828,
|
|
"grad_norm": 1.4174768924713135,
|
|
"learning_rate": 9.998869765883566e-06,
|
|
"loss": 1.2673,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.10909552200938728,
|
|
"grad_norm": 1.492872714996338,
|
|
"learning_rate": 9.997731768972785e-06,
|
|
"loss": 1.3116,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.11163262717239629,
|
|
"grad_norm": 1.487380027770996,
|
|
"learning_rate": 9.996201495477102e-06,
|
|
"loss": 1.2903,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.1141697323354053,
|
|
"grad_norm": 1.4736219644546509,
|
|
"learning_rate": 9.994279065509094e-06,
|
|
"loss": 1.295,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.1167068374984143,
|
|
"grad_norm": 1.4441871643066406,
|
|
"learning_rate": 9.991964629962067e-06,
|
|
"loss": 1.293,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.11924394266142331,
|
|
"grad_norm": 1.4621552228927612,
|
|
"learning_rate": 9.989258370498208e-06,
|
|
"loss": 1.2746,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.12178104782443232,
|
|
"grad_norm": 1.4845929145812988,
|
|
"learning_rate": 9.986160499534318e-06,
|
|
"loss": 1.3113,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.12431815298744132,
|
|
"grad_norm": 1.4002912044525146,
|
|
"learning_rate": 9.982671260225156e-06,
|
|
"loss": 1.2872,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.12685525815045035,
|
|
"grad_norm": 1.380839228630066,
|
|
"learning_rate": 9.97879092644434e-06,
|
|
"loss": 1.2743,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.12939236331345935,
|
|
"grad_norm": 1.5403680801391602,
|
|
"learning_rate": 9.974519802762853e-06,
|
|
"loss": 1.2862,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.13192946847646836,
|
|
"grad_norm": 1.4719021320343018,
|
|
"learning_rate": 9.969858224425138e-06,
|
|
"loss": 1.2733,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.13446657363947737,
|
|
"grad_norm": 1.5172195434570312,
|
|
"learning_rate": 9.96480655732279e-06,
|
|
"loss": 1.2961,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.13700367880248637,
|
|
"grad_norm": 1.3394590616226196,
|
|
"learning_rate": 9.959365197965824e-06,
|
|
"loss": 1.2742,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.13954078396549538,
|
|
"grad_norm": 1.4464937448501587,
|
|
"learning_rate": 9.953534573451568e-06,
|
|
"loss": 1.268,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.1420778891285044,
|
|
"grad_norm": 1.4036401510238647,
|
|
"learning_rate": 9.947315141431126e-06,
|
|
"loss": 1.2636,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.1446149942915134,
|
|
"grad_norm": 1.3706105947494507,
|
|
"learning_rate": 9.940707390073465e-06,
|
|
"loss": 1.2728,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.1471520994545224,
|
|
"grad_norm": 1.322920560836792,
|
|
"learning_rate": 9.933711838027096e-06,
|
|
"loss": 1.2585,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.1496892046175314,
|
|
"grad_norm": 1.4396326541900635,
|
|
"learning_rate": 9.926329034379361e-06,
|
|
"loss": 1.2752,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.15222630978054041,
|
|
"grad_norm": 1.389569878578186,
|
|
"learning_rate": 9.918559558613344e-06,
|
|
"loss": 1.284,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.15476341494354942,
|
|
"grad_norm": 1.5533502101898193,
|
|
"learning_rate": 9.910404020562377e-06,
|
|
"loss": 1.2732,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.15730052010655843,
|
|
"grad_norm": 1.3960537910461426,
|
|
"learning_rate": 9.901863060362176e-06,
|
|
"loss": 1.2756,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.15983762526956743,
|
|
"grad_norm": 1.3757154941558838,
|
|
"learning_rate": 9.8929373484006e-06,
|
|
"loss": 1.2502,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.16237473043257644,
|
|
"grad_norm": 1.467374324798584,
|
|
"learning_rate": 9.883627585265032e-06,
|
|
"loss": 1.2528,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.16491183559558545,
|
|
"grad_norm": 1.4819854497909546,
|
|
"learning_rate": 9.873934501687381e-06,
|
|
"loss": 1.2602,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.16744894075859446,
|
|
"grad_norm": 1.3049192428588867,
|
|
"learning_rate": 9.863858858486736e-06,
|
|
"loss": 1.2522,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.16998604592160346,
|
|
"grad_norm": 1.367477536201477,
|
|
"learning_rate": 9.853401446509641e-06,
|
|
"loss": 1.2655,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.17252315108461247,
|
|
"grad_norm": 1.394960880279541,
|
|
"learning_rate": 9.842563086568024e-06,
|
|
"loss": 1.273,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.17506025624762148,
|
|
"grad_norm": 1.375187873840332,
|
|
"learning_rate": 9.831344629374778e-06,
|
|
"loss": 1.2805,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.17759736141063048,
|
|
"grad_norm": 1.4008384943008423,
|
|
"learning_rate": 9.81974695547697e-06,
|
|
"loss": 1.2516,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.1801344665736395,
|
|
"grad_norm": 1.398503065109253,
|
|
"learning_rate": 9.807770975186743e-06,
|
|
"loss": 1.2578,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.1826715717366485,
|
|
"grad_norm": 1.3208105564117432,
|
|
"learning_rate": 9.795417628509857e-06,
|
|
"loss": 1.2591,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.1852086768996575,
|
|
"grad_norm": 1.3660458326339722,
|
|
"learning_rate": 9.78268788507191e-06,
|
|
"loss": 1.2741,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.1877457820626665,
|
|
"grad_norm": 1.3826677799224854,
|
|
"learning_rate": 9.769582744042224e-06,
|
|
"loss": 1.2588,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.19028288722567552,
|
|
"grad_norm": 1.572285771369934,
|
|
"learning_rate": 9.756103234055432e-06,
|
|
"loss": 1.2609,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.19281999238868452,
|
|
"grad_norm": 1.4226349592208862,
|
|
"learning_rate": 9.742250413130728e-06,
|
|
"loss": 1.2472,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.19535709755169353,
|
|
"grad_norm": 1.3697686195373535,
|
|
"learning_rate": 9.728025368588829e-06,
|
|
"loss": 1.2492,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.1978942027147025,
|
|
"grad_norm": 1.4662617444992065,
|
|
"learning_rate": 9.713429216966624e-06,
|
|
"loss": 1.2438,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.20043130787771152,
|
|
"grad_norm": 1.425715446472168,
|
|
"learning_rate": 9.698463103929542e-06,
|
|
"loss": 1.2668,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.20296841304072052,
|
|
"grad_norm": 1.380704402923584,
|
|
"learning_rate": 9.68312820418163e-06,
|
|
"loss": 1.2439,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.20550551820372953,
|
|
"grad_norm": 1.4010827541351318,
|
|
"learning_rate": 9.667425721373333e-06,
|
|
"loss": 1.2606,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.20804262336673854,
|
|
"grad_norm": 1.3978463411331177,
|
|
"learning_rate": 9.651356888007041e-06,
|
|
"loss": 1.2584,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.21057972852974755,
|
|
"grad_norm": 1.4867024421691895,
|
|
"learning_rate": 9.634922965340334e-06,
|
|
"loss": 1.243,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.21311683369275655,
|
|
"grad_norm": 1.3792587518692017,
|
|
"learning_rate": 9.618125243286989e-06,
|
|
"loss": 1.2341,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.21565393885576556,
|
|
"grad_norm": 1.3928625583648682,
|
|
"learning_rate": 9.60096504031573e-06,
|
|
"loss": 1.2482,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.21819104401877457,
|
|
"grad_norm": 1.4658421277999878,
|
|
"learning_rate": 9.58344370334675e-06,
|
|
"loss": 1.2737,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.22072814918178357,
|
|
"grad_norm": 1.35547935962677,
|
|
"learning_rate": 9.565562607645974e-06,
|
|
"loss": 1.2433,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.22326525434479258,
|
|
"grad_norm": 1.3512929677963257,
|
|
"learning_rate": 9.547323156717133e-06,
|
|
"loss": 1.2451,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.2258023595078016,
|
|
"grad_norm": 1.371601939201355,
|
|
"learning_rate": 9.52872678219158e-06,
|
|
"loss": 1.2412,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.2283394646708106,
|
|
"grad_norm": 1.4480026960372925,
|
|
"learning_rate": 9.50977494371594e-06,
|
|
"loss": 1.2487,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.2308765698338196,
|
|
"grad_norm": 1.3161512613296509,
|
|
"learning_rate": 9.490469128837525e-06,
|
|
"loss": 1.2355,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.2334136749968286,
|
|
"grad_norm": 1.4747735261917114,
|
|
"learning_rate": 9.470810852887586e-06,
|
|
"loss": 1.2314,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.23595078015983761,
|
|
"grad_norm": 1.3615195751190186,
|
|
"learning_rate": 9.450801658862371e-06,
|
|
"loss": 1.249,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.23848788532284662,
|
|
"grad_norm": 1.3905844688415527,
|
|
"learning_rate": 9.430443117302006e-06,
|
|
"loss": 1.2357,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.24102499048585563,
|
|
"grad_norm": 1.4403789043426514,
|
|
"learning_rate": 9.409736826167233e-06,
|
|
"loss": 1.2482,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.24356209564886463,
|
|
"grad_norm": 1.5014008283615112,
|
|
"learning_rate": 9.388684410713977e-06,
|
|
"loss": 1.2437,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.24609920081187364,
|
|
"grad_norm": 1.383191466331482,
|
|
"learning_rate": 9.367287523365782e-06,
|
|
"loss": 1.237,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.24863630597488265,
|
|
"grad_norm": 1.4577914476394653,
|
|
"learning_rate": 9.345547843584108e-06,
|
|
"loss": 1.2366,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.25117341113789166,
|
|
"grad_norm": 1.3893483877182007,
|
|
"learning_rate": 9.323467077736513e-06,
|
|
"loss": 1.2432,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.2537105163009007,
|
|
"grad_norm": 1.331141710281372,
|
|
"learning_rate": 9.301046958962707e-06,
|
|
"loss": 1.2438,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.25624762146390967,
|
|
"grad_norm": 1.5160998106002808,
|
|
"learning_rate": 9.278289247038537e-06,
|
|
"loss": 1.2404,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.2587847266269187,
|
|
"grad_norm": 1.2425806522369385,
|
|
"learning_rate": 9.255195728237837e-06,
|
|
"loss": 1.2361,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.2613218317899277,
|
|
"grad_norm": 1.325697660446167,
|
|
"learning_rate": 9.231768215192243e-06,
|
|
"loss": 1.2344,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.2638589369529367,
|
|
"grad_norm": 1.4259663820266724,
|
|
"learning_rate": 9.2080085467489e-06,
|
|
"loss": 1.258,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.2663960421159457,
|
|
"grad_norm": 1.4262241125106812,
|
|
"learning_rate": 9.183918587826142e-06,
|
|
"loss": 1.2518,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.26893314727895473,
|
|
"grad_norm": 1.4553781747817993,
|
|
"learning_rate": 9.159500229267103e-06,
|
|
"loss": 1.2387,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.2714702524419637,
|
|
"grad_norm": 1.3143069744110107,
|
|
"learning_rate": 9.134755387691315e-06,
|
|
"loss": 1.2474,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.27400735760497275,
|
|
"grad_norm": 1.3108036518096924,
|
|
"learning_rate": 9.109686005344258e-06,
|
|
"loss": 1.2362,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.2765444627679817,
|
|
"grad_norm": 1.4027985334396362,
|
|
"learning_rate": 9.084294049944919e-06,
|
|
"loss": 1.2344,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.27908156793099076,
|
|
"grad_norm": 1.3254168033599854,
|
|
"learning_rate": 9.05858151453134e-06,
|
|
"loss": 1.2294,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.28161867309399974,
|
|
"grad_norm": 1.3602770566940308,
|
|
"learning_rate": 9.032550417304189e-06,
|
|
"loss": 1.2408,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.2841557782570088,
|
|
"grad_norm": 1.3525891304016113,
|
|
"learning_rate": 9.006202801468342e-06,
|
|
"loss": 1.2436,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.28669288342001775,
|
|
"grad_norm": 1.3206712007522583,
|
|
"learning_rate": 8.979540735072512e-06,
|
|
"loss": 1.2103,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.2892299885830268,
|
|
"grad_norm": 1.3212580680847168,
|
|
"learning_rate": 8.952566310846931e-06,
|
|
"loss": 1.2184,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.29176709374603577,
|
|
"grad_norm": 1.4156090021133423,
|
|
"learning_rate": 8.925281646039078e-06,
|
|
"loss": 1.2323,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.2943041989090448,
|
|
"grad_norm": 1.3827259540557861,
|
|
"learning_rate": 8.897688882247515e-06,
|
|
"loss": 1.2226,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.2968413040720538,
|
|
"grad_norm": 1.375603199005127,
|
|
"learning_rate": 8.869790185253766e-06,
|
|
"loss": 1.2241,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.2993784092350628,
|
|
"grad_norm": 1.4006825685501099,
|
|
"learning_rate": 8.841587744852339e-06,
|
|
"loss": 1.2405,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.3019155143980718,
|
|
"grad_norm": 1.3298841714859009,
|
|
"learning_rate": 8.813083774678841e-06,
|
|
"loss": 1.2296,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.30445261956108083,
|
|
"grad_norm": 1.3826628923416138,
|
|
"learning_rate": 8.784280512036235e-06,
|
|
"loss": 1.2272,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3069897247240898,
|
|
"grad_norm": 1.3651957511901855,
|
|
"learning_rate": 8.755180217719218e-06,
|
|
"loss": 1.225,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.30952682988709884,
|
|
"grad_norm": 1.4205344915390015,
|
|
"learning_rate": 8.72578517583679e-06,
|
|
"loss": 1.2351,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.3120639350501078,
|
|
"grad_norm": 1.350210189819336,
|
|
"learning_rate": 8.696097693632944e-06,
|
|
"loss": 1.2146,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.31460104021311686,
|
|
"grad_norm": 1.391353726387024,
|
|
"learning_rate": 8.666120101305596e-06,
|
|
"loss": 1.2444,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.31713814537612584,
|
|
"grad_norm": 1.4733567237854004,
|
|
"learning_rate": 8.635854751823666e-06,
|
|
"loss": 1.2427,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.31967525053913487,
|
|
"grad_norm": 1.3797138929367065,
|
|
"learning_rate": 8.60530402074241e-06,
|
|
"loss": 1.2236,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.32221235570214385,
|
|
"grad_norm": 1.2921204566955566,
|
|
"learning_rate": 8.574470306016936e-06,
|
|
"loss": 1.2375,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.3247494608651529,
|
|
"grad_norm": 1.3980249166488647,
|
|
"learning_rate": 8.543356027814009e-06,
|
|
"loss": 1.2176,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.32728656602816186,
|
|
"grad_norm": 1.3124829530715942,
|
|
"learning_rate": 8.511963628322076e-06,
|
|
"loss": 1.2289,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.3298236711911709,
|
|
"grad_norm": 1.3660489320755005,
|
|
"learning_rate": 8.480295571559581e-06,
|
|
"loss": 1.2222,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.3323607763541799,
|
|
"grad_norm": 1.3039889335632324,
|
|
"learning_rate": 8.448354343181568e-06,
|
|
"loss": 1.23,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.3348978815171889,
|
|
"grad_norm": 1.2941962480545044,
|
|
"learning_rate": 8.416142450284565e-06,
|
|
"loss": 1.234,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.3374349866801979,
|
|
"grad_norm": 1.3144862651824951,
|
|
"learning_rate": 8.383662421209813e-06,
|
|
"loss": 1.2291,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.3399720918432069,
|
|
"grad_norm": 1.4170352220535278,
|
|
"learning_rate": 8.350916805344812e-06,
|
|
"loss": 1.2501,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.3425091970062159,
|
|
"grad_norm": 1.2927628755569458,
|
|
"learning_rate": 8.317908172923207e-06,
|
|
"loss": 1.2057,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.34504630216922494,
|
|
"grad_norm": 1.3344128131866455,
|
|
"learning_rate": 8.28463911482306e-06,
|
|
"loss": 1.2244,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.3475834073322339,
|
|
"grad_norm": 1.3674781322479248,
|
|
"learning_rate": 8.251112242363488e-06,
|
|
"loss": 1.241,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.35012051249524295,
|
|
"grad_norm": 1.3904838562011719,
|
|
"learning_rate": 8.217330187099689e-06,
|
|
"loss": 1.2063,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.35265761765825193,
|
|
"grad_norm": 1.3588337898254395,
|
|
"learning_rate": 8.183295600616399e-06,
|
|
"loss": 1.2127,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.35519472282126097,
|
|
"grad_norm": 1.4805495738983154,
|
|
"learning_rate": 8.149011154319763e-06,
|
|
"loss": 1.224,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.35773182798426995,
|
|
"grad_norm": 1.3868420124053955,
|
|
"learning_rate": 8.114479539227653e-06,
|
|
"loss": 1.2399,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.360268933147279,
|
|
"grad_norm": 1.3122705221176147,
|
|
"learning_rate": 8.079703465758447e-06,
|
|
"loss": 1.216,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.36280603831028796,
|
|
"grad_norm": 1.3743106126785278,
|
|
"learning_rate": 8.044685663518289e-06,
|
|
"loss": 1.2258,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.365343143473297,
|
|
"grad_norm": 1.274947166442871,
|
|
"learning_rate": 8.009428881086836e-06,
|
|
"loss": 1.2159,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.367880248636306,
|
|
"grad_norm": 1.3252506256103516,
|
|
"learning_rate": 7.97393588580152e-06,
|
|
"loss": 1.2097,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.370417353799315,
|
|
"grad_norm": 1.349714756011963,
|
|
"learning_rate": 7.93820946354034e-06,
|
|
"loss": 1.2118,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.372954458962324,
|
|
"grad_norm": 1.353014349937439,
|
|
"learning_rate": 7.902252418503198e-06,
|
|
"loss": 1.2293,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.375491564125333,
|
|
"grad_norm": 1.3702679872512817,
|
|
"learning_rate": 7.86606757299178e-06,
|
|
"loss": 1.2096,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.378028669288342,
|
|
"grad_norm": 1.4039068222045898,
|
|
"learning_rate": 7.829657767188052e-06,
|
|
"loss": 1.2264,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.38056577445135104,
|
|
"grad_norm": 1.3884947299957275,
|
|
"learning_rate": 7.793025858931317e-06,
|
|
"loss": 1.2283,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.38310287961436,
|
|
"grad_norm": 1.306620717048645,
|
|
"learning_rate": 7.756174723493908e-06,
|
|
"loss": 1.2325,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.38563998477736905,
|
|
"grad_norm": 1.3600883483886719,
|
|
"learning_rate": 7.719107253355494e-06,
|
|
"loss": 1.2324,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.38817708994037803,
|
|
"grad_norm": 1.2543104887008667,
|
|
"learning_rate": 7.68182635797606e-06,
|
|
"loss": 1.1939,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.39071419510338706,
|
|
"grad_norm": 1.2916842699050903,
|
|
"learning_rate": 7.644334963567542e-06,
|
|
"loss": 1.2105,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.39325130026639604,
|
|
"grad_norm": 1.3755340576171875,
|
|
"learning_rate": 7.606636012864126e-06,
|
|
"loss": 1.226,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.395788405429405,
|
|
"grad_norm": 1.3301304578781128,
|
|
"learning_rate": 7.568732464891293e-06,
|
|
"loss": 1.2194,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.39832551059241406,
|
|
"grad_norm": 1.4263029098510742,
|
|
"learning_rate": 7.530627294733549e-06,
|
|
"loss": 1.2152,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.40086261575542304,
|
|
"grad_norm": 1.3354136943817139,
|
|
"learning_rate": 7.492323493300912e-06,
|
|
"loss": 1.2028,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.40339972091843207,
|
|
"grad_norm": 1.3600828647613525,
|
|
"learning_rate": 7.453824067094152e-06,
|
|
"loss": 1.2132,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.40593682608144105,
|
|
"grad_norm": 1.3342976570129395,
|
|
"learning_rate": 7.4151320379688105e-06,
|
|
"loss": 1.2235,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.4084739312444501,
|
|
"grad_norm": 1.3055970668792725,
|
|
"learning_rate": 7.376250442898006e-06,
|
|
"loss": 1.2121,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.41101103640745906,
|
|
"grad_norm": 1.2475143671035767,
|
|
"learning_rate": 7.33718233373407e-06,
|
|
"loss": 1.213,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.4135481415704681,
|
|
"grad_norm": 1.3556421995162964,
|
|
"learning_rate": 7.297930776968989e-06,
|
|
"loss": 1.2219,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.4160852467334771,
|
|
"grad_norm": 1.4067039489746094,
|
|
"learning_rate": 7.258498853493729e-06,
|
|
"loss": 1.2248,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.4186223518964861,
|
|
"grad_norm": 1.3992860317230225,
|
|
"learning_rate": 7.2188896583563984e-06,
|
|
"loss": 1.2041,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.4211594570594951,
|
|
"grad_norm": 1.267992615699768,
|
|
"learning_rate": 7.179106300519329e-06,
|
|
"loss": 1.232,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.4236965622225041,
|
|
"grad_norm": 1.3517013788223267,
|
|
"learning_rate": 7.13915190261504e-06,
|
|
"loss": 1.2012,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.4262336673855131,
|
|
"grad_norm": 1.3722771406173706,
|
|
"learning_rate": 7.099029600701144e-06,
|
|
"loss": 1.2013,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.42877077254852214,
|
|
"grad_norm": 1.3306978940963745,
|
|
"learning_rate": 7.0587425440141955e-06,
|
|
"loss": 1.2057,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.4313078777115311,
|
|
"grad_norm": 1.2793058156967163,
|
|
"learning_rate": 7.0182938947225025e-06,
|
|
"loss": 1.2094,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.43384498287454015,
|
|
"grad_norm": 1.2533907890319824,
|
|
"learning_rate": 6.977686827677926e-06,
|
|
"loss": 1.22,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.43638208803754913,
|
|
"grad_norm": 1.3793444633483887,
|
|
"learning_rate": 6.936924530166682e-06,
|
|
"loss": 1.2301,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.43891919320055817,
|
|
"grad_norm": 1.2866994142532349,
|
|
"learning_rate": 6.896010201659173e-06,
|
|
"loss": 1.2108,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.44145629836356715,
|
|
"grad_norm": 1.3020964860916138,
|
|
"learning_rate": 6.854947053558849e-06,
|
|
"loss": 1.2133,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.4439934035265762,
|
|
"grad_norm": 1.266717791557312,
|
|
"learning_rate": 6.8137383089501526e-06,
|
|
"loss": 1.2067,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.44653050868958516,
|
|
"grad_norm": 1.3352110385894775,
|
|
"learning_rate": 6.772387202345528e-06,
|
|
"loss": 1.2128,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.4490676138525942,
|
|
"grad_norm": 1.2845940589904785,
|
|
"learning_rate": 6.730896979431543e-06,
|
|
"loss": 1.2168,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.4516047190156032,
|
|
"grad_norm": 1.3710917234420776,
|
|
"learning_rate": 6.689270896814139e-06,
|
|
"loss": 1.2091,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.4541418241786122,
|
|
"grad_norm": 1.351404070854187,
|
|
"learning_rate": 6.647512221763005e-06,
|
|
"loss": 1.2047,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.4566789293416212,
|
|
"grad_norm": 1.3901602029800415,
|
|
"learning_rate": 6.6056242319551315e-06,
|
|
"loss": 1.2074,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.4592160345046302,
|
|
"grad_norm": 1.3269678354263306,
|
|
"learning_rate": 6.563610215217551e-06,
|
|
"loss": 1.2012,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.4617531396676392,
|
|
"grad_norm": 1.4395819902420044,
|
|
"learning_rate": 6.5214734692692594e-06,
|
|
"loss": 1.2121,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.46429024483064824,
|
|
"grad_norm": 1.3422843217849731,
|
|
"learning_rate": 6.479217301462386e-06,
|
|
"loss": 1.2072,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.4668273499936572,
|
|
"grad_norm": 1.389143466949463,
|
|
"learning_rate": 6.43684502852259e-06,
|
|
"loss": 1.2005,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.46936445515666625,
|
|
"grad_norm": 1.2481592893600464,
|
|
"learning_rate": 6.394359976288729e-06,
|
|
"loss": 1.2026,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.47190156031967523,
|
|
"grad_norm": 1.2201452255249023,
|
|
"learning_rate": 6.3517654794518156e-06,
|
|
"loss": 1.2086,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.47443866548268426,
|
|
"grad_norm": 1.2975594997406006,
|
|
"learning_rate": 6.309064881293265e-06,
|
|
"loss": 1.2118,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.47697577064569324,
|
|
"grad_norm": 1.3062618970870972,
|
|
"learning_rate": 6.266261533422487e-06,
|
|
"loss": 1.2117,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.4795128758087023,
|
|
"grad_norm": 1.356292963027954,
|
|
"learning_rate": 6.223358795513812e-06,
|
|
"loss": 1.2045,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.48204998097171126,
|
|
"grad_norm": 1.259065866470337,
|
|
"learning_rate": 6.18036003504278e-06,
|
|
"loss": 1.1995,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.4845870861347203,
|
|
"grad_norm": 1.3219462633132935,
|
|
"learning_rate": 6.1372686270218385e-06,
|
|
"loss": 1.1936,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.48712419129772927,
|
|
"grad_norm": 1.2755314111709595,
|
|
"learning_rate": 6.094087953735423e-06,
|
|
"loss": 1.2122,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.4896612964607383,
|
|
"grad_norm": 1.2812877893447876,
|
|
"learning_rate": 6.050821404474483e-06,
|
|
"loss": 1.1939,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.4921984016237473,
|
|
"grad_norm": 1.308876633644104,
|
|
"learning_rate": 6.00747237527045e-06,
|
|
"loss": 1.2163,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.4947355067867563,
|
|
"grad_norm": 1.3260457515716553,
|
|
"learning_rate": 5.964044268628688e-06,
|
|
"loss": 1.2022,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.4972726119497653,
|
|
"grad_norm": 1.3721497058868408,
|
|
"learning_rate": 5.920540493261415e-06,
|
|
"loss": 1.2128,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.49980971711277433,
|
|
"grad_norm": 1.4000312089920044,
|
|
"learning_rate": 5.8769644638201635e-06,
|
|
"loss": 1.2014,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.5023468222757833,
|
|
"grad_norm": 1.31642746925354,
|
|
"learning_rate": 5.8333196006277536e-06,
|
|
"loss": 1.1962,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.5048839274387923,
|
|
"grad_norm": 1.224902629852295,
|
|
"learning_rate": 5.789609329409826e-06,
|
|
"loss": 1.2015,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.5074210326018014,
|
|
"grad_norm": 1.3817135095596313,
|
|
"learning_rate": 5.7458370810259635e-06,
|
|
"loss": 1.1935,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5099581377648104,
|
|
"grad_norm": 1.268218755722046,
|
|
"learning_rate": 5.702006291200389e-06,
|
|
"loss": 1.1894,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.5124952429278193,
|
|
"grad_norm": 1.4027165174484253,
|
|
"learning_rate": 5.6581204002523e-06,
|
|
"loss": 1.1883,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.5150323480908283,
|
|
"grad_norm": 1.2554658651351929,
|
|
"learning_rate": 5.614182852825835e-06,
|
|
"loss": 1.1995,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.5175694532538374,
|
|
"grad_norm": 1.4268817901611328,
|
|
"learning_rate": 5.570197097619688e-06,
|
|
"loss": 1.2145,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.5201065584168464,
|
|
"grad_norm": 1.2476046085357666,
|
|
"learning_rate": 5.526166587116436e-06,
|
|
"loss": 1.1952,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.5226436635798554,
|
|
"grad_norm": 1.3912453651428223,
|
|
"learning_rate": 5.4820947773115374e-06,
|
|
"loss": 1.2126,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.5251807687428643,
|
|
"grad_norm": 1.3362759351730347,
|
|
"learning_rate": 5.437985127442065e-06,
|
|
"loss": 1.1981,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.5277178739058734,
|
|
"grad_norm": 1.243674874305725,
|
|
"learning_rate": 5.393841099715205e-06,
|
|
"loss": 1.1944,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.5302549790688824,
|
|
"grad_norm": 1.3977802991867065,
|
|
"learning_rate": 5.349666159036482e-06,
|
|
"loss": 1.1924,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.5327920842318914,
|
|
"grad_norm": 1.330768346786499,
|
|
"learning_rate": 5.305463772737812e-06,
|
|
"loss": 1.1907,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.5353291893949004,
|
|
"grad_norm": 1.3184082508087158,
|
|
"learning_rate": 5.261237410305344e-06,
|
|
"loss": 1.1979,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.5378662945579095,
|
|
"grad_norm": 1.3445682525634766,
|
|
"learning_rate": 5.2169905431071356e-06,
|
|
"loss": 1.2007,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.5404033997209184,
|
|
"grad_norm": 1.2852333784103394,
|
|
"learning_rate": 5.172726644120678e-06,
|
|
"loss": 1.187,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.5429405048839274,
|
|
"grad_norm": 1.33376944065094,
|
|
"learning_rate": 5.128449187660309e-06,
|
|
"loss": 1.1913,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.5454776100469364,
|
|
"grad_norm": 1.3172907829284668,
|
|
"learning_rate": 5.084161649104502e-06,
|
|
"loss": 1.1996,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.5480147152099455,
|
|
"grad_norm": 1.3981866836547852,
|
|
"learning_rate": 5.039867504623084e-06,
|
|
"loss": 1.1792,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.5505518203729545,
|
|
"grad_norm": 1.2657582759857178,
|
|
"learning_rate": 4.995570230904386e-06,
|
|
"loss": 1.1744,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.5530889255359634,
|
|
"grad_norm": 1.2834333181381226,
|
|
"learning_rate": 4.951273304882358e-06,
|
|
"loss": 1.1934,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.5556260306989724,
|
|
"grad_norm": 1.2566810846328735,
|
|
"learning_rate": 4.906980203463659e-06,
|
|
"loss": 1.1934,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.5581631358619815,
|
|
"grad_norm": 1.310981035232544,
|
|
"learning_rate": 4.862694403254747e-06,
|
|
"loss": 1.1952,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.5607002410249905,
|
|
"grad_norm": 1.2907445430755615,
|
|
"learning_rate": 4.818419380289009e-06,
|
|
"loss": 1.213,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.5632373461879995,
|
|
"grad_norm": 1.3687422275543213,
|
|
"learning_rate": 4.774158609753908e-06,
|
|
"loss": 1.1969,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.5657744513510085,
|
|
"grad_norm": 1.2513952255249023,
|
|
"learning_rate": 4.729915565718223e-06,
|
|
"loss": 1.1855,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.5683115565140175,
|
|
"grad_norm": 1.3049182891845703,
|
|
"learning_rate": 4.685693720859369e-06,
|
|
"loss": 1.1888,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.5708486616770265,
|
|
"grad_norm": 1.2568814754486084,
|
|
"learning_rate": 4.641496546190813e-06,
|
|
"loss": 1.1858,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.5733857668400355,
|
|
"grad_norm": 1.2858439683914185,
|
|
"learning_rate": 4.597327510789635e-06,
|
|
"loss": 1.18,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.5759228720030445,
|
|
"grad_norm": 1.3341573476791382,
|
|
"learning_rate": 4.553190081524242e-06,
|
|
"loss": 1.1904,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.5784599771660536,
|
|
"grad_norm": 1.2678639888763428,
|
|
"learning_rate": 4.5090877227822424e-06,
|
|
"loss": 1.1908,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.5809970823290626,
|
|
"grad_norm": 1.3060572147369385,
|
|
"learning_rate": 4.46502389619853e-06,
|
|
"loss": 1.202,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.5835341874920715,
|
|
"grad_norm": 1.2779840230941772,
|
|
"learning_rate": 4.421002060383569e-06,
|
|
"loss": 1.1926,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.5860712926550805,
|
|
"grad_norm": 1.3469980955123901,
|
|
"learning_rate": 4.3770256706519375e-06,
|
|
"loss": 1.1777,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.5886083978180896,
|
|
"grad_norm": 1.2928153276443481,
|
|
"learning_rate": 4.3330981787511006e-06,
|
|
"loss": 1.1779,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.5911455029810986,
|
|
"grad_norm": 1.424657940864563,
|
|
"learning_rate": 4.289223032590491e-06,
|
|
"loss": 1.2134,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.5936826081441076,
|
|
"grad_norm": 1.3630969524383545,
|
|
"learning_rate": 4.245403675970877e-06,
|
|
"loss": 1.1821,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.5962197133071165,
|
|
"grad_norm": 1.3015477657318115,
|
|
"learning_rate": 4.201643548314051e-06,
|
|
"loss": 1.1874,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.5987568184701256,
|
|
"grad_norm": 1.2937594652175903,
|
|
"learning_rate": 4.157946084392871e-06,
|
|
"loss": 1.2015,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.6012939236331346,
|
|
"grad_norm": 1.1649646759033203,
|
|
"learning_rate": 4.114314714061659e-06,
|
|
"loss": 1.1787,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.6038310287961436,
|
|
"grad_norm": 1.2585970163345337,
|
|
"learning_rate": 4.0707528619869976e-06,
|
|
"loss": 1.1739,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.6063681339591526,
|
|
"grad_norm": 1.3575518131256104,
|
|
"learning_rate": 4.027263947378907e-06,
|
|
"loss": 1.1744,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.6089052391221617,
|
|
"grad_norm": 1.3373316526412964,
|
|
"learning_rate": 3.9838513837224814e-06,
|
|
"loss": 1.189,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6114423442851706,
|
|
"grad_norm": 1.2326818704605103,
|
|
"learning_rate": 3.940518578509963e-06,
|
|
"loss": 1.1842,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.6139794494481796,
|
|
"grad_norm": 1.34890878200531,
|
|
"learning_rate": 3.8972689329732725e-06,
|
|
"loss": 1.1954,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.6165165546111886,
|
|
"grad_norm": 1.2541084289550781,
|
|
"learning_rate": 3.854105841817056e-06,
|
|
"loss": 1.1771,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.6190536597741977,
|
|
"grad_norm": 1.254408836364746,
|
|
"learning_rate": 3.811032692952227e-06,
|
|
"loss": 1.1655,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.6215907649372067,
|
|
"grad_norm": 1.2309529781341553,
|
|
"learning_rate": 3.7680528672300404e-06,
|
|
"loss": 1.1909,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.6241278701002156,
|
|
"grad_norm": 1.3115286827087402,
|
|
"learning_rate": 3.7251697381767373e-06,
|
|
"loss": 1.192,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.6266649752632246,
|
|
"grad_norm": 1.23496413230896,
|
|
"learning_rate": 3.6823866717287437e-06,
|
|
"loss": 1.1905,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.6292020804262337,
|
|
"grad_norm": 1.3248686790466309,
|
|
"learning_rate": 3.6397070259684793e-06,
|
|
"loss": 1.1864,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.6317391855892427,
|
|
"grad_norm": 1.256982445716858,
|
|
"learning_rate": 3.5971341508607814e-06,
|
|
"loss": 1.169,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.6342762907522517,
|
|
"grad_norm": 1.30142343044281,
|
|
"learning_rate": 3.5546713879899563e-06,
|
|
"loss": 1.1699,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.6368133959152606,
|
|
"grad_norm": 1.3655248880386353,
|
|
"learning_rate": 3.512322070297503e-06,
|
|
"loss": 1.1719,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.6393505010782697,
|
|
"grad_norm": 1.288802146911621,
|
|
"learning_rate": 3.4700895218205026e-06,
|
|
"loss": 1.1869,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.6418876062412787,
|
|
"grad_norm": 1.245276689529419,
|
|
"learning_rate": 3.4279770574307096e-06,
|
|
"loss": 1.1882,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.6444247114042877,
|
|
"grad_norm": 1.222338080406189,
|
|
"learning_rate": 3.385987982574372e-06,
|
|
"loss": 1.1746,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.6469618165672967,
|
|
"grad_norm": 1.2364776134490967,
|
|
"learning_rate": 3.3441255930127752e-06,
|
|
"loss": 1.1912,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.6494989217303058,
|
|
"grad_norm": 1.270403504371643,
|
|
"learning_rate": 3.3023931745635606e-06,
|
|
"loss": 1.1805,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.6520360268933147,
|
|
"grad_norm": 1.3970060348510742,
|
|
"learning_rate": 3.2607940028428154e-06,
|
|
"loss": 1.1913,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.6545731320563237,
|
|
"grad_norm": 1.3008099794387817,
|
|
"learning_rate": 3.2193313430079737e-06,
|
|
"loss": 1.1978,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.6571102372193327,
|
|
"grad_norm": 1.3095245361328125,
|
|
"learning_rate": 3.178008449501517e-06,
|
|
"loss": 1.1744,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.6596473423823418,
|
|
"grad_norm": 1.1827950477600098,
|
|
"learning_rate": 3.1368285657955464e-06,
|
|
"loss": 1.1779,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.6621844475453508,
|
|
"grad_norm": 1.2912861108779907,
|
|
"learning_rate": 3.0957949241371845e-06,
|
|
"loss": 1.197,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.6647215527083598,
|
|
"grad_norm": 1.2797883749008179,
|
|
"learning_rate": 3.0549107452948866e-06,
|
|
"loss": 1.1945,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.6672586578713687,
|
|
"grad_norm": 1.3769925832748413,
|
|
"learning_rate": 3.014179238305629e-06,
|
|
"loss": 1.1819,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.6697957630343778,
|
|
"grad_norm": 1.204916000366211,
|
|
"learning_rate": 2.9736036002230332e-06,
|
|
"loss": 1.1646,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.6723328681973868,
|
|
"grad_norm": 1.216291904449463,
|
|
"learning_rate": 2.933187015866431e-06,
|
|
"loss": 1.1929,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.6748699733603958,
|
|
"grad_norm": 1.273775339126587,
|
|
"learning_rate": 2.892932657570878e-06,
|
|
"loss": 1.1775,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.6774070785234048,
|
|
"grad_norm": 1.2574785947799683,
|
|
"learning_rate": 2.8528436849381518e-06,
|
|
"loss": 1.2057,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.6799441836864138,
|
|
"grad_norm": 1.2418912649154663,
|
|
"learning_rate": 2.8129232445887623e-06,
|
|
"loss": 1.1858,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.6824812888494228,
|
|
"grad_norm": 1.3044190406799316,
|
|
"learning_rate": 2.773174469914964e-06,
|
|
"loss": 1.1867,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.6850183940124318,
|
|
"grad_norm": 1.3013222217559814,
|
|
"learning_rate": 2.7336004808348094e-06,
|
|
"loss": 1.1737,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.6875554991754408,
|
|
"grad_norm": 1.2504340410232544,
|
|
"learning_rate": 2.6942043835472725e-06,
|
|
"loss": 1.1827,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.6900926043384499,
|
|
"grad_norm": 1.292549729347229,
|
|
"learning_rate": 2.654989270288435e-06,
|
|
"loss": 1.1844,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.6926297095014589,
|
|
"grad_norm": 1.2378226518630981,
|
|
"learning_rate": 2.615958219088776e-06,
|
|
"loss": 1.1827,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.6951668146644678,
|
|
"grad_norm": 1.1776105165481567,
|
|
"learning_rate": 2.577114293531571e-06,
|
|
"loss": 1.1764,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.6977039198274768,
|
|
"grad_norm": 1.2368700504302979,
|
|
"learning_rate": 2.538460542512435e-06,
|
|
"loss": 1.1788,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.7002410249904859,
|
|
"grad_norm": 1.3077671527862549,
|
|
"learning_rate": 2.5000000000000015e-06,
|
|
"loss": 1.1786,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.7027781301534949,
|
|
"grad_norm": 1.2264658212661743,
|
|
"learning_rate": 2.461735684797794e-06,
|
|
"loss": 1.1891,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.7053152353165039,
|
|
"grad_norm": 1.411934494972229,
|
|
"learning_rate": 2.4236706003072733e-06,
|
|
"loss": 1.2021,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.7078523404795128,
|
|
"grad_norm": 1.3481911420822144,
|
|
"learning_rate": 2.385807734292097e-06,
|
|
"loss": 1.1687,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.7103894456425219,
|
|
"grad_norm": 1.2622945308685303,
|
|
"learning_rate": 2.3481500586436067e-06,
|
|
"loss": 1.1906,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7129265508055309,
|
|
"grad_norm": 1.3974981307983398,
|
|
"learning_rate": 2.3107005291475653e-06,
|
|
"loss": 1.1894,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.7154636559685399,
|
|
"grad_norm": 1.2239971160888672,
|
|
"learning_rate": 2.273462085252146e-06,
|
|
"loss": 1.1596,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.7180007611315489,
|
|
"grad_norm": 1.3323861360549927,
|
|
"learning_rate": 2.236437649837223e-06,
|
|
"loss": 1.2045,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.720537866294558,
|
|
"grad_norm": 1.2548524141311646,
|
|
"learning_rate": 2.1996301289849474e-06,
|
|
"loss": 1.1791,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.7230749714575669,
|
|
"grad_norm": 1.3251192569732666,
|
|
"learning_rate": 2.1630424117516436e-06,
|
|
"loss": 1.174,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.7256120766205759,
|
|
"grad_norm": 1.2302052974700928,
|
|
"learning_rate": 2.126677369941047e-06,
|
|
"loss": 1.1498,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.7281491817835849,
|
|
"grad_norm": 1.2613223791122437,
|
|
"learning_rate": 2.0905378578788947e-06,
|
|
"loss": 1.1799,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.730686286946594,
|
|
"grad_norm": 1.2105897665023804,
|
|
"learning_rate": 2.0546267121888863e-06,
|
|
"loss": 1.169,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.733223392109603,
|
|
"grad_norm": 1.3086020946502686,
|
|
"learning_rate": 2.0189467515700283e-06,
|
|
"loss": 1.166,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.735760497272612,
|
|
"grad_norm": 1.2625070810317993,
|
|
"learning_rate": 1.9835007765754035e-06,
|
|
"loss": 1.1944,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.7382976024356209,
|
|
"grad_norm": 1.3192371129989624,
|
|
"learning_rate": 1.9482915693923442e-06,
|
|
"loss": 1.1712,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.74083470759863,
|
|
"grad_norm": 1.2234618663787842,
|
|
"learning_rate": 1.913321893624059e-06,
|
|
"loss": 1.1753,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.743371812761639,
|
|
"grad_norm": 1.3323945999145508,
|
|
"learning_rate": 1.878594494072713e-06,
|
|
"loss": 1.1681,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.745908917924648,
|
|
"grad_norm": 1.2558120489120483,
|
|
"learning_rate": 1.8441120965239912e-06,
|
|
"loss": 1.1796,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.748446023087657,
|
|
"grad_norm": 1.2716906070709229,
|
|
"learning_rate": 1.8098774075331383e-06,
|
|
"loss": 1.1894,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.750983128250666,
|
|
"grad_norm": 1.2781611680984497,
|
|
"learning_rate": 1.7758931142125308e-06,
|
|
"loss": 1.1855,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.753520233413675,
|
|
"grad_norm": 1.3427870273590088,
|
|
"learning_rate": 1.7421618840207576e-06,
|
|
"loss": 1.183,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.756057338576684,
|
|
"grad_norm": 1.2347464561462402,
|
|
"learning_rate": 1.7086863645532425e-06,
|
|
"loss": 1.1615,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.758594443739693,
|
|
"grad_norm": 1.2659286260604858,
|
|
"learning_rate": 1.6754691833344472e-06,
|
|
"loss": 1.1926,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.7611315489027021,
|
|
"grad_norm": 1.2952594757080078,
|
|
"learning_rate": 1.642512947611622e-06,
|
|
"loss": 1.1988,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.763668654065711,
|
|
"grad_norm": 1.2655588388442993,
|
|
"learning_rate": 1.6098202441501599e-06,
|
|
"loss": 1.1691,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.76620575922872,
|
|
"grad_norm": 1.2471306324005127,
|
|
"learning_rate": 1.5773936390305678e-06,
|
|
"loss": 1.1572,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.768742864391729,
|
|
"grad_norm": 1.2647178173065186,
|
|
"learning_rate": 1.5452356774470468e-06,
|
|
"loss": 1.1733,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.7712799695547381,
|
|
"grad_norm": 1.2472219467163086,
|
|
"learning_rate": 1.5133488835077204e-06,
|
|
"loss": 1.1772,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.7738170747177471,
|
|
"grad_norm": 1.2521233558654785,
|
|
"learning_rate": 1.4817357600365061e-06,
|
|
"loss": 1.172,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.7763541798807561,
|
|
"grad_norm": 1.3133138418197632,
|
|
"learning_rate": 1.4503987883766857e-06,
|
|
"loss": 1.1784,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.778891285043765,
|
|
"grad_norm": 1.2727947235107422,
|
|
"learning_rate": 1.4193404281961172e-06,
|
|
"loss": 1.1817,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.7814283902067741,
|
|
"grad_norm": 1.2805311679840088,
|
|
"learning_rate": 1.3885631172941932e-06,
|
|
"loss": 1.1841,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.7839654953697831,
|
|
"grad_norm": 1.2885172367095947,
|
|
"learning_rate": 1.3580692714104887e-06,
|
|
"loss": 1.162,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.7865026005327921,
|
|
"grad_norm": 1.3356949090957642,
|
|
"learning_rate": 1.3278612840351468e-06,
|
|
"loss": 1.1879,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.7890397056958011,
|
|
"grad_norm": 1.2000919580459595,
|
|
"learning_rate": 1.2979415262210089e-06,
|
|
"loss": 1.1772,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.79157681085881,
|
|
"grad_norm": 1.2145313024520874,
|
|
"learning_rate": 1.2683123463975144e-06,
|
|
"loss": 1.1662,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.7941139160218191,
|
|
"grad_norm": 1.2365864515304565,
|
|
"learning_rate": 1.2389760701863717e-06,
|
|
"loss": 1.1916,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.7966510211848281,
|
|
"grad_norm": 1.2178484201431274,
|
|
"learning_rate": 1.2099350002190063e-06,
|
|
"loss": 1.1686,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.7991881263478371,
|
|
"grad_norm": 1.2906898260116577,
|
|
"learning_rate": 1.1811914159558374e-06,
|
|
"loss": 1.1979,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.8017252315108461,
|
|
"grad_norm": 1.2861703634262085,
|
|
"learning_rate": 1.1527475735073574e-06,
|
|
"loss": 1.1937,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.8042623366738552,
|
|
"grad_norm": 1.2466925382614136,
|
|
"learning_rate": 1.1246057054570414e-06,
|
|
"loss": 1.1632,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.8067994418368641,
|
|
"grad_norm": 1.1871761083602905,
|
|
"learning_rate": 1.0967680206861198e-06,
|
|
"loss": 1.1587,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.8093365469998731,
|
|
"grad_norm": 1.2665520906448364,
|
|
"learning_rate": 1.069236704200195e-06,
|
|
"loss": 1.1679,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.8118736521628821,
|
|
"grad_norm": 1.197919487953186,
|
|
"learning_rate": 1.0420139169577393e-06,
|
|
"loss": 1.1652,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8144107573258912,
|
|
"grad_norm": 1.2373968362808228,
|
|
"learning_rate": 1.01510179570048e-06,
|
|
"loss": 1.178,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.8169478624889002,
|
|
"grad_norm": 1.2911186218261719,
|
|
"learning_rate": 9.88502452785685e-07,
|
|
"loss": 1.1735,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.8194849676519091,
|
|
"grad_norm": 1.2003546953201294,
|
|
"learning_rate": 9.62217976020357e-07,
|
|
"loss": 1.1836,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.8220220728149181,
|
|
"grad_norm": 1.3649158477783203,
|
|
"learning_rate": 9.362504284973683e-07,
|
|
"loss": 1.1651,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.8245591779779272,
|
|
"grad_norm": 1.3181599378585815,
|
|
"learning_rate": 9.1060184843352e-07,
|
|
"loss": 1.1735,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.8270962831409362,
|
|
"grad_norm": 1.2371008396148682,
|
|
"learning_rate": 8.852742490095628e-07,
|
|
"loss": 1.1629,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.8296333883039452,
|
|
"grad_norm": 1.181353211402893,
|
|
"learning_rate": 8.602696182121812e-07,
|
|
"loss": 1.1722,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.8321704934669542,
|
|
"grad_norm": 1.314288854598999,
|
|
"learning_rate": 8.35589918677952e-07,
|
|
"loss": 1.1713,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.8347075986299632,
|
|
"grad_norm": 1.2208564281463623,
|
|
"learning_rate": 8.112370875393e-07,
|
|
"loss": 1.1801,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.8372447037929722,
|
|
"grad_norm": 1.2573906183242798,
|
|
"learning_rate": 7.872130362724422e-07,
|
|
"loss": 1.154,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.8397818089559812,
|
|
"grad_norm": 1.286879301071167,
|
|
"learning_rate": 7.635196505473652e-07,
|
|
"loss": 1.1759,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.8423189141189902,
|
|
"grad_norm": 1.2773866653442383,
|
|
"learning_rate": 7.401587900798091e-07,
|
|
"loss": 1.1746,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.8448560192819993,
|
|
"grad_norm": 1.3070263862609863,
|
|
"learning_rate": 7.171322884852988e-07,
|
|
"loss": 1.1866,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.8473931244450083,
|
|
"grad_norm": 1.2348638772964478,
|
|
"learning_rate": 6.944419531352236e-07,
|
|
"loss": 1.1816,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.8499302296080172,
|
|
"grad_norm": 1.1883289813995361,
|
|
"learning_rate": 6.720895650149744e-07,
|
|
"loss": 1.1795,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.8524673347710262,
|
|
"grad_norm": 1.245474934577942,
|
|
"learning_rate": 6.500768785841482e-07,
|
|
"loss": 1.1733,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.8550044399340353,
|
|
"grad_norm": 1.277593731880188,
|
|
"learning_rate": 6.284056216388451e-07,
|
|
"loss": 1.1731,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.8575415450970443,
|
|
"grad_norm": 1.2596774101257324,
|
|
"learning_rate": 6.070774951760505e-07,
|
|
"loss": 1.171,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.8600786502600533,
|
|
"grad_norm": 1.266883134841919,
|
|
"learning_rate": 5.860941732601166e-07,
|
|
"loss": 1.1668,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.8626157554230622,
|
|
"grad_norm": 1.2608847618103027,
|
|
"learning_rate": 5.654573028913735e-07,
|
|
"loss": 1.1704,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.8651528605860713,
|
|
"grad_norm": 1.311318278312683,
|
|
"learning_rate": 5.451685038768473e-07,
|
|
"loss": 1.1899,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.8676899657490803,
|
|
"grad_norm": 1.2083625793457031,
|
|
"learning_rate": 5.252293687031196e-07,
|
|
"loss": 1.1636,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.8702270709120893,
|
|
"grad_norm": 1.256255865097046,
|
|
"learning_rate": 5.05641462411336e-07,
|
|
"loss": 1.1651,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.8727641760750983,
|
|
"grad_norm": 1.261788010597229,
|
|
"learning_rate": 4.864063224743626e-07,
|
|
"loss": 1.1677,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.8753012812381074,
|
|
"grad_norm": 1.1607638597488403,
|
|
"learning_rate": 4.6752545867610963e-07,
|
|
"loss": 1.1722,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.8778383864011163,
|
|
"grad_norm": 1.1922293901443481,
|
|
"learning_rate": 4.4900035299302036e-07,
|
|
"loss": 1.1675,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.8803754915641253,
|
|
"grad_norm": 1.2432760000228882,
|
|
"learning_rate": 4.308324594777635e-07,
|
|
"loss": 1.1689,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.8829125967271343,
|
|
"grad_norm": 1.304062008857727,
|
|
"learning_rate": 4.130232041450866e-07,
|
|
"loss": 1.1684,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.8854497018901434,
|
|
"grad_norm": 1.1947314739227295,
|
|
"learning_rate": 3.9557398485989884e-07,
|
|
"loss": 1.1652,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.8879868070531524,
|
|
"grad_norm": 1.2069730758666992,
|
|
"learning_rate": 3.784861712275467e-07,
|
|
"loss": 1.1608,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.8905239122161613,
|
|
"grad_norm": 1.2010389566421509,
|
|
"learning_rate": 3.61761104486314e-07,
|
|
"loss": 1.1731,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.8930610173791703,
|
|
"grad_norm": 1.2449175119400024,
|
|
"learning_rate": 3.454000974021432e-07,
|
|
"loss": 1.1829,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.8955981225421794,
|
|
"grad_norm": 1.244471788406372,
|
|
"learning_rate": 3.294044341655983e-07,
|
|
"loss": 1.1629,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.8981352277051884,
|
|
"grad_norm": 1.2766717672348022,
|
|
"learning_rate": 3.1377537029107174e-07,
|
|
"loss": 1.1567,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.9006723328681974,
|
|
"grad_norm": 1.2156423330307007,
|
|
"learning_rate": 2.985141325182267e-07,
|
|
"loss": 1.177,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.9032094380312063,
|
|
"grad_norm": 1.2810441255569458,
|
|
"learning_rate": 2.836219187157202e-07,
|
|
"loss": 1.1757,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.9057465431942154,
|
|
"grad_norm": 1.312305212020874,
|
|
"learning_rate": 2.69099897787175e-07,
|
|
"loss": 1.1654,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.9082836483572244,
|
|
"grad_norm": 1.1842001676559448,
|
|
"learning_rate": 2.5494920957943314e-07,
|
|
"loss": 1.1597,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.9108207535202334,
|
|
"grad_norm": 1.239871859550476,
|
|
"learning_rate": 2.411709647930882e-07,
|
|
"loss": 1.1762,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.9133578586832424,
|
|
"grad_norm": 1.197737216949463,
|
|
"learning_rate": 2.2776624489530664e-07,
|
|
"loss": 1.1699,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.9158949638462515,
|
|
"grad_norm": 1.2735172510147095,
|
|
"learning_rate": 2.1473610203494032e-07,
|
|
"loss": 1.1742,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.9184320690092604,
|
|
"grad_norm": 1.2625423669815063,
|
|
"learning_rate": 2.0208155895994343e-07,
|
|
"loss": 1.1609,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.9209691741722694,
|
|
"grad_norm": 1.2696080207824707,
|
|
"learning_rate": 1.8980360893709582e-07,
|
|
"loss": 1.1742,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.9235062793352784,
|
|
"grad_norm": 1.2440428733825684,
|
|
"learning_rate": 1.7790321567404011e-07,
|
|
"loss": 1.1747,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.9260433844982875,
|
|
"grad_norm": 1.2741636037826538,
|
|
"learning_rate": 1.6638131324364094e-07,
|
|
"loss": 1.171,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.9285804896612965,
|
|
"grad_norm": 1.2880579233169556,
|
|
"learning_rate": 1.55238806010668e-07,
|
|
"loss": 1.1443,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.9311175948243055,
|
|
"grad_norm": 1.297038197517395,
|
|
"learning_rate": 1.444765685608096e-07,
|
|
"loss": 1.1733,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.9336546999873144,
|
|
"grad_norm": 1.208609938621521,
|
|
"learning_rate": 1.340954456320287e-07,
|
|
"loss": 1.1741,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.9361918051503235,
|
|
"grad_norm": 1.2683435678482056,
|
|
"learning_rate": 1.2409625204825802e-07,
|
|
"loss": 1.174,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.9387289103133325,
|
|
"grad_norm": 1.1996833086013794,
|
|
"learning_rate": 1.1447977265544141e-07,
|
|
"loss": 1.1777,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.9412660154763415,
|
|
"grad_norm": 1.2534565925598145,
|
|
"learning_rate": 1.052467622599329e-07,
|
|
"loss": 1.155,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.9438031206393505,
|
|
"grad_norm": 1.2183892726898193,
|
|
"learning_rate": 9.639794556925041e-08,
|
|
"loss": 1.1655,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.9463402258023595,
|
|
"grad_norm": 1.2513470649719238,
|
|
"learning_rate": 8.793401713519333e-08,
|
|
"loss": 1.1727,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.9488773309653685,
|
|
"grad_norm": 1.3019185066223145,
|
|
"learning_rate": 7.985564129932566e-08,
|
|
"loss": 1.175,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.9514144361283775,
|
|
"grad_norm": 1.2735040187835693,
|
|
"learning_rate": 7.216345214083264e-08,
|
|
"loss": 1.1796,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.9539515412913865,
|
|
"grad_norm": 1.2108937501907349,
|
|
"learning_rate": 6.485805342674901e-08,
|
|
"loss": 1.1478,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.9564886464543956,
|
|
"grad_norm": 1.3137885332107544,
|
|
"learning_rate": 5.7940018564570654e-08,
|
|
"loss": 1.1777,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 0.9590257516174046,
|
|
"grad_norm": 1.2065484523773193,
|
|
"learning_rate": 5.1409890557246876e-08,
|
|
"loss": 1.1749,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.9615628567804135,
|
|
"grad_norm": 1.3035470247268677,
|
|
"learning_rate": 4.526818196055938e-08,
|
|
"loss": 1.1795,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 0.9640999619434225,
|
|
"grad_norm": 1.2624789476394653,
|
|
"learning_rate": 3.951537484289114e-08,
|
|
"loss": 1.1865,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.9666370671064316,
|
|
"grad_norm": 1.2832140922546387,
|
|
"learning_rate": 3.4151920747390044e-08,
|
|
"loss": 1.1623,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 0.9691741722694406,
|
|
"grad_norm": 1.206804633140564,
|
|
"learning_rate": 2.9178240656523305e-08,
|
|
"loss": 1.1698,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.9717112774324496,
|
|
"grad_norm": 1.237695574760437,
|
|
"learning_rate": 2.4594724959037253e-08,
|
|
"loss": 1.1826,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 0.9742483825954585,
|
|
"grad_norm": 1.2522958517074585,
|
|
"learning_rate": 2.0401733419315727e-08,
|
|
"loss": 1.1679,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.9767854877584676,
|
|
"grad_norm": 1.209662914276123,
|
|
"learning_rate": 1.659959514913767e-08,
|
|
"loss": 1.1842,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.9793225929214766,
|
|
"grad_norm": 1.3333046436309814,
|
|
"learning_rate": 1.3188608581851114e-08,
|
|
"loss": 1.1629,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.9818596980844856,
|
|
"grad_norm": 1.2529042959213257,
|
|
"learning_rate": 1.016904144894304e-08,
|
|
"loss": 1.1779,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 0.9843968032474946,
|
|
"grad_norm": 1.2353804111480713,
|
|
"learning_rate": 7.541130759027848e-09,
|
|
"loss": 1.1728,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.9869339084105037,
|
|
"grad_norm": 1.1999486684799194,
|
|
"learning_rate": 5.305082779244464e-09,
|
|
"loss": 1.1607,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 0.9894710135735126,
|
|
"grad_norm": 1.1895602941513062,
|
|
"learning_rate": 3.4610730190648423e-09,
|
|
"loss": 1.1884,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.9920081187365216,
|
|
"grad_norm": 1.22995924949646,
|
|
"learning_rate": 2.0092462165194337e-09,
|
|
"loss": 1.1906,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 0.9945452238995306,
|
|
"grad_norm": 1.1689248085021973,
|
|
"learning_rate": 9.497163268351595e-10,
|
|
"loss": 1.1499,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.9970823290625397,
|
|
"grad_norm": 1.3178656101226807,
|
|
"learning_rate": 2.825665134920108e-10,
|
|
"loss": 1.173,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 0.9996194342255487,
|
|
"grad_norm": 1.2528467178344727,
|
|
"learning_rate": 7.849141696048002e-12,
|
|
"loss": 1.1774,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.9998731447418495,
|
|
"step": 3941,
|
|
"total_flos": 1.0333279192736596e+19,
|
|
"train_loss": 1.22370115647731,
|
|
"train_runtime": 28099.9428,
|
|
"train_samples_per_second": 17.953,
|
|
"train_steps_per_second": 0.14
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 3941,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.0333279192736596e+19,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|