52794 lines
1.2 MiB
52794 lines
1.2 MiB
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 2.0,
|
|
"eval_steps": 500,
|
|
"global_step": 7520,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00026595744680851064,
|
|
"grad_norm": 12.928236961364746,
|
|
"learning_rate": 1.0638297872340427e-08,
|
|
"loss": 1.5564,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0005319148936170213,
|
|
"grad_norm": 12.116073608398438,
|
|
"learning_rate": 2.1276595744680853e-08,
|
|
"loss": 1.5756,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0007978723404255319,
|
|
"grad_norm": 13.450613975524902,
|
|
"learning_rate": 3.191489361702128e-08,
|
|
"loss": 1.6078,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0010638297872340426,
|
|
"grad_norm": 14.591333389282227,
|
|
"learning_rate": 4.2553191489361707e-08,
|
|
"loss": 1.6333,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.0013297872340425532,
|
|
"grad_norm": 14.167532920837402,
|
|
"learning_rate": 5.319148936170213e-08,
|
|
"loss": 1.4764,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.0015957446808510637,
|
|
"grad_norm": 11.665863037109375,
|
|
"learning_rate": 6.382978723404255e-08,
|
|
"loss": 1.5681,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.0018617021276595746,
|
|
"grad_norm": 12.705963134765625,
|
|
"learning_rate": 7.446808510638299e-08,
|
|
"loss": 1.5249,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.002127659574468085,
|
|
"grad_norm": 13.839447021484375,
|
|
"learning_rate": 8.510638297872341e-08,
|
|
"loss": 1.6567,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.0023936170212765957,
|
|
"grad_norm": 11.46570110321045,
|
|
"learning_rate": 9.574468085106384e-08,
|
|
"loss": 1.4166,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.0026595744680851063,
|
|
"grad_norm": 12.468977928161621,
|
|
"learning_rate": 1.0638297872340426e-07,
|
|
"loss": 1.4788,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.002925531914893617,
|
|
"grad_norm": 10.813947677612305,
|
|
"learning_rate": 1.1702127659574468e-07,
|
|
"loss": 1.3127,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.0031914893617021275,
|
|
"grad_norm": 12.833952903747559,
|
|
"learning_rate": 1.276595744680851e-07,
|
|
"loss": 1.5291,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.003457446808510638,
|
|
"grad_norm": 13.475564956665039,
|
|
"learning_rate": 1.3829787234042553e-07,
|
|
"loss": 1.4629,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.003723404255319149,
|
|
"grad_norm": 11.995802879333496,
|
|
"learning_rate": 1.4893617021276598e-07,
|
|
"loss": 1.5887,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.003989361702127659,
|
|
"grad_norm": 14.704851150512695,
|
|
"learning_rate": 1.5957446808510638e-07,
|
|
"loss": 1.4533,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.00425531914893617,
|
|
"grad_norm": 11.153929710388184,
|
|
"learning_rate": 1.7021276595744683e-07,
|
|
"loss": 1.4027,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.0045212765957446804,
|
|
"grad_norm": 14.091814994812012,
|
|
"learning_rate": 1.8085106382978722e-07,
|
|
"loss": 1.6199,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.0047872340425531915,
|
|
"grad_norm": 13.533143997192383,
|
|
"learning_rate": 1.9148936170212767e-07,
|
|
"loss": 1.4809,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.0050531914893617025,
|
|
"grad_norm": 13.076473236083984,
|
|
"learning_rate": 2.0212765957446812e-07,
|
|
"loss": 1.5374,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.005319148936170213,
|
|
"grad_norm": 13.062971115112305,
|
|
"learning_rate": 2.1276595744680852e-07,
|
|
"loss": 1.6008,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.005585106382978724,
|
|
"grad_norm": 13.033509254455566,
|
|
"learning_rate": 2.2340425531914897e-07,
|
|
"loss": 1.4679,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.005851063829787234,
|
|
"grad_norm": 11.98855972290039,
|
|
"learning_rate": 2.3404255319148937e-07,
|
|
"loss": 1.5049,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.006117021276595745,
|
|
"grad_norm": 13.161596298217773,
|
|
"learning_rate": 2.446808510638298e-07,
|
|
"loss": 1.5114,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.006382978723404255,
|
|
"grad_norm": 12.387269020080566,
|
|
"learning_rate": 2.553191489361702e-07,
|
|
"loss": 1.3019,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.006648936170212766,
|
|
"grad_norm": 10.667431831359863,
|
|
"learning_rate": 2.6595744680851066e-07,
|
|
"loss": 1.3113,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.006914893617021276,
|
|
"grad_norm": 11.682806015014648,
|
|
"learning_rate": 2.7659574468085106e-07,
|
|
"loss": 1.627,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.007180851063829787,
|
|
"grad_norm": 11.338486671447754,
|
|
"learning_rate": 2.872340425531915e-07,
|
|
"loss": 1.6309,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.007446808510638298,
|
|
"grad_norm": 12.796504020690918,
|
|
"learning_rate": 2.9787234042553196e-07,
|
|
"loss": 1.4464,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.007712765957446808,
|
|
"grad_norm": 12.2352876663208,
|
|
"learning_rate": 3.0851063829787236e-07,
|
|
"loss": 1.5748,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.007978723404255319,
|
|
"grad_norm": 10.04947566986084,
|
|
"learning_rate": 3.1914893617021275e-07,
|
|
"loss": 1.3302,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.00824468085106383,
|
|
"grad_norm": 11.51389217376709,
|
|
"learning_rate": 3.297872340425532e-07,
|
|
"loss": 1.3543,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.00851063829787234,
|
|
"grad_norm": 9.522992134094238,
|
|
"learning_rate": 3.4042553191489365e-07,
|
|
"loss": 1.4485,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.008776595744680852,
|
|
"grad_norm": 8.156554222106934,
|
|
"learning_rate": 3.510638297872341e-07,
|
|
"loss": 1.3791,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.009042553191489361,
|
|
"grad_norm": 10.546247482299805,
|
|
"learning_rate": 3.6170212765957445e-07,
|
|
"loss": 1.6197,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.009308510638297872,
|
|
"grad_norm": 8.094082832336426,
|
|
"learning_rate": 3.723404255319149e-07,
|
|
"loss": 1.2722,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.009574468085106383,
|
|
"grad_norm": 7.64621114730835,
|
|
"learning_rate": 3.8297872340425535e-07,
|
|
"loss": 1.2489,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.009840425531914894,
|
|
"grad_norm": 7.087127208709717,
|
|
"learning_rate": 3.936170212765958e-07,
|
|
"loss": 1.3383,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.010106382978723405,
|
|
"grad_norm": 7.989037990570068,
|
|
"learning_rate": 4.0425531914893625e-07,
|
|
"loss": 1.2275,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.010372340425531914,
|
|
"grad_norm": 9.057306289672852,
|
|
"learning_rate": 4.148936170212766e-07,
|
|
"loss": 1.4094,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.010638297872340425,
|
|
"grad_norm": 7.628477573394775,
|
|
"learning_rate": 4.2553191489361704e-07,
|
|
"loss": 1.3137,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.010904255319148936,
|
|
"grad_norm": 7.493610858917236,
|
|
"learning_rate": 4.361702127659575e-07,
|
|
"loss": 1.3603,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.011170212765957447,
|
|
"grad_norm": 6.819916725158691,
|
|
"learning_rate": 4.4680851063829794e-07,
|
|
"loss": 1.5013,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.011436170212765957,
|
|
"grad_norm": 7.222757339477539,
|
|
"learning_rate": 4.574468085106383e-07,
|
|
"loss": 1.4389,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.011702127659574468,
|
|
"grad_norm": 6.92927885055542,
|
|
"learning_rate": 4.6808510638297873e-07,
|
|
"loss": 1.386,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.011968085106382979,
|
|
"grad_norm": 6.100423336029053,
|
|
"learning_rate": 4.787234042553192e-07,
|
|
"loss": 1.3654,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.01223404255319149,
|
|
"grad_norm": 6.047520637512207,
|
|
"learning_rate": 4.893617021276596e-07,
|
|
"loss": 1.2467,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.0125,
|
|
"grad_norm": 6.429448127746582,
|
|
"learning_rate": 5.000000000000001e-07,
|
|
"loss": 1.2826,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.01276595744680851,
|
|
"grad_norm": 6.81625509262085,
|
|
"learning_rate": 5.106382978723404e-07,
|
|
"loss": 1.4576,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.013031914893617021,
|
|
"grad_norm": 5.9020609855651855,
|
|
"learning_rate": 5.212765957446809e-07,
|
|
"loss": 1.2929,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.013297872340425532,
|
|
"grad_norm": 6.343348979949951,
|
|
"learning_rate": 5.319148936170213e-07,
|
|
"loss": 1.4692,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.013563829787234043,
|
|
"grad_norm": 6.274758338928223,
|
|
"learning_rate": 5.425531914893618e-07,
|
|
"loss": 1.3331,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.013829787234042552,
|
|
"grad_norm": 6.188233852386475,
|
|
"learning_rate": 5.531914893617021e-07,
|
|
"loss": 1.4061,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.014095744680851063,
|
|
"grad_norm": 6.108701705932617,
|
|
"learning_rate": 5.638297872340426e-07,
|
|
"loss": 1.2786,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.014361702127659574,
|
|
"grad_norm": 6.032108306884766,
|
|
"learning_rate": 5.74468085106383e-07,
|
|
"loss": 1.3159,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.014627659574468085,
|
|
"grad_norm": 6.019993305206299,
|
|
"learning_rate": 5.851063829787235e-07,
|
|
"loss": 1.3846,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.014893617021276596,
|
|
"grad_norm": 6.405829906463623,
|
|
"learning_rate": 5.957446808510639e-07,
|
|
"loss": 1.3691,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.015159574468085106,
|
|
"grad_norm": 6.517266273498535,
|
|
"learning_rate": 6.063829787234043e-07,
|
|
"loss": 1.416,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.015425531914893617,
|
|
"grad_norm": 5.831709861755371,
|
|
"learning_rate": 6.170212765957447e-07,
|
|
"loss": 1.3022,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.015691489361702126,
|
|
"grad_norm": 6.413986682891846,
|
|
"learning_rate": 6.276595744680851e-07,
|
|
"loss": 1.2001,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.015957446808510637,
|
|
"grad_norm": 5.887234687805176,
|
|
"learning_rate": 6.382978723404255e-07,
|
|
"loss": 1.301,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.016223404255319148,
|
|
"grad_norm": 6.500317573547363,
|
|
"learning_rate": 6.48936170212766e-07,
|
|
"loss": 1.2389,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.01648936170212766,
|
|
"grad_norm": 5.423646450042725,
|
|
"learning_rate": 6.595744680851064e-07,
|
|
"loss": 1.1179,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.01675531914893617,
|
|
"grad_norm": 6.422118663787842,
|
|
"learning_rate": 6.702127659574469e-07,
|
|
"loss": 1.2685,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.01702127659574468,
|
|
"grad_norm": 6.100841999053955,
|
|
"learning_rate": 6.808510638297873e-07,
|
|
"loss": 1.3432,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.017287234042553192,
|
|
"grad_norm": 6.879647254943848,
|
|
"learning_rate": 6.914893617021278e-07,
|
|
"loss": 1.4595,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.017553191489361703,
|
|
"grad_norm": 5.739667892456055,
|
|
"learning_rate": 7.021276595744682e-07,
|
|
"loss": 1.254,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.017819148936170214,
|
|
"grad_norm": 5.58401346206665,
|
|
"learning_rate": 7.127659574468087e-07,
|
|
"loss": 1.275,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.018085106382978722,
|
|
"grad_norm": 5.75786018371582,
|
|
"learning_rate": 7.234042553191489e-07,
|
|
"loss": 1.2797,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.018351063829787233,
|
|
"grad_norm": 5.23975133895874,
|
|
"learning_rate": 7.340425531914893e-07,
|
|
"loss": 1.2314,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.018617021276595744,
|
|
"grad_norm": 5.783809661865234,
|
|
"learning_rate": 7.446808510638298e-07,
|
|
"loss": 1.2621,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.018882978723404255,
|
|
"grad_norm": 6.303256988525391,
|
|
"learning_rate": 7.553191489361702e-07,
|
|
"loss": 1.2988,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.019148936170212766,
|
|
"grad_norm": 6.035338401794434,
|
|
"learning_rate": 7.659574468085107e-07,
|
|
"loss": 1.3572,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.019414893617021277,
|
|
"grad_norm": 5.458433628082275,
|
|
"learning_rate": 7.765957446808511e-07,
|
|
"loss": 1.2515,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.019680851063829788,
|
|
"grad_norm": 5.706748008728027,
|
|
"learning_rate": 7.872340425531916e-07,
|
|
"loss": 1.2144,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.0199468085106383,
|
|
"grad_norm": 5.4996018409729,
|
|
"learning_rate": 7.97872340425532e-07,
|
|
"loss": 1.2999,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.02021276595744681,
|
|
"grad_norm": 5.666746139526367,
|
|
"learning_rate": 8.085106382978725e-07,
|
|
"loss": 1.2947,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.020478723404255317,
|
|
"grad_norm": 5.446689128875732,
|
|
"learning_rate": 8.191489361702127e-07,
|
|
"loss": 1.4081,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.02074468085106383,
|
|
"grad_norm": 5.886783123016357,
|
|
"learning_rate": 8.297872340425532e-07,
|
|
"loss": 1.5147,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.02101063829787234,
|
|
"grad_norm": 5.839478969573975,
|
|
"learning_rate": 8.404255319148936e-07,
|
|
"loss": 1.3047,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.02127659574468085,
|
|
"grad_norm": 5.6594767570495605,
|
|
"learning_rate": 8.510638297872341e-07,
|
|
"loss": 1.3499,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.02154255319148936,
|
|
"grad_norm": 5.712738990783691,
|
|
"learning_rate": 8.617021276595745e-07,
|
|
"loss": 1.2731,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.021808510638297873,
|
|
"grad_norm": 5.7129316329956055,
|
|
"learning_rate": 8.72340425531915e-07,
|
|
"loss": 1.2454,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.022074468085106384,
|
|
"grad_norm": 5.676748275756836,
|
|
"learning_rate": 8.829787234042554e-07,
|
|
"loss": 1.4916,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.022340425531914895,
|
|
"grad_norm": 5.481147289276123,
|
|
"learning_rate": 8.936170212765959e-07,
|
|
"loss": 1.3493,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.022606382978723406,
|
|
"grad_norm": 5.774475574493408,
|
|
"learning_rate": 9.042553191489363e-07,
|
|
"loss": 1.2583,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.022872340425531913,
|
|
"grad_norm": 6.059263229370117,
|
|
"learning_rate": 9.148936170212766e-07,
|
|
"loss": 1.2257,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.023138297872340424,
|
|
"grad_norm": 5.5594258308410645,
|
|
"learning_rate": 9.25531914893617e-07,
|
|
"loss": 1.3313,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.023404255319148935,
|
|
"grad_norm": 5.335761070251465,
|
|
"learning_rate": 9.361702127659575e-07,
|
|
"loss": 1.221,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.023670212765957446,
|
|
"grad_norm": 5.275820255279541,
|
|
"learning_rate": 9.468085106382979e-07,
|
|
"loss": 1.315,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.023936170212765957,
|
|
"grad_norm": 5.96125602722168,
|
|
"learning_rate": 9.574468085106384e-07,
|
|
"loss": 1.2792,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.02420212765957447,
|
|
"grad_norm": 5.549777984619141,
|
|
"learning_rate": 9.680851063829788e-07,
|
|
"loss": 1.2194,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.02446808510638298,
|
|
"grad_norm": 5.814997673034668,
|
|
"learning_rate": 9.787234042553193e-07,
|
|
"loss": 1.2917,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.02473404255319149,
|
|
"grad_norm": 5.332813739776611,
|
|
"learning_rate": 9.893617021276597e-07,
|
|
"loss": 1.2458,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.025,
|
|
"grad_norm": 5.473198890686035,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"loss": 1.2752,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.02526595744680851,
|
|
"grad_norm": 5.484592914581299,
|
|
"learning_rate": 1.0106382978723404e-06,
|
|
"loss": 1.3052,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.02553191489361702,
|
|
"grad_norm": 6.4860453605651855,
|
|
"learning_rate": 1.0212765957446809e-06,
|
|
"loss": 1.4454,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.02579787234042553,
|
|
"grad_norm": 5.582982540130615,
|
|
"learning_rate": 1.0319148936170213e-06,
|
|
"loss": 1.2514,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.026063829787234042,
|
|
"grad_norm": 5.618495464324951,
|
|
"learning_rate": 1.0425531914893618e-06,
|
|
"loss": 1.4123,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.026329787234042553,
|
|
"grad_norm": 5.169803619384766,
|
|
"learning_rate": 1.0531914893617022e-06,
|
|
"loss": 1.3128,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.026595744680851064,
|
|
"grad_norm": 5.215284824371338,
|
|
"learning_rate": 1.0638297872340427e-06,
|
|
"loss": 1.4286,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.026861702127659575,
|
|
"grad_norm": 5.888491153717041,
|
|
"learning_rate": 1.074468085106383e-06,
|
|
"loss": 1.2953,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.027127659574468086,
|
|
"grad_norm": 5.597144603729248,
|
|
"learning_rate": 1.0851063829787236e-06,
|
|
"loss": 1.2401,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.027393617021276597,
|
|
"grad_norm": 5.215080261230469,
|
|
"learning_rate": 1.095744680851064e-06,
|
|
"loss": 1.1961,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.027659574468085105,
|
|
"grad_norm": 5.162172794342041,
|
|
"learning_rate": 1.1063829787234042e-06,
|
|
"loss": 1.2641,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.027925531914893616,
|
|
"grad_norm": 5.490815162658691,
|
|
"learning_rate": 1.1170212765957447e-06,
|
|
"loss": 1.1788,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.028191489361702127,
|
|
"grad_norm": 5.236513137817383,
|
|
"learning_rate": 1.1276595744680851e-06,
|
|
"loss": 1.3241,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.028457446808510638,
|
|
"grad_norm": 5.335816860198975,
|
|
"learning_rate": 1.1382978723404256e-06,
|
|
"loss": 1.299,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.02872340425531915,
|
|
"grad_norm": 5.176724910736084,
|
|
"learning_rate": 1.148936170212766e-06,
|
|
"loss": 1.3305,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.02898936170212766,
|
|
"grad_norm": 6.114458084106445,
|
|
"learning_rate": 1.1595744680851065e-06,
|
|
"loss": 1.3005,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.02925531914893617,
|
|
"grad_norm": 5.407876491546631,
|
|
"learning_rate": 1.170212765957447e-06,
|
|
"loss": 1.2806,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.029521276595744682,
|
|
"grad_norm": 4.949467658996582,
|
|
"learning_rate": 1.1808510638297874e-06,
|
|
"loss": 1.2961,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.029787234042553193,
|
|
"grad_norm": 6.091759204864502,
|
|
"learning_rate": 1.1914893617021278e-06,
|
|
"loss": 1.3533,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.0300531914893617,
|
|
"grad_norm": 6.605318069458008,
|
|
"learning_rate": 1.202127659574468e-06,
|
|
"loss": 1.3292,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.03031914893617021,
|
|
"grad_norm": 5.556684494018555,
|
|
"learning_rate": 1.2127659574468085e-06,
|
|
"loss": 1.2438,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.030585106382978722,
|
|
"grad_norm": 5.465230941772461,
|
|
"learning_rate": 1.223404255319149e-06,
|
|
"loss": 1.2679,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.030851063829787233,
|
|
"grad_norm": 5.770520210266113,
|
|
"learning_rate": 1.2340425531914894e-06,
|
|
"loss": 1.355,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.031117021276595744,
|
|
"grad_norm": 5.495830535888672,
|
|
"learning_rate": 1.2446808510638299e-06,
|
|
"loss": 1.2153,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.03138297872340425,
|
|
"grad_norm": 5.549342632293701,
|
|
"learning_rate": 1.2553191489361701e-06,
|
|
"loss": 1.3283,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.03164893617021276,
|
|
"grad_norm": 5.871270656585693,
|
|
"learning_rate": 1.2659574468085106e-06,
|
|
"loss": 1.2485,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.031914893617021274,
|
|
"grad_norm": 5.074721813201904,
|
|
"learning_rate": 1.276595744680851e-06,
|
|
"loss": 1.2725,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.032180851063829785,
|
|
"grad_norm": 5.2500715255737305,
|
|
"learning_rate": 1.2872340425531915e-06,
|
|
"loss": 1.1767,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.032446808510638296,
|
|
"grad_norm": 5.220420837402344,
|
|
"learning_rate": 1.297872340425532e-06,
|
|
"loss": 1.2566,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.03271276595744681,
|
|
"grad_norm": 5.691092014312744,
|
|
"learning_rate": 1.3085106382978724e-06,
|
|
"loss": 1.1828,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.03297872340425532,
|
|
"grad_norm": 5.540714740753174,
|
|
"learning_rate": 1.3191489361702128e-06,
|
|
"loss": 1.4373,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.03324468085106383,
|
|
"grad_norm": 5.538027286529541,
|
|
"learning_rate": 1.3297872340425533e-06,
|
|
"loss": 1.2955,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.03351063829787234,
|
|
"grad_norm": 5.601515769958496,
|
|
"learning_rate": 1.3404255319148937e-06,
|
|
"loss": 1.4246,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.03377659574468085,
|
|
"grad_norm": 5.398896217346191,
|
|
"learning_rate": 1.3510638297872342e-06,
|
|
"loss": 1.2479,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.03404255319148936,
|
|
"grad_norm": 5.281778335571289,
|
|
"learning_rate": 1.3617021276595746e-06,
|
|
"loss": 1.4188,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.03430851063829787,
|
|
"grad_norm": 5.898463249206543,
|
|
"learning_rate": 1.372340425531915e-06,
|
|
"loss": 1.2214,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.034574468085106384,
|
|
"grad_norm": 5.390676975250244,
|
|
"learning_rate": 1.3829787234042555e-06,
|
|
"loss": 1.2872,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.034840425531914895,
|
|
"grad_norm": 5.157502174377441,
|
|
"learning_rate": 1.393617021276596e-06,
|
|
"loss": 1.2954,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.035106382978723406,
|
|
"grad_norm": 5.678062438964844,
|
|
"learning_rate": 1.4042553191489364e-06,
|
|
"loss": 1.2732,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.03537234042553192,
|
|
"grad_norm": 5.359380722045898,
|
|
"learning_rate": 1.4148936170212769e-06,
|
|
"loss": 1.2858,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.03563829787234043,
|
|
"grad_norm": 6.153907775878906,
|
|
"learning_rate": 1.4255319148936173e-06,
|
|
"loss": 1.3225,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.03590425531914894,
|
|
"grad_norm": 5.03823709487915,
|
|
"learning_rate": 1.4361702127659578e-06,
|
|
"loss": 1.196,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.036170212765957444,
|
|
"grad_norm": 5.12296199798584,
|
|
"learning_rate": 1.4468085106382978e-06,
|
|
"loss": 1.1534,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.036436170212765955,
|
|
"grad_norm": 5.526867866516113,
|
|
"learning_rate": 1.4574468085106382e-06,
|
|
"loss": 1.3099,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.036702127659574466,
|
|
"grad_norm": 5.23512601852417,
|
|
"learning_rate": 1.4680851063829787e-06,
|
|
"loss": 1.167,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.03696808510638298,
|
|
"grad_norm": 5.28326940536499,
|
|
"learning_rate": 1.4787234042553191e-06,
|
|
"loss": 1.2882,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.03723404255319149,
|
|
"grad_norm": 6.0062336921691895,
|
|
"learning_rate": 1.4893617021276596e-06,
|
|
"loss": 1.2937,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.0375,
|
|
"grad_norm": 5.471292495727539,
|
|
"learning_rate": 1.5e-06,
|
|
"loss": 1.2783,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.03776595744680851,
|
|
"grad_norm": 4.784001350402832,
|
|
"learning_rate": 1.5106382978723405e-06,
|
|
"loss": 1.1493,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.03803191489361702,
|
|
"grad_norm": 5.167656898498535,
|
|
"learning_rate": 1.521276595744681e-06,
|
|
"loss": 1.2872,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.03829787234042553,
|
|
"grad_norm": 5.2528276443481445,
|
|
"learning_rate": 1.5319148936170214e-06,
|
|
"loss": 1.2876,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.03856382978723404,
|
|
"grad_norm": 5.4960784912109375,
|
|
"learning_rate": 1.5425531914893618e-06,
|
|
"loss": 1.2364,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.038829787234042554,
|
|
"grad_norm": 5.419551372528076,
|
|
"learning_rate": 1.5531914893617023e-06,
|
|
"loss": 1.3695,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.039095744680851065,
|
|
"grad_norm": 5.1890974044799805,
|
|
"learning_rate": 1.5638297872340427e-06,
|
|
"loss": 1.2263,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.039361702127659576,
|
|
"grad_norm": 5.578823566436768,
|
|
"learning_rate": 1.5744680851063832e-06,
|
|
"loss": 1.2531,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.03962765957446809,
|
|
"grad_norm": 5.37275505065918,
|
|
"learning_rate": 1.5851063829787236e-06,
|
|
"loss": 1.2201,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.0398936170212766,
|
|
"grad_norm": 5.344025135040283,
|
|
"learning_rate": 1.595744680851064e-06,
|
|
"loss": 1.1419,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.04015957446808511,
|
|
"grad_norm": 5.697562217712402,
|
|
"learning_rate": 1.6063829787234045e-06,
|
|
"loss": 1.3923,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.04042553191489362,
|
|
"grad_norm": 5.420823097229004,
|
|
"learning_rate": 1.617021276595745e-06,
|
|
"loss": 1.2936,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.04069148936170213,
|
|
"grad_norm": 5.53727912902832,
|
|
"learning_rate": 1.6276595744680854e-06,
|
|
"loss": 1.2047,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.040957446808510635,
|
|
"grad_norm": 5.577879428863525,
|
|
"learning_rate": 1.6382978723404255e-06,
|
|
"loss": 1.2495,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.041223404255319146,
|
|
"grad_norm": 5.115095138549805,
|
|
"learning_rate": 1.648936170212766e-06,
|
|
"loss": 1.3324,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.04148936170212766,
|
|
"grad_norm": 5.6801862716674805,
|
|
"learning_rate": 1.6595744680851064e-06,
|
|
"loss": 1.3554,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.04175531914893617,
|
|
"grad_norm": 5.293743133544922,
|
|
"learning_rate": 1.6702127659574468e-06,
|
|
"loss": 1.2226,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.04202127659574468,
|
|
"grad_norm": 5.129601955413818,
|
|
"learning_rate": 1.6808510638297873e-06,
|
|
"loss": 1.3393,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.04228723404255319,
|
|
"grad_norm": 5.572645664215088,
|
|
"learning_rate": 1.6914893617021277e-06,
|
|
"loss": 1.2734,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.0425531914893617,
|
|
"grad_norm": 4.944756507873535,
|
|
"learning_rate": 1.7021276595744682e-06,
|
|
"loss": 1.3417,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.04281914893617021,
|
|
"grad_norm": 4.982651710510254,
|
|
"learning_rate": 1.7127659574468086e-06,
|
|
"loss": 1.2622,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.04308510638297872,
|
|
"grad_norm": 5.134377479553223,
|
|
"learning_rate": 1.723404255319149e-06,
|
|
"loss": 1.1741,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.043351063829787234,
|
|
"grad_norm": 4.829857349395752,
|
|
"learning_rate": 1.7340425531914895e-06,
|
|
"loss": 1.2298,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.043617021276595745,
|
|
"grad_norm": 5.052809715270996,
|
|
"learning_rate": 1.74468085106383e-06,
|
|
"loss": 1.1607,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.043882978723404256,
|
|
"grad_norm": 5.3465776443481445,
|
|
"learning_rate": 1.7553191489361704e-06,
|
|
"loss": 1.3924,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.04414893617021277,
|
|
"grad_norm": 5.502316951751709,
|
|
"learning_rate": 1.7659574468085109e-06,
|
|
"loss": 1.1488,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.04441489361702128,
|
|
"grad_norm": 5.253002643585205,
|
|
"learning_rate": 1.7765957446808513e-06,
|
|
"loss": 1.2004,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.04468085106382979,
|
|
"grad_norm": 5.437882900238037,
|
|
"learning_rate": 1.7872340425531918e-06,
|
|
"loss": 1.3885,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.0449468085106383,
|
|
"grad_norm": 5.526264190673828,
|
|
"learning_rate": 1.7978723404255322e-06,
|
|
"loss": 1.2351,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.04521276595744681,
|
|
"grad_norm": 5.078868389129639,
|
|
"learning_rate": 1.8085106382978727e-06,
|
|
"loss": 1.1479,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.04547872340425532,
|
|
"grad_norm": 5.379688739776611,
|
|
"learning_rate": 1.8191489361702131e-06,
|
|
"loss": 1.246,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.045744680851063826,
|
|
"grad_norm": 4.756881237030029,
|
|
"learning_rate": 1.8297872340425531e-06,
|
|
"loss": 1.3602,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.04601063829787234,
|
|
"grad_norm": 5.651166915893555,
|
|
"learning_rate": 1.8404255319148936e-06,
|
|
"loss": 1.1183,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.04627659574468085,
|
|
"grad_norm": 5.725973129272461,
|
|
"learning_rate": 1.851063829787234e-06,
|
|
"loss": 1.2474,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.04654255319148936,
|
|
"grad_norm": 4.994713306427002,
|
|
"learning_rate": 1.8617021276595745e-06,
|
|
"loss": 1.1945,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.04680851063829787,
|
|
"grad_norm": 4.701328277587891,
|
|
"learning_rate": 1.872340425531915e-06,
|
|
"loss": 1.2735,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.04707446808510638,
|
|
"grad_norm": 5.917819023132324,
|
|
"learning_rate": 1.8829787234042554e-06,
|
|
"loss": 1.2192,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.04734042553191489,
|
|
"grad_norm": 5.055963039398193,
|
|
"learning_rate": 1.8936170212765958e-06,
|
|
"loss": 1.4119,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.047606382978723404,
|
|
"grad_norm": 5.516870021820068,
|
|
"learning_rate": 1.9042553191489363e-06,
|
|
"loss": 1.2739,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.047872340425531915,
|
|
"grad_norm": 5.217896461486816,
|
|
"learning_rate": 1.9148936170212767e-06,
|
|
"loss": 1.0916,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.048138297872340426,
|
|
"grad_norm": 5.3772807121276855,
|
|
"learning_rate": 1.925531914893617e-06,
|
|
"loss": 1.2636,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.04840425531914894,
|
|
"grad_norm": 5.261349678039551,
|
|
"learning_rate": 1.9361702127659576e-06,
|
|
"loss": 1.1872,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.04867021276595745,
|
|
"grad_norm": 5.209681510925293,
|
|
"learning_rate": 1.946808510638298e-06,
|
|
"loss": 1.1946,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.04893617021276596,
|
|
"grad_norm": 6.393560886383057,
|
|
"learning_rate": 1.9574468085106385e-06,
|
|
"loss": 1.4354,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.04920212765957447,
|
|
"grad_norm": 5.200966835021973,
|
|
"learning_rate": 1.968085106382979e-06,
|
|
"loss": 1.264,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.04946808510638298,
|
|
"grad_norm": 4.81060791015625,
|
|
"learning_rate": 1.9787234042553194e-06,
|
|
"loss": 1.345,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.04973404255319149,
|
|
"grad_norm": 5.786832332611084,
|
|
"learning_rate": 1.98936170212766e-06,
|
|
"loss": 1.2897,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"grad_norm": 5.332983493804932,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": 1.3621,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.050265957446808514,
|
|
"grad_norm": 5.093095779418945,
|
|
"learning_rate": 2.0106382978723408e-06,
|
|
"loss": 1.3366,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.05053191489361702,
|
|
"grad_norm": 5.604922771453857,
|
|
"learning_rate": 2.021276595744681e-06,
|
|
"loss": 1.2009,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.05079787234042553,
|
|
"grad_norm": 5.312707901000977,
|
|
"learning_rate": 2.0319148936170213e-06,
|
|
"loss": 1.1604,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.05106382978723404,
|
|
"grad_norm": 5.330122470855713,
|
|
"learning_rate": 2.0425531914893617e-06,
|
|
"loss": 1.2102,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.05132978723404255,
|
|
"grad_norm": 5.350152015686035,
|
|
"learning_rate": 2.053191489361702e-06,
|
|
"loss": 1.3483,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.05159574468085106,
|
|
"grad_norm": 5.540630340576172,
|
|
"learning_rate": 2.0638297872340426e-06,
|
|
"loss": 1.437,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.05186170212765957,
|
|
"grad_norm": 4.698929309844971,
|
|
"learning_rate": 2.074468085106383e-06,
|
|
"loss": 1.2083,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.052127659574468084,
|
|
"grad_norm": 5.128317356109619,
|
|
"learning_rate": 2.0851063829787235e-06,
|
|
"loss": 1.1502,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.052393617021276595,
|
|
"grad_norm": 5.425604343414307,
|
|
"learning_rate": 2.095744680851064e-06,
|
|
"loss": 1.2919,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.052659574468085106,
|
|
"grad_norm": 5.3685712814331055,
|
|
"learning_rate": 2.1063829787234044e-06,
|
|
"loss": 1.2305,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.05292553191489362,
|
|
"grad_norm": 6.010136127471924,
|
|
"learning_rate": 2.117021276595745e-06,
|
|
"loss": 1.0582,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.05319148936170213,
|
|
"grad_norm": 5.427469253540039,
|
|
"learning_rate": 2.1276595744680853e-06,
|
|
"loss": 1.2515,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05345744680851064,
|
|
"grad_norm": 5.31635856628418,
|
|
"learning_rate": 2.1382978723404258e-06,
|
|
"loss": 1.2157,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.05372340425531915,
|
|
"grad_norm": 5.334502220153809,
|
|
"learning_rate": 2.148936170212766e-06,
|
|
"loss": 1.271,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.05398936170212766,
|
|
"grad_norm": 4.88215970993042,
|
|
"learning_rate": 2.1595744680851067e-06,
|
|
"loss": 1.2777,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.05425531914893617,
|
|
"grad_norm": 5.919299602508545,
|
|
"learning_rate": 2.170212765957447e-06,
|
|
"loss": 1.3336,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.05452127659574468,
|
|
"grad_norm": 5.037824630737305,
|
|
"learning_rate": 2.1808510638297876e-06,
|
|
"loss": 1.316,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.054787234042553194,
|
|
"grad_norm": 5.16343879699707,
|
|
"learning_rate": 2.191489361702128e-06,
|
|
"loss": 1.2724,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.055053191489361705,
|
|
"grad_norm": 5.36834192276001,
|
|
"learning_rate": 2.2021276595744685e-06,
|
|
"loss": 1.1693,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.05531914893617021,
|
|
"grad_norm": 4.99350118637085,
|
|
"learning_rate": 2.2127659574468085e-06,
|
|
"loss": 1.225,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.05558510638297872,
|
|
"grad_norm": 5.564612865447998,
|
|
"learning_rate": 2.223404255319149e-06,
|
|
"loss": 1.2125,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.05585106382978723,
|
|
"grad_norm": 5.21875,
|
|
"learning_rate": 2.2340425531914894e-06,
|
|
"loss": 1.3788,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.05611702127659574,
|
|
"grad_norm": 5.006836891174316,
|
|
"learning_rate": 2.24468085106383e-06,
|
|
"loss": 1.2095,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.05638297872340425,
|
|
"grad_norm": 5.6003546714782715,
|
|
"learning_rate": 2.2553191489361703e-06,
|
|
"loss": 1.3872,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.056648936170212764,
|
|
"grad_norm": 4.7773613929748535,
|
|
"learning_rate": 2.2659574468085107e-06,
|
|
"loss": 1.1979,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.056914893617021275,
|
|
"grad_norm": 4.554566860198975,
|
|
"learning_rate": 2.276595744680851e-06,
|
|
"loss": 1.1656,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.057180851063829786,
|
|
"grad_norm": 5.66951322555542,
|
|
"learning_rate": 2.2872340425531916e-06,
|
|
"loss": 1.3728,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.0574468085106383,
|
|
"grad_norm": 5.2931013107299805,
|
|
"learning_rate": 2.297872340425532e-06,
|
|
"loss": 1.2003,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.05771276595744681,
|
|
"grad_norm": 5.449213981628418,
|
|
"learning_rate": 2.3085106382978725e-06,
|
|
"loss": 1.2337,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.05797872340425532,
|
|
"grad_norm": 5.684970378875732,
|
|
"learning_rate": 2.319148936170213e-06,
|
|
"loss": 1.2196,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.05824468085106383,
|
|
"grad_norm": 5.038141250610352,
|
|
"learning_rate": 2.3297872340425534e-06,
|
|
"loss": 1.0954,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.05851063829787234,
|
|
"grad_norm": 5.255678176879883,
|
|
"learning_rate": 2.340425531914894e-06,
|
|
"loss": 1.3141,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.05877659574468085,
|
|
"grad_norm": 5.490760326385498,
|
|
"learning_rate": 2.3510638297872343e-06,
|
|
"loss": 1.1469,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.059042553191489364,
|
|
"grad_norm": 5.482240676879883,
|
|
"learning_rate": 2.3617021276595748e-06,
|
|
"loss": 1.2831,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.059308510638297875,
|
|
"grad_norm": 6.045271873474121,
|
|
"learning_rate": 2.3723404255319152e-06,
|
|
"loss": 1.1601,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.059574468085106386,
|
|
"grad_norm": 5.145684719085693,
|
|
"learning_rate": 2.3829787234042557e-06,
|
|
"loss": 1.1432,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.0598404255319149,
|
|
"grad_norm": 4.948934555053711,
|
|
"learning_rate": 2.393617021276596e-06,
|
|
"loss": 1.1199,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.0601063829787234,
|
|
"grad_norm": 5.273087978363037,
|
|
"learning_rate": 2.404255319148936e-06,
|
|
"loss": 1.3225,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.06037234042553191,
|
|
"grad_norm": 5.76677131652832,
|
|
"learning_rate": 2.4148936170212766e-06,
|
|
"loss": 1.3144,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.06063829787234042,
|
|
"grad_norm": 5.51316499710083,
|
|
"learning_rate": 2.425531914893617e-06,
|
|
"loss": 1.2931,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.060904255319148934,
|
|
"grad_norm": 5.077220916748047,
|
|
"learning_rate": 2.4361702127659575e-06,
|
|
"loss": 1.1972,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.061170212765957445,
|
|
"grad_norm": 5.733246803283691,
|
|
"learning_rate": 2.446808510638298e-06,
|
|
"loss": 1.2773,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.061436170212765956,
|
|
"grad_norm": 4.702721118927002,
|
|
"learning_rate": 2.4574468085106384e-06,
|
|
"loss": 1.2654,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.06170212765957447,
|
|
"grad_norm": 5.210516452789307,
|
|
"learning_rate": 2.468085106382979e-06,
|
|
"loss": 1.3222,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.06196808510638298,
|
|
"grad_norm": 5.6721720695495605,
|
|
"learning_rate": 2.4787234042553193e-06,
|
|
"loss": 1.1756,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.06223404255319149,
|
|
"grad_norm": 4.598169326782227,
|
|
"learning_rate": 2.4893617021276598e-06,
|
|
"loss": 1.2613,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.0625,
|
|
"grad_norm": 5.069137096405029,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 1.2629,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.0627659574468085,
|
|
"grad_norm": 4.875532627105713,
|
|
"learning_rate": 2.5106382978723402e-06,
|
|
"loss": 1.1515,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.06303191489361702,
|
|
"grad_norm": 5.547458171844482,
|
|
"learning_rate": 2.521276595744681e-06,
|
|
"loss": 1.4157,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.06329787234042553,
|
|
"grad_norm": 5.377124786376953,
|
|
"learning_rate": 2.531914893617021e-06,
|
|
"loss": 1.3036,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.06356382978723404,
|
|
"grad_norm": 5.135563850402832,
|
|
"learning_rate": 2.542553191489362e-06,
|
|
"loss": 1.1638,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.06382978723404255,
|
|
"grad_norm": 5.6008172035217285,
|
|
"learning_rate": 2.553191489361702e-06,
|
|
"loss": 1.2787,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.06409574468085107,
|
|
"grad_norm": 5.453914165496826,
|
|
"learning_rate": 2.563829787234043e-06,
|
|
"loss": 1.3239,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.06436170212765957,
|
|
"grad_norm": 5.219985485076904,
|
|
"learning_rate": 2.574468085106383e-06,
|
|
"loss": 1.0942,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.06462765957446809,
|
|
"grad_norm": 5.180700778961182,
|
|
"learning_rate": 2.585106382978724e-06,
|
|
"loss": 1.1501,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.06489361702127659,
|
|
"grad_norm": 5.2240071296691895,
|
|
"learning_rate": 2.595744680851064e-06,
|
|
"loss": 1.2269,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.06515957446808511,
|
|
"grad_norm": 6.328047275543213,
|
|
"learning_rate": 2.6063829787234047e-06,
|
|
"loss": 1.405,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.06542553191489361,
|
|
"grad_norm": 5.10886287689209,
|
|
"learning_rate": 2.6170212765957447e-06,
|
|
"loss": 1.2698,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.06569148936170213,
|
|
"grad_norm": 5.45538330078125,
|
|
"learning_rate": 2.6276595744680856e-06,
|
|
"loss": 1.33,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.06595744680851064,
|
|
"grad_norm": 5.294386386871338,
|
|
"learning_rate": 2.6382978723404256e-06,
|
|
"loss": 1.2895,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.06622340425531915,
|
|
"grad_norm": 4.7668776512146,
|
|
"learning_rate": 2.6489361702127665e-06,
|
|
"loss": 1.1176,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.06648936170212766,
|
|
"grad_norm": 4.915814399719238,
|
|
"learning_rate": 2.6595744680851065e-06,
|
|
"loss": 1.2469,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.06675531914893618,
|
|
"grad_norm": 5.320147514343262,
|
|
"learning_rate": 2.6702127659574474e-06,
|
|
"loss": 1.4904,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.06702127659574468,
|
|
"grad_norm": 5.417577266693115,
|
|
"learning_rate": 2.6808510638297874e-06,
|
|
"loss": 1.3166,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.0672872340425532,
|
|
"grad_norm": 4.704782485961914,
|
|
"learning_rate": 2.6914893617021283e-06,
|
|
"loss": 1.2362,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.0675531914893617,
|
|
"grad_norm": 5.100544452667236,
|
|
"learning_rate": 2.7021276595744683e-06,
|
|
"loss": 1.2969,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.0678191489361702,
|
|
"grad_norm": 6.336488723754883,
|
|
"learning_rate": 2.7127659574468084e-06,
|
|
"loss": 1.2708,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.06808510638297872,
|
|
"grad_norm": 5.281217098236084,
|
|
"learning_rate": 2.7234042553191492e-06,
|
|
"loss": 1.3103,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.06835106382978723,
|
|
"grad_norm": 5.127480983734131,
|
|
"learning_rate": 2.7340425531914893e-06,
|
|
"loss": 1.2957,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.06861702127659575,
|
|
"grad_norm": 5.289313316345215,
|
|
"learning_rate": 2.74468085106383e-06,
|
|
"loss": 1.2658,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.06888297872340425,
|
|
"grad_norm": 5.088155746459961,
|
|
"learning_rate": 2.75531914893617e-06,
|
|
"loss": 1.1359,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.06914893617021277,
|
|
"grad_norm": 5.367323875427246,
|
|
"learning_rate": 2.765957446808511e-06,
|
|
"loss": 1.2408,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.06941489361702127,
|
|
"grad_norm": 5.337047576904297,
|
|
"learning_rate": 2.776595744680851e-06,
|
|
"loss": 1.2908,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.06968085106382979,
|
|
"grad_norm": 5.167153358459473,
|
|
"learning_rate": 2.787234042553192e-06,
|
|
"loss": 1.3217,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.0699468085106383,
|
|
"grad_norm": 5.522439956665039,
|
|
"learning_rate": 2.797872340425532e-06,
|
|
"loss": 1.2799,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.07021276595744681,
|
|
"grad_norm": 4.691408157348633,
|
|
"learning_rate": 2.808510638297873e-06,
|
|
"loss": 1.096,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.07047872340425532,
|
|
"grad_norm": 5.208773612976074,
|
|
"learning_rate": 2.819148936170213e-06,
|
|
"loss": 1.3215,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.07074468085106383,
|
|
"grad_norm": 5.4790496826171875,
|
|
"learning_rate": 2.8297872340425537e-06,
|
|
"loss": 1.4218,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.07101063829787234,
|
|
"grad_norm": 5.256765842437744,
|
|
"learning_rate": 2.8404255319148938e-06,
|
|
"loss": 1.4242,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.07127659574468086,
|
|
"grad_norm": 4.874395370483398,
|
|
"learning_rate": 2.8510638297872346e-06,
|
|
"loss": 1.2518,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.07154255319148936,
|
|
"grad_norm": 5.108527183532715,
|
|
"learning_rate": 2.8617021276595747e-06,
|
|
"loss": 1.2919,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.07180851063829788,
|
|
"grad_norm": 5.333227157592773,
|
|
"learning_rate": 2.8723404255319155e-06,
|
|
"loss": 1.459,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.07207446808510638,
|
|
"grad_norm": 5.232532501220703,
|
|
"learning_rate": 2.8829787234042556e-06,
|
|
"loss": 1.1832,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.07234042553191489,
|
|
"grad_norm": 5.147657871246338,
|
|
"learning_rate": 2.8936170212765956e-06,
|
|
"loss": 1.3219,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.0726063829787234,
|
|
"grad_norm": 5.002472400665283,
|
|
"learning_rate": 2.9042553191489365e-06,
|
|
"loss": 1.2989,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.07287234042553191,
|
|
"grad_norm": 4.903095722198486,
|
|
"learning_rate": 2.9148936170212765e-06,
|
|
"loss": 1.1621,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.07313829787234043,
|
|
"grad_norm": 5.269963264465332,
|
|
"learning_rate": 2.9255319148936174e-06,
|
|
"loss": 1.2966,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.07340425531914893,
|
|
"grad_norm": 5.356837749481201,
|
|
"learning_rate": 2.9361702127659574e-06,
|
|
"loss": 1.2455,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.07367021276595745,
|
|
"grad_norm": 5.510587215423584,
|
|
"learning_rate": 2.9468085106382983e-06,
|
|
"loss": 1.2386,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.07393617021276595,
|
|
"grad_norm": 5.7554755210876465,
|
|
"learning_rate": 2.9574468085106383e-06,
|
|
"loss": 1.3096,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.07420212765957447,
|
|
"grad_norm": 5.236169815063477,
|
|
"learning_rate": 2.968085106382979e-06,
|
|
"loss": 1.2496,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.07446808510638298,
|
|
"grad_norm": 4.870725631713867,
|
|
"learning_rate": 2.978723404255319e-06,
|
|
"loss": 1.083,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.0747340425531915,
|
|
"grad_norm": 5.181726455688477,
|
|
"learning_rate": 2.98936170212766e-06,
|
|
"loss": 1.223,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.075,
|
|
"grad_norm": 4.924530506134033,
|
|
"learning_rate": 3e-06,
|
|
"loss": 1.2855,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.07526595744680852,
|
|
"grad_norm": 5.177605628967285,
|
|
"learning_rate": 3.010638297872341e-06,
|
|
"loss": 1.2215,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.07553191489361702,
|
|
"grad_norm": 4.895737648010254,
|
|
"learning_rate": 3.021276595744681e-06,
|
|
"loss": 1.2451,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.07579787234042554,
|
|
"grad_norm": 5.425995349884033,
|
|
"learning_rate": 3.031914893617022e-06,
|
|
"loss": 1.6053,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.07606382978723404,
|
|
"grad_norm": 5.228978157043457,
|
|
"learning_rate": 3.042553191489362e-06,
|
|
"loss": 1.1846,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.07632978723404256,
|
|
"grad_norm": 4.825231552124023,
|
|
"learning_rate": 3.0531914893617027e-06,
|
|
"loss": 1.1355,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.07659574468085106,
|
|
"grad_norm": 6.309840679168701,
|
|
"learning_rate": 3.0638297872340428e-06,
|
|
"loss": 1.1388,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.07686170212765958,
|
|
"grad_norm": 5.012725830078125,
|
|
"learning_rate": 3.0744680851063836e-06,
|
|
"loss": 0.9926,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.07712765957446809,
|
|
"grad_norm": 5.028249263763428,
|
|
"learning_rate": 3.0851063829787237e-06,
|
|
"loss": 1.2024,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.07739361702127659,
|
|
"grad_norm": 5.77925968170166,
|
|
"learning_rate": 3.0957446808510637e-06,
|
|
"loss": 1.5436,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.07765957446808511,
|
|
"grad_norm": 5.277095794677734,
|
|
"learning_rate": 3.1063829787234046e-06,
|
|
"loss": 1.2018,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.07792553191489361,
|
|
"grad_norm": 5.4600958824157715,
|
|
"learning_rate": 3.1170212765957446e-06,
|
|
"loss": 1.072,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.07819148936170213,
|
|
"grad_norm": 5.168891906738281,
|
|
"learning_rate": 3.1276595744680855e-06,
|
|
"loss": 1.3841,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.07845744680851063,
|
|
"grad_norm": 4.869060516357422,
|
|
"learning_rate": 3.1382978723404255e-06,
|
|
"loss": 1.1663,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.07872340425531915,
|
|
"grad_norm": 5.289313316345215,
|
|
"learning_rate": 3.1489361702127664e-06,
|
|
"loss": 1.0781,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.07898936170212766,
|
|
"grad_norm": 5.145017147064209,
|
|
"learning_rate": 3.1595744680851064e-06,
|
|
"loss": 1.1087,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.07925531914893617,
|
|
"grad_norm": 5.634250640869141,
|
|
"learning_rate": 3.1702127659574473e-06,
|
|
"loss": 1.3936,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.07952127659574468,
|
|
"grad_norm": 5.201961040496826,
|
|
"learning_rate": 3.1808510638297873e-06,
|
|
"loss": 1.3752,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.0797872340425532,
|
|
"grad_norm": 5.372065544128418,
|
|
"learning_rate": 3.191489361702128e-06,
|
|
"loss": 1.1715,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.0800531914893617,
|
|
"grad_norm": 6.010387420654297,
|
|
"learning_rate": 3.202127659574468e-06,
|
|
"loss": 1.2187,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.08031914893617022,
|
|
"grad_norm": 5.143375396728516,
|
|
"learning_rate": 3.212765957446809e-06,
|
|
"loss": 1.2051,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.08058510638297872,
|
|
"grad_norm": 5.376684665679932,
|
|
"learning_rate": 3.223404255319149e-06,
|
|
"loss": 1.2319,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.08085106382978724,
|
|
"grad_norm": 4.905093193054199,
|
|
"learning_rate": 3.23404255319149e-06,
|
|
"loss": 1.2187,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.08111702127659574,
|
|
"grad_norm": 5.650513648986816,
|
|
"learning_rate": 3.24468085106383e-06,
|
|
"loss": 1.1528,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.08138297872340426,
|
|
"grad_norm": 5.2889227867126465,
|
|
"learning_rate": 3.255319148936171e-06,
|
|
"loss": 1.0795,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.08164893617021277,
|
|
"grad_norm": 5.284914970397949,
|
|
"learning_rate": 3.265957446808511e-06,
|
|
"loss": 1.2885,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.08191489361702127,
|
|
"grad_norm": 5.4190449714660645,
|
|
"learning_rate": 3.276595744680851e-06,
|
|
"loss": 1.4991,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.08218085106382979,
|
|
"grad_norm": 4.965026378631592,
|
|
"learning_rate": 3.287234042553192e-06,
|
|
"loss": 1.2674,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.08244680851063829,
|
|
"grad_norm": 5.040426254272461,
|
|
"learning_rate": 3.297872340425532e-06,
|
|
"loss": 1.2347,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.08271276595744681,
|
|
"grad_norm": 5.759904384613037,
|
|
"learning_rate": 3.3085106382978727e-06,
|
|
"loss": 1.2976,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.08297872340425531,
|
|
"grad_norm": 4.893044471740723,
|
|
"learning_rate": 3.3191489361702127e-06,
|
|
"loss": 1.213,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.08324468085106383,
|
|
"grad_norm": 4.674813270568848,
|
|
"learning_rate": 3.3297872340425536e-06,
|
|
"loss": 1.2795,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.08351063829787234,
|
|
"grad_norm": 5.59810209274292,
|
|
"learning_rate": 3.3404255319148936e-06,
|
|
"loss": 1.2338,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.08377659574468085,
|
|
"grad_norm": 4.63198709487915,
|
|
"learning_rate": 3.3510638297872345e-06,
|
|
"loss": 1.2026,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.08404255319148936,
|
|
"grad_norm": 5.4756245613098145,
|
|
"learning_rate": 3.3617021276595745e-06,
|
|
"loss": 1.2838,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.08430851063829788,
|
|
"grad_norm": 5.258046627044678,
|
|
"learning_rate": 3.3723404255319154e-06,
|
|
"loss": 1.1449,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.08457446808510638,
|
|
"grad_norm": 5.205422878265381,
|
|
"learning_rate": 3.3829787234042554e-06,
|
|
"loss": 1.223,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.0848404255319149,
|
|
"grad_norm": 5.365026473999023,
|
|
"learning_rate": 3.3936170212765963e-06,
|
|
"loss": 1.191,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.0851063829787234,
|
|
"grad_norm": 5.367187023162842,
|
|
"learning_rate": 3.4042553191489363e-06,
|
|
"loss": 1.2246,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.08537234042553192,
|
|
"grad_norm": 5.512171745300293,
|
|
"learning_rate": 3.414893617021277e-06,
|
|
"loss": 1.2601,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.08563829787234042,
|
|
"grad_norm": 5.804540157318115,
|
|
"learning_rate": 3.4255319148936172e-06,
|
|
"loss": 1.1537,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.08590425531914894,
|
|
"grad_norm": 5.474178791046143,
|
|
"learning_rate": 3.436170212765958e-06,
|
|
"loss": 1.3175,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.08617021276595745,
|
|
"grad_norm": 5.454108715057373,
|
|
"learning_rate": 3.446808510638298e-06,
|
|
"loss": 1.1764,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.08643617021276596,
|
|
"grad_norm": 5.368601322174072,
|
|
"learning_rate": 3.457446808510639e-06,
|
|
"loss": 1.2001,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.08670212765957447,
|
|
"grad_norm": 5.19401741027832,
|
|
"learning_rate": 3.468085106382979e-06,
|
|
"loss": 1.2673,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.08696808510638297,
|
|
"grad_norm": 4.70231294631958,
|
|
"learning_rate": 3.478723404255319e-06,
|
|
"loss": 1.1736,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.08723404255319149,
|
|
"grad_norm": 5.607789039611816,
|
|
"learning_rate": 3.48936170212766e-06,
|
|
"loss": 1.1986,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.0875,
|
|
"grad_norm": 5.1046013832092285,
|
|
"learning_rate": 3.5e-06,
|
|
"loss": 1.2426,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.08776595744680851,
|
|
"grad_norm": 5.214546203613281,
|
|
"learning_rate": 3.510638297872341e-06,
|
|
"loss": 1.1211,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.08803191489361702,
|
|
"grad_norm": 4.989225387573242,
|
|
"learning_rate": 3.521276595744681e-06,
|
|
"loss": 1.3025,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.08829787234042553,
|
|
"grad_norm": 4.886022567749023,
|
|
"learning_rate": 3.5319148936170217e-06,
|
|
"loss": 1.2109,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.08856382978723404,
|
|
"grad_norm": 5.30552339553833,
|
|
"learning_rate": 3.5425531914893617e-06,
|
|
"loss": 1.1811,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.08882978723404256,
|
|
"grad_norm": 4.81152868270874,
|
|
"learning_rate": 3.5531914893617026e-06,
|
|
"loss": 1.1677,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.08909574468085106,
|
|
"grad_norm": 5.06434440612793,
|
|
"learning_rate": 3.5638297872340426e-06,
|
|
"loss": 1.2425,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.08936170212765958,
|
|
"grad_norm": 7.036694526672363,
|
|
"learning_rate": 3.5744680851063835e-06,
|
|
"loss": 1.2682,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.08962765957446808,
|
|
"grad_norm": 5.208419322967529,
|
|
"learning_rate": 3.5851063829787235e-06,
|
|
"loss": 1.2394,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.0898936170212766,
|
|
"grad_norm": 4.592006206512451,
|
|
"learning_rate": 3.5957446808510644e-06,
|
|
"loss": 1.2083,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.0901595744680851,
|
|
"grad_norm": 5.002110481262207,
|
|
"learning_rate": 3.6063829787234044e-06,
|
|
"loss": 1.2284,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.09042553191489362,
|
|
"grad_norm": 4.708452224731445,
|
|
"learning_rate": 3.6170212765957453e-06,
|
|
"loss": 1.1616,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.09069148936170213,
|
|
"grad_norm": 4.872410297393799,
|
|
"learning_rate": 3.6276595744680853e-06,
|
|
"loss": 1.181,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.09095744680851064,
|
|
"grad_norm": 5.24644136428833,
|
|
"learning_rate": 3.6382978723404262e-06,
|
|
"loss": 1.285,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.09122340425531915,
|
|
"grad_norm": 5.019744396209717,
|
|
"learning_rate": 3.6489361702127662e-06,
|
|
"loss": 1.2677,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.09148936170212765,
|
|
"grad_norm": 6.380999565124512,
|
|
"learning_rate": 3.6595744680851063e-06,
|
|
"loss": 1.1268,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.09175531914893617,
|
|
"grad_norm": 5.100999355316162,
|
|
"learning_rate": 3.670212765957447e-06,
|
|
"loss": 1.2023,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.09202127659574467,
|
|
"grad_norm": 5.221463203430176,
|
|
"learning_rate": 3.680851063829787e-06,
|
|
"loss": 1.2482,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.09228723404255319,
|
|
"grad_norm": 4.895312309265137,
|
|
"learning_rate": 3.691489361702128e-06,
|
|
"loss": 1.2515,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.0925531914893617,
|
|
"grad_norm": 4.988393306732178,
|
|
"learning_rate": 3.702127659574468e-06,
|
|
"loss": 1.1969,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.09281914893617021,
|
|
"grad_norm": 5.19982385635376,
|
|
"learning_rate": 3.712765957446809e-06,
|
|
"loss": 1.2488,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.09308510638297872,
|
|
"grad_norm": 5.010618686676025,
|
|
"learning_rate": 3.723404255319149e-06,
|
|
"loss": 1.2475,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.09335106382978724,
|
|
"grad_norm": 4.905212879180908,
|
|
"learning_rate": 3.73404255319149e-06,
|
|
"loss": 1.3921,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.09361702127659574,
|
|
"grad_norm": 5.373055458068848,
|
|
"learning_rate": 3.74468085106383e-06,
|
|
"loss": 1.4741,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.09388297872340426,
|
|
"grad_norm": 4.804662704467773,
|
|
"learning_rate": 3.7553191489361707e-06,
|
|
"loss": 1.2208,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.09414893617021276,
|
|
"grad_norm": 5.451242923736572,
|
|
"learning_rate": 3.7659574468085108e-06,
|
|
"loss": 1.3764,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.09441489361702128,
|
|
"grad_norm": 5.5642409324646,
|
|
"learning_rate": 3.7765957446808516e-06,
|
|
"loss": 1.4001,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.09468085106382979,
|
|
"grad_norm": 4.492448806762695,
|
|
"learning_rate": 3.7872340425531917e-06,
|
|
"loss": 1.1094,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.0949468085106383,
|
|
"grad_norm": 5.439316749572754,
|
|
"learning_rate": 3.7978723404255325e-06,
|
|
"loss": 1.3348,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.09521276595744681,
|
|
"grad_norm": 4.795385837554932,
|
|
"learning_rate": 3.8085106382978726e-06,
|
|
"loss": 1.23,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.09547872340425533,
|
|
"grad_norm": 5.010631084442139,
|
|
"learning_rate": 3.819148936170213e-06,
|
|
"loss": 1.1724,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.09574468085106383,
|
|
"grad_norm": 5.740480422973633,
|
|
"learning_rate": 3.8297872340425535e-06,
|
|
"loss": 1.3756,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.09601063829787235,
|
|
"grad_norm": 4.986555099487305,
|
|
"learning_rate": 3.840425531914894e-06,
|
|
"loss": 1.2722,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.09627659574468085,
|
|
"grad_norm": 5.041133880615234,
|
|
"learning_rate": 3.851063829787234e-06,
|
|
"loss": 1.0448,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.09654255319148936,
|
|
"grad_norm": 5.378165245056152,
|
|
"learning_rate": 3.861702127659575e-06,
|
|
"loss": 1.2111,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.09680851063829787,
|
|
"grad_norm": 4.8053059577941895,
|
|
"learning_rate": 3.872340425531915e-06,
|
|
"loss": 1.1344,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.09707446808510638,
|
|
"grad_norm": 5.25260066986084,
|
|
"learning_rate": 3.882978723404256e-06,
|
|
"loss": 1.1288,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.0973404255319149,
|
|
"grad_norm": 4.839104175567627,
|
|
"learning_rate": 3.893617021276596e-06,
|
|
"loss": 1.2131,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.0976063829787234,
|
|
"grad_norm": 5.487301826477051,
|
|
"learning_rate": 3.904255319148937e-06,
|
|
"loss": 1.1969,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.09787234042553192,
|
|
"grad_norm": 4.733921051025391,
|
|
"learning_rate": 3.914893617021277e-06,
|
|
"loss": 1.097,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.09813829787234042,
|
|
"grad_norm": 5.042628765106201,
|
|
"learning_rate": 3.9255319148936175e-06,
|
|
"loss": 1.3554,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.09840425531914894,
|
|
"grad_norm": 6.3879876136779785,
|
|
"learning_rate": 3.936170212765958e-06,
|
|
"loss": 1.1231,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.09867021276595744,
|
|
"grad_norm": 4.907758712768555,
|
|
"learning_rate": 3.946808510638298e-06,
|
|
"loss": 1.4223,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.09893617021276596,
|
|
"grad_norm": 4.765664577484131,
|
|
"learning_rate": 3.957446808510639e-06,
|
|
"loss": 1.2346,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.09920212765957447,
|
|
"grad_norm": 4.949317932128906,
|
|
"learning_rate": 3.968085106382979e-06,
|
|
"loss": 1.1447,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.09946808510638298,
|
|
"grad_norm": 5.256651878356934,
|
|
"learning_rate": 3.97872340425532e-06,
|
|
"loss": 1.25,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.09973404255319149,
|
|
"grad_norm": 5.307461261749268,
|
|
"learning_rate": 3.98936170212766e-06,
|
|
"loss": 1.3373,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"grad_norm": 5.324861526489258,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 1.1654,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.10026595744680851,
|
|
"grad_norm": 5.055593013763428,
|
|
"learning_rate": 4.010638297872341e-06,
|
|
"loss": 1.1508,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.10053191489361703,
|
|
"grad_norm": 4.892101287841797,
|
|
"learning_rate": 4.0212765957446816e-06,
|
|
"loss": 1.2529,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.10079787234042553,
|
|
"grad_norm": 4.846734523773193,
|
|
"learning_rate": 4.031914893617022e-06,
|
|
"loss": 1.1536,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.10106382978723404,
|
|
"grad_norm": 5.4368462562561035,
|
|
"learning_rate": 4.042553191489362e-06,
|
|
"loss": 1.1512,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.10132978723404255,
|
|
"grad_norm": 5.102158546447754,
|
|
"learning_rate": 4.053191489361702e-06,
|
|
"loss": 1.2382,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.10159574468085106,
|
|
"grad_norm": 5.7933030128479,
|
|
"learning_rate": 4.0638297872340425e-06,
|
|
"loss": 1.4996,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.10186170212765958,
|
|
"grad_norm": 4.7221221923828125,
|
|
"learning_rate": 4.074468085106383e-06,
|
|
"loss": 1.3471,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.10212765957446808,
|
|
"grad_norm": 4.660311222076416,
|
|
"learning_rate": 4.085106382978723e-06,
|
|
"loss": 1.103,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.1023936170212766,
|
|
"grad_norm": 5.399576663970947,
|
|
"learning_rate": 4.095744680851064e-06,
|
|
"loss": 1.3684,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.1026595744680851,
|
|
"grad_norm": 4.925390720367432,
|
|
"learning_rate": 4.106382978723404e-06,
|
|
"loss": 1.2596,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.10292553191489362,
|
|
"grad_norm": 5.198457717895508,
|
|
"learning_rate": 4.117021276595745e-06,
|
|
"loss": 1.2224,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.10319148936170212,
|
|
"grad_norm": 5.053544044494629,
|
|
"learning_rate": 4.127659574468085e-06,
|
|
"loss": 1.0447,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.10345744680851064,
|
|
"grad_norm": 5.769658088684082,
|
|
"learning_rate": 4.138297872340426e-06,
|
|
"loss": 1.4491,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.10372340425531915,
|
|
"grad_norm": 4.969061851501465,
|
|
"learning_rate": 4.148936170212766e-06,
|
|
"loss": 1.2964,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.10398936170212766,
|
|
"grad_norm": 4.825634479522705,
|
|
"learning_rate": 4.1595744680851066e-06,
|
|
"loss": 1.1521,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.10425531914893617,
|
|
"grad_norm": 5.240276336669922,
|
|
"learning_rate": 4.170212765957447e-06,
|
|
"loss": 1.27,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.10452127659574469,
|
|
"grad_norm": 4.926823139190674,
|
|
"learning_rate": 4.1808510638297875e-06,
|
|
"loss": 1.1428,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.10478723404255319,
|
|
"grad_norm": 5.143110275268555,
|
|
"learning_rate": 4.191489361702128e-06,
|
|
"loss": 1.2502,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.10505319148936171,
|
|
"grad_norm": 5.7517876625061035,
|
|
"learning_rate": 4.202127659574468e-06,
|
|
"loss": 1.3353,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.10531914893617021,
|
|
"grad_norm": 5.096099853515625,
|
|
"learning_rate": 4.212765957446809e-06,
|
|
"loss": 1.2383,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.10558510638297873,
|
|
"grad_norm": 5.0476484298706055,
|
|
"learning_rate": 4.223404255319149e-06,
|
|
"loss": 1.1639,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.10585106382978723,
|
|
"grad_norm": 5.166505813598633,
|
|
"learning_rate": 4.23404255319149e-06,
|
|
"loss": 1.327,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.10611702127659574,
|
|
"grad_norm": 5.315145969390869,
|
|
"learning_rate": 4.24468085106383e-06,
|
|
"loss": 1.2239,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.10638297872340426,
|
|
"grad_norm": 5.185245990753174,
|
|
"learning_rate": 4.255319148936171e-06,
|
|
"loss": 1.3102,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.10664893617021276,
|
|
"grad_norm": 5.77607536315918,
|
|
"learning_rate": 4.265957446808511e-06,
|
|
"loss": 1.3943,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.10691489361702128,
|
|
"grad_norm": 5.244495391845703,
|
|
"learning_rate": 4.2765957446808515e-06,
|
|
"loss": 1.2495,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.10718085106382978,
|
|
"grad_norm": 4.943081378936768,
|
|
"learning_rate": 4.287234042553192e-06,
|
|
"loss": 1.1773,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.1074468085106383,
|
|
"grad_norm": 4.948064804077148,
|
|
"learning_rate": 4.297872340425532e-06,
|
|
"loss": 1.2758,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.1077127659574468,
|
|
"grad_norm": 5.133402347564697,
|
|
"learning_rate": 4.308510638297873e-06,
|
|
"loss": 1.28,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.10797872340425532,
|
|
"grad_norm": 5.113506317138672,
|
|
"learning_rate": 4.319148936170213e-06,
|
|
"loss": 1.3164,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.10824468085106383,
|
|
"grad_norm": 5.551205635070801,
|
|
"learning_rate": 4.329787234042554e-06,
|
|
"loss": 1.3766,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.10851063829787234,
|
|
"grad_norm": 5.358046531677246,
|
|
"learning_rate": 4.340425531914894e-06,
|
|
"loss": 1.3146,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.10877659574468085,
|
|
"grad_norm": 4.947327136993408,
|
|
"learning_rate": 4.351063829787235e-06,
|
|
"loss": 1.2566,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.10904255319148937,
|
|
"grad_norm": 5.421116828918457,
|
|
"learning_rate": 4.361702127659575e-06,
|
|
"loss": 1.3041,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.10930851063829787,
|
|
"grad_norm": 5.073742866516113,
|
|
"learning_rate": 4.3723404255319156e-06,
|
|
"loss": 1.2297,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.10957446808510639,
|
|
"grad_norm": 4.688051700592041,
|
|
"learning_rate": 4.382978723404256e-06,
|
|
"loss": 1.281,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.10984042553191489,
|
|
"grad_norm": 4.957024097442627,
|
|
"learning_rate": 4.3936170212765965e-06,
|
|
"loss": 1.2235,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.11010638297872341,
|
|
"grad_norm": 4.920490741729736,
|
|
"learning_rate": 4.404255319148937e-06,
|
|
"loss": 1.3369,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.11037234042553191,
|
|
"grad_norm": 4.797316551208496,
|
|
"learning_rate": 4.414893617021277e-06,
|
|
"loss": 1.2144,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.11063829787234042,
|
|
"grad_norm": 5.424980640411377,
|
|
"learning_rate": 4.425531914893617e-06,
|
|
"loss": 1.3891,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.11090425531914894,
|
|
"grad_norm": 6.654335021972656,
|
|
"learning_rate": 4.436170212765957e-06,
|
|
"loss": 1.2438,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.11117021276595744,
|
|
"grad_norm": 4.950499057769775,
|
|
"learning_rate": 4.446808510638298e-06,
|
|
"loss": 1.1873,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.11143617021276596,
|
|
"grad_norm": 4.553642272949219,
|
|
"learning_rate": 4.457446808510638e-06,
|
|
"loss": 1.1059,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.11170212765957446,
|
|
"grad_norm": 5.221842288970947,
|
|
"learning_rate": 4.468085106382979e-06,
|
|
"loss": 1.2645,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.11196808510638298,
|
|
"grad_norm": 5.45412015914917,
|
|
"learning_rate": 4.478723404255319e-06,
|
|
"loss": 1.234,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.11223404255319148,
|
|
"grad_norm": 5.6037750244140625,
|
|
"learning_rate": 4.48936170212766e-06,
|
|
"loss": 1.2393,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.1125,
|
|
"grad_norm": 6.701963901519775,
|
|
"learning_rate": 4.5e-06,
|
|
"loss": 1.2275,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.1127659574468085,
|
|
"grad_norm": 5.183774471282959,
|
|
"learning_rate": 4.5106382978723406e-06,
|
|
"loss": 1.345,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.11303191489361702,
|
|
"grad_norm": 5.005707263946533,
|
|
"learning_rate": 4.521276595744681e-06,
|
|
"loss": 1.2778,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.11329787234042553,
|
|
"grad_norm": 4.887904644012451,
|
|
"learning_rate": 4.5319148936170215e-06,
|
|
"loss": 1.2156,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.11356382978723405,
|
|
"grad_norm": 5.077915191650391,
|
|
"learning_rate": 4.542553191489362e-06,
|
|
"loss": 1.3213,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.11382978723404255,
|
|
"grad_norm": 5.669859409332275,
|
|
"learning_rate": 4.553191489361702e-06,
|
|
"loss": 1.2028,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.11409574468085107,
|
|
"grad_norm": 4.871664047241211,
|
|
"learning_rate": 4.563829787234043e-06,
|
|
"loss": 1.2471,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.11436170212765957,
|
|
"grad_norm": 6.208220958709717,
|
|
"learning_rate": 4.574468085106383e-06,
|
|
"loss": 1.3042,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.11462765957446809,
|
|
"grad_norm": 5.47734260559082,
|
|
"learning_rate": 4.585106382978724e-06,
|
|
"loss": 1.1327,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.1148936170212766,
|
|
"grad_norm": 4.876042366027832,
|
|
"learning_rate": 4.595744680851064e-06,
|
|
"loss": 1.2484,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.11515957446808511,
|
|
"grad_norm": 4.497283458709717,
|
|
"learning_rate": 4.606382978723405e-06,
|
|
"loss": 1.0734,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.11542553191489362,
|
|
"grad_norm": 5.2405314445495605,
|
|
"learning_rate": 4.617021276595745e-06,
|
|
"loss": 1.3122,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.11569148936170212,
|
|
"grad_norm": 5.948802947998047,
|
|
"learning_rate": 4.6276595744680855e-06,
|
|
"loss": 1.2006,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.11595744680851064,
|
|
"grad_norm": 5.318106174468994,
|
|
"learning_rate": 4.638297872340426e-06,
|
|
"loss": 1.2712,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.11622340425531914,
|
|
"grad_norm": 5.686134338378906,
|
|
"learning_rate": 4.648936170212766e-06,
|
|
"loss": 1.3471,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.11648936170212766,
|
|
"grad_norm": 5.246779441833496,
|
|
"learning_rate": 4.659574468085107e-06,
|
|
"loss": 1.2967,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.11675531914893617,
|
|
"grad_norm": 4.675699710845947,
|
|
"learning_rate": 4.670212765957447e-06,
|
|
"loss": 1.2304,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.11702127659574468,
|
|
"grad_norm": 5.018355846405029,
|
|
"learning_rate": 4.680851063829788e-06,
|
|
"loss": 1.3061,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.11728723404255319,
|
|
"grad_norm": 5.387866497039795,
|
|
"learning_rate": 4.691489361702128e-06,
|
|
"loss": 1.3658,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.1175531914893617,
|
|
"grad_norm": 4.927948951721191,
|
|
"learning_rate": 4.702127659574469e-06,
|
|
"loss": 1.3331,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.11781914893617021,
|
|
"grad_norm": 5.1225738525390625,
|
|
"learning_rate": 4.712765957446809e-06,
|
|
"loss": 1.1334,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.11808510638297873,
|
|
"grad_norm": 4.9314751625061035,
|
|
"learning_rate": 4.7234042553191496e-06,
|
|
"loss": 1.2384,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.11835106382978723,
|
|
"grad_norm": 5.148207664489746,
|
|
"learning_rate": 4.73404255319149e-06,
|
|
"loss": 1.2677,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.11861702127659575,
|
|
"grad_norm": 4.629826068878174,
|
|
"learning_rate": 4.7446808510638305e-06,
|
|
"loss": 1.2096,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.11888297872340425,
|
|
"grad_norm": 4.850092887878418,
|
|
"learning_rate": 4.755319148936171e-06,
|
|
"loss": 1.2004,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.11914893617021277,
|
|
"grad_norm": 5.228341102600098,
|
|
"learning_rate": 4.765957446808511e-06,
|
|
"loss": 1.1828,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.11941489361702128,
|
|
"grad_norm": 4.738990306854248,
|
|
"learning_rate": 4.776595744680852e-06,
|
|
"loss": 1.2557,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.1196808510638298,
|
|
"grad_norm": 4.737931251525879,
|
|
"learning_rate": 4.787234042553192e-06,
|
|
"loss": 1.1705,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.1199468085106383,
|
|
"grad_norm": 4.852109432220459,
|
|
"learning_rate": 4.797872340425533e-06,
|
|
"loss": 1.175,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.1202127659574468,
|
|
"grad_norm": 4.808513641357422,
|
|
"learning_rate": 4.808510638297872e-06,
|
|
"loss": 1.3285,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.12047872340425532,
|
|
"grad_norm": 5.352870464324951,
|
|
"learning_rate": 4.819148936170213e-06,
|
|
"loss": 1.2471,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.12074468085106382,
|
|
"grad_norm": 4.533960819244385,
|
|
"learning_rate": 4.829787234042553e-06,
|
|
"loss": 1.2059,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.12101063829787234,
|
|
"grad_norm": 4.770225524902344,
|
|
"learning_rate": 4.840425531914894e-06,
|
|
"loss": 1.2049,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.12127659574468085,
|
|
"grad_norm": 5.0733418464660645,
|
|
"learning_rate": 4.851063829787234e-06,
|
|
"loss": 1.2758,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.12154255319148936,
|
|
"grad_norm": 4.347215175628662,
|
|
"learning_rate": 4.8617021276595746e-06,
|
|
"loss": 1.1401,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.12180851063829787,
|
|
"grad_norm": 5.329954147338867,
|
|
"learning_rate": 4.872340425531915e-06,
|
|
"loss": 1.276,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.12207446808510639,
|
|
"grad_norm": 5.255573272705078,
|
|
"learning_rate": 4.8829787234042555e-06,
|
|
"loss": 1.234,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.12234042553191489,
|
|
"grad_norm": 5.189822196960449,
|
|
"learning_rate": 4.893617021276596e-06,
|
|
"loss": 1.3676,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.12260638297872341,
|
|
"grad_norm": 5.039921283721924,
|
|
"learning_rate": 4.904255319148936e-06,
|
|
"loss": 1.3342,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.12287234042553191,
|
|
"grad_norm": 4.65778923034668,
|
|
"learning_rate": 4.914893617021277e-06,
|
|
"loss": 1.1117,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.12313829787234043,
|
|
"grad_norm": 5.006718635559082,
|
|
"learning_rate": 4.925531914893617e-06,
|
|
"loss": 1.2543,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.12340425531914893,
|
|
"grad_norm": 5.547107219696045,
|
|
"learning_rate": 4.936170212765958e-06,
|
|
"loss": 1.2113,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.12367021276595745,
|
|
"grad_norm": 6.148080348968506,
|
|
"learning_rate": 4.946808510638298e-06,
|
|
"loss": 1.1889,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.12393617021276596,
|
|
"grad_norm": 5.120206832885742,
|
|
"learning_rate": 4.957446808510639e-06,
|
|
"loss": 1.2198,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.12420212765957447,
|
|
"grad_norm": 5.487342834472656,
|
|
"learning_rate": 4.968085106382979e-06,
|
|
"loss": 1.2786,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.12446808510638298,
|
|
"grad_norm": 8.382891654968262,
|
|
"learning_rate": 4.9787234042553195e-06,
|
|
"loss": 1.3757,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.1247340425531915,
|
|
"grad_norm": 5.241554260253906,
|
|
"learning_rate": 4.98936170212766e-06,
|
|
"loss": 1.3302,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.125,
|
|
"grad_norm": 5.201963901519775,
|
|
"learning_rate": 5e-06,
|
|
"loss": 1.2948,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.12526595744680852,
|
|
"grad_norm": 5.143476486206055,
|
|
"learning_rate": 5.010638297872341e-06,
|
|
"loss": 1.2364,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.125531914893617,
|
|
"grad_norm": 4.847978115081787,
|
|
"learning_rate": 5.0212765957446805e-06,
|
|
"loss": 1.1692,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.12579787234042553,
|
|
"grad_norm": 7.869311809539795,
|
|
"learning_rate": 5.031914893617022e-06,
|
|
"loss": 1.3719,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.12606382978723404,
|
|
"grad_norm": 5.498979091644287,
|
|
"learning_rate": 5.042553191489362e-06,
|
|
"loss": 1.3422,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.12632978723404256,
|
|
"grad_norm": 6.362303256988525,
|
|
"learning_rate": 5.053191489361703e-06,
|
|
"loss": 1.4323,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.12659574468085105,
|
|
"grad_norm": 5.051971435546875,
|
|
"learning_rate": 5.063829787234042e-06,
|
|
"loss": 1.1821,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.12686170212765957,
|
|
"grad_norm": 4.8123250007629395,
|
|
"learning_rate": 5.0744680851063836e-06,
|
|
"loss": 1.2988,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.1271276595744681,
|
|
"grad_norm": 5.487412452697754,
|
|
"learning_rate": 5.085106382978724e-06,
|
|
"loss": 1.3167,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.1273936170212766,
|
|
"grad_norm": 8.315117835998535,
|
|
"learning_rate": 5.0957446808510645e-06,
|
|
"loss": 1.192,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.1276595744680851,
|
|
"grad_norm": 5.151649475097656,
|
|
"learning_rate": 5.106382978723404e-06,
|
|
"loss": 1.2499,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.12792553191489361,
|
|
"grad_norm": 5.335565567016602,
|
|
"learning_rate": 5.117021276595745e-06,
|
|
"loss": 1.2643,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.12819148936170213,
|
|
"grad_norm": 4.590991020202637,
|
|
"learning_rate": 5.127659574468086e-06,
|
|
"loss": 1.218,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.12845744680851065,
|
|
"grad_norm": 4.4650750160217285,
|
|
"learning_rate": 5.138297872340426e-06,
|
|
"loss": 1.1962,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.12872340425531914,
|
|
"grad_norm": 4.609473705291748,
|
|
"learning_rate": 5.148936170212766e-06,
|
|
"loss": 1.476,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.12898936170212766,
|
|
"grad_norm": 4.7010087966918945,
|
|
"learning_rate": 5.159574468085107e-06,
|
|
"loss": 1.1609,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.12925531914893618,
|
|
"grad_norm": 4.8034257888793945,
|
|
"learning_rate": 5.170212765957448e-06,
|
|
"loss": 1.3393,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.1295212765957447,
|
|
"grad_norm": 5.149427890777588,
|
|
"learning_rate": 5.180851063829788e-06,
|
|
"loss": 1.2883,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.12978723404255318,
|
|
"grad_norm": 5.017268657684326,
|
|
"learning_rate": 5.191489361702128e-06,
|
|
"loss": 1.1178,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.1300531914893617,
|
|
"grad_norm": 4.924554347991943,
|
|
"learning_rate": 5.202127659574468e-06,
|
|
"loss": 1.3381,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.13031914893617022,
|
|
"grad_norm": 4.674248218536377,
|
|
"learning_rate": 5.212765957446809e-06,
|
|
"loss": 1.0916,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.1305851063829787,
|
|
"grad_norm": 4.853366851806641,
|
|
"learning_rate": 5.223404255319149e-06,
|
|
"loss": 1.2784,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.13085106382978723,
|
|
"grad_norm": 5.032970428466797,
|
|
"learning_rate": 5.2340425531914895e-06,
|
|
"loss": 1.2575,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.13111702127659575,
|
|
"grad_norm": 4.911726474761963,
|
|
"learning_rate": 5.24468085106383e-06,
|
|
"loss": 1.2049,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.13138297872340426,
|
|
"grad_norm": 5.197798252105713,
|
|
"learning_rate": 5.255319148936171e-06,
|
|
"loss": 1.3461,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.13164893617021275,
|
|
"grad_norm": 4.873477458953857,
|
|
"learning_rate": 5.265957446808511e-06,
|
|
"loss": 1.2681,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.13191489361702127,
|
|
"grad_norm": 4.855223178863525,
|
|
"learning_rate": 5.276595744680851e-06,
|
|
"loss": 1.1849,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.1321808510638298,
|
|
"grad_norm": 5.735394477844238,
|
|
"learning_rate": 5.287234042553192e-06,
|
|
"loss": 1.2821,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.1324468085106383,
|
|
"grad_norm": 4.7265305519104,
|
|
"learning_rate": 5.297872340425533e-06,
|
|
"loss": 1.1253,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.1327127659574468,
|
|
"grad_norm": 5.138075351715088,
|
|
"learning_rate": 5.308510638297873e-06,
|
|
"loss": 1.1951,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.13297872340425532,
|
|
"grad_norm": 4.761940002441406,
|
|
"learning_rate": 5.319148936170213e-06,
|
|
"loss": 1.4573,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.13297872340425532,
|
|
"eval_loss": 1.276181697845459,
|
|
"eval_runtime": 12.4372,
|
|
"eval_samples_per_second": 32.162,
|
|
"eval_steps_per_second": 4.02,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.13324468085106383,
|
|
"grad_norm": 5.0954132080078125,
|
|
"learning_rate": 5.3297872340425535e-06,
|
|
"loss": 1.43,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.13351063829787235,
|
|
"grad_norm": 5.592034816741943,
|
|
"learning_rate": 5.340425531914895e-06,
|
|
"loss": 1.3052,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.13377659574468084,
|
|
"grad_norm": 5.18677282333374,
|
|
"learning_rate": 5.351063829787234e-06,
|
|
"loss": 1.3141,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.13404255319148936,
|
|
"grad_norm": 5.0918707847595215,
|
|
"learning_rate": 5.361702127659575e-06,
|
|
"loss": 1.3649,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.13430851063829788,
|
|
"grad_norm": 4.749475002288818,
|
|
"learning_rate": 5.372340425531915e-06,
|
|
"loss": 1.1692,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.1345744680851064,
|
|
"grad_norm": 4.383024215698242,
|
|
"learning_rate": 5.382978723404257e-06,
|
|
"loss": 1.3438,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.1348404255319149,
|
|
"grad_norm": 4.863028049468994,
|
|
"learning_rate": 5.393617021276596e-06,
|
|
"loss": 1.3332,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.1351063829787234,
|
|
"grad_norm": 4.633965492248535,
|
|
"learning_rate": 5.404255319148937e-06,
|
|
"loss": 1.2012,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.13537234042553192,
|
|
"grad_norm": 5.257637023925781,
|
|
"learning_rate": 5.414893617021277e-06,
|
|
"loss": 1.3595,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.1356382978723404,
|
|
"grad_norm": 4.795042037963867,
|
|
"learning_rate": 5.425531914893617e-06,
|
|
"loss": 1.3843,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.13590425531914893,
|
|
"grad_norm": 5.261885643005371,
|
|
"learning_rate": 5.436170212765958e-06,
|
|
"loss": 1.2708,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.13617021276595745,
|
|
"grad_norm": 4.95104455947876,
|
|
"learning_rate": 5.4468085106382985e-06,
|
|
"loss": 1.2268,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.13643617021276597,
|
|
"grad_norm": 5.171029567718506,
|
|
"learning_rate": 5.457446808510639e-06,
|
|
"loss": 1.38,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.13670212765957446,
|
|
"grad_norm": 4.671914577484131,
|
|
"learning_rate": 5.4680851063829785e-06,
|
|
"loss": 1.1485,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.13696808510638298,
|
|
"grad_norm": 4.562173843383789,
|
|
"learning_rate": 5.47872340425532e-06,
|
|
"loss": 1.3282,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.1372340425531915,
|
|
"grad_norm": 4.870545387268066,
|
|
"learning_rate": 5.48936170212766e-06,
|
|
"loss": 1.1943,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.1375,
|
|
"grad_norm": 5.231775760650635,
|
|
"learning_rate": 5.500000000000001e-06,
|
|
"loss": 1.2763,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.1377659574468085,
|
|
"grad_norm": 5.05985689163208,
|
|
"learning_rate": 5.51063829787234e-06,
|
|
"loss": 1.2018,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.13803191489361702,
|
|
"grad_norm": 4.818659782409668,
|
|
"learning_rate": 5.521276595744682e-06,
|
|
"loss": 1.2307,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.13829787234042554,
|
|
"grad_norm": 4.803600311279297,
|
|
"learning_rate": 5.531914893617022e-06,
|
|
"loss": 1.3586,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.13856382978723406,
|
|
"grad_norm": 4.65132999420166,
|
|
"learning_rate": 5.5425531914893625e-06,
|
|
"loss": 1.2147,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.13882978723404255,
|
|
"grad_norm": 4.503746032714844,
|
|
"learning_rate": 5.553191489361702e-06,
|
|
"loss": 1.2307,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.13909574468085106,
|
|
"grad_norm": 4.557102203369141,
|
|
"learning_rate": 5.563829787234043e-06,
|
|
"loss": 1.1906,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.13936170212765958,
|
|
"grad_norm": 4.347774028778076,
|
|
"learning_rate": 5.574468085106384e-06,
|
|
"loss": 1.1632,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.13962765957446807,
|
|
"grad_norm": 4.431983947753906,
|
|
"learning_rate": 5.5851063829787235e-06,
|
|
"loss": 1.2617,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.1398936170212766,
|
|
"grad_norm": 4.971803665161133,
|
|
"learning_rate": 5.595744680851064e-06,
|
|
"loss": 1.2581,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.1401595744680851,
|
|
"grad_norm": 4.5451979637146,
|
|
"learning_rate": 5.606382978723404e-06,
|
|
"loss": 1.3048,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.14042553191489363,
|
|
"grad_norm": 4.687234878540039,
|
|
"learning_rate": 5.617021276595746e-06,
|
|
"loss": 1.2556,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.14069148936170212,
|
|
"grad_norm": 4.7519378662109375,
|
|
"learning_rate": 5.627659574468085e-06,
|
|
"loss": 1.2017,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.14095744680851063,
|
|
"grad_norm": 5.454826354980469,
|
|
"learning_rate": 5.638297872340426e-06,
|
|
"loss": 1.137,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.14122340425531915,
|
|
"grad_norm": 5.442596435546875,
|
|
"learning_rate": 5.648936170212766e-06,
|
|
"loss": 1.3776,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.14148936170212767,
|
|
"grad_norm": 5.057155132293701,
|
|
"learning_rate": 5.6595744680851075e-06,
|
|
"loss": 1.4229,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.14175531914893616,
|
|
"grad_norm": 4.806349277496338,
|
|
"learning_rate": 5.670212765957447e-06,
|
|
"loss": 1.2874,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.14202127659574468,
|
|
"grad_norm": 4.934086322784424,
|
|
"learning_rate": 5.6808510638297875e-06,
|
|
"loss": 1.3149,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.1422872340425532,
|
|
"grad_norm": 4.371129035949707,
|
|
"learning_rate": 5.691489361702128e-06,
|
|
"loss": 1.2567,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.1425531914893617,
|
|
"grad_norm": 5.498307228088379,
|
|
"learning_rate": 5.702127659574469e-06,
|
|
"loss": 1.166,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.1428191489361702,
|
|
"grad_norm": 4.467796802520752,
|
|
"learning_rate": 5.712765957446809e-06,
|
|
"loss": 1.1359,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.14308510638297872,
|
|
"grad_norm": 4.92448091506958,
|
|
"learning_rate": 5.723404255319149e-06,
|
|
"loss": 1.2873,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.14335106382978724,
|
|
"grad_norm": 4.561826705932617,
|
|
"learning_rate": 5.73404255319149e-06,
|
|
"loss": 1.0615,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.14361702127659576,
|
|
"grad_norm": 4.773728370666504,
|
|
"learning_rate": 5.744680851063831e-06,
|
|
"loss": 1.1718,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.14388297872340425,
|
|
"grad_norm": 4.3747639656066895,
|
|
"learning_rate": 5.755319148936171e-06,
|
|
"loss": 1.165,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.14414893617021277,
|
|
"grad_norm": 5.261002063751221,
|
|
"learning_rate": 5.765957446808511e-06,
|
|
"loss": 1.3091,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.14441489361702128,
|
|
"grad_norm": 5.58752965927124,
|
|
"learning_rate": 5.7765957446808516e-06,
|
|
"loss": 1.2045,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.14468085106382977,
|
|
"grad_norm": 4.371783256530762,
|
|
"learning_rate": 5.787234042553191e-06,
|
|
"loss": 1.1548,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.1449468085106383,
|
|
"grad_norm": 4.958721160888672,
|
|
"learning_rate": 5.7978723404255325e-06,
|
|
"loss": 1.4517,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.1452127659574468,
|
|
"grad_norm": 4.846461296081543,
|
|
"learning_rate": 5.808510638297873e-06,
|
|
"loss": 1.3224,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.14547872340425533,
|
|
"grad_norm": 5.132719039916992,
|
|
"learning_rate": 5.819148936170213e-06,
|
|
"loss": 1.1865,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.14574468085106382,
|
|
"grad_norm": 4.791563987731934,
|
|
"learning_rate": 5.829787234042553e-06,
|
|
"loss": 1.2571,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.14601063829787234,
|
|
"grad_norm": 5.137845039367676,
|
|
"learning_rate": 5.840425531914894e-06,
|
|
"loss": 1.3008,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.14627659574468085,
|
|
"grad_norm": 4.80680513381958,
|
|
"learning_rate": 5.851063829787235e-06,
|
|
"loss": 1.243,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.14654255319148937,
|
|
"grad_norm": 4.938924312591553,
|
|
"learning_rate": 5.861702127659575e-06,
|
|
"loss": 1.3482,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.14680851063829786,
|
|
"grad_norm": 5.239283561706543,
|
|
"learning_rate": 5.872340425531915e-06,
|
|
"loss": 1.1938,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.14707446808510638,
|
|
"grad_norm": 4.885773658752441,
|
|
"learning_rate": 5.882978723404256e-06,
|
|
"loss": 1.1257,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.1473404255319149,
|
|
"grad_norm": 5.183603763580322,
|
|
"learning_rate": 5.8936170212765965e-06,
|
|
"loss": 1.3353,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.14760638297872342,
|
|
"grad_norm": 4.765013694763184,
|
|
"learning_rate": 5.904255319148937e-06,
|
|
"loss": 1.2058,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.1478723404255319,
|
|
"grad_norm": 5.2760419845581055,
|
|
"learning_rate": 5.9148936170212766e-06,
|
|
"loss": 1.2109,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.14813829787234042,
|
|
"grad_norm": 5.04670524597168,
|
|
"learning_rate": 5.925531914893618e-06,
|
|
"loss": 1.3347,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.14840425531914894,
|
|
"grad_norm": 4.968268394470215,
|
|
"learning_rate": 5.936170212765958e-06,
|
|
"loss": 1.3295,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.14867021276595746,
|
|
"grad_norm": 4.791049480438232,
|
|
"learning_rate": 5.946808510638299e-06,
|
|
"loss": 1.2116,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.14893617021276595,
|
|
"grad_norm": 4.980474948883057,
|
|
"learning_rate": 5.957446808510638e-06,
|
|
"loss": 1.4063,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.14920212765957447,
|
|
"grad_norm": 4.56986141204834,
|
|
"learning_rate": 5.968085106382979e-06,
|
|
"loss": 1.2442,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.149468085106383,
|
|
"grad_norm": 4.691464424133301,
|
|
"learning_rate": 5.97872340425532e-06,
|
|
"loss": 1.2784,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.14973404255319148,
|
|
"grad_norm": 5.040019512176514,
|
|
"learning_rate": 5.98936170212766e-06,
|
|
"loss": 1.2195,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"grad_norm": 5.160355091094971,
|
|
"learning_rate": 6e-06,
|
|
"loss": 1.4814,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.1502659574468085,
|
|
"grad_norm": 4.696538925170898,
|
|
"learning_rate": 6.010638297872341e-06,
|
|
"loss": 1.2542,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.15053191489361703,
|
|
"grad_norm": 4.901849269866943,
|
|
"learning_rate": 6.021276595744682e-06,
|
|
"loss": 1.2633,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.15079787234042552,
|
|
"grad_norm": 4.936095237731934,
|
|
"learning_rate": 6.0319148936170215e-06,
|
|
"loss": 1.2812,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.15106382978723404,
|
|
"grad_norm": 4.6663055419921875,
|
|
"learning_rate": 6.042553191489362e-06,
|
|
"loss": 1.3449,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.15132978723404256,
|
|
"grad_norm": 4.95345401763916,
|
|
"learning_rate": 6.053191489361702e-06,
|
|
"loss": 1.1968,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.15159574468085107,
|
|
"grad_norm": 4.66139030456543,
|
|
"learning_rate": 6.063829787234044e-06,
|
|
"loss": 1.1773,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.15186170212765956,
|
|
"grad_norm": 5.310500144958496,
|
|
"learning_rate": 6.074468085106383e-06,
|
|
"loss": 1.2606,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.15212765957446808,
|
|
"grad_norm": 5.423430442810059,
|
|
"learning_rate": 6.085106382978724e-06,
|
|
"loss": 1.4334,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.1523936170212766,
|
|
"grad_norm": 5.189186096191406,
|
|
"learning_rate": 6.095744680851064e-06,
|
|
"loss": 1.2955,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.15265957446808512,
|
|
"grad_norm": 5.515524864196777,
|
|
"learning_rate": 6.1063829787234055e-06,
|
|
"loss": 1.2777,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.1529255319148936,
|
|
"grad_norm": 4.615379810333252,
|
|
"learning_rate": 6.117021276595745e-06,
|
|
"loss": 1.2492,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.15319148936170213,
|
|
"grad_norm": 4.674113750457764,
|
|
"learning_rate": 6.1276595744680855e-06,
|
|
"loss": 1.2807,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.15345744680851064,
|
|
"grad_norm": 4.907557487487793,
|
|
"learning_rate": 6.138297872340426e-06,
|
|
"loss": 1.4288,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.15372340425531916,
|
|
"grad_norm": 4.517690658569336,
|
|
"learning_rate": 6.148936170212767e-06,
|
|
"loss": 1.2274,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.15398936170212765,
|
|
"grad_norm": 4.350996971130371,
|
|
"learning_rate": 6.159574468085107e-06,
|
|
"loss": 1.284,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.15425531914893617,
|
|
"grad_norm": 4.552090644836426,
|
|
"learning_rate": 6.170212765957447e-06,
|
|
"loss": 1.193,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.1545212765957447,
|
|
"grad_norm": 5.3864827156066895,
|
|
"learning_rate": 6.180851063829788e-06,
|
|
"loss": 1.2869,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.15478723404255318,
|
|
"grad_norm": 4.946741104125977,
|
|
"learning_rate": 6.191489361702127e-06,
|
|
"loss": 1.1894,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.1550531914893617,
|
|
"grad_norm": 4.652212619781494,
|
|
"learning_rate": 6.202127659574469e-06,
|
|
"loss": 1.3841,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.15531914893617021,
|
|
"grad_norm": 4.876087188720703,
|
|
"learning_rate": 6.212765957446809e-06,
|
|
"loss": 1.4244,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.15558510638297873,
|
|
"grad_norm": 4.947083473205566,
|
|
"learning_rate": 6.22340425531915e-06,
|
|
"loss": 1.3616,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.15585106382978722,
|
|
"grad_norm": 4.663647174835205,
|
|
"learning_rate": 6.234042553191489e-06,
|
|
"loss": 1.2258,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.15611702127659574,
|
|
"grad_norm": 4.758052825927734,
|
|
"learning_rate": 6.2446808510638305e-06,
|
|
"loss": 1.1514,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.15638297872340426,
|
|
"grad_norm": 4.887540340423584,
|
|
"learning_rate": 6.255319148936171e-06,
|
|
"loss": 1.1887,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.15664893617021278,
|
|
"grad_norm": 4.9997477531433105,
|
|
"learning_rate": 6.265957446808511e-06,
|
|
"loss": 1.2235,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.15691489361702127,
|
|
"grad_norm": 5.29210090637207,
|
|
"learning_rate": 6.276595744680851e-06,
|
|
"loss": 1.3761,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.15718085106382979,
|
|
"grad_norm": 4.92548942565918,
|
|
"learning_rate": 6.287234042553192e-06,
|
|
"loss": 1.3848,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.1574468085106383,
|
|
"grad_norm": 5.194962978363037,
|
|
"learning_rate": 6.297872340425533e-06,
|
|
"loss": 1.4225,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.15771276595744682,
|
|
"grad_norm": 4.7201080322265625,
|
|
"learning_rate": 6.308510638297873e-06,
|
|
"loss": 1.142,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.1579787234042553,
|
|
"grad_norm": 4.397183895111084,
|
|
"learning_rate": 6.319148936170213e-06,
|
|
"loss": 1.0353,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.15824468085106383,
|
|
"grad_norm": 4.910755157470703,
|
|
"learning_rate": 6.329787234042554e-06,
|
|
"loss": 1.3927,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.15851063829787235,
|
|
"grad_norm": 4.846840858459473,
|
|
"learning_rate": 6.3404255319148945e-06,
|
|
"loss": 1.3298,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.15877659574468084,
|
|
"grad_norm": 4.725717067718506,
|
|
"learning_rate": 6.351063829787234e-06,
|
|
"loss": 1.319,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.15904255319148936,
|
|
"grad_norm": 4.561202049255371,
|
|
"learning_rate": 6.361702127659575e-06,
|
|
"loss": 1.3586,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.15930851063829787,
|
|
"grad_norm": 5.391122817993164,
|
|
"learning_rate": 6.372340425531915e-06,
|
|
"loss": 1.2876,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.1595744680851064,
|
|
"grad_norm": 4.996328830718994,
|
|
"learning_rate": 6.382978723404256e-06,
|
|
"loss": 1.5125,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.15984042553191488,
|
|
"grad_norm": 5.271803855895996,
|
|
"learning_rate": 6.393617021276596e-06,
|
|
"loss": 1.3858,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.1601063829787234,
|
|
"grad_norm": 4.3907318115234375,
|
|
"learning_rate": 6.404255319148936e-06,
|
|
"loss": 1.1134,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.16037234042553192,
|
|
"grad_norm": 5.224330902099609,
|
|
"learning_rate": 6.414893617021277e-06,
|
|
"loss": 1.572,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.16063829787234044,
|
|
"grad_norm": 5.044121742248535,
|
|
"learning_rate": 6.425531914893618e-06,
|
|
"loss": 1.4531,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.16090425531914893,
|
|
"grad_norm": 4.903571128845215,
|
|
"learning_rate": 6.436170212765958e-06,
|
|
"loss": 1.2779,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.16117021276595744,
|
|
"grad_norm": 4.621399402618408,
|
|
"learning_rate": 6.446808510638298e-06,
|
|
"loss": 1.1709,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.16143617021276596,
|
|
"grad_norm": 4.697232723236084,
|
|
"learning_rate": 6.457446808510639e-06,
|
|
"loss": 1.1601,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.16170212765957448,
|
|
"grad_norm": 5.482996940612793,
|
|
"learning_rate": 6.46808510638298e-06,
|
|
"loss": 1.401,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.16196808510638297,
|
|
"grad_norm": 4.974328994750977,
|
|
"learning_rate": 6.4787234042553195e-06,
|
|
"loss": 1.288,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.1622340425531915,
|
|
"grad_norm": 4.7073140144348145,
|
|
"learning_rate": 6.48936170212766e-06,
|
|
"loss": 1.331,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.1625,
|
|
"grad_norm": 4.540210247039795,
|
|
"learning_rate": 6.5000000000000004e-06,
|
|
"loss": 1.217,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.16276595744680852,
|
|
"grad_norm": 4.792731285095215,
|
|
"learning_rate": 6.510638297872342e-06,
|
|
"loss": 1.2696,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.163031914893617,
|
|
"grad_norm": 4.365908622741699,
|
|
"learning_rate": 6.521276595744681e-06,
|
|
"loss": 1.1104,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.16329787234042553,
|
|
"grad_norm": 4.6623101234436035,
|
|
"learning_rate": 6.531914893617022e-06,
|
|
"loss": 1.0165,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.16356382978723405,
|
|
"grad_norm": 4.874281883239746,
|
|
"learning_rate": 6.542553191489362e-06,
|
|
"loss": 1.3418,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.16382978723404254,
|
|
"grad_norm": 5.30225133895874,
|
|
"learning_rate": 6.553191489361702e-06,
|
|
"loss": 1.2965,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.16409574468085106,
|
|
"grad_norm": 5.1621880531311035,
|
|
"learning_rate": 6.563829787234043e-06,
|
|
"loss": 1.411,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.16436170212765958,
|
|
"grad_norm": 5.011656761169434,
|
|
"learning_rate": 6.574468085106384e-06,
|
|
"loss": 1.2324,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.1646276595744681,
|
|
"grad_norm": 4.633167743682861,
|
|
"learning_rate": 6.585106382978724e-06,
|
|
"loss": 1.2498,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.16489361702127658,
|
|
"grad_norm": 4.762227535247803,
|
|
"learning_rate": 6.595744680851064e-06,
|
|
"loss": 1.3774,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.1651595744680851,
|
|
"grad_norm": 4.581019401550293,
|
|
"learning_rate": 6.606382978723405e-06,
|
|
"loss": 1.2745,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.16542553191489362,
|
|
"grad_norm": 4.845024585723877,
|
|
"learning_rate": 6.617021276595745e-06,
|
|
"loss": 1.2003,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.16569148936170214,
|
|
"grad_norm": 4.555243015289307,
|
|
"learning_rate": 6.627659574468086e-06,
|
|
"loss": 1.265,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.16595744680851063,
|
|
"grad_norm": 4.3719987869262695,
|
|
"learning_rate": 6.6382978723404254e-06,
|
|
"loss": 1.2131,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.16622340425531915,
|
|
"grad_norm": 4.629434108734131,
|
|
"learning_rate": 6.648936170212767e-06,
|
|
"loss": 1.3491,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.16648936170212766,
|
|
"grad_norm": 5.0472540855407715,
|
|
"learning_rate": 6.659574468085107e-06,
|
|
"loss": 1.4119,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.16675531914893618,
|
|
"grad_norm": 4.784181594848633,
|
|
"learning_rate": 6.670212765957448e-06,
|
|
"loss": 1.3079,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.16702127659574467,
|
|
"grad_norm": 5.000133514404297,
|
|
"learning_rate": 6.680851063829787e-06,
|
|
"loss": 1.2378,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.1672872340425532,
|
|
"grad_norm": 4.911679267883301,
|
|
"learning_rate": 6.6914893617021285e-06,
|
|
"loss": 1.1824,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.1675531914893617,
|
|
"grad_norm": 4.674395561218262,
|
|
"learning_rate": 6.702127659574469e-06,
|
|
"loss": 1.1836,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.16781914893617023,
|
|
"grad_norm": 4.964152812957764,
|
|
"learning_rate": 6.7127659574468094e-06,
|
|
"loss": 1.2419,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.16808510638297872,
|
|
"grad_norm": 4.766603946685791,
|
|
"learning_rate": 6.723404255319149e-06,
|
|
"loss": 1.2885,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.16835106382978723,
|
|
"grad_norm": 4.679075241088867,
|
|
"learning_rate": 6.7340425531914895e-06,
|
|
"loss": 1.279,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.16861702127659575,
|
|
"grad_norm": 4.590879440307617,
|
|
"learning_rate": 6.744680851063831e-06,
|
|
"loss": 1.2808,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.16888297872340424,
|
|
"grad_norm": 4.539956092834473,
|
|
"learning_rate": 6.75531914893617e-06,
|
|
"loss": 1.3353,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.16914893617021276,
|
|
"grad_norm": 4.546907424926758,
|
|
"learning_rate": 6.765957446808511e-06,
|
|
"loss": 1.2691,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.16941489361702128,
|
|
"grad_norm": 4.260477066040039,
|
|
"learning_rate": 6.776595744680851e-06,
|
|
"loss": 1.313,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.1696808510638298,
|
|
"grad_norm": 4.697219371795654,
|
|
"learning_rate": 6.787234042553193e-06,
|
|
"loss": 1.131,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.1699468085106383,
|
|
"grad_norm": 4.471210479736328,
|
|
"learning_rate": 6.797872340425532e-06,
|
|
"loss": 1.1466,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.1702127659574468,
|
|
"grad_norm": 5.731024742126465,
|
|
"learning_rate": 6.808510638297873e-06,
|
|
"loss": 1.1923,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.17047872340425532,
|
|
"grad_norm": 4.853487491607666,
|
|
"learning_rate": 6.819148936170213e-06,
|
|
"loss": 1.3019,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.17074468085106384,
|
|
"grad_norm": 4.857687950134277,
|
|
"learning_rate": 6.829787234042554e-06,
|
|
"loss": 1.382,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.17101063829787233,
|
|
"grad_norm": 5.497145652770996,
|
|
"learning_rate": 6.840425531914894e-06,
|
|
"loss": 1.2611,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.17127659574468085,
|
|
"grad_norm": 4.852382659912109,
|
|
"learning_rate": 6.8510638297872344e-06,
|
|
"loss": 1.3002,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.17154255319148937,
|
|
"grad_norm": 4.891834259033203,
|
|
"learning_rate": 6.861702127659575e-06,
|
|
"loss": 1.3009,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.17180851063829788,
|
|
"grad_norm": 5.264189720153809,
|
|
"learning_rate": 6.872340425531916e-06,
|
|
"loss": 1.2047,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.17207446808510637,
|
|
"grad_norm": 4.408929347991943,
|
|
"learning_rate": 6.882978723404256e-06,
|
|
"loss": 1.4105,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.1723404255319149,
|
|
"grad_norm": 4.550996780395508,
|
|
"learning_rate": 6.893617021276596e-06,
|
|
"loss": 1.4495,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.1726063829787234,
|
|
"grad_norm": 4.704092025756836,
|
|
"learning_rate": 6.904255319148937e-06,
|
|
"loss": 1.2031,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.17287234042553193,
|
|
"grad_norm": 4.802618026733398,
|
|
"learning_rate": 6.914893617021278e-06,
|
|
"loss": 1.2879,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.17313829787234042,
|
|
"grad_norm": 4.637843608856201,
|
|
"learning_rate": 6.925531914893618e-06,
|
|
"loss": 1.2621,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.17340425531914894,
|
|
"grad_norm": 4.558661937713623,
|
|
"learning_rate": 6.936170212765958e-06,
|
|
"loss": 1.1671,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.17367021276595745,
|
|
"grad_norm": 4.981627464294434,
|
|
"learning_rate": 6.9468085106382985e-06,
|
|
"loss": 1.2137,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.17393617021276594,
|
|
"grad_norm": 4.708109378814697,
|
|
"learning_rate": 6.957446808510638e-06,
|
|
"loss": 1.1408,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.17420212765957446,
|
|
"grad_norm": 5.328996658325195,
|
|
"learning_rate": 6.968085106382979e-06,
|
|
"loss": 1.1697,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.17446808510638298,
|
|
"grad_norm": 4.988645553588867,
|
|
"learning_rate": 6.97872340425532e-06,
|
|
"loss": 1.2962,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.1747340425531915,
|
|
"grad_norm": 5.570682048797607,
|
|
"learning_rate": 6.98936170212766e-06,
|
|
"loss": 1.4083,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.175,
|
|
"grad_norm": 5.141003608703613,
|
|
"learning_rate": 7e-06,
|
|
"loss": 1.2558,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.1752659574468085,
|
|
"grad_norm": 4.548361778259277,
|
|
"learning_rate": 7.010638297872341e-06,
|
|
"loss": 1.2556,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.17553191489361702,
|
|
"grad_norm": 4.381852149963379,
|
|
"learning_rate": 7.021276595744682e-06,
|
|
"loss": 1.3609,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.17579787234042554,
|
|
"grad_norm": 4.388241767883301,
|
|
"learning_rate": 7.031914893617022e-06,
|
|
"loss": 1.2165,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.17606382978723403,
|
|
"grad_norm": 4.472124099731445,
|
|
"learning_rate": 7.042553191489362e-06,
|
|
"loss": 1.3372,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.17632978723404255,
|
|
"grad_norm": 4.284490585327148,
|
|
"learning_rate": 7.053191489361703e-06,
|
|
"loss": 1.1206,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.17659574468085107,
|
|
"grad_norm": 4.448127269744873,
|
|
"learning_rate": 7.0638297872340434e-06,
|
|
"loss": 1.3206,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.1768617021276596,
|
|
"grad_norm": 4.701923847198486,
|
|
"learning_rate": 7.074468085106384e-06,
|
|
"loss": 1.1289,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.17712765957446808,
|
|
"grad_norm": 4.249335289001465,
|
|
"learning_rate": 7.0851063829787235e-06,
|
|
"loss": 1.136,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.1773936170212766,
|
|
"grad_norm": 4.292792320251465,
|
|
"learning_rate": 7.095744680851065e-06,
|
|
"loss": 1.1827,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.1776595744680851,
|
|
"grad_norm": 4.595381736755371,
|
|
"learning_rate": 7.106382978723405e-06,
|
|
"loss": 1.1449,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.1779255319148936,
|
|
"grad_norm": 4.856510162353516,
|
|
"learning_rate": 7.117021276595745e-06,
|
|
"loss": 1.2378,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.17819148936170212,
|
|
"grad_norm": 4.735593318939209,
|
|
"learning_rate": 7.127659574468085e-06,
|
|
"loss": 1.1641,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.17845744680851064,
|
|
"grad_norm": 4.771074295043945,
|
|
"learning_rate": 7.138297872340426e-06,
|
|
"loss": 1.33,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.17872340425531916,
|
|
"grad_norm": 4.873645782470703,
|
|
"learning_rate": 7.148936170212767e-06,
|
|
"loss": 1.3388,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.17898936170212765,
|
|
"grad_norm": 4.672497749328613,
|
|
"learning_rate": 7.159574468085107e-06,
|
|
"loss": 1.3479,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.17925531914893617,
|
|
"grad_norm": 4.454950332641602,
|
|
"learning_rate": 7.170212765957447e-06,
|
|
"loss": 1.3631,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.17952127659574468,
|
|
"grad_norm": 5.085921764373779,
|
|
"learning_rate": 7.1808510638297875e-06,
|
|
"loss": 1.4711,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.1797872340425532,
|
|
"grad_norm": 4.528400421142578,
|
|
"learning_rate": 7.191489361702129e-06,
|
|
"loss": 1.1868,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.1800531914893617,
|
|
"grad_norm": 4.722430229187012,
|
|
"learning_rate": 7.2021276595744684e-06,
|
|
"loss": 1.3842,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.1803191489361702,
|
|
"grad_norm": 4.894054889678955,
|
|
"learning_rate": 7.212765957446809e-06,
|
|
"loss": 1.4365,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.18058510638297873,
|
|
"grad_norm": 4.8365559577941895,
|
|
"learning_rate": 7.223404255319149e-06,
|
|
"loss": 1.4409,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.18085106382978725,
|
|
"grad_norm": 5.0071916580200195,
|
|
"learning_rate": 7.234042553191491e-06,
|
|
"loss": 1.214,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.18111702127659574,
|
|
"grad_norm": 4.514876365661621,
|
|
"learning_rate": 7.24468085106383e-06,
|
|
"loss": 1.1646,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.18138297872340425,
|
|
"grad_norm": 4.465925693511963,
|
|
"learning_rate": 7.255319148936171e-06,
|
|
"loss": 1.2662,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.18164893617021277,
|
|
"grad_norm": 4.698017120361328,
|
|
"learning_rate": 7.265957446808511e-06,
|
|
"loss": 1.3683,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.1819148936170213,
|
|
"grad_norm": 4.704659461975098,
|
|
"learning_rate": 7.2765957446808524e-06,
|
|
"loss": 1.2236,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.18218085106382978,
|
|
"grad_norm": 4.9184675216674805,
|
|
"learning_rate": 7.287234042553192e-06,
|
|
"loss": 1.1904,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.1824468085106383,
|
|
"grad_norm": 4.5409088134765625,
|
|
"learning_rate": 7.2978723404255325e-06,
|
|
"loss": 1.2257,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.18271276595744682,
|
|
"grad_norm": 4.9037556648254395,
|
|
"learning_rate": 7.308510638297873e-06,
|
|
"loss": 1.31,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.1829787234042553,
|
|
"grad_norm": 4.719064235687256,
|
|
"learning_rate": 7.3191489361702125e-06,
|
|
"loss": 1.2651,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.18324468085106382,
|
|
"grad_norm": 4.5164971351623535,
|
|
"learning_rate": 7.329787234042554e-06,
|
|
"loss": 1.306,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.18351063829787234,
|
|
"grad_norm": 4.281124591827393,
|
|
"learning_rate": 7.340425531914894e-06,
|
|
"loss": 1.1963,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.18377659574468086,
|
|
"grad_norm": 4.6168951988220215,
|
|
"learning_rate": 7.351063829787235e-06,
|
|
"loss": 1.2118,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.18404255319148935,
|
|
"grad_norm": 4.85908842086792,
|
|
"learning_rate": 7.361702127659574e-06,
|
|
"loss": 1.2587,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.18430851063829787,
|
|
"grad_norm": 4.3025336265563965,
|
|
"learning_rate": 7.372340425531916e-06,
|
|
"loss": 1.1239,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.18457446808510639,
|
|
"grad_norm": 4.3702311515808105,
|
|
"learning_rate": 7.382978723404256e-06,
|
|
"loss": 1.0654,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.1848404255319149,
|
|
"grad_norm": 4.243852615356445,
|
|
"learning_rate": 7.3936170212765965e-06,
|
|
"loss": 1.2725,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.1851063829787234,
|
|
"grad_norm": 4.241601467132568,
|
|
"learning_rate": 7.404255319148936e-06,
|
|
"loss": 1.1379,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.1853723404255319,
|
|
"grad_norm": 4.863661766052246,
|
|
"learning_rate": 7.4148936170212774e-06,
|
|
"loss": 1.2644,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.18563829787234043,
|
|
"grad_norm": 4.637073040008545,
|
|
"learning_rate": 7.425531914893618e-06,
|
|
"loss": 1.3296,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.18590425531914895,
|
|
"grad_norm": 4.703394889831543,
|
|
"learning_rate": 7.436170212765958e-06,
|
|
"loss": 1.3016,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.18617021276595744,
|
|
"grad_norm": 4.478874206542969,
|
|
"learning_rate": 7.446808510638298e-06,
|
|
"loss": 1.3163,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.18643617021276596,
|
|
"grad_norm": 4.600717067718506,
|
|
"learning_rate": 7.457446808510639e-06,
|
|
"loss": 1.3648,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.18670212765957447,
|
|
"grad_norm": 4.729065418243408,
|
|
"learning_rate": 7.46808510638298e-06,
|
|
"loss": 1.3604,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.186968085106383,
|
|
"grad_norm": 4.127298831939697,
|
|
"learning_rate": 7.47872340425532e-06,
|
|
"loss": 1.153,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.18723404255319148,
|
|
"grad_norm": 4.612214088439941,
|
|
"learning_rate": 7.48936170212766e-06,
|
|
"loss": 1.2951,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.1875,
|
|
"grad_norm": 5.011428356170654,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 1.4121,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.18776595744680852,
|
|
"grad_norm": 4.605989933013916,
|
|
"learning_rate": 7.5106382978723415e-06,
|
|
"loss": 1.262,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.188031914893617,
|
|
"grad_norm": 5.028648853302002,
|
|
"learning_rate": 7.521276595744681e-06,
|
|
"loss": 1.4181,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.18829787234042553,
|
|
"grad_norm": 4.571159839630127,
|
|
"learning_rate": 7.5319148936170215e-06,
|
|
"loss": 1.2364,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.18856382978723404,
|
|
"grad_norm": 4.608417510986328,
|
|
"learning_rate": 7.542553191489362e-06,
|
|
"loss": 1.3094,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.18882978723404256,
|
|
"grad_norm": 4.881725311279297,
|
|
"learning_rate": 7.553191489361703e-06,
|
|
"loss": 1.313,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.18909574468085105,
|
|
"grad_norm": 4.912058353424072,
|
|
"learning_rate": 7.563829787234043e-06,
|
|
"loss": 1.392,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.18936170212765957,
|
|
"grad_norm": 4.419525623321533,
|
|
"learning_rate": 7.574468085106383e-06,
|
|
"loss": 1.2366,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.1896276595744681,
|
|
"grad_norm": 4.507438659667969,
|
|
"learning_rate": 7.585106382978724e-06,
|
|
"loss": 1.2404,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.1898936170212766,
|
|
"grad_norm": 4.561898708343506,
|
|
"learning_rate": 7.595744680851065e-06,
|
|
"loss": 1.3596,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.1901595744680851,
|
|
"grad_norm": 4.635844707489014,
|
|
"learning_rate": 7.606382978723405e-06,
|
|
"loss": 1.2898,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.19042553191489361,
|
|
"grad_norm": 5.374488353729248,
|
|
"learning_rate": 7.617021276595745e-06,
|
|
"loss": 1.3445,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.19069148936170213,
|
|
"grad_norm": 4.574670314788818,
|
|
"learning_rate": 7.627659574468086e-06,
|
|
"loss": 1.2414,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.19095744680851065,
|
|
"grad_norm": 4.509703159332275,
|
|
"learning_rate": 7.638297872340426e-06,
|
|
"loss": 1.1649,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.19122340425531914,
|
|
"grad_norm": 4.2057929039001465,
|
|
"learning_rate": 7.648936170212766e-06,
|
|
"loss": 1.3734,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.19148936170212766,
|
|
"grad_norm": 4.571545124053955,
|
|
"learning_rate": 7.659574468085107e-06,
|
|
"loss": 1.2722,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.19175531914893618,
|
|
"grad_norm": 4.561543941497803,
|
|
"learning_rate": 7.670212765957448e-06,
|
|
"loss": 1.4057,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.1920212765957447,
|
|
"grad_norm": 4.365459442138672,
|
|
"learning_rate": 7.680851063829788e-06,
|
|
"loss": 1.2348,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.19228723404255318,
|
|
"grad_norm": 4.416993141174316,
|
|
"learning_rate": 7.691489361702127e-06,
|
|
"loss": 1.3065,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.1925531914893617,
|
|
"grad_norm": 4.762002944946289,
|
|
"learning_rate": 7.702127659574469e-06,
|
|
"loss": 1.3231,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.19281914893617022,
|
|
"grad_norm": 5.0312604904174805,
|
|
"learning_rate": 7.71276595744681e-06,
|
|
"loss": 1.3851,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.1930851063829787,
|
|
"grad_norm": 4.8303046226501465,
|
|
"learning_rate": 7.72340425531915e-06,
|
|
"loss": 1.3391,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.19335106382978723,
|
|
"grad_norm": 5.312425136566162,
|
|
"learning_rate": 7.73404255319149e-06,
|
|
"loss": 1.3422,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.19361702127659575,
|
|
"grad_norm": 4.574582576751709,
|
|
"learning_rate": 7.74468085106383e-06,
|
|
"loss": 1.2543,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.19388297872340426,
|
|
"grad_norm": 4.735869884490967,
|
|
"learning_rate": 7.755319148936172e-06,
|
|
"loss": 1.427,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.19414893617021275,
|
|
"grad_norm": 4.317601203918457,
|
|
"learning_rate": 7.765957446808511e-06,
|
|
"loss": 1.221,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.19441489361702127,
|
|
"grad_norm": 4.69275426864624,
|
|
"learning_rate": 7.776595744680851e-06,
|
|
"loss": 1.2186,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.1946808510638298,
|
|
"grad_norm": 4.865464210510254,
|
|
"learning_rate": 7.787234042553192e-06,
|
|
"loss": 1.3243,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.1949468085106383,
|
|
"grad_norm": 4.288273811340332,
|
|
"learning_rate": 7.797872340425534e-06,
|
|
"loss": 1.2224,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.1952127659574468,
|
|
"grad_norm": 4.230968475341797,
|
|
"learning_rate": 7.808510638297873e-06,
|
|
"loss": 1.1869,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.19547872340425532,
|
|
"grad_norm": 5.056215286254883,
|
|
"learning_rate": 7.819148936170213e-06,
|
|
"loss": 1.2755,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.19574468085106383,
|
|
"grad_norm": 4.373525142669678,
|
|
"learning_rate": 7.829787234042554e-06,
|
|
"loss": 1.2649,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.19601063829787235,
|
|
"grad_norm": 4.4216179847717285,
|
|
"learning_rate": 7.840425531914895e-06,
|
|
"loss": 1.2578,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.19627659574468084,
|
|
"grad_norm": 4.517039775848389,
|
|
"learning_rate": 7.851063829787235e-06,
|
|
"loss": 1.1759,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.19654255319148936,
|
|
"grad_norm": 4.973018169403076,
|
|
"learning_rate": 7.861702127659575e-06,
|
|
"loss": 1.2073,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.19680851063829788,
|
|
"grad_norm": 4.714282035827637,
|
|
"learning_rate": 7.872340425531916e-06,
|
|
"loss": 1.3551,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.1970744680851064,
|
|
"grad_norm": 4.824267387390137,
|
|
"learning_rate": 7.882978723404257e-06,
|
|
"loss": 1.287,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.1973404255319149,
|
|
"grad_norm": 4.343824863433838,
|
|
"learning_rate": 7.893617021276597e-06,
|
|
"loss": 1.1736,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.1976063829787234,
|
|
"grad_norm": 5.130711555480957,
|
|
"learning_rate": 7.904255319148936e-06,
|
|
"loss": 1.3622,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.19787234042553192,
|
|
"grad_norm": 4.943610191345215,
|
|
"learning_rate": 7.914893617021278e-06,
|
|
"loss": 1.2538,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.1981382978723404,
|
|
"grad_norm": 4.978169918060303,
|
|
"learning_rate": 7.925531914893617e-06,
|
|
"loss": 1.2547,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.19840425531914893,
|
|
"grad_norm": 4.933815956115723,
|
|
"learning_rate": 7.936170212765959e-06,
|
|
"loss": 1.3827,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.19867021276595745,
|
|
"grad_norm": 4.288017272949219,
|
|
"learning_rate": 7.946808510638298e-06,
|
|
"loss": 1.2695,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.19893617021276597,
|
|
"grad_norm": 4.4305267333984375,
|
|
"learning_rate": 7.95744680851064e-06,
|
|
"loss": 1.1459,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.19920212765957446,
|
|
"grad_norm": 4.959934711456299,
|
|
"learning_rate": 7.968085106382979e-06,
|
|
"loss": 1.1793,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.19946808510638298,
|
|
"grad_norm": 4.623016834259033,
|
|
"learning_rate": 7.97872340425532e-06,
|
|
"loss": 1.2508,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.1997340425531915,
|
|
"grad_norm": 4.426565170288086,
|
|
"learning_rate": 7.98936170212766e-06,
|
|
"loss": 1.2464,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"grad_norm": 4.914389610290527,
|
|
"learning_rate": 8.000000000000001e-06,
|
|
"loss": 1.2941,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.2002659574468085,
|
|
"grad_norm": 4.474592685699463,
|
|
"learning_rate": 8.010638297872341e-06,
|
|
"loss": 1.2285,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.20053191489361702,
|
|
"grad_norm": 4.237037181854248,
|
|
"learning_rate": 8.021276595744682e-06,
|
|
"loss": 1.3422,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.20079787234042554,
|
|
"grad_norm": 4.545922756195068,
|
|
"learning_rate": 8.031914893617022e-06,
|
|
"loss": 1.2456,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.20106382978723406,
|
|
"grad_norm": 4.951487064361572,
|
|
"learning_rate": 8.042553191489363e-06,
|
|
"loss": 1.3001,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.20132978723404255,
|
|
"grad_norm": 5.056552886962891,
|
|
"learning_rate": 8.053191489361703e-06,
|
|
"loss": 1.3875,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.20159574468085106,
|
|
"grad_norm": 4.5373101234436035,
|
|
"learning_rate": 8.063829787234044e-06,
|
|
"loss": 1.2855,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.20186170212765958,
|
|
"grad_norm": 4.698331832885742,
|
|
"learning_rate": 8.074468085106384e-06,
|
|
"loss": 1.1841,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.20212765957446807,
|
|
"grad_norm": 4.885603904724121,
|
|
"learning_rate": 8.085106382978723e-06,
|
|
"loss": 1.2843,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.2023936170212766,
|
|
"grad_norm": 4.819825172424316,
|
|
"learning_rate": 8.095744680851065e-06,
|
|
"loss": 1.2908,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.2026595744680851,
|
|
"grad_norm": 4.332822799682617,
|
|
"learning_rate": 8.106382978723404e-06,
|
|
"loss": 1.1986,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.20292553191489363,
|
|
"grad_norm": 4.102404594421387,
|
|
"learning_rate": 8.117021276595745e-06,
|
|
"loss": 1.3478,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.20319148936170212,
|
|
"grad_norm": 4.496637344360352,
|
|
"learning_rate": 8.127659574468085e-06,
|
|
"loss": 1.265,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.20345744680851063,
|
|
"grad_norm": 4.544750690460205,
|
|
"learning_rate": 8.138297872340426e-06,
|
|
"loss": 1.2299,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.20372340425531915,
|
|
"grad_norm": 4.774095058441162,
|
|
"learning_rate": 8.148936170212766e-06,
|
|
"loss": 1.3596,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.20398936170212767,
|
|
"grad_norm": 4.508190155029297,
|
|
"learning_rate": 8.159574468085107e-06,
|
|
"loss": 1.3143,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.20425531914893616,
|
|
"grad_norm": 4.832380771636963,
|
|
"learning_rate": 8.170212765957447e-06,
|
|
"loss": 1.2449,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.20452127659574468,
|
|
"grad_norm": 4.282026290893555,
|
|
"learning_rate": 8.180851063829788e-06,
|
|
"loss": 1.199,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.2047872340425532,
|
|
"grad_norm": 4.594806671142578,
|
|
"learning_rate": 8.191489361702128e-06,
|
|
"loss": 1.2466,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.2050531914893617,
|
|
"grad_norm": 4.925674915313721,
|
|
"learning_rate": 8.202127659574469e-06,
|
|
"loss": 1.2771,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 0.2053191489361702,
|
|
"grad_norm": 4.634965419769287,
|
|
"learning_rate": 8.212765957446809e-06,
|
|
"loss": 1.2511,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 0.20558510638297872,
|
|
"grad_norm": 4.774378776550293,
|
|
"learning_rate": 8.22340425531915e-06,
|
|
"loss": 1.1902,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 0.20585106382978724,
|
|
"grad_norm": 4.943484783172607,
|
|
"learning_rate": 8.23404255319149e-06,
|
|
"loss": 1.454,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 0.20611702127659576,
|
|
"grad_norm": 4.800187587738037,
|
|
"learning_rate": 8.24468085106383e-06,
|
|
"loss": 1.3709,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.20638297872340425,
|
|
"grad_norm": 5.566744327545166,
|
|
"learning_rate": 8.25531914893617e-06,
|
|
"loss": 1.3158,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 0.20664893617021277,
|
|
"grad_norm": 4.241647720336914,
|
|
"learning_rate": 8.265957446808512e-06,
|
|
"loss": 1.3173,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 0.20691489361702128,
|
|
"grad_norm": 4.561349868774414,
|
|
"learning_rate": 8.276595744680851e-06,
|
|
"loss": 1.1971,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 0.20718085106382977,
|
|
"grad_norm": 4.4153828620910645,
|
|
"learning_rate": 8.287234042553191e-06,
|
|
"loss": 1.2479,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 0.2074468085106383,
|
|
"grad_norm": 4.6610107421875,
|
|
"learning_rate": 8.297872340425532e-06,
|
|
"loss": 1.5759,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.2077127659574468,
|
|
"grad_norm": 5.142064094543457,
|
|
"learning_rate": 8.308510638297874e-06,
|
|
"loss": 1.3802,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 0.20797872340425533,
|
|
"grad_norm": 4.54619026184082,
|
|
"learning_rate": 8.319148936170213e-06,
|
|
"loss": 1.3185,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 0.20824468085106382,
|
|
"grad_norm": 4.640912055969238,
|
|
"learning_rate": 8.329787234042553e-06,
|
|
"loss": 1.2491,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 0.20851063829787234,
|
|
"grad_norm": 4.866705894470215,
|
|
"learning_rate": 8.340425531914894e-06,
|
|
"loss": 1.28,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 0.20877659574468085,
|
|
"grad_norm": 4.362489700317383,
|
|
"learning_rate": 8.351063829787235e-06,
|
|
"loss": 1.3603,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.20904255319148937,
|
|
"grad_norm": 4.756308078765869,
|
|
"learning_rate": 8.361702127659575e-06,
|
|
"loss": 1.4108,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 0.20930851063829786,
|
|
"grad_norm": 4.564047813415527,
|
|
"learning_rate": 8.372340425531915e-06,
|
|
"loss": 1.3404,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 0.20957446808510638,
|
|
"grad_norm": 4.4327921867370605,
|
|
"learning_rate": 8.382978723404256e-06,
|
|
"loss": 1.2675,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 0.2098404255319149,
|
|
"grad_norm": 4.656761646270752,
|
|
"learning_rate": 8.393617021276597e-06,
|
|
"loss": 1.2601,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 0.21010638297872342,
|
|
"grad_norm": 4.353705883026123,
|
|
"learning_rate": 8.404255319148937e-06,
|
|
"loss": 1.2144,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.2103723404255319,
|
|
"grad_norm": 4.420286655426025,
|
|
"learning_rate": 8.414893617021276e-06,
|
|
"loss": 1.249,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 0.21063829787234042,
|
|
"grad_norm": 4.781008243560791,
|
|
"learning_rate": 8.425531914893618e-06,
|
|
"loss": 1.3132,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 0.21090425531914894,
|
|
"grad_norm": 5.137455463409424,
|
|
"learning_rate": 8.436170212765959e-06,
|
|
"loss": 1.2915,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 0.21117021276595746,
|
|
"grad_norm": 4.893155097961426,
|
|
"learning_rate": 8.446808510638299e-06,
|
|
"loss": 1.3679,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 0.21143617021276595,
|
|
"grad_norm": 4.635669708251953,
|
|
"learning_rate": 8.457446808510638e-06,
|
|
"loss": 1.3222,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.21170212765957447,
|
|
"grad_norm": 4.853140354156494,
|
|
"learning_rate": 8.46808510638298e-06,
|
|
"loss": 1.2849,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 0.211968085106383,
|
|
"grad_norm": 4.836693286895752,
|
|
"learning_rate": 8.47872340425532e-06,
|
|
"loss": 1.395,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 0.21223404255319148,
|
|
"grad_norm": 4.493725299835205,
|
|
"learning_rate": 8.48936170212766e-06,
|
|
"loss": 1.3197,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 0.2125,
|
|
"grad_norm": 5.088167190551758,
|
|
"learning_rate": 8.5e-06,
|
|
"loss": 1.4093,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 0.2127659574468085,
|
|
"grad_norm": 4.372249603271484,
|
|
"learning_rate": 8.510638297872341e-06,
|
|
"loss": 1.3612,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.21303191489361703,
|
|
"grad_norm": 4.2862420082092285,
|
|
"learning_rate": 8.521276595744683e-06,
|
|
"loss": 1.2227,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 0.21329787234042552,
|
|
"grad_norm": 4.741192817687988,
|
|
"learning_rate": 8.531914893617022e-06,
|
|
"loss": 1.2799,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 0.21356382978723404,
|
|
"grad_norm": 5.022809982299805,
|
|
"learning_rate": 8.542553191489362e-06,
|
|
"loss": 1.407,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 0.21382978723404256,
|
|
"grad_norm": 4.443842887878418,
|
|
"learning_rate": 8.553191489361703e-06,
|
|
"loss": 1.3346,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 0.21409574468085107,
|
|
"grad_norm": 4.133638858795166,
|
|
"learning_rate": 8.563829787234044e-06,
|
|
"loss": 1.2443,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.21436170212765956,
|
|
"grad_norm": 4.916075706481934,
|
|
"learning_rate": 8.574468085106384e-06,
|
|
"loss": 1.3503,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 0.21462765957446808,
|
|
"grad_norm": 4.634794235229492,
|
|
"learning_rate": 8.585106382978724e-06,
|
|
"loss": 1.4072,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 0.2148936170212766,
|
|
"grad_norm": 4.912757396697998,
|
|
"learning_rate": 8.595744680851065e-06,
|
|
"loss": 1.3311,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 0.21515957446808512,
|
|
"grad_norm": 5.202310085296631,
|
|
"learning_rate": 8.606382978723406e-06,
|
|
"loss": 1.3224,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 0.2154255319148936,
|
|
"grad_norm": 4.477729320526123,
|
|
"learning_rate": 8.617021276595746e-06,
|
|
"loss": 1.2806,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.21569148936170213,
|
|
"grad_norm": 4.493345260620117,
|
|
"learning_rate": 8.627659574468085e-06,
|
|
"loss": 1.0227,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 0.21595744680851064,
|
|
"grad_norm": 5.053197383880615,
|
|
"learning_rate": 8.638297872340427e-06,
|
|
"loss": 1.2941,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 0.21622340425531916,
|
|
"grad_norm": 4.492358684539795,
|
|
"learning_rate": 8.648936170212768e-06,
|
|
"loss": 1.2651,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 0.21648936170212765,
|
|
"grad_norm": 4.270611763000488,
|
|
"learning_rate": 8.659574468085108e-06,
|
|
"loss": 1.2417,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 0.21675531914893617,
|
|
"grad_norm": 4.236185073852539,
|
|
"learning_rate": 8.670212765957447e-06,
|
|
"loss": 1.1717,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.2170212765957447,
|
|
"grad_norm": 4.765509128570557,
|
|
"learning_rate": 8.680851063829788e-06,
|
|
"loss": 1.3134,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 0.21728723404255318,
|
|
"grad_norm": 5.146259784698486,
|
|
"learning_rate": 8.691489361702128e-06,
|
|
"loss": 1.4561,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 0.2175531914893617,
|
|
"grad_norm": 4.461063385009766,
|
|
"learning_rate": 8.70212765957447e-06,
|
|
"loss": 1.2138,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 0.21781914893617021,
|
|
"grad_norm": 4.676782608032227,
|
|
"learning_rate": 8.712765957446809e-06,
|
|
"loss": 1.2614,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 0.21808510638297873,
|
|
"grad_norm": 4.411204814910889,
|
|
"learning_rate": 8.72340425531915e-06,
|
|
"loss": 1.3142,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.21835106382978722,
|
|
"grad_norm": 4.208769798278809,
|
|
"learning_rate": 8.73404255319149e-06,
|
|
"loss": 1.4278,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 0.21861702127659574,
|
|
"grad_norm": 4.132145404815674,
|
|
"learning_rate": 8.744680851063831e-06,
|
|
"loss": 1.214,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 0.21888297872340426,
|
|
"grad_norm": 4.246182441711426,
|
|
"learning_rate": 8.75531914893617e-06,
|
|
"loss": 1.4079,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 0.21914893617021278,
|
|
"grad_norm": 4.859819412231445,
|
|
"learning_rate": 8.765957446808512e-06,
|
|
"loss": 1.2343,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 0.21941489361702127,
|
|
"grad_norm": 4.722071170806885,
|
|
"learning_rate": 8.776595744680852e-06,
|
|
"loss": 1.276,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.21968085106382979,
|
|
"grad_norm": 4.489323139190674,
|
|
"learning_rate": 8.787234042553193e-06,
|
|
"loss": 1.2388,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 0.2199468085106383,
|
|
"grad_norm": 4.459937572479248,
|
|
"learning_rate": 8.797872340425533e-06,
|
|
"loss": 1.1911,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 0.22021276595744682,
|
|
"grad_norm": 4.6483988761901855,
|
|
"learning_rate": 8.808510638297874e-06,
|
|
"loss": 1.5344,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 0.2204787234042553,
|
|
"grad_norm": 4.822110176086426,
|
|
"learning_rate": 8.819148936170213e-06,
|
|
"loss": 1.2885,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 0.22074468085106383,
|
|
"grad_norm": 4.722024917602539,
|
|
"learning_rate": 8.829787234042555e-06,
|
|
"loss": 1.2496,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.22101063829787235,
|
|
"grad_norm": 5.146275520324707,
|
|
"learning_rate": 8.840425531914894e-06,
|
|
"loss": 1.3017,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 0.22127659574468084,
|
|
"grad_norm": 4.489665508270264,
|
|
"learning_rate": 8.851063829787234e-06,
|
|
"loss": 1.1933,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 0.22154255319148936,
|
|
"grad_norm": 4.318885803222656,
|
|
"learning_rate": 8.861702127659575e-06,
|
|
"loss": 1.1849,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 0.22180851063829787,
|
|
"grad_norm": 4.603454113006592,
|
|
"learning_rate": 8.872340425531915e-06,
|
|
"loss": 1.3538,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 0.2220744680851064,
|
|
"grad_norm": 4.531906604766846,
|
|
"learning_rate": 8.882978723404256e-06,
|
|
"loss": 1.3913,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.22234042553191488,
|
|
"grad_norm": 4.391329288482666,
|
|
"learning_rate": 8.893617021276596e-06,
|
|
"loss": 1.289,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 0.2226063829787234,
|
|
"grad_norm": 5.546546459197998,
|
|
"learning_rate": 8.904255319148937e-06,
|
|
"loss": 1.2507,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 0.22287234042553192,
|
|
"grad_norm": 4.61740779876709,
|
|
"learning_rate": 8.914893617021277e-06,
|
|
"loss": 1.3726,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 0.22313829787234044,
|
|
"grad_norm": 4.953794479370117,
|
|
"learning_rate": 8.925531914893618e-06,
|
|
"loss": 1.2434,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 0.22340425531914893,
|
|
"grad_norm": 4.278190612792969,
|
|
"learning_rate": 8.936170212765958e-06,
|
|
"loss": 1.2559,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.22367021276595744,
|
|
"grad_norm": 4.941532135009766,
|
|
"learning_rate": 8.946808510638299e-06,
|
|
"loss": 1.3278,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 0.22393617021276596,
|
|
"grad_norm": 4.883002758026123,
|
|
"learning_rate": 8.957446808510638e-06,
|
|
"loss": 1.2537,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 0.22420212765957448,
|
|
"grad_norm": 4.7191619873046875,
|
|
"learning_rate": 8.96808510638298e-06,
|
|
"loss": 1.2726,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 0.22446808510638297,
|
|
"grad_norm": 4.509050369262695,
|
|
"learning_rate": 8.97872340425532e-06,
|
|
"loss": 1.2025,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 0.2247340425531915,
|
|
"grad_norm": 3.9332523345947266,
|
|
"learning_rate": 8.98936170212766e-06,
|
|
"loss": 1.1207,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.225,
|
|
"grad_norm": 4.3128204345703125,
|
|
"learning_rate": 9e-06,
|
|
"loss": 1.2433,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 0.22526595744680852,
|
|
"grad_norm": 4.253404140472412,
|
|
"learning_rate": 9.010638297872342e-06,
|
|
"loss": 1.2193,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 0.225531914893617,
|
|
"grad_norm": 4.779951572418213,
|
|
"learning_rate": 9.021276595744681e-06,
|
|
"loss": 1.2158,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 0.22579787234042553,
|
|
"grad_norm": 4.481555461883545,
|
|
"learning_rate": 9.031914893617022e-06,
|
|
"loss": 1.4551,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 0.22606382978723405,
|
|
"grad_norm": 4.955724239349365,
|
|
"learning_rate": 9.042553191489362e-06,
|
|
"loss": 1.4291,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.22632978723404254,
|
|
"grad_norm": 4.106208801269531,
|
|
"learning_rate": 9.053191489361702e-06,
|
|
"loss": 1.3655,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 0.22659574468085106,
|
|
"grad_norm": 4.6892499923706055,
|
|
"learning_rate": 9.063829787234043e-06,
|
|
"loss": 1.2516,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 0.22686170212765958,
|
|
"grad_norm": 4.553836822509766,
|
|
"learning_rate": 9.074468085106384e-06,
|
|
"loss": 1.2107,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 0.2271276595744681,
|
|
"grad_norm": 5.072434902191162,
|
|
"learning_rate": 9.085106382978724e-06,
|
|
"loss": 1.3445,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 0.22739361702127658,
|
|
"grad_norm": 4.725018501281738,
|
|
"learning_rate": 9.095744680851063e-06,
|
|
"loss": 1.2701,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.2276595744680851,
|
|
"grad_norm": 4.630471706390381,
|
|
"learning_rate": 9.106382978723405e-06,
|
|
"loss": 1.3229,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 0.22792553191489362,
|
|
"grad_norm": 4.0610880851745605,
|
|
"learning_rate": 9.117021276595746e-06,
|
|
"loss": 1.0857,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 0.22819148936170214,
|
|
"grad_norm": 4.523334503173828,
|
|
"learning_rate": 9.127659574468086e-06,
|
|
"loss": 1.446,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 0.22845744680851063,
|
|
"grad_norm": 5.042343616485596,
|
|
"learning_rate": 9.138297872340425e-06,
|
|
"loss": 1.3728,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 0.22872340425531915,
|
|
"grad_norm": 4.5774664878845215,
|
|
"learning_rate": 9.148936170212767e-06,
|
|
"loss": 1.3178,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.22898936170212766,
|
|
"grad_norm": 4.425473213195801,
|
|
"learning_rate": 9.159574468085108e-06,
|
|
"loss": 1.3412,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 0.22925531914893618,
|
|
"grad_norm": 4.738778114318848,
|
|
"learning_rate": 9.170212765957447e-06,
|
|
"loss": 1.3676,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 0.22952127659574467,
|
|
"grad_norm": 4.462982654571533,
|
|
"learning_rate": 9.180851063829787e-06,
|
|
"loss": 1.2755,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 0.2297872340425532,
|
|
"grad_norm": 4.682027816772461,
|
|
"learning_rate": 9.191489361702128e-06,
|
|
"loss": 1.2625,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 0.2300531914893617,
|
|
"grad_norm": 4.37489652633667,
|
|
"learning_rate": 9.20212765957447e-06,
|
|
"loss": 1.291,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.23031914893617023,
|
|
"grad_norm": 4.652685642242432,
|
|
"learning_rate": 9.21276595744681e-06,
|
|
"loss": 1.1782,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 0.23058510638297872,
|
|
"grad_norm": 4.401131629943848,
|
|
"learning_rate": 9.223404255319149e-06,
|
|
"loss": 1.2626,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 0.23085106382978723,
|
|
"grad_norm": 4.712587356567383,
|
|
"learning_rate": 9.23404255319149e-06,
|
|
"loss": 1.2888,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 0.23111702127659575,
|
|
"grad_norm": 4.425190448760986,
|
|
"learning_rate": 9.244680851063831e-06,
|
|
"loss": 1.2566,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 0.23138297872340424,
|
|
"grad_norm": 5.040404319763184,
|
|
"learning_rate": 9.255319148936171e-06,
|
|
"loss": 1.1856,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.23164893617021276,
|
|
"grad_norm": 4.372191905975342,
|
|
"learning_rate": 9.26595744680851e-06,
|
|
"loss": 1.3153,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 0.23191489361702128,
|
|
"grad_norm": 4.518852233886719,
|
|
"learning_rate": 9.276595744680852e-06,
|
|
"loss": 1.2652,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 0.2321808510638298,
|
|
"grad_norm": 5.675739288330078,
|
|
"learning_rate": 9.287234042553193e-06,
|
|
"loss": 1.2654,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 0.2324468085106383,
|
|
"grad_norm": 4.503605842590332,
|
|
"learning_rate": 9.297872340425533e-06,
|
|
"loss": 1.2693,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 0.2327127659574468,
|
|
"grad_norm": 4.573145866394043,
|
|
"learning_rate": 9.308510638297872e-06,
|
|
"loss": 1.3126,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.23297872340425532,
|
|
"grad_norm": 4.833911418914795,
|
|
"learning_rate": 9.319148936170214e-06,
|
|
"loss": 1.3583,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 0.23324468085106384,
|
|
"grad_norm": 4.768589496612549,
|
|
"learning_rate": 9.329787234042555e-06,
|
|
"loss": 1.273,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 0.23351063829787233,
|
|
"grad_norm": 4.1959638595581055,
|
|
"learning_rate": 9.340425531914895e-06,
|
|
"loss": 1.1774,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 0.23377659574468085,
|
|
"grad_norm": 4.231587886810303,
|
|
"learning_rate": 9.351063829787234e-06,
|
|
"loss": 1.3215,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 0.23404255319148937,
|
|
"grad_norm": 4.725379943847656,
|
|
"learning_rate": 9.361702127659576e-06,
|
|
"loss": 1.3458,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.23430851063829788,
|
|
"grad_norm": 4.831368446350098,
|
|
"learning_rate": 9.372340425531917e-06,
|
|
"loss": 1.3499,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 0.23457446808510637,
|
|
"grad_norm": 4.571084499359131,
|
|
"learning_rate": 9.382978723404256e-06,
|
|
"loss": 1.2071,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 0.2348404255319149,
|
|
"grad_norm": 4.676523208618164,
|
|
"learning_rate": 9.393617021276596e-06,
|
|
"loss": 1.3009,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 0.2351063829787234,
|
|
"grad_norm": 4.406195640563965,
|
|
"learning_rate": 9.404255319148937e-06,
|
|
"loss": 1.3127,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 0.23537234042553193,
|
|
"grad_norm": 4.958892822265625,
|
|
"learning_rate": 9.414893617021279e-06,
|
|
"loss": 1.3724,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.23563829787234042,
|
|
"grad_norm": 4.296865463256836,
|
|
"learning_rate": 9.425531914893618e-06,
|
|
"loss": 1.2535,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 0.23590425531914894,
|
|
"grad_norm": 4.650951862335205,
|
|
"learning_rate": 9.436170212765958e-06,
|
|
"loss": 1.2432,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 0.23617021276595745,
|
|
"grad_norm": 4.3874831199646,
|
|
"learning_rate": 9.446808510638299e-06,
|
|
"loss": 1.4075,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 0.23643617021276594,
|
|
"grad_norm": 4.246219158172607,
|
|
"learning_rate": 9.457446808510639e-06,
|
|
"loss": 1.2787,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 0.23670212765957446,
|
|
"grad_norm": 4.379426956176758,
|
|
"learning_rate": 9.46808510638298e-06,
|
|
"loss": 1.2586,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.23696808510638298,
|
|
"grad_norm": 4.164050102233887,
|
|
"learning_rate": 9.47872340425532e-06,
|
|
"loss": 1.3071,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 0.2372340425531915,
|
|
"grad_norm": 4.572608947753906,
|
|
"learning_rate": 9.489361702127661e-06,
|
|
"loss": 1.3735,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 0.2375,
|
|
"grad_norm": 4.812750339508057,
|
|
"learning_rate": 9.5e-06,
|
|
"loss": 1.3627,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 0.2377659574468085,
|
|
"grad_norm": 4.5463056564331055,
|
|
"learning_rate": 9.510638297872342e-06,
|
|
"loss": 1.2688,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 0.23803191489361702,
|
|
"grad_norm": 4.700718402862549,
|
|
"learning_rate": 9.521276595744681e-06,
|
|
"loss": 1.3242,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.23829787234042554,
|
|
"grad_norm": 4.626996040344238,
|
|
"learning_rate": 9.531914893617023e-06,
|
|
"loss": 1.3346,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 0.23856382978723403,
|
|
"grad_norm": 4.4340643882751465,
|
|
"learning_rate": 9.542553191489362e-06,
|
|
"loss": 1.266,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 0.23882978723404255,
|
|
"grad_norm": 4.288296222686768,
|
|
"learning_rate": 9.553191489361704e-06,
|
|
"loss": 1.3097,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 0.23909574468085107,
|
|
"grad_norm": 4.531320571899414,
|
|
"learning_rate": 9.563829787234043e-06,
|
|
"loss": 1.2607,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 0.2393617021276596,
|
|
"grad_norm": 4.4416985511779785,
|
|
"learning_rate": 9.574468085106385e-06,
|
|
"loss": 1.2443,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.23962765957446808,
|
|
"grad_norm": 4.752575397491455,
|
|
"learning_rate": 9.585106382978724e-06,
|
|
"loss": 1.263,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 0.2398936170212766,
|
|
"grad_norm": 4.418696403503418,
|
|
"learning_rate": 9.595744680851065e-06,
|
|
"loss": 1.4263,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 0.2401595744680851,
|
|
"grad_norm": 4.149245262145996,
|
|
"learning_rate": 9.606382978723405e-06,
|
|
"loss": 1.2097,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 0.2404255319148936,
|
|
"grad_norm": 4.261038303375244,
|
|
"learning_rate": 9.617021276595745e-06,
|
|
"loss": 1.284,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 0.24069148936170212,
|
|
"grad_norm": 4.526815414428711,
|
|
"learning_rate": 9.627659574468086e-06,
|
|
"loss": 1.2036,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.24095744680851064,
|
|
"grad_norm": 4.194947719573975,
|
|
"learning_rate": 9.638297872340426e-06,
|
|
"loss": 1.3215,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 0.24122340425531916,
|
|
"grad_norm": 4.903501987457275,
|
|
"learning_rate": 9.648936170212767e-06,
|
|
"loss": 1.2824,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 0.24148936170212765,
|
|
"grad_norm": 4.600060939788818,
|
|
"learning_rate": 9.659574468085106e-06,
|
|
"loss": 1.3283,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 0.24175531914893617,
|
|
"grad_norm": 4.43640661239624,
|
|
"learning_rate": 9.670212765957448e-06,
|
|
"loss": 1.2952,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 0.24202127659574468,
|
|
"grad_norm": 4.518085479736328,
|
|
"learning_rate": 9.680851063829787e-06,
|
|
"loss": 1.2436,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.2422872340425532,
|
|
"grad_norm": 4.508195877075195,
|
|
"learning_rate": 9.691489361702129e-06,
|
|
"loss": 1.448,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 0.2425531914893617,
|
|
"grad_norm": 4.132392406463623,
|
|
"learning_rate": 9.702127659574468e-06,
|
|
"loss": 1.2467,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 0.2428191489361702,
|
|
"grad_norm": 4.272422790527344,
|
|
"learning_rate": 9.71276595744681e-06,
|
|
"loss": 1.1718,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 0.24308510638297873,
|
|
"grad_norm": 3.7474145889282227,
|
|
"learning_rate": 9.723404255319149e-06,
|
|
"loss": 1.2312,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 0.24335106382978725,
|
|
"grad_norm": 4.318002700805664,
|
|
"learning_rate": 9.73404255319149e-06,
|
|
"loss": 1.2954,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.24361702127659574,
|
|
"grad_norm": 4.300724506378174,
|
|
"learning_rate": 9.74468085106383e-06,
|
|
"loss": 1.324,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 0.24388297872340425,
|
|
"grad_norm": 4.362585067749023,
|
|
"learning_rate": 9.755319148936171e-06,
|
|
"loss": 1.2939,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 0.24414893617021277,
|
|
"grad_norm": 4.705591678619385,
|
|
"learning_rate": 9.765957446808511e-06,
|
|
"loss": 1.3472,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 0.2444148936170213,
|
|
"grad_norm": 4.612809658050537,
|
|
"learning_rate": 9.776595744680852e-06,
|
|
"loss": 1.323,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 0.24468085106382978,
|
|
"grad_norm": 4.289991855621338,
|
|
"learning_rate": 9.787234042553192e-06,
|
|
"loss": 1.3352,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.2449468085106383,
|
|
"grad_norm": 4.43556022644043,
|
|
"learning_rate": 9.797872340425533e-06,
|
|
"loss": 1.2358,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 0.24521276595744682,
|
|
"grad_norm": 4.365429878234863,
|
|
"learning_rate": 9.808510638297873e-06,
|
|
"loss": 1.3711,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 0.2454787234042553,
|
|
"grad_norm": 4.680497646331787,
|
|
"learning_rate": 9.819148936170212e-06,
|
|
"loss": 1.3057,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 0.24574468085106382,
|
|
"grad_norm": 4.54257869720459,
|
|
"learning_rate": 9.829787234042554e-06,
|
|
"loss": 1.4173,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 0.24601063829787234,
|
|
"grad_norm": 4.676888465881348,
|
|
"learning_rate": 9.840425531914895e-06,
|
|
"loss": 1.386,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.24627659574468086,
|
|
"grad_norm": 4.417918682098389,
|
|
"learning_rate": 9.851063829787235e-06,
|
|
"loss": 1.4044,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 0.24654255319148935,
|
|
"grad_norm": 4.195037841796875,
|
|
"learning_rate": 9.861702127659574e-06,
|
|
"loss": 1.2735,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 0.24680851063829787,
|
|
"grad_norm": 4.587873935699463,
|
|
"learning_rate": 9.872340425531915e-06,
|
|
"loss": 1.2647,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 0.24707446808510639,
|
|
"grad_norm": 4.467301845550537,
|
|
"learning_rate": 9.882978723404257e-06,
|
|
"loss": 1.387,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 0.2473404255319149,
|
|
"grad_norm": 4.606912136077881,
|
|
"learning_rate": 9.893617021276596e-06,
|
|
"loss": 1.3188,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.2476063829787234,
|
|
"grad_norm": 4.470932483673096,
|
|
"learning_rate": 9.904255319148936e-06,
|
|
"loss": 1.3166,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 0.2478723404255319,
|
|
"grad_norm": 4.317614555358887,
|
|
"learning_rate": 9.914893617021277e-06,
|
|
"loss": 1.3514,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 0.24813829787234043,
|
|
"grad_norm": 4.443989276885986,
|
|
"learning_rate": 9.925531914893619e-06,
|
|
"loss": 1.2636,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 0.24840425531914895,
|
|
"grad_norm": 4.796088218688965,
|
|
"learning_rate": 9.936170212765958e-06,
|
|
"loss": 1.2652,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 0.24867021276595744,
|
|
"grad_norm": 4.967231750488281,
|
|
"learning_rate": 9.946808510638298e-06,
|
|
"loss": 1.4264,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.24893617021276596,
|
|
"grad_norm": 4.075037002563477,
|
|
"learning_rate": 9.957446808510639e-06,
|
|
"loss": 1.1912,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 0.24920212765957447,
|
|
"grad_norm": 4.505919933319092,
|
|
"learning_rate": 9.96808510638298e-06,
|
|
"loss": 1.3069,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 0.249468085106383,
|
|
"grad_norm": 4.194151878356934,
|
|
"learning_rate": 9.97872340425532e-06,
|
|
"loss": 1.3177,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 0.24973404255319148,
|
|
"grad_norm": 4.591639518737793,
|
|
"learning_rate": 9.98936170212766e-06,
|
|
"loss": 1.3742,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"grad_norm": 4.259275913238525,
|
|
"learning_rate": 1e-05,
|
|
"loss": 1.2802,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.2502659574468085,
|
|
"grad_norm": 5.042564392089844,
|
|
"learning_rate": 9.999999922647056e-06,
|
|
"loss": 1.3329,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 0.25053191489361704,
|
|
"grad_norm": 4.728914737701416,
|
|
"learning_rate": 9.999999690588228e-06,
|
|
"loss": 1.2498,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 0.25079787234042555,
|
|
"grad_norm": 4.191166877746582,
|
|
"learning_rate": 9.999999303823525e-06,
|
|
"loss": 1.3322,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 0.251063829787234,
|
|
"grad_norm": 4.627315044403076,
|
|
"learning_rate": 9.999998762352953e-06,
|
|
"loss": 1.4223,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 0.25132978723404253,
|
|
"grad_norm": 4.210728168487549,
|
|
"learning_rate": 9.999998066176536e-06,
|
|
"loss": 1.2534,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.25159574468085105,
|
|
"grad_norm": 4.210343837738037,
|
|
"learning_rate": 9.99999721529429e-06,
|
|
"loss": 1.2587,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 0.25186170212765957,
|
|
"grad_norm": 4.43513298034668,
|
|
"learning_rate": 9.999996209706243e-06,
|
|
"loss": 1.2222,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 0.2521276595744681,
|
|
"grad_norm": 4.577609539031982,
|
|
"learning_rate": 9.999995049412428e-06,
|
|
"loss": 1.3063,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 0.2523936170212766,
|
|
"grad_norm": 4.520708084106445,
|
|
"learning_rate": 9.99999373441288e-06,
|
|
"loss": 1.2357,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 0.2526595744680851,
|
|
"grad_norm": 4.051931858062744,
|
|
"learning_rate": 9.999992264707636e-06,
|
|
"loss": 1.265,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.25292553191489364,
|
|
"grad_norm": 4.30267333984375,
|
|
"learning_rate": 9.999990640296747e-06,
|
|
"loss": 1.1791,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 0.2531914893617021,
|
|
"grad_norm": 4.397022724151611,
|
|
"learning_rate": 9.99998886118026e-06,
|
|
"loss": 1.2239,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 0.2534574468085106,
|
|
"grad_norm": 4.552164077758789,
|
|
"learning_rate": 9.999986927358231e-06,
|
|
"loss": 1.3983,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 0.25372340425531914,
|
|
"grad_norm": 4.569587707519531,
|
|
"learning_rate": 9.999984838830721e-06,
|
|
"loss": 1.3307,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 0.25398936170212766,
|
|
"grad_norm": 4.352025985717773,
|
|
"learning_rate": 9.999982595597793e-06,
|
|
"loss": 1.3996,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.2542553191489362,
|
|
"grad_norm": 4.358248710632324,
|
|
"learning_rate": 9.999980197659515e-06,
|
|
"loss": 1.4166,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 0.2545212765957447,
|
|
"grad_norm": 4.449854373931885,
|
|
"learning_rate": 9.999977645015963e-06,
|
|
"loss": 1.2414,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 0.2547872340425532,
|
|
"grad_norm": 4.66248083114624,
|
|
"learning_rate": 9.999974937667217e-06,
|
|
"loss": 1.2852,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 0.2550531914893617,
|
|
"grad_norm": 4.217624187469482,
|
|
"learning_rate": 9.99997207561336e-06,
|
|
"loss": 1.2624,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 0.2553191489361702,
|
|
"grad_norm": 4.449913501739502,
|
|
"learning_rate": 9.99996905885448e-06,
|
|
"loss": 1.2733,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.2555851063829787,
|
|
"grad_norm": 3.9325287342071533,
|
|
"learning_rate": 9.99996588739067e-06,
|
|
"loss": 1.2253,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 0.25585106382978723,
|
|
"grad_norm": 4.425497531890869,
|
|
"learning_rate": 9.99996256122203e-06,
|
|
"loss": 1.1233,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 0.25611702127659575,
|
|
"grad_norm": 3.946796178817749,
|
|
"learning_rate": 9.99995908034866e-06,
|
|
"loss": 1.2961,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 0.25638297872340426,
|
|
"grad_norm": 4.145402431488037,
|
|
"learning_rate": 9.999955444770671e-06,
|
|
"loss": 1.3856,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 0.2566489361702128,
|
|
"grad_norm": 4.4032206535339355,
|
|
"learning_rate": 9.99995165448817e-06,
|
|
"loss": 1.3649,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.2569148936170213,
|
|
"grad_norm": 4.492345333099365,
|
|
"learning_rate": 9.999947709501282e-06,
|
|
"loss": 1.2992,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 0.25718085106382976,
|
|
"grad_norm": 4.298032760620117,
|
|
"learning_rate": 9.999943609810125e-06,
|
|
"loss": 1.3756,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 0.2574468085106383,
|
|
"grad_norm": 3.9896862506866455,
|
|
"learning_rate": 9.999939355414825e-06,
|
|
"loss": 1.2034,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 0.2577127659574468,
|
|
"grad_norm": 4.537227630615234,
|
|
"learning_rate": 9.999934946315516e-06,
|
|
"loss": 1.2959,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 0.2579787234042553,
|
|
"grad_norm": 4.087522029876709,
|
|
"learning_rate": 9.999930382512331e-06,
|
|
"loss": 1.2928,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.25824468085106383,
|
|
"grad_norm": 4.388976573944092,
|
|
"learning_rate": 9.999925664005415e-06,
|
|
"loss": 1.2452,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 0.25851063829787235,
|
|
"grad_norm": 4.264836311340332,
|
|
"learning_rate": 9.99992079079491e-06,
|
|
"loss": 1.3477,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 0.25877659574468087,
|
|
"grad_norm": 4.548455715179443,
|
|
"learning_rate": 9.999915762880971e-06,
|
|
"loss": 1.2818,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 0.2590425531914894,
|
|
"grad_norm": 4.096053600311279,
|
|
"learning_rate": 9.99991058026375e-06,
|
|
"loss": 1.1407,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 0.25930851063829785,
|
|
"grad_norm": 4.8142571449279785,
|
|
"learning_rate": 9.99990524294341e-06,
|
|
"loss": 1.5322,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.25957446808510637,
|
|
"grad_norm": 4.194404602050781,
|
|
"learning_rate": 9.999899750920115e-06,
|
|
"loss": 1.2874,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 0.2598404255319149,
|
|
"grad_norm": 3.905287504196167,
|
|
"learning_rate": 9.999894104194037e-06,
|
|
"loss": 1.1986,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 0.2601063829787234,
|
|
"grad_norm": 4.401111602783203,
|
|
"learning_rate": 9.999888302765347e-06,
|
|
"loss": 1.2148,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 0.2603723404255319,
|
|
"grad_norm": 4.558286666870117,
|
|
"learning_rate": 9.999882346634225e-06,
|
|
"loss": 1.247,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 0.26063829787234044,
|
|
"grad_norm": 3.902086019515991,
|
|
"learning_rate": 9.999876235800859e-06,
|
|
"loss": 1.3395,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.26090425531914896,
|
|
"grad_norm": 4.327469825744629,
|
|
"learning_rate": 9.999869970265434e-06,
|
|
"loss": 1.301,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 0.2611702127659574,
|
|
"grad_norm": 4.4269609451293945,
|
|
"learning_rate": 9.999863550028147e-06,
|
|
"loss": 1.3436,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 0.26143617021276594,
|
|
"grad_norm": 4.277595520019531,
|
|
"learning_rate": 9.999856975089193e-06,
|
|
"loss": 1.3487,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 0.26170212765957446,
|
|
"grad_norm": 5.5637311935424805,
|
|
"learning_rate": 9.99985024544878e-06,
|
|
"loss": 1.3848,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 0.261968085106383,
|
|
"grad_norm": 4.938830852508545,
|
|
"learning_rate": 9.999843361107111e-06,
|
|
"loss": 1.2637,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.2622340425531915,
|
|
"grad_norm": 4.1854376792907715,
|
|
"learning_rate": 9.999836322064404e-06,
|
|
"loss": 1.2802,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 0.2625,
|
|
"grad_norm": 4.120711803436279,
|
|
"learning_rate": 9.999829128320873e-06,
|
|
"loss": 1.2468,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 0.26276595744680853,
|
|
"grad_norm": 4.207146167755127,
|
|
"learning_rate": 9.999821779876744e-06,
|
|
"loss": 1.2662,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 0.26303191489361705,
|
|
"grad_norm": 4.666594505310059,
|
|
"learning_rate": 9.999814276732242e-06,
|
|
"loss": 1.3755,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 0.2632978723404255,
|
|
"grad_norm": 4.344621181488037,
|
|
"learning_rate": 9.9998066188876e-06,
|
|
"loss": 1.3096,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.263563829787234,
|
|
"grad_norm": 4.433095455169678,
|
|
"learning_rate": 9.999798806343055e-06,
|
|
"loss": 1.3499,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 0.26382978723404255,
|
|
"grad_norm": 4.92564058303833,
|
|
"learning_rate": 9.999790839098847e-06,
|
|
"loss": 1.281,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 0.26409574468085106,
|
|
"grad_norm": 4.6375603675842285,
|
|
"learning_rate": 9.999782717155225e-06,
|
|
"loss": 1.3261,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 0.2643617021276596,
|
|
"grad_norm": 4.372560024261475,
|
|
"learning_rate": 9.999774440512438e-06,
|
|
"loss": 1.186,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 0.2646276595744681,
|
|
"grad_norm": 4.910377502441406,
|
|
"learning_rate": 9.999766009170743e-06,
|
|
"loss": 1.4187,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.2648936170212766,
|
|
"grad_norm": 4.599401473999023,
|
|
"learning_rate": 9.999757423130402e-06,
|
|
"loss": 1.4278,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 0.2651595744680851,
|
|
"grad_norm": 4.204658508300781,
|
|
"learning_rate": 9.999748682391682e-06,
|
|
"loss": 1.3376,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 0.2654255319148936,
|
|
"grad_norm": 4.476613998413086,
|
|
"learning_rate": 9.999739786954849e-06,
|
|
"loss": 1.1909,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 0.2656914893617021,
|
|
"grad_norm": 4.173623561859131,
|
|
"learning_rate": 9.999730736820182e-06,
|
|
"loss": 1.2678,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 0.26595744680851063,
|
|
"grad_norm": 4.294970989227295,
|
|
"learning_rate": 9.999721531987958e-06,
|
|
"loss": 1.224,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.26595744680851063,
|
|
"eval_loss": 1.3182601928710938,
|
|
"eval_runtime": 12.5838,
|
|
"eval_samples_per_second": 31.787,
|
|
"eval_steps_per_second": 3.973,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.26622340425531915,
|
|
"grad_norm": 4.1402411460876465,
|
|
"learning_rate": 9.999712172458462e-06,
|
|
"loss": 1.1836,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 0.26648936170212767,
|
|
"grad_norm": 5.045607566833496,
|
|
"learning_rate": 9.999702658231987e-06,
|
|
"loss": 1.2545,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 0.2667553191489362,
|
|
"grad_norm": 4.2975921630859375,
|
|
"learning_rate": 9.999692989308827e-06,
|
|
"loss": 1.4903,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 0.2670212765957447,
|
|
"grad_norm": 4.366122245788574,
|
|
"learning_rate": 9.999683165689277e-06,
|
|
"loss": 1.3197,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 0.26728723404255317,
|
|
"grad_norm": 4.20319938659668,
|
|
"learning_rate": 9.999673187373644e-06,
|
|
"loss": 1.5023,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.2675531914893617,
|
|
"grad_norm": 4.779364109039307,
|
|
"learning_rate": 9.999663054362236e-06,
|
|
"loss": 1.4043,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 0.2678191489361702,
|
|
"grad_norm": 4.18774938583374,
|
|
"learning_rate": 9.999652766655367e-06,
|
|
"loss": 1.2043,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 0.2680851063829787,
|
|
"grad_norm": 4.277698040008545,
|
|
"learning_rate": 9.999642324253357e-06,
|
|
"loss": 1.3012,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 0.26835106382978724,
|
|
"grad_norm": 4.673196315765381,
|
|
"learning_rate": 9.999631727156523e-06,
|
|
"loss": 1.4028,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 0.26861702127659576,
|
|
"grad_norm": 3.9610633850097656,
|
|
"learning_rate": 9.9996209753652e-06,
|
|
"loss": 1.2564,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.2688829787234043,
|
|
"grad_norm": 4.724634170532227,
|
|
"learning_rate": 9.999610068879717e-06,
|
|
"loss": 1.2371,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 0.2691489361702128,
|
|
"grad_norm": 4.770898342132568,
|
|
"learning_rate": 9.999599007700411e-06,
|
|
"loss": 1.3291,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 0.26941489361702126,
|
|
"grad_norm": 4.2460551261901855,
|
|
"learning_rate": 9.999587791827627e-06,
|
|
"loss": 1.321,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 0.2696808510638298,
|
|
"grad_norm": 4.29102897644043,
|
|
"learning_rate": 9.99957642126171e-06,
|
|
"loss": 1.2469,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 0.2699468085106383,
|
|
"grad_norm": 4.516227722167969,
|
|
"learning_rate": 9.999564896003013e-06,
|
|
"loss": 1.2158,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.2702127659574468,
|
|
"grad_norm": 4.530557632446289,
|
|
"learning_rate": 9.999553216051892e-06,
|
|
"loss": 1.3454,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 0.27047872340425533,
|
|
"grad_norm": 4.2970290184021,
|
|
"learning_rate": 9.999541381408706e-06,
|
|
"loss": 1.3784,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 0.27074468085106385,
|
|
"grad_norm": 4.136434078216553,
|
|
"learning_rate": 9.999529392073825e-06,
|
|
"loss": 1.2268,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 0.27101063829787236,
|
|
"grad_norm": 4.108096122741699,
|
|
"learning_rate": 9.999517248047618e-06,
|
|
"loss": 1.2798,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 0.2712765957446808,
|
|
"grad_norm": 4.367121696472168,
|
|
"learning_rate": 9.99950494933046e-06,
|
|
"loss": 1.2629,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.27154255319148934,
|
|
"grad_norm": 4.400355815887451,
|
|
"learning_rate": 9.999492495922735e-06,
|
|
"loss": 1.3386,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 0.27180851063829786,
|
|
"grad_norm": 4.384739875793457,
|
|
"learning_rate": 9.999479887824826e-06,
|
|
"loss": 1.2904,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 0.2720744680851064,
|
|
"grad_norm": 4.273925304412842,
|
|
"learning_rate": 9.999467125037121e-06,
|
|
"loss": 1.268,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 0.2723404255319149,
|
|
"grad_norm": 4.222406387329102,
|
|
"learning_rate": 9.999454207560019e-06,
|
|
"loss": 1.2875,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 0.2726063829787234,
|
|
"grad_norm": 4.79681396484375,
|
|
"learning_rate": 9.999441135393917e-06,
|
|
"loss": 1.3315,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.27287234042553193,
|
|
"grad_norm": 4.473938941955566,
|
|
"learning_rate": 9.99942790853922e-06,
|
|
"loss": 1.4033,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 0.27313829787234045,
|
|
"grad_norm": 4.128412246704102,
|
|
"learning_rate": 9.999414526996337e-06,
|
|
"loss": 1.1818,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 0.2734042553191489,
|
|
"grad_norm": 4.2525739669799805,
|
|
"learning_rate": 9.999400990765683e-06,
|
|
"loss": 1.2004,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 0.27367021276595743,
|
|
"grad_norm": 4.565985202789307,
|
|
"learning_rate": 9.999387299847677e-06,
|
|
"loss": 1.3035,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 0.27393617021276595,
|
|
"grad_norm": 4.308706283569336,
|
|
"learning_rate": 9.99937345424274e-06,
|
|
"loss": 1.2976,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.27420212765957447,
|
|
"grad_norm": 4.31046724319458,
|
|
"learning_rate": 9.999359453951303e-06,
|
|
"loss": 1.3213,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 0.274468085106383,
|
|
"grad_norm": 4.618355751037598,
|
|
"learning_rate": 9.9993452989738e-06,
|
|
"loss": 1.3231,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 0.2747340425531915,
|
|
"grad_norm": 4.580687999725342,
|
|
"learning_rate": 9.999330989310665e-06,
|
|
"loss": 1.3654,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 0.275,
|
|
"grad_norm": 4.229262351989746,
|
|
"learning_rate": 9.999316524962347e-06,
|
|
"loss": 1.2944,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 0.2752659574468085,
|
|
"grad_norm": 3.708747148513794,
|
|
"learning_rate": 9.999301905929286e-06,
|
|
"loss": 1.154,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.275531914893617,
|
|
"grad_norm": 4.275104999542236,
|
|
"learning_rate": 9.999287132211938e-06,
|
|
"loss": 1.2148,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 0.2757978723404255,
|
|
"grad_norm": 4.225863456726074,
|
|
"learning_rate": 9.999272203810763e-06,
|
|
"loss": 1.4705,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 0.27606382978723404,
|
|
"grad_norm": 4.132633209228516,
|
|
"learning_rate": 9.999257120726219e-06,
|
|
"loss": 1.2538,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 0.27632978723404256,
|
|
"grad_norm": 5.643379211425781,
|
|
"learning_rate": 9.999241882958772e-06,
|
|
"loss": 1.2564,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 0.2765957446808511,
|
|
"grad_norm": 4.306319713592529,
|
|
"learning_rate": 9.999226490508897e-06,
|
|
"loss": 1.4085,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.2768617021276596,
|
|
"grad_norm": 4.2022247314453125,
|
|
"learning_rate": 9.99921094337707e-06,
|
|
"loss": 1.3632,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 0.2771276595744681,
|
|
"grad_norm": 4.866800785064697,
|
|
"learning_rate": 9.999195241563768e-06,
|
|
"loss": 1.3262,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 0.2773936170212766,
|
|
"grad_norm": 4.111828327178955,
|
|
"learning_rate": 9.99917938506948e-06,
|
|
"loss": 1.3087,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 0.2776595744680851,
|
|
"grad_norm": 4.37149715423584,
|
|
"learning_rate": 9.999163373894696e-06,
|
|
"loss": 1.2089,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 0.2779255319148936,
|
|
"grad_norm": 4.524958610534668,
|
|
"learning_rate": 9.999147208039912e-06,
|
|
"loss": 1.1935,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.2781914893617021,
|
|
"grad_norm": 4.5271406173706055,
|
|
"learning_rate": 9.999130887505627e-06,
|
|
"loss": 1.3111,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 0.27845744680851064,
|
|
"grad_norm": 4.4966301918029785,
|
|
"learning_rate": 9.999114412292347e-06,
|
|
"loss": 1.3695,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 0.27872340425531916,
|
|
"grad_norm": 4.8100714683532715,
|
|
"learning_rate": 9.999097782400582e-06,
|
|
"loss": 1.3152,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 0.2789893617021277,
|
|
"grad_norm": 4.238595962524414,
|
|
"learning_rate": 9.999080997830845e-06,
|
|
"loss": 1.2533,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 0.27925531914893614,
|
|
"grad_norm": 4.036017417907715,
|
|
"learning_rate": 9.999064058583657e-06,
|
|
"loss": 1.1984,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.27952127659574466,
|
|
"grad_norm": 4.587932586669922,
|
|
"learning_rate": 9.99904696465954e-06,
|
|
"loss": 1.2216,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 0.2797872340425532,
|
|
"grad_norm": 5.027749538421631,
|
|
"learning_rate": 9.999029716059026e-06,
|
|
"loss": 1.4618,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 0.2800531914893617,
|
|
"grad_norm": 4.331791400909424,
|
|
"learning_rate": 9.999012312782645e-06,
|
|
"loss": 1.2566,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 0.2803191489361702,
|
|
"grad_norm": 4.737422943115234,
|
|
"learning_rate": 9.99899475483094e-06,
|
|
"loss": 1.2935,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 0.28058510638297873,
|
|
"grad_norm": 4.8805832862854,
|
|
"learning_rate": 9.998977042204449e-06,
|
|
"loss": 1.3277,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.28085106382978725,
|
|
"grad_norm": 4.296173095703125,
|
|
"learning_rate": 9.998959174903725e-06,
|
|
"loss": 1.341,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 0.28111702127659577,
|
|
"grad_norm": 4.3713788986206055,
|
|
"learning_rate": 9.998941152929316e-06,
|
|
"loss": 1.308,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 0.28138297872340423,
|
|
"grad_norm": 4.576108932495117,
|
|
"learning_rate": 9.998922976281785e-06,
|
|
"loss": 1.2585,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 0.28164893617021275,
|
|
"grad_norm": 4.187806129455566,
|
|
"learning_rate": 9.998904644961689e-06,
|
|
"loss": 1.393,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 0.28191489361702127,
|
|
"grad_norm": 4.360199928283691,
|
|
"learning_rate": 9.9988861589696e-06,
|
|
"loss": 1.4,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.2821808510638298,
|
|
"grad_norm": 4.283745288848877,
|
|
"learning_rate": 9.998867518306087e-06,
|
|
"loss": 1.2823,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 0.2824468085106383,
|
|
"grad_norm": 3.8223369121551514,
|
|
"learning_rate": 9.998848722971727e-06,
|
|
"loss": 1.3144,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 0.2827127659574468,
|
|
"grad_norm": 4.405114650726318,
|
|
"learning_rate": 9.998829772967103e-06,
|
|
"loss": 1.4051,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 0.28297872340425534,
|
|
"grad_norm": 4.547544479370117,
|
|
"learning_rate": 9.9988106682928e-06,
|
|
"loss": 1.2622,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 0.28324468085106386,
|
|
"grad_norm": 3.850954055786133,
|
|
"learning_rate": 9.998791408949408e-06,
|
|
"loss": 1.197,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.2835106382978723,
|
|
"grad_norm": 3.994758367538452,
|
|
"learning_rate": 9.998771994937528e-06,
|
|
"loss": 1.1907,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 0.28377659574468084,
|
|
"grad_norm": 4.24208927154541,
|
|
"learning_rate": 9.998752426257754e-06,
|
|
"loss": 1.4078,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 0.28404255319148936,
|
|
"grad_norm": 4.435787200927734,
|
|
"learning_rate": 9.998732702910697e-06,
|
|
"loss": 1.2044,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 0.2843085106382979,
|
|
"grad_norm": 4.169311046600342,
|
|
"learning_rate": 9.998712824896963e-06,
|
|
"loss": 1.2126,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 0.2845744680851064,
|
|
"grad_norm": 4.478437900543213,
|
|
"learning_rate": 9.99869279221717e-06,
|
|
"loss": 1.3164,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.2848404255319149,
|
|
"grad_norm": 4.775943756103516,
|
|
"learning_rate": 9.998672604871936e-06,
|
|
"loss": 1.3169,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 0.2851063829787234,
|
|
"grad_norm": 4.637179374694824,
|
|
"learning_rate": 9.998652262861888e-06,
|
|
"loss": 1.2441,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 0.2853723404255319,
|
|
"grad_norm": 4.511475086212158,
|
|
"learning_rate": 9.998631766187651e-06,
|
|
"loss": 1.3766,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 0.2856382978723404,
|
|
"grad_norm": 4.503199100494385,
|
|
"learning_rate": 9.998611114849866e-06,
|
|
"loss": 1.1787,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 0.2859042553191489,
|
|
"grad_norm": 4.549198627471924,
|
|
"learning_rate": 9.998590308849164e-06,
|
|
"loss": 1.3229,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.28617021276595744,
|
|
"grad_norm": 4.182891368865967,
|
|
"learning_rate": 9.998569348186194e-06,
|
|
"loss": 1.2659,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 0.28643617021276596,
|
|
"grad_norm": 4.964444160461426,
|
|
"learning_rate": 9.998548232861604e-06,
|
|
"loss": 1.4196,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 0.2867021276595745,
|
|
"grad_norm": 4.905456066131592,
|
|
"learning_rate": 9.998526962876047e-06,
|
|
"loss": 1.3089,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 0.286968085106383,
|
|
"grad_norm": 4.207391738891602,
|
|
"learning_rate": 9.998505538230179e-06,
|
|
"loss": 1.3231,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 0.2872340425531915,
|
|
"grad_norm": 4.414906024932861,
|
|
"learning_rate": 9.998483958924666e-06,
|
|
"loss": 1.229,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.2875,
|
|
"grad_norm": 4.2714667320251465,
|
|
"learning_rate": 9.998462224960176e-06,
|
|
"loss": 1.4204,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 0.2877659574468085,
|
|
"grad_norm": 4.423734188079834,
|
|
"learning_rate": 9.998440336337376e-06,
|
|
"loss": 1.3774,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 0.288031914893617,
|
|
"grad_norm": 4.450468063354492,
|
|
"learning_rate": 9.998418293056949e-06,
|
|
"loss": 1.2639,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 0.28829787234042553,
|
|
"grad_norm": 4.328600883483887,
|
|
"learning_rate": 9.998396095119575e-06,
|
|
"loss": 1.3594,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 0.28856382978723405,
|
|
"grad_norm": 4.951174259185791,
|
|
"learning_rate": 9.998373742525941e-06,
|
|
"loss": 1.4862,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.28882978723404257,
|
|
"grad_norm": 4.484705924987793,
|
|
"learning_rate": 9.998351235276738e-06,
|
|
"loss": 1.3577,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 0.2890957446808511,
|
|
"grad_norm": 4.428178310394287,
|
|
"learning_rate": 9.998328573372664e-06,
|
|
"loss": 1.2438,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 0.28936170212765955,
|
|
"grad_norm": 4.682640552520752,
|
|
"learning_rate": 9.998305756814419e-06,
|
|
"loss": 1.3493,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 0.28962765957446807,
|
|
"grad_norm": 4.30879020690918,
|
|
"learning_rate": 9.998282785602709e-06,
|
|
"loss": 1.253,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 0.2898936170212766,
|
|
"grad_norm": 4.327608108520508,
|
|
"learning_rate": 9.998259659738243e-06,
|
|
"loss": 1.3574,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.2901595744680851,
|
|
"grad_norm": 3.996189594268799,
|
|
"learning_rate": 9.998236379221742e-06,
|
|
"loss": 1.1811,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 0.2904255319148936,
|
|
"grad_norm": 4.262546062469482,
|
|
"learning_rate": 9.99821294405392e-06,
|
|
"loss": 1.1899,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 0.29069148936170214,
|
|
"grad_norm": 3.7779383659362793,
|
|
"learning_rate": 9.998189354235506e-06,
|
|
"loss": 1.3034,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 0.29095744680851066,
|
|
"grad_norm": 4.748449325561523,
|
|
"learning_rate": 9.998165609767228e-06,
|
|
"loss": 1.1943,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 0.2912234042553192,
|
|
"grad_norm": 4.325401782989502,
|
|
"learning_rate": 9.998141710649822e-06,
|
|
"loss": 1.2955,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.29148936170212764,
|
|
"grad_norm": 4.276817321777344,
|
|
"learning_rate": 9.998117656884025e-06,
|
|
"loss": 1.2853,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 0.29175531914893615,
|
|
"grad_norm": 4.66014289855957,
|
|
"learning_rate": 9.998093448470585e-06,
|
|
"loss": 1.2643,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 0.29202127659574467,
|
|
"grad_norm": 3.963014602661133,
|
|
"learning_rate": 9.998069085410249e-06,
|
|
"loss": 1.2145,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 0.2922872340425532,
|
|
"grad_norm": 4.040323734283447,
|
|
"learning_rate": 9.99804456770377e-06,
|
|
"loss": 1.3845,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 0.2925531914893617,
|
|
"grad_norm": 3.8575801849365234,
|
|
"learning_rate": 9.99801989535191e-06,
|
|
"loss": 1.131,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.2928191489361702,
|
|
"grad_norm": 4.067200183868408,
|
|
"learning_rate": 9.997995068355428e-06,
|
|
"loss": 1.352,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 0.29308510638297874,
|
|
"grad_norm": 4.207942962646484,
|
|
"learning_rate": 9.997970086715096e-06,
|
|
"loss": 1.2372,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 0.2933510638297872,
|
|
"grad_norm": 4.058019638061523,
|
|
"learning_rate": 9.997944950431684e-06,
|
|
"loss": 1.203,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 0.2936170212765957,
|
|
"grad_norm": 4.622230052947998,
|
|
"learning_rate": 9.99791965950597e-06,
|
|
"loss": 1.3916,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 0.29388297872340424,
|
|
"grad_norm": 4.3508076667785645,
|
|
"learning_rate": 9.997894213938738e-06,
|
|
"loss": 1.3344,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.29414893617021276,
|
|
"grad_norm": 3.9889092445373535,
|
|
"learning_rate": 9.997868613730775e-06,
|
|
"loss": 1.1658,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 0.2944148936170213,
|
|
"grad_norm": 4.091287136077881,
|
|
"learning_rate": 9.997842858882873e-06,
|
|
"loss": 1.3258,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 0.2946808510638298,
|
|
"grad_norm": 4.280172824859619,
|
|
"learning_rate": 9.997816949395828e-06,
|
|
"loss": 1.3231,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 0.2949468085106383,
|
|
"grad_norm": 4.268125057220459,
|
|
"learning_rate": 9.997790885270444e-06,
|
|
"loss": 1.1984,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 0.29521276595744683,
|
|
"grad_norm": 4.030393600463867,
|
|
"learning_rate": 9.997764666507523e-06,
|
|
"loss": 1.3441,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.2954787234042553,
|
|
"grad_norm": 4.591287136077881,
|
|
"learning_rate": 9.997738293107882e-06,
|
|
"loss": 1.3059,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 0.2957446808510638,
|
|
"grad_norm": 5.225955486297607,
|
|
"learning_rate": 9.997711765072333e-06,
|
|
"loss": 1.3236,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 0.29601063829787233,
|
|
"grad_norm": 4.161701679229736,
|
|
"learning_rate": 9.997685082401698e-06,
|
|
"loss": 1.2,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 0.29627659574468085,
|
|
"grad_norm": 4.316693305969238,
|
|
"learning_rate": 9.997658245096802e-06,
|
|
"loss": 1.2758,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 0.29654255319148937,
|
|
"grad_norm": 4.311786651611328,
|
|
"learning_rate": 9.997631253158477e-06,
|
|
"loss": 1.1873,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.2968085106382979,
|
|
"grad_norm": 4.271190643310547,
|
|
"learning_rate": 9.997604106587555e-06,
|
|
"loss": 1.1661,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 0.2970744680851064,
|
|
"grad_norm": 4.620399475097656,
|
|
"learning_rate": 9.99757680538488e-06,
|
|
"loss": 1.3542,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 0.2973404255319149,
|
|
"grad_norm": 4.287705421447754,
|
|
"learning_rate": 9.997549349551295e-06,
|
|
"loss": 1.3467,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 0.2976063829787234,
|
|
"grad_norm": 4.158224105834961,
|
|
"learning_rate": 9.997521739087647e-06,
|
|
"loss": 1.229,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 0.2978723404255319,
|
|
"grad_norm": 4.308200836181641,
|
|
"learning_rate": 9.997493973994793e-06,
|
|
"loss": 1.3478,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.2981382978723404,
|
|
"grad_norm": 4.467398643493652,
|
|
"learning_rate": 9.997466054273593e-06,
|
|
"loss": 1.2729,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 0.29840425531914894,
|
|
"grad_norm": 4.264455318450928,
|
|
"learning_rate": 9.997437979924908e-06,
|
|
"loss": 1.234,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 0.29867021276595745,
|
|
"grad_norm": 4.258848190307617,
|
|
"learning_rate": 9.99740975094961e-06,
|
|
"loss": 1.1682,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 0.298936170212766,
|
|
"grad_norm": 4.3061089515686035,
|
|
"learning_rate": 9.99738136734857e-06,
|
|
"loss": 1.3241,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 0.2992021276595745,
|
|
"grad_norm": 4.324080467224121,
|
|
"learning_rate": 9.997352829122667e-06,
|
|
"loss": 1.254,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.29946808510638295,
|
|
"grad_norm": 4.312755584716797,
|
|
"learning_rate": 9.997324136272784e-06,
|
|
"loss": 1.309,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 0.29973404255319147,
|
|
"grad_norm": 4.023726463317871,
|
|
"learning_rate": 9.997295288799806e-06,
|
|
"loss": 1.238,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"grad_norm": 4.355762004852295,
|
|
"learning_rate": 9.99726628670463e-06,
|
|
"loss": 1.2271,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 0.3002659574468085,
|
|
"grad_norm": 4.85224723815918,
|
|
"learning_rate": 9.997237129988154e-06,
|
|
"loss": 1.2849,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 0.300531914893617,
|
|
"grad_norm": 4.464909553527832,
|
|
"learning_rate": 9.997207818651273e-06,
|
|
"loss": 1.2992,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.30079787234042554,
|
|
"grad_norm": 3.7525863647460938,
|
|
"learning_rate": 9.997178352694902e-06,
|
|
"loss": 1.1764,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 0.30106382978723406,
|
|
"grad_norm": 4.892136096954346,
|
|
"learning_rate": 9.997148732119947e-06,
|
|
"loss": 1.4041,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 0.3013297872340426,
|
|
"grad_norm": 3.8774726390838623,
|
|
"learning_rate": 9.99711895692733e-06,
|
|
"loss": 1.1936,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 0.30159574468085104,
|
|
"grad_norm": 4.585043907165527,
|
|
"learning_rate": 9.997089027117966e-06,
|
|
"loss": 1.2402,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 0.30186170212765956,
|
|
"grad_norm": 4.731383800506592,
|
|
"learning_rate": 9.997058942692786e-06,
|
|
"loss": 1.3886,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.3021276595744681,
|
|
"grad_norm": 4.4259033203125,
|
|
"learning_rate": 9.997028703652718e-06,
|
|
"loss": 1.4784,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 0.3023936170212766,
|
|
"grad_norm": 4.584959030151367,
|
|
"learning_rate": 9.996998309998699e-06,
|
|
"loss": 1.1575,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 0.3026595744680851,
|
|
"grad_norm": 4.300727844238281,
|
|
"learning_rate": 9.996967761731668e-06,
|
|
"loss": 1.3999,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 0.30292553191489363,
|
|
"grad_norm": 4.30328893661499,
|
|
"learning_rate": 9.996937058852575e-06,
|
|
"loss": 1.3061,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 0.30319148936170215,
|
|
"grad_norm": 4.1981964111328125,
|
|
"learning_rate": 9.996906201362361e-06,
|
|
"loss": 1.3078,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.3034574468085106,
|
|
"grad_norm": 4.507598876953125,
|
|
"learning_rate": 9.99687518926199e-06,
|
|
"loss": 1.3732,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 0.30372340425531913,
|
|
"grad_norm": 4.559037685394287,
|
|
"learning_rate": 9.996844022552416e-06,
|
|
"loss": 1.3447,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 0.30398936170212765,
|
|
"grad_norm": 4.10542106628418,
|
|
"learning_rate": 9.996812701234604e-06,
|
|
"loss": 1.2118,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 0.30425531914893617,
|
|
"grad_norm": 4.441193103790283,
|
|
"learning_rate": 9.996781225309526e-06,
|
|
"loss": 1.3549,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 0.3045212765957447,
|
|
"grad_norm": 4.166191577911377,
|
|
"learning_rate": 9.996749594778153e-06,
|
|
"loss": 1.3067,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.3047872340425532,
|
|
"grad_norm": 4.284362316131592,
|
|
"learning_rate": 9.996717809641464e-06,
|
|
"loss": 1.31,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 0.3050531914893617,
|
|
"grad_norm": 4.457339286804199,
|
|
"learning_rate": 9.996685869900444e-06,
|
|
"loss": 1.2858,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 0.30531914893617024,
|
|
"grad_norm": 5.572897434234619,
|
|
"learning_rate": 9.99665377555608e-06,
|
|
"loss": 1.3094,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 0.3055851063829787,
|
|
"grad_norm": 3.9291319847106934,
|
|
"learning_rate": 9.996621526609364e-06,
|
|
"loss": 1.1499,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 0.3058510638297872,
|
|
"grad_norm": 4.23716926574707,
|
|
"learning_rate": 9.996589123061297e-06,
|
|
"loss": 1.1395,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.30611702127659574,
|
|
"grad_norm": 4.1819047927856445,
|
|
"learning_rate": 9.99655656491288e-06,
|
|
"loss": 1.2152,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 0.30638297872340425,
|
|
"grad_norm": 4.467685222625732,
|
|
"learning_rate": 9.99652385216512e-06,
|
|
"loss": 1.38,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 0.30664893617021277,
|
|
"grad_norm": 3.723454236984253,
|
|
"learning_rate": 9.996490984819027e-06,
|
|
"loss": 1.1745,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 0.3069148936170213,
|
|
"grad_norm": 4.097151756286621,
|
|
"learning_rate": 9.996457962875623e-06,
|
|
"loss": 1.3743,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 0.3071808510638298,
|
|
"grad_norm": 4.7414326667785645,
|
|
"learning_rate": 9.996424786335925e-06,
|
|
"loss": 1.4252,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.3074468085106383,
|
|
"grad_norm": 3.7857699394226074,
|
|
"learning_rate": 9.996391455200963e-06,
|
|
"loss": 1.2984,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 0.3077127659574468,
|
|
"grad_norm": 4.953484535217285,
|
|
"learning_rate": 9.996357969471767e-06,
|
|
"loss": 1.3539,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 0.3079787234042553,
|
|
"grad_norm": 4.564802646636963,
|
|
"learning_rate": 9.996324329149372e-06,
|
|
"loss": 1.2833,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 0.3082446808510638,
|
|
"grad_norm": 4.2867045402526855,
|
|
"learning_rate": 9.99629053423482e-06,
|
|
"loss": 1.2933,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 0.30851063829787234,
|
|
"grad_norm": 4.2070817947387695,
|
|
"learning_rate": 9.996256584729157e-06,
|
|
"loss": 1.163,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.30877659574468086,
|
|
"grad_norm": 4.603311061859131,
|
|
"learning_rate": 9.996222480633433e-06,
|
|
"loss": 1.2404,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 0.3090425531914894,
|
|
"grad_norm": 4.443660736083984,
|
|
"learning_rate": 9.996188221948702e-06,
|
|
"loss": 1.3518,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 0.3093085106382979,
|
|
"grad_norm": 4.2897443771362305,
|
|
"learning_rate": 9.996153808676025e-06,
|
|
"loss": 1.2786,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 0.30957446808510636,
|
|
"grad_norm": 4.69590425491333,
|
|
"learning_rate": 9.996119240816469e-06,
|
|
"loss": 1.3259,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 0.3098404255319149,
|
|
"grad_norm": 4.064958095550537,
|
|
"learning_rate": 9.996084518371101e-06,
|
|
"loss": 1.2768,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.3101063829787234,
|
|
"grad_norm": 4.3534626960754395,
|
|
"learning_rate": 9.996049641340994e-06,
|
|
"loss": 1.3245,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 0.3103723404255319,
|
|
"grad_norm": 4.278623580932617,
|
|
"learning_rate": 9.996014609727232e-06,
|
|
"loss": 1.405,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 0.31063829787234043,
|
|
"grad_norm": 4.835923671722412,
|
|
"learning_rate": 9.995979423530893e-06,
|
|
"loss": 1.2416,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 0.31090425531914895,
|
|
"grad_norm": 4.191746711730957,
|
|
"learning_rate": 9.99594408275307e-06,
|
|
"loss": 1.154,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 0.31117021276595747,
|
|
"grad_norm": 3.9082558155059814,
|
|
"learning_rate": 9.995908587394854e-06,
|
|
"loss": 1.2412,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.311436170212766,
|
|
"grad_norm": 4.342267036437988,
|
|
"learning_rate": 9.995872937457345e-06,
|
|
"loss": 1.2312,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 0.31170212765957445,
|
|
"grad_norm": 4.569537162780762,
|
|
"learning_rate": 9.995837132941646e-06,
|
|
"loss": 1.3551,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 0.31196808510638296,
|
|
"grad_norm": 4.246980667114258,
|
|
"learning_rate": 9.995801173848863e-06,
|
|
"loss": 1.2517,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 0.3122340425531915,
|
|
"grad_norm": 4.276669025421143,
|
|
"learning_rate": 9.995765060180111e-06,
|
|
"loss": 1.2417,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 0.3125,
|
|
"grad_norm": 4.076509952545166,
|
|
"learning_rate": 9.995728791936505e-06,
|
|
"loss": 1.2837,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.3127659574468085,
|
|
"grad_norm": 4.078117370605469,
|
|
"learning_rate": 9.99569236911917e-06,
|
|
"loss": 1.1589,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 0.31303191489361704,
|
|
"grad_norm": 4.253208637237549,
|
|
"learning_rate": 9.995655791729231e-06,
|
|
"loss": 1.4023,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 0.31329787234042555,
|
|
"grad_norm": 4.0782790184021,
|
|
"learning_rate": 9.99561905976782e-06,
|
|
"loss": 1.2094,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 0.313563829787234,
|
|
"grad_norm": 4.714814186096191,
|
|
"learning_rate": 9.995582173236073e-06,
|
|
"loss": 1.2883,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 0.31382978723404253,
|
|
"grad_norm": 4.640500068664551,
|
|
"learning_rate": 9.995545132135133e-06,
|
|
"loss": 1.3784,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.31409574468085105,
|
|
"grad_norm": 4.722717761993408,
|
|
"learning_rate": 9.995507936466144e-06,
|
|
"loss": 1.2644,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 0.31436170212765957,
|
|
"grad_norm": 4.296687602996826,
|
|
"learning_rate": 9.99547058623026e-06,
|
|
"loss": 1.2238,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 0.3146276595744681,
|
|
"grad_norm": 4.157870769500732,
|
|
"learning_rate": 9.995433081428631e-06,
|
|
"loss": 1.2275,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 0.3148936170212766,
|
|
"grad_norm": 4.162895202636719,
|
|
"learning_rate": 9.995395422062424e-06,
|
|
"loss": 1.2697,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 0.3151595744680851,
|
|
"grad_norm": 4.142743110656738,
|
|
"learning_rate": 9.9953576081328e-06,
|
|
"loss": 1.2514,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.31542553191489364,
|
|
"grad_norm": 4.504545211791992,
|
|
"learning_rate": 9.995319639640932e-06,
|
|
"loss": 1.1996,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 0.3156914893617021,
|
|
"grad_norm": 4.5642523765563965,
|
|
"learning_rate": 9.995281516587992e-06,
|
|
"loss": 1.4783,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 0.3159574468085106,
|
|
"grad_norm": 4.14572286605835,
|
|
"learning_rate": 9.99524323897516e-06,
|
|
"loss": 1.3261,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 0.31622340425531914,
|
|
"grad_norm": 4.159525394439697,
|
|
"learning_rate": 9.995204806803622e-06,
|
|
"loss": 1.3492,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 0.31648936170212766,
|
|
"grad_norm": 3.9404852390289307,
|
|
"learning_rate": 9.995166220074566e-06,
|
|
"loss": 1.2726,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.3167553191489362,
|
|
"grad_norm": 4.158994197845459,
|
|
"learning_rate": 9.995127478789186e-06,
|
|
"loss": 1.2472,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 0.3170212765957447,
|
|
"grad_norm": 4.277184009552002,
|
|
"learning_rate": 9.995088582948682e-06,
|
|
"loss": 1.3549,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 0.3172872340425532,
|
|
"grad_norm": 4.210202217102051,
|
|
"learning_rate": 9.995049532554253e-06,
|
|
"loss": 1.313,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 0.3175531914893617,
|
|
"grad_norm": 4.146048545837402,
|
|
"learning_rate": 9.995010327607113e-06,
|
|
"loss": 1.3272,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 0.3178191489361702,
|
|
"grad_norm": 4.287917137145996,
|
|
"learning_rate": 9.994970968108473e-06,
|
|
"loss": 1.4158,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.3180851063829787,
|
|
"grad_norm": 3.8834691047668457,
|
|
"learning_rate": 9.99493145405955e-06,
|
|
"loss": 1.1957,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 0.31835106382978723,
|
|
"grad_norm": 4.134634494781494,
|
|
"learning_rate": 9.994891785461565e-06,
|
|
"loss": 1.3806,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 0.31861702127659575,
|
|
"grad_norm": 4.137069225311279,
|
|
"learning_rate": 9.99485196231575e-06,
|
|
"loss": 1.2337,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 0.31888297872340426,
|
|
"grad_norm": 3.9084503650665283,
|
|
"learning_rate": 9.994811984623332e-06,
|
|
"loss": 1.1263,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 0.3191489361702128,
|
|
"grad_norm": 4.515985012054443,
|
|
"learning_rate": 9.994771852385552e-06,
|
|
"loss": 1.3851,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3194148936170213,
|
|
"grad_norm": 4.150672912597656,
|
|
"learning_rate": 9.994731565603651e-06,
|
|
"loss": 1.2034,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 0.31968085106382976,
|
|
"grad_norm": 4.727832317352295,
|
|
"learning_rate": 9.994691124278874e-06,
|
|
"loss": 1.3987,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 0.3199468085106383,
|
|
"grad_norm": 4.292087554931641,
|
|
"learning_rate": 9.994650528412472e-06,
|
|
"loss": 1.3757,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 0.3202127659574468,
|
|
"grad_norm": 4.135016918182373,
|
|
"learning_rate": 9.994609778005704e-06,
|
|
"loss": 1.3413,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 0.3204787234042553,
|
|
"grad_norm": 4.273712635040283,
|
|
"learning_rate": 9.994568873059829e-06,
|
|
"loss": 1.2102,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.32074468085106383,
|
|
"grad_norm": 4.216573715209961,
|
|
"learning_rate": 9.994527813576111e-06,
|
|
"loss": 1.3998,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 0.32101063829787235,
|
|
"grad_norm": 3.847257375717163,
|
|
"learning_rate": 9.994486599555823e-06,
|
|
"loss": 1.1265,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 0.32127659574468087,
|
|
"grad_norm": 4.784033298492432,
|
|
"learning_rate": 9.99444523100024e-06,
|
|
"loss": 1.3363,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 0.3215425531914894,
|
|
"grad_norm": 4.474783897399902,
|
|
"learning_rate": 9.994403707910642e-06,
|
|
"loss": 1.2317,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 0.32180851063829785,
|
|
"grad_norm": 4.004277229309082,
|
|
"learning_rate": 9.994362030288312e-06,
|
|
"loss": 1.2477,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.32207446808510637,
|
|
"grad_norm": 3.9819071292877197,
|
|
"learning_rate": 9.99432019813454e-06,
|
|
"loss": 1.1898,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 0.3223404255319149,
|
|
"grad_norm": 3.8308217525482178,
|
|
"learning_rate": 9.994278211450622e-06,
|
|
"loss": 1.287,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 0.3226063829787234,
|
|
"grad_norm": 4.272090435028076,
|
|
"learning_rate": 9.994236070237854e-06,
|
|
"loss": 1.3905,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 0.3228723404255319,
|
|
"grad_norm": 4.1817169189453125,
|
|
"learning_rate": 9.994193774497544e-06,
|
|
"loss": 1.2512,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 0.32313829787234044,
|
|
"grad_norm": 3.9769554138183594,
|
|
"learning_rate": 9.994151324231e-06,
|
|
"loss": 1.2287,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.32340425531914896,
|
|
"grad_norm": 4.290254592895508,
|
|
"learning_rate": 9.994108719439533e-06,
|
|
"loss": 1.2741,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 0.3236702127659574,
|
|
"grad_norm": 4.185919284820557,
|
|
"learning_rate": 9.994065960124462e-06,
|
|
"loss": 1.3203,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 0.32393617021276594,
|
|
"grad_norm": 4.25853967666626,
|
|
"learning_rate": 9.994023046287109e-06,
|
|
"loss": 1.3062,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 0.32420212765957446,
|
|
"grad_norm": 3.9912209510803223,
|
|
"learning_rate": 9.993979977928805e-06,
|
|
"loss": 1.1988,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 0.324468085106383,
|
|
"grad_norm": 3.865492343902588,
|
|
"learning_rate": 9.993936755050881e-06,
|
|
"loss": 1.1626,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.3247340425531915,
|
|
"grad_norm": 4.017344951629639,
|
|
"learning_rate": 9.993893377654673e-06,
|
|
"loss": 1.3626,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 0.325,
|
|
"grad_norm": 3.9618587493896484,
|
|
"learning_rate": 9.993849845741525e-06,
|
|
"loss": 1.361,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 0.32526595744680853,
|
|
"grad_norm": 4.2321648597717285,
|
|
"learning_rate": 9.993806159312783e-06,
|
|
"loss": 1.3773,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 0.32553191489361705,
|
|
"grad_norm": 4.570196151733398,
|
|
"learning_rate": 9.9937623183698e-06,
|
|
"loss": 1.3895,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 0.3257978723404255,
|
|
"grad_norm": 3.9867353439331055,
|
|
"learning_rate": 9.99371832291393e-06,
|
|
"loss": 1.1623,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.326063829787234,
|
|
"grad_norm": 5.1412200927734375,
|
|
"learning_rate": 9.993674172946536e-06,
|
|
"loss": 1.3987,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 0.32632978723404255,
|
|
"grad_norm": 4.0850605964660645,
|
|
"learning_rate": 9.993629868468984e-06,
|
|
"loss": 1.2399,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 0.32659574468085106,
|
|
"grad_norm": 5.263411521911621,
|
|
"learning_rate": 9.993585409482645e-06,
|
|
"loss": 1.311,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 0.3268617021276596,
|
|
"grad_norm": 3.8653786182403564,
|
|
"learning_rate": 9.993540795988895e-06,
|
|
"loss": 1.1391,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 0.3271276595744681,
|
|
"grad_norm": 4.475793838500977,
|
|
"learning_rate": 9.993496027989112e-06,
|
|
"loss": 1.2644,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.3273936170212766,
|
|
"grad_norm": 4.395388603210449,
|
|
"learning_rate": 9.993451105484682e-06,
|
|
"loss": 1.342,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 0.3276595744680851,
|
|
"grad_norm": 4.290927410125732,
|
|
"learning_rate": 9.993406028476997e-06,
|
|
"loss": 1.3893,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 0.3279255319148936,
|
|
"grad_norm": 4.348012924194336,
|
|
"learning_rate": 9.993360796967451e-06,
|
|
"loss": 1.2903,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 0.3281914893617021,
|
|
"grad_norm": 4.174604415893555,
|
|
"learning_rate": 9.993315410957442e-06,
|
|
"loss": 1.2951,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 0.32845744680851063,
|
|
"grad_norm": 4.359421253204346,
|
|
"learning_rate": 9.993269870448375e-06,
|
|
"loss": 1.4433,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 0.32872340425531915,
|
|
"grad_norm": 4.25851583480835,
|
|
"learning_rate": 9.99322417544166e-06,
|
|
"loss": 1.2445,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 0.32898936170212767,
|
|
"grad_norm": 4.110776901245117,
|
|
"learning_rate": 9.993178325938711e-06,
|
|
"loss": 1.3569,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 0.3292553191489362,
|
|
"grad_norm": 4.008944988250732,
|
|
"learning_rate": 9.993132321940947e-06,
|
|
"loss": 1.2227,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 0.3295212765957447,
|
|
"grad_norm": 4.228448390960693,
|
|
"learning_rate": 9.993086163449787e-06,
|
|
"loss": 1.2388,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 0.32978723404255317,
|
|
"grad_norm": 4.701793193817139,
|
|
"learning_rate": 9.993039850466664e-06,
|
|
"loss": 1.5212,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.3300531914893617,
|
|
"grad_norm": 4.4202094078063965,
|
|
"learning_rate": 9.99299338299301e-06,
|
|
"loss": 1.2413,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 0.3303191489361702,
|
|
"grad_norm": 4.218541622161865,
|
|
"learning_rate": 9.992946761030261e-06,
|
|
"loss": 1.2663,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 0.3305851063829787,
|
|
"grad_norm": 4.355581283569336,
|
|
"learning_rate": 9.99289998457986e-06,
|
|
"loss": 1.3233,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 0.33085106382978724,
|
|
"grad_norm": 4.184298992156982,
|
|
"learning_rate": 9.992853053643257e-06,
|
|
"loss": 1.3291,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 0.33111702127659576,
|
|
"grad_norm": 4.030219078063965,
|
|
"learning_rate": 9.992805968221902e-06,
|
|
"loss": 1.3502,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 0.3313829787234043,
|
|
"grad_norm": 4.068756103515625,
|
|
"learning_rate": 9.992758728317252e-06,
|
|
"loss": 1.1977,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 0.3316489361702128,
|
|
"grad_norm": 4.332919120788574,
|
|
"learning_rate": 9.99271133393077e-06,
|
|
"loss": 1.2899,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 0.33191489361702126,
|
|
"grad_norm": 3.9694416522979736,
|
|
"learning_rate": 9.992663785063919e-06,
|
|
"loss": 1.3366,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 0.3321808510638298,
|
|
"grad_norm": 3.924436569213867,
|
|
"learning_rate": 9.992616081718171e-06,
|
|
"loss": 1.2552,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 0.3324468085106383,
|
|
"grad_norm": 4.128008842468262,
|
|
"learning_rate": 9.992568223895007e-06,
|
|
"loss": 1.2872,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.3327127659574468,
|
|
"grad_norm": 4.744760036468506,
|
|
"learning_rate": 9.992520211595902e-06,
|
|
"loss": 1.2885,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 0.33297872340425533,
|
|
"grad_norm": 3.722013235092163,
|
|
"learning_rate": 9.992472044822344e-06,
|
|
"loss": 1.1684,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 0.33324468085106385,
|
|
"grad_norm": 4.375733852386475,
|
|
"learning_rate": 9.992423723575822e-06,
|
|
"loss": 1.4177,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 0.33351063829787236,
|
|
"grad_norm": 4.03129243850708,
|
|
"learning_rate": 9.992375247857833e-06,
|
|
"loss": 1.3669,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 0.3337765957446808,
|
|
"grad_norm": 3.828651189804077,
|
|
"learning_rate": 9.992326617669876e-06,
|
|
"loss": 1.3573,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 0.33404255319148934,
|
|
"grad_norm": 4.016900062561035,
|
|
"learning_rate": 9.992277833013457e-06,
|
|
"loss": 1.2265,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 0.33430851063829786,
|
|
"grad_norm": 4.38175630569458,
|
|
"learning_rate": 9.992228893890084e-06,
|
|
"loss": 1.3774,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 0.3345744680851064,
|
|
"grad_norm": 4.081117153167725,
|
|
"learning_rate": 9.992179800301269e-06,
|
|
"loss": 1.2978,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 0.3348404255319149,
|
|
"grad_norm": 4.280460834503174,
|
|
"learning_rate": 9.992130552248535e-06,
|
|
"loss": 1.1316,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 0.3351063829787234,
|
|
"grad_norm": 4.5057268142700195,
|
|
"learning_rate": 9.992081149733404e-06,
|
|
"loss": 1.3776,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.33537234042553193,
|
|
"grad_norm": 3.8671257495880127,
|
|
"learning_rate": 9.992031592757405e-06,
|
|
"loss": 1.3541,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 0.33563829787234045,
|
|
"grad_norm": 4.478667736053467,
|
|
"learning_rate": 9.991981881322072e-06,
|
|
"loss": 1.3155,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 0.3359042553191489,
|
|
"grad_norm": 5.32509183883667,
|
|
"learning_rate": 9.991932015428941e-06,
|
|
"loss": 1.3662,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 0.33617021276595743,
|
|
"grad_norm": 4.138638973236084,
|
|
"learning_rate": 9.991881995079558e-06,
|
|
"loss": 1.3641,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 0.33643617021276595,
|
|
"grad_norm": 4.780951499938965,
|
|
"learning_rate": 9.991831820275466e-06,
|
|
"loss": 1.4626,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 0.33670212765957447,
|
|
"grad_norm": 3.6165192127227783,
|
|
"learning_rate": 9.991781491018223e-06,
|
|
"loss": 1.2914,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 0.336968085106383,
|
|
"grad_norm": 4.3747992515563965,
|
|
"learning_rate": 9.991731007309382e-06,
|
|
"loss": 1.2756,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 0.3372340425531915,
|
|
"grad_norm": 5.0972580909729,
|
|
"learning_rate": 9.991680369150507e-06,
|
|
"loss": 1.4694,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 0.3375,
|
|
"grad_norm": 3.841791868209839,
|
|
"learning_rate": 9.991629576543164e-06,
|
|
"loss": 1.1905,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 0.3377659574468085,
|
|
"grad_norm": 4.1475324630737305,
|
|
"learning_rate": 9.991578629488926e-06,
|
|
"loss": 1.3379,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.338031914893617,
|
|
"grad_norm": 4.152446269989014,
|
|
"learning_rate": 9.991527527989366e-06,
|
|
"loss": 1.1402,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 0.3382978723404255,
|
|
"grad_norm": 4.5577006340026855,
|
|
"learning_rate": 9.99147627204607e-06,
|
|
"loss": 1.3844,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 0.33856382978723404,
|
|
"grad_norm": 4.605076313018799,
|
|
"learning_rate": 9.991424861660621e-06,
|
|
"loss": 1.4557,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 0.33882978723404256,
|
|
"grad_norm": 4.045496940612793,
|
|
"learning_rate": 9.99137329683461e-06,
|
|
"loss": 1.2976,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 0.3390957446808511,
|
|
"grad_norm": 4.148492336273193,
|
|
"learning_rate": 9.991321577569632e-06,
|
|
"loss": 1.4065,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.3393617021276596,
|
|
"grad_norm": 4.128026485443115,
|
|
"learning_rate": 9.991269703867288e-06,
|
|
"loss": 1.3056,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 0.3396276595744681,
|
|
"grad_norm": 4.140103340148926,
|
|
"learning_rate": 9.991217675729184e-06,
|
|
"loss": 1.3136,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 0.3398936170212766,
|
|
"grad_norm": 4.122238636016846,
|
|
"learning_rate": 9.991165493156927e-06,
|
|
"loss": 1.2575,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 0.3401595744680851,
|
|
"grad_norm": 4.590948104858398,
|
|
"learning_rate": 9.991113156152134e-06,
|
|
"loss": 1.2896,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 0.3404255319148936,
|
|
"grad_norm": 4.469196796417236,
|
|
"learning_rate": 9.991060664716423e-06,
|
|
"loss": 1.4088,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.3406914893617021,
|
|
"grad_norm": 4.643316268920898,
|
|
"learning_rate": 9.99100801885142e-06,
|
|
"loss": 1.4124,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 0.34095744680851064,
|
|
"grad_norm": 4.106162071228027,
|
|
"learning_rate": 9.990955218558751e-06,
|
|
"loss": 1.3555,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 0.34122340425531916,
|
|
"grad_norm": 4.337850093841553,
|
|
"learning_rate": 9.990902263840053e-06,
|
|
"loss": 1.1865,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 0.3414893617021277,
|
|
"grad_norm": 3.8557538986206055,
|
|
"learning_rate": 9.990849154696963e-06,
|
|
"loss": 1.2002,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 0.34175531914893614,
|
|
"grad_norm": 4.412120342254639,
|
|
"learning_rate": 9.990795891131125e-06,
|
|
"loss": 1.3584,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 0.34202127659574466,
|
|
"grad_norm": 5.199094772338867,
|
|
"learning_rate": 9.990742473144184e-06,
|
|
"loss": 1.3745,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 0.3422872340425532,
|
|
"grad_norm": 3.8888189792633057,
|
|
"learning_rate": 9.990688900737795e-06,
|
|
"loss": 1.2443,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 0.3425531914893617,
|
|
"grad_norm": 3.81540846824646,
|
|
"learning_rate": 9.990635173913616e-06,
|
|
"loss": 1.347,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 0.3428191489361702,
|
|
"grad_norm": 4.090488910675049,
|
|
"learning_rate": 9.990581292673309e-06,
|
|
"loss": 1.283,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 0.34308510638297873,
|
|
"grad_norm": 4.115976333618164,
|
|
"learning_rate": 9.990527257018544e-06,
|
|
"loss": 1.2893,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.34335106382978725,
|
|
"grad_norm": 3.9170165061950684,
|
|
"learning_rate": 9.990473066950987e-06,
|
|
"loss": 1.2133,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 0.34361702127659577,
|
|
"grad_norm": 3.8994202613830566,
|
|
"learning_rate": 9.990418722472317e-06,
|
|
"loss": 1.1986,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 0.34388297872340423,
|
|
"grad_norm": 3.8675310611724854,
|
|
"learning_rate": 9.990364223584218e-06,
|
|
"loss": 1.16,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 0.34414893617021275,
|
|
"grad_norm": 4.010871410369873,
|
|
"learning_rate": 9.990309570288374e-06,
|
|
"loss": 1.2748,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 0.34441489361702127,
|
|
"grad_norm": 4.264376163482666,
|
|
"learning_rate": 9.990254762586477e-06,
|
|
"loss": 1.167,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 0.3446808510638298,
|
|
"grad_norm": 4.201075553894043,
|
|
"learning_rate": 9.990199800480222e-06,
|
|
"loss": 1.2061,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 0.3449468085106383,
|
|
"grad_norm": 4.1181535720825195,
|
|
"learning_rate": 9.99014468397131e-06,
|
|
"loss": 1.188,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 0.3452127659574468,
|
|
"grad_norm": 3.747342824935913,
|
|
"learning_rate": 9.990089413061445e-06,
|
|
"loss": 1.1944,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 0.34547872340425534,
|
|
"grad_norm": 4.067655086517334,
|
|
"learning_rate": 9.990033987752341e-06,
|
|
"loss": 1.1876,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 0.34574468085106386,
|
|
"grad_norm": 4.090482234954834,
|
|
"learning_rate": 9.989978408045709e-06,
|
|
"loss": 1.2122,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.3460106382978723,
|
|
"grad_norm": 3.879619598388672,
|
|
"learning_rate": 9.989922673943271e-06,
|
|
"loss": 1.2099,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 0.34627659574468084,
|
|
"grad_norm": 4.814892768859863,
|
|
"learning_rate": 9.98986678544675e-06,
|
|
"loss": 1.3879,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 0.34654255319148936,
|
|
"grad_norm": 4.234111309051514,
|
|
"learning_rate": 9.989810742557875e-06,
|
|
"loss": 1.5134,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 0.3468085106382979,
|
|
"grad_norm": 4.2561469078063965,
|
|
"learning_rate": 9.989754545278381e-06,
|
|
"loss": 1.3591,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 0.3470744680851064,
|
|
"grad_norm": 4.519184112548828,
|
|
"learning_rate": 9.989698193610007e-06,
|
|
"loss": 1.1676,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 0.3473404255319149,
|
|
"grad_norm": 4.09921407699585,
|
|
"learning_rate": 9.989641687554496e-06,
|
|
"loss": 1.238,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 0.3476063829787234,
|
|
"grad_norm": 3.9749245643615723,
|
|
"learning_rate": 9.989585027113598e-06,
|
|
"loss": 1.2444,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 0.3478723404255319,
|
|
"grad_norm": 4.225282192230225,
|
|
"learning_rate": 9.989528212289064e-06,
|
|
"loss": 1.1724,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 0.3481382978723404,
|
|
"grad_norm": 4.391535758972168,
|
|
"learning_rate": 9.98947124308265e-06,
|
|
"loss": 1.4058,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 0.3484042553191489,
|
|
"grad_norm": 3.8815417289733887,
|
|
"learning_rate": 9.989414119496126e-06,
|
|
"loss": 1.2464,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.34867021276595744,
|
|
"grad_norm": 4.186168193817139,
|
|
"learning_rate": 9.989356841531252e-06,
|
|
"loss": 1.2393,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 0.34893617021276596,
|
|
"grad_norm": 3.9777474403381348,
|
|
"learning_rate": 9.989299409189802e-06,
|
|
"loss": 1.1674,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 0.3492021276595745,
|
|
"grad_norm": 4.088747978210449,
|
|
"learning_rate": 9.989241822473557e-06,
|
|
"loss": 1.2024,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 0.349468085106383,
|
|
"grad_norm": 4.297309398651123,
|
|
"learning_rate": 9.989184081384295e-06,
|
|
"loss": 1.384,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 0.3497340425531915,
|
|
"grad_norm": 3.6362228393554688,
|
|
"learning_rate": 9.989126185923803e-06,
|
|
"loss": 1.266,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"grad_norm": 4.015252113342285,
|
|
"learning_rate": 9.989068136093873e-06,
|
|
"loss": 1.2447,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 0.3502659574468085,
|
|
"grad_norm": 3.9256210327148438,
|
|
"learning_rate": 9.989009931896302e-06,
|
|
"loss": 1.2674,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 0.350531914893617,
|
|
"grad_norm": 4.108496189117432,
|
|
"learning_rate": 9.988951573332888e-06,
|
|
"loss": 1.232,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 0.35079787234042553,
|
|
"grad_norm": 4.183421611785889,
|
|
"learning_rate": 9.98889306040544e-06,
|
|
"loss": 1.2652,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 0.35106382978723405,
|
|
"grad_norm": 4.556921482086182,
|
|
"learning_rate": 9.988834393115768e-06,
|
|
"loss": 1.3536,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.35132978723404257,
|
|
"grad_norm": 4.081547737121582,
|
|
"learning_rate": 9.988775571465684e-06,
|
|
"loss": 1.3168,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 0.3515957446808511,
|
|
"grad_norm": 4.136814594268799,
|
|
"learning_rate": 9.988716595457011e-06,
|
|
"loss": 1.3124,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 0.35186170212765955,
|
|
"grad_norm": 4.485897064208984,
|
|
"learning_rate": 9.988657465091572e-06,
|
|
"loss": 1.3164,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 0.35212765957446807,
|
|
"grad_norm": 4.273427963256836,
|
|
"learning_rate": 9.988598180371198e-06,
|
|
"loss": 1.2051,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 0.3523936170212766,
|
|
"grad_norm": 3.715895175933838,
|
|
"learning_rate": 9.988538741297724e-06,
|
|
"loss": 1.0755,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.3526595744680851,
|
|
"grad_norm": 3.932218551635742,
|
|
"learning_rate": 9.98847914787299e-06,
|
|
"loss": 1.4028,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 0.3529255319148936,
|
|
"grad_norm": 4.555146217346191,
|
|
"learning_rate": 9.988419400098834e-06,
|
|
"loss": 1.2805,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 0.35319148936170214,
|
|
"grad_norm": 4.291238784790039,
|
|
"learning_rate": 9.98835949797711e-06,
|
|
"loss": 1.3683,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 0.35345744680851066,
|
|
"grad_norm": 4.525993824005127,
|
|
"learning_rate": 9.98829944150967e-06,
|
|
"loss": 1.2788,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 0.3537234042553192,
|
|
"grad_norm": 3.771448850631714,
|
|
"learning_rate": 9.988239230698373e-06,
|
|
"loss": 1.3256,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.35398936170212764,
|
|
"grad_norm": 4.0126633644104,
|
|
"learning_rate": 9.988178865545081e-06,
|
|
"loss": 1.2984,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 0.35425531914893615,
|
|
"grad_norm": 3.521714210510254,
|
|
"learning_rate": 9.988118346051663e-06,
|
|
"loss": 1.192,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 0.35452127659574467,
|
|
"grad_norm": 4.065241813659668,
|
|
"learning_rate": 9.98805767221999e-06,
|
|
"loss": 1.383,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 0.3547872340425532,
|
|
"grad_norm": 4.3708720207214355,
|
|
"learning_rate": 9.987996844051939e-06,
|
|
"loss": 1.3586,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 0.3550531914893617,
|
|
"grad_norm": 4.104064464569092,
|
|
"learning_rate": 9.987935861549393e-06,
|
|
"loss": 1.2583,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 0.3553191489361702,
|
|
"grad_norm": 4.293087959289551,
|
|
"learning_rate": 9.98787472471424e-06,
|
|
"loss": 1.3606,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 0.35558510638297874,
|
|
"grad_norm": 3.906818151473999,
|
|
"learning_rate": 9.98781343354837e-06,
|
|
"loss": 1.2305,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 0.3558510638297872,
|
|
"grad_norm": 4.049057960510254,
|
|
"learning_rate": 9.98775198805368e-06,
|
|
"loss": 1.1915,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 0.3561170212765957,
|
|
"grad_norm": 4.160476207733154,
|
|
"learning_rate": 9.987690388232071e-06,
|
|
"loss": 1.3273,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 0.35638297872340424,
|
|
"grad_norm": 4.2301344871521,
|
|
"learning_rate": 9.98762863408545e-06,
|
|
"loss": 1.242,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.35664893617021276,
|
|
"grad_norm": 4.272438049316406,
|
|
"learning_rate": 9.987566725615725e-06,
|
|
"loss": 1.3378,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 0.3569148936170213,
|
|
"grad_norm": 4.048627853393555,
|
|
"learning_rate": 9.987504662824814e-06,
|
|
"loss": 1.2938,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 0.3571808510638298,
|
|
"grad_norm": 4.272396087646484,
|
|
"learning_rate": 9.987442445714637e-06,
|
|
"loss": 1.363,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 0.3574468085106383,
|
|
"grad_norm": 4.04710578918457,
|
|
"learning_rate": 9.98738007428712e-06,
|
|
"loss": 1.3823,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 0.35771276595744683,
|
|
"grad_norm": 4.724300384521484,
|
|
"learning_rate": 9.98731754854419e-06,
|
|
"loss": 1.4429,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 0.3579787234042553,
|
|
"grad_norm": 4.071347713470459,
|
|
"learning_rate": 9.987254868487783e-06,
|
|
"loss": 1.2203,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 0.3582446808510638,
|
|
"grad_norm": 3.8509132862091064,
|
|
"learning_rate": 9.987192034119839e-06,
|
|
"loss": 1.2774,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 0.35851063829787233,
|
|
"grad_norm": 3.7690467834472656,
|
|
"learning_rate": 9.987129045442304e-06,
|
|
"loss": 1.1786,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 0.35877659574468085,
|
|
"grad_norm": 4.102452754974365,
|
|
"learning_rate": 9.987065902457122e-06,
|
|
"loss": 1.232,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 0.35904255319148937,
|
|
"grad_norm": 4.353301048278809,
|
|
"learning_rate": 9.98700260516625e-06,
|
|
"loss": 1.204,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.3593085106382979,
|
|
"grad_norm": 4.020050048828125,
|
|
"learning_rate": 9.986939153571647e-06,
|
|
"loss": 1.2681,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 0.3595744680851064,
|
|
"grad_norm": 4.041562080383301,
|
|
"learning_rate": 9.986875547675274e-06,
|
|
"loss": 1.2093,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 0.3598404255319149,
|
|
"grad_norm": 3.9428937435150146,
|
|
"learning_rate": 9.9868117874791e-06,
|
|
"loss": 1.4088,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 0.3601063829787234,
|
|
"grad_norm": 3.8776018619537354,
|
|
"learning_rate": 9.986747872985099e-06,
|
|
"loss": 1.2944,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 0.3603723404255319,
|
|
"grad_norm": 4.4396796226501465,
|
|
"learning_rate": 9.986683804195248e-06,
|
|
"loss": 1.2328,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 0.3606382978723404,
|
|
"grad_norm": 6.8338093757629395,
|
|
"learning_rate": 9.986619581111528e-06,
|
|
"loss": 1.2865,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 0.36090425531914894,
|
|
"grad_norm": 3.8783535957336426,
|
|
"learning_rate": 9.986555203735926e-06,
|
|
"loss": 1.2004,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 0.36117021276595745,
|
|
"grad_norm": 4.063074111938477,
|
|
"learning_rate": 9.986490672070438e-06,
|
|
"loss": 1.2033,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 0.361436170212766,
|
|
"grad_norm": 5.602739334106445,
|
|
"learning_rate": 9.986425986117055e-06,
|
|
"loss": 1.2993,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 0.3617021276595745,
|
|
"grad_norm": 3.687655448913574,
|
|
"learning_rate": 9.986361145877783e-06,
|
|
"loss": 1.1984,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.36196808510638295,
|
|
"grad_norm": 4.312001705169678,
|
|
"learning_rate": 9.986296151354625e-06,
|
|
"loss": 1.2943,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 0.36223404255319147,
|
|
"grad_norm": 4.478762149810791,
|
|
"learning_rate": 9.986231002549594e-06,
|
|
"loss": 1.294,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 0.3625,
|
|
"grad_norm": 4.86306095123291,
|
|
"learning_rate": 9.986165699464706e-06,
|
|
"loss": 1.5325,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 0.3627659574468085,
|
|
"grad_norm": 4.426929950714111,
|
|
"learning_rate": 9.986100242101982e-06,
|
|
"loss": 1.3561,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 0.363031914893617,
|
|
"grad_norm": 4.546680450439453,
|
|
"learning_rate": 9.986034630463443e-06,
|
|
"loss": 1.3143,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 0.36329787234042554,
|
|
"grad_norm": 4.5038957595825195,
|
|
"learning_rate": 9.985968864551123e-06,
|
|
"loss": 1.2948,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 0.36356382978723406,
|
|
"grad_norm": 4.967344284057617,
|
|
"learning_rate": 9.985902944367058e-06,
|
|
"loss": 1.2844,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 0.3638297872340426,
|
|
"grad_norm": 3.8887312412261963,
|
|
"learning_rate": 9.985836869913283e-06,
|
|
"loss": 1.2737,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 0.36409574468085104,
|
|
"grad_norm": 4.1144795417785645,
|
|
"learning_rate": 9.985770641191847e-06,
|
|
"loss": 1.3379,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 0.36436170212765956,
|
|
"grad_norm": 4.12211275100708,
|
|
"learning_rate": 9.985704258204798e-06,
|
|
"loss": 1.3465,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.3646276595744681,
|
|
"grad_norm": 4.424558162689209,
|
|
"learning_rate": 9.985637720954188e-06,
|
|
"loss": 1.0785,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 0.3648936170212766,
|
|
"grad_norm": 4.308188438415527,
|
|
"learning_rate": 9.985571029442078e-06,
|
|
"loss": 1.4829,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 0.3651595744680851,
|
|
"grad_norm": 3.587887763977051,
|
|
"learning_rate": 9.98550418367053e-06,
|
|
"loss": 1.2684,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 0.36542553191489363,
|
|
"grad_norm": 4.300267696380615,
|
|
"learning_rate": 9.985437183641612e-06,
|
|
"loss": 1.305,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 0.36569148936170215,
|
|
"grad_norm": 4.035099506378174,
|
|
"learning_rate": 9.985370029357399e-06,
|
|
"loss": 1.2249,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.3659574468085106,
|
|
"grad_norm": 3.958627939224243,
|
|
"learning_rate": 9.985302720819967e-06,
|
|
"loss": 1.2176,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 0.36622340425531913,
|
|
"grad_norm": 4.257254600524902,
|
|
"learning_rate": 9.9852352580314e-06,
|
|
"loss": 1.2714,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 0.36648936170212765,
|
|
"grad_norm": 4.782037258148193,
|
|
"learning_rate": 9.985167640993784e-06,
|
|
"loss": 1.4979,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 0.36675531914893617,
|
|
"grad_norm": 4.400300025939941,
|
|
"learning_rate": 9.985099869709213e-06,
|
|
"loss": 1.3505,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 0.3670212765957447,
|
|
"grad_norm": 4.289068698883057,
|
|
"learning_rate": 9.985031944179781e-06,
|
|
"loss": 1.2113,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.3672872340425532,
|
|
"grad_norm": 4.770625591278076,
|
|
"learning_rate": 9.984963864407593e-06,
|
|
"loss": 1.4373,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 0.3675531914893617,
|
|
"grad_norm": 4.392122268676758,
|
|
"learning_rate": 9.984895630394755e-06,
|
|
"loss": 1.3069,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 0.36781914893617024,
|
|
"grad_norm": 3.9814369678497314,
|
|
"learning_rate": 9.984827242143376e-06,
|
|
"loss": 1.281,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 0.3680851063829787,
|
|
"grad_norm": 3.9791054725646973,
|
|
"learning_rate": 9.984758699655572e-06,
|
|
"loss": 1.1758,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 0.3683510638297872,
|
|
"grad_norm": 4.434001922607422,
|
|
"learning_rate": 9.984690002933465e-06,
|
|
"loss": 1.3586,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 0.36861702127659574,
|
|
"grad_norm": 4.445183753967285,
|
|
"learning_rate": 9.984621151979183e-06,
|
|
"loss": 1.367,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 0.36888297872340425,
|
|
"grad_norm": 3.8560211658477783,
|
|
"learning_rate": 9.984552146794853e-06,
|
|
"loss": 1.2933,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 0.36914893617021277,
|
|
"grad_norm": 4.20532751083374,
|
|
"learning_rate": 9.984482987382612e-06,
|
|
"loss": 1.3036,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 0.3694148936170213,
|
|
"grad_norm": 4.1775898933410645,
|
|
"learning_rate": 9.984413673744597e-06,
|
|
"loss": 1.1862,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 0.3696808510638298,
|
|
"grad_norm": 4.668176651000977,
|
|
"learning_rate": 9.984344205882954e-06,
|
|
"loss": 1.3125,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.3699468085106383,
|
|
"grad_norm": 4.170348644256592,
|
|
"learning_rate": 9.984274583799833e-06,
|
|
"loss": 1.1855,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 0.3702127659574468,
|
|
"grad_norm": 3.893609046936035,
|
|
"learning_rate": 9.98420480749739e-06,
|
|
"loss": 1.3567,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 0.3704787234042553,
|
|
"grad_norm": 3.791059970855713,
|
|
"learning_rate": 9.98413487697778e-06,
|
|
"loss": 1.2596,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 0.3707446808510638,
|
|
"grad_norm": 3.89493465423584,
|
|
"learning_rate": 9.984064792243171e-06,
|
|
"loss": 1.1468,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 0.37101063829787234,
|
|
"grad_norm": 3.932354211807251,
|
|
"learning_rate": 9.983994553295728e-06,
|
|
"loss": 1.2274,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 0.37127659574468086,
|
|
"grad_norm": 3.772759199142456,
|
|
"learning_rate": 9.983924160137627e-06,
|
|
"loss": 1.1687,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 0.3715425531914894,
|
|
"grad_norm": 4.090175628662109,
|
|
"learning_rate": 9.983853612771043e-06,
|
|
"loss": 1.1627,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 0.3718085106382979,
|
|
"grad_norm": 5.041259288787842,
|
|
"learning_rate": 9.983782911198161e-06,
|
|
"loss": 1.2878,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 0.37207446808510636,
|
|
"grad_norm": 4.565484523773193,
|
|
"learning_rate": 9.98371205542117e-06,
|
|
"loss": 1.2838,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 0.3723404255319149,
|
|
"grad_norm": 3.94577956199646,
|
|
"learning_rate": 9.983641045442256e-06,
|
|
"loss": 1.3253,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3726063829787234,
|
|
"grad_norm": 3.559597969055176,
|
|
"learning_rate": 9.983569881263625e-06,
|
|
"loss": 1.0896,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 0.3728723404255319,
|
|
"grad_norm": 4.101516246795654,
|
|
"learning_rate": 9.983498562887471e-06,
|
|
"loss": 1.4844,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 0.37313829787234043,
|
|
"grad_norm": 4.680913925170898,
|
|
"learning_rate": 9.983427090316005e-06,
|
|
"loss": 1.3343,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 0.37340425531914895,
|
|
"grad_norm": 5.2188286781311035,
|
|
"learning_rate": 9.983355463551439e-06,
|
|
"loss": 1.3206,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 0.37367021276595747,
|
|
"grad_norm": 4.363986968994141,
|
|
"learning_rate": 9.983283682595986e-06,
|
|
"loss": 1.5722,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 0.373936170212766,
|
|
"grad_norm": 4.405764579772949,
|
|
"learning_rate": 9.98321174745187e-06,
|
|
"loss": 1.3106,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 0.37420212765957445,
|
|
"grad_norm": 3.671576738357544,
|
|
"learning_rate": 9.983139658121316e-06,
|
|
"loss": 1.1663,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 0.37446808510638296,
|
|
"grad_norm": 4.068467140197754,
|
|
"learning_rate": 9.983067414606553e-06,
|
|
"loss": 1.3443,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 0.3747340425531915,
|
|
"grad_norm": 4.050812244415283,
|
|
"learning_rate": 9.982995016909817e-06,
|
|
"loss": 1.2671,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 0.375,
|
|
"grad_norm": 4.016097545623779,
|
|
"learning_rate": 9.98292246503335e-06,
|
|
"loss": 1.2389,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.3752659574468085,
|
|
"grad_norm": 4.278280258178711,
|
|
"learning_rate": 9.982849758979394e-06,
|
|
"loss": 1.3095,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 0.37553191489361704,
|
|
"grad_norm": 3.826686143875122,
|
|
"learning_rate": 9.9827768987502e-06,
|
|
"loss": 1.0923,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 0.37579787234042555,
|
|
"grad_norm": 3.954808473587036,
|
|
"learning_rate": 9.982703884348023e-06,
|
|
"loss": 1.3359,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 0.376063829787234,
|
|
"grad_norm": 3.8342320919036865,
|
|
"learning_rate": 9.982630715775121e-06,
|
|
"loss": 1.287,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 0.37632978723404253,
|
|
"grad_norm": 4.190742492675781,
|
|
"learning_rate": 9.982557393033758e-06,
|
|
"loss": 1.2957,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 0.37659574468085105,
|
|
"grad_norm": 4.030623435974121,
|
|
"learning_rate": 9.982483916126204e-06,
|
|
"loss": 1.2992,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 0.37686170212765957,
|
|
"grad_norm": 4.164768695831299,
|
|
"learning_rate": 9.98241028505473e-06,
|
|
"loss": 1.5608,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 0.3771276595744681,
|
|
"grad_norm": 4.243110656738281,
|
|
"learning_rate": 9.982336499821617e-06,
|
|
"loss": 1.3214,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 0.3773936170212766,
|
|
"grad_norm": 3.969595193862915,
|
|
"learning_rate": 9.982262560429147e-06,
|
|
"loss": 1.3743,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 0.3776595744680851,
|
|
"grad_norm": 4.253571033477783,
|
|
"learning_rate": 9.982188466879607e-06,
|
|
"loss": 1.329,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.37792553191489364,
|
|
"grad_norm": 4.254541397094727,
|
|
"learning_rate": 9.98211421917529e-06,
|
|
"loss": 1.3093,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 0.3781914893617021,
|
|
"grad_norm": 4.365729808807373,
|
|
"learning_rate": 9.982039817318491e-06,
|
|
"loss": 1.3744,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 0.3784574468085106,
|
|
"grad_norm": 4.0368499755859375,
|
|
"learning_rate": 9.981965261311519e-06,
|
|
"loss": 1.1517,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 0.37872340425531914,
|
|
"grad_norm": 4.165602207183838,
|
|
"learning_rate": 9.981890551156673e-06,
|
|
"loss": 1.2983,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 0.37898936170212766,
|
|
"grad_norm": 4.241005897521973,
|
|
"learning_rate": 9.981815686856268e-06,
|
|
"loss": 1.2491,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.3792553191489362,
|
|
"grad_norm": 3.9506289958953857,
|
|
"learning_rate": 9.981740668412622e-06,
|
|
"loss": 1.175,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 0.3795212765957447,
|
|
"grad_norm": 4.209918022155762,
|
|
"learning_rate": 9.981665495828053e-06,
|
|
"loss": 1.379,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 0.3797872340425532,
|
|
"grad_norm": 4.048032283782959,
|
|
"learning_rate": 9.981590169104889e-06,
|
|
"loss": 1.4339,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 0.3800531914893617,
|
|
"grad_norm": 3.9107158184051514,
|
|
"learning_rate": 9.98151468824546e-06,
|
|
"loss": 1.4468,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 0.3803191489361702,
|
|
"grad_norm": 3.8230321407318115,
|
|
"learning_rate": 9.981439053252102e-06,
|
|
"loss": 1.2942,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.3805851063829787,
|
|
"grad_norm": 3.772338390350342,
|
|
"learning_rate": 9.981363264127154e-06,
|
|
"loss": 1.3236,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 0.38085106382978723,
|
|
"grad_norm": 4.234860897064209,
|
|
"learning_rate": 9.981287320872962e-06,
|
|
"loss": 1.3763,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 0.38111702127659575,
|
|
"grad_norm": 3.8890817165374756,
|
|
"learning_rate": 9.981211223491876e-06,
|
|
"loss": 1.3667,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 0.38138297872340426,
|
|
"grad_norm": 3.8217055797576904,
|
|
"learning_rate": 9.98113497198625e-06,
|
|
"loss": 1.1392,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 0.3816489361702128,
|
|
"grad_norm": 3.9971745014190674,
|
|
"learning_rate": 9.981058566358443e-06,
|
|
"loss": 1.1892,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 0.3819148936170213,
|
|
"grad_norm": 4.417277812957764,
|
|
"learning_rate": 9.98098200661082e-06,
|
|
"loss": 1.3306,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 0.38218085106382976,
|
|
"grad_norm": 4.433936595916748,
|
|
"learning_rate": 9.980905292745749e-06,
|
|
"loss": 1.2253,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 0.3824468085106383,
|
|
"grad_norm": 3.668414831161499,
|
|
"learning_rate": 9.980828424765603e-06,
|
|
"loss": 1.3243,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 0.3827127659574468,
|
|
"grad_norm": 4.062864303588867,
|
|
"learning_rate": 9.980751402672762e-06,
|
|
"loss": 1.2416,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 0.3829787234042553,
|
|
"grad_norm": 4.28949499130249,
|
|
"learning_rate": 9.980674226469608e-06,
|
|
"loss": 1.3018,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.38324468085106383,
|
|
"grad_norm": 3.598482847213745,
|
|
"learning_rate": 9.980596896158532e-06,
|
|
"loss": 1.1174,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 0.38351063829787235,
|
|
"grad_norm": 4.300634384155273,
|
|
"learning_rate": 9.980519411741922e-06,
|
|
"loss": 1.3079,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 0.38377659574468087,
|
|
"grad_norm": 4.2363128662109375,
|
|
"learning_rate": 9.980441773222178e-06,
|
|
"loss": 1.3546,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 0.3840425531914894,
|
|
"grad_norm": 4.521866321563721,
|
|
"learning_rate": 9.980363980601702e-06,
|
|
"loss": 1.2007,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 0.38430851063829785,
|
|
"grad_norm": 3.9129135608673096,
|
|
"learning_rate": 9.9802860338829e-06,
|
|
"loss": 1.3101,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 0.38457446808510637,
|
|
"grad_norm": 4.559953689575195,
|
|
"learning_rate": 9.980207933068185e-06,
|
|
"loss": 1.3183,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 0.3848404255319149,
|
|
"grad_norm": 4.102110385894775,
|
|
"learning_rate": 9.980129678159974e-06,
|
|
"loss": 1.2549,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 0.3851063829787234,
|
|
"grad_norm": 4.215007781982422,
|
|
"learning_rate": 9.980051269160686e-06,
|
|
"loss": 1.3281,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 0.3853723404255319,
|
|
"grad_norm": 4.188117980957031,
|
|
"learning_rate": 9.97997270607275e-06,
|
|
"loss": 1.267,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 0.38563829787234044,
|
|
"grad_norm": 3.9828150272369385,
|
|
"learning_rate": 9.979893988898592e-06,
|
|
"loss": 1.2967,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.38590425531914896,
|
|
"grad_norm": 3.9680116176605225,
|
|
"learning_rate": 9.979815117640654e-06,
|
|
"loss": 1.2711,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 0.3861702127659574,
|
|
"grad_norm": 3.9651451110839844,
|
|
"learning_rate": 9.979736092301374e-06,
|
|
"loss": 1.2298,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 0.38643617021276594,
|
|
"grad_norm": 3.7032337188720703,
|
|
"learning_rate": 9.979656912883193e-06,
|
|
"loss": 1.1644,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 0.38670212765957446,
|
|
"grad_norm": 4.174644470214844,
|
|
"learning_rate": 9.979577579388566e-06,
|
|
"loss": 1.1941,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 0.386968085106383,
|
|
"grad_norm": 3.9499082565307617,
|
|
"learning_rate": 9.979498091819946e-06,
|
|
"loss": 1.2205,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 0.3872340425531915,
|
|
"grad_norm": 4.005082130432129,
|
|
"learning_rate": 9.979418450179792e-06,
|
|
"loss": 1.2983,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 0.3875,
|
|
"grad_norm": 4.425258159637451,
|
|
"learning_rate": 9.97933865447057e-06,
|
|
"loss": 1.3444,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 0.38776595744680853,
|
|
"grad_norm": 4.169209003448486,
|
|
"learning_rate": 9.979258704694747e-06,
|
|
"loss": 1.3914,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 0.38803191489361705,
|
|
"grad_norm": 3.7960317134857178,
|
|
"learning_rate": 9.979178600854797e-06,
|
|
"loss": 1.2186,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 0.3882978723404255,
|
|
"grad_norm": 3.9216535091400146,
|
|
"learning_rate": 9.979098342953198e-06,
|
|
"loss": 1.0839,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.388563829787234,
|
|
"grad_norm": 4.077401638031006,
|
|
"learning_rate": 9.979017930992436e-06,
|
|
"loss": 1.225,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 0.38882978723404255,
|
|
"grad_norm": 3.871135950088501,
|
|
"learning_rate": 9.978937364974996e-06,
|
|
"loss": 1.2545,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 0.38909574468085106,
|
|
"grad_norm": 4.12876558303833,
|
|
"learning_rate": 9.978856644903373e-06,
|
|
"loss": 1.3806,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 0.3893617021276596,
|
|
"grad_norm": 4.172638416290283,
|
|
"learning_rate": 9.978775770780061e-06,
|
|
"loss": 1.3444,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 0.3896276595744681,
|
|
"grad_norm": 4.253303050994873,
|
|
"learning_rate": 9.978694742607566e-06,
|
|
"loss": 1.3015,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 0.3898936170212766,
|
|
"grad_norm": 3.937948226928711,
|
|
"learning_rate": 9.978613560388396e-06,
|
|
"loss": 1.4014,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 0.3901595744680851,
|
|
"grad_norm": 3.959920644760132,
|
|
"learning_rate": 9.978532224125059e-06,
|
|
"loss": 1.2797,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 0.3904255319148936,
|
|
"grad_norm": 4.240394592285156,
|
|
"learning_rate": 9.978450733820073e-06,
|
|
"loss": 1.3541,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 0.3906914893617021,
|
|
"grad_norm": 4.060705661773682,
|
|
"learning_rate": 9.97836908947596e-06,
|
|
"loss": 1.2997,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 0.39095744680851063,
|
|
"grad_norm": 4.276419162750244,
|
|
"learning_rate": 9.978287291095248e-06,
|
|
"loss": 1.4451,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.39122340425531915,
|
|
"grad_norm": 3.961526393890381,
|
|
"learning_rate": 9.978205338680465e-06,
|
|
"loss": 1.3248,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 0.39148936170212767,
|
|
"grad_norm": 4.002696514129639,
|
|
"learning_rate": 9.978123232234147e-06,
|
|
"loss": 1.3274,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 0.3917553191489362,
|
|
"grad_norm": 3.857750654220581,
|
|
"learning_rate": 9.978040971758836e-06,
|
|
"loss": 1.2552,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 0.3920212765957447,
|
|
"grad_norm": 3.973501682281494,
|
|
"learning_rate": 9.977958557257077e-06,
|
|
"loss": 1.3911,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 0.39228723404255317,
|
|
"grad_norm": 4.301419258117676,
|
|
"learning_rate": 9.977875988731418e-06,
|
|
"loss": 1.2423,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.3925531914893617,
|
|
"grad_norm": 3.7840960025787354,
|
|
"learning_rate": 9.977793266184416e-06,
|
|
"loss": 1.1739,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 0.3928191489361702,
|
|
"grad_norm": 3.6807820796966553,
|
|
"learning_rate": 9.977710389618628e-06,
|
|
"loss": 1.1685,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 0.3930851063829787,
|
|
"grad_norm": 3.942674398422241,
|
|
"learning_rate": 9.977627359036624e-06,
|
|
"loss": 1.2033,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 0.39335106382978724,
|
|
"grad_norm": 4.07774543762207,
|
|
"learning_rate": 9.977544174440965e-06,
|
|
"loss": 1.2707,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 0.39361702127659576,
|
|
"grad_norm": 4.302217483520508,
|
|
"learning_rate": 9.977460835834231e-06,
|
|
"loss": 1.3944,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.3938829787234043,
|
|
"grad_norm": 4.006019592285156,
|
|
"learning_rate": 9.977377343218998e-06,
|
|
"loss": 1.3301,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 0.3941489361702128,
|
|
"grad_norm": 4.067336082458496,
|
|
"learning_rate": 9.977293696597849e-06,
|
|
"loss": 1.3282,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 0.39441489361702126,
|
|
"grad_norm": 4.4912004470825195,
|
|
"learning_rate": 9.977209895973374e-06,
|
|
"loss": 1.374,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 0.3946808510638298,
|
|
"grad_norm": 3.933626651763916,
|
|
"learning_rate": 9.977125941348165e-06,
|
|
"loss": 1.1584,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 0.3949468085106383,
|
|
"grad_norm": 4.08411169052124,
|
|
"learning_rate": 9.97704183272482e-06,
|
|
"loss": 1.3587,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 0.3952127659574468,
|
|
"grad_norm": 4.316272735595703,
|
|
"learning_rate": 9.976957570105939e-06,
|
|
"loss": 1.2544,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 0.39547872340425533,
|
|
"grad_norm": 4.05543851852417,
|
|
"learning_rate": 9.976873153494132e-06,
|
|
"loss": 1.1699,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 0.39574468085106385,
|
|
"grad_norm": 4.137149810791016,
|
|
"learning_rate": 9.976788582892012e-06,
|
|
"loss": 1.3501,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 0.39601063829787236,
|
|
"grad_norm": 3.830085515975952,
|
|
"learning_rate": 9.976703858302192e-06,
|
|
"loss": 1.2818,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 0.3962765957446808,
|
|
"grad_norm": 4.138214588165283,
|
|
"learning_rate": 9.976618979727295e-06,
|
|
"loss": 1.2769,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.39654255319148934,
|
|
"grad_norm": 4.205438137054443,
|
|
"learning_rate": 9.976533947169948e-06,
|
|
"loss": 1.4103,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 0.39680851063829786,
|
|
"grad_norm": 4.104953289031982,
|
|
"learning_rate": 9.976448760632782e-06,
|
|
"loss": 1.3701,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 0.3970744680851064,
|
|
"grad_norm": 3.725175619125366,
|
|
"learning_rate": 9.976363420118432e-06,
|
|
"loss": 1.2986,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 0.3973404255319149,
|
|
"grad_norm": 4.973143577575684,
|
|
"learning_rate": 9.97627792562954e-06,
|
|
"loss": 1.3123,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 0.3976063829787234,
|
|
"grad_norm": 3.5973260402679443,
|
|
"learning_rate": 9.976192277168748e-06,
|
|
"loss": 1.1878,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 0.39787234042553193,
|
|
"grad_norm": 3.9308860301971436,
|
|
"learning_rate": 9.97610647473871e-06,
|
|
"loss": 1.3139,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 0.39813829787234045,
|
|
"grad_norm": 3.831552028656006,
|
|
"learning_rate": 9.976020518342078e-06,
|
|
"loss": 1.249,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 0.3984042553191489,
|
|
"grad_norm": 3.8937809467315674,
|
|
"learning_rate": 9.975934407981512e-06,
|
|
"loss": 1.2361,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 0.39867021276595743,
|
|
"grad_norm": 4.4092512130737305,
|
|
"learning_rate": 9.97584814365968e-06,
|
|
"loss": 1.424,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 0.39893617021276595,
|
|
"grad_norm": 4.096745491027832,
|
|
"learning_rate": 9.975761725379243e-06,
|
|
"loss": 1.3488,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.39893617021276595,
|
|
"eval_loss": 1.3084138631820679,
|
|
"eval_runtime": 12.5754,
|
|
"eval_samples_per_second": 31.808,
|
|
"eval_steps_per_second": 3.976,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.39920212765957447,
|
|
"grad_norm": 5.023965835571289,
|
|
"learning_rate": 9.975675153142884e-06,
|
|
"loss": 1.3409,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 0.399468085106383,
|
|
"grad_norm": 4.182278156280518,
|
|
"learning_rate": 9.975588426953276e-06,
|
|
"loss": 1.2497,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 0.3997340425531915,
|
|
"grad_norm": 3.872786283493042,
|
|
"learning_rate": 9.975501546813104e-06,
|
|
"loss": 1.29,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"grad_norm": 3.9527881145477295,
|
|
"learning_rate": 9.975414512725058e-06,
|
|
"loss": 1.3427,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 0.4002659574468085,
|
|
"grad_norm": 3.563168525695801,
|
|
"learning_rate": 9.975327324691828e-06,
|
|
"loss": 1.2509,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 0.400531914893617,
|
|
"grad_norm": 3.8460729122161865,
|
|
"learning_rate": 9.975239982716113e-06,
|
|
"loss": 1.214,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 0.4007978723404255,
|
|
"grad_norm": 4.321569442749023,
|
|
"learning_rate": 9.975152486800615e-06,
|
|
"loss": 1.1959,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 0.40106382978723404,
|
|
"grad_norm": 4.102901935577393,
|
|
"learning_rate": 9.975064836948041e-06,
|
|
"loss": 1.2786,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 0.40132978723404256,
|
|
"grad_norm": 3.8385143280029297,
|
|
"learning_rate": 9.974977033161103e-06,
|
|
"loss": 1.3574,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 0.4015957446808511,
|
|
"grad_norm": 3.912363290786743,
|
|
"learning_rate": 9.97488907544252e-06,
|
|
"loss": 1.388,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.4018617021276596,
|
|
"grad_norm": 4.346206188201904,
|
|
"learning_rate": 9.974800963795012e-06,
|
|
"loss": 1.4532,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 0.4021276595744681,
|
|
"grad_norm": 4.346587657928467,
|
|
"learning_rate": 9.974712698221306e-06,
|
|
"loss": 1.2098,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 0.4023936170212766,
|
|
"grad_norm": 3.9622318744659424,
|
|
"learning_rate": 9.97462427872413e-06,
|
|
"loss": 1.1556,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 0.4026595744680851,
|
|
"grad_norm": 3.903508186340332,
|
|
"learning_rate": 9.974535705306222e-06,
|
|
"loss": 1.1644,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 0.4029255319148936,
|
|
"grad_norm": 4.4463605880737305,
|
|
"learning_rate": 9.974446977970322e-06,
|
|
"loss": 1.4892,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 0.4031914893617021,
|
|
"grad_norm": 3.8401832580566406,
|
|
"learning_rate": 9.974358096719178e-06,
|
|
"loss": 1.3681,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 0.40345744680851064,
|
|
"grad_norm": 4.009060382843018,
|
|
"learning_rate": 9.974269061555537e-06,
|
|
"loss": 1.2134,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 0.40372340425531916,
|
|
"grad_norm": 3.609969139099121,
|
|
"learning_rate": 9.974179872482153e-06,
|
|
"loss": 1.34,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 0.4039893617021277,
|
|
"grad_norm": 4.289672374725342,
|
|
"learning_rate": 9.97409052950179e-06,
|
|
"loss": 1.4246,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 0.40425531914893614,
|
|
"grad_norm": 3.6479434967041016,
|
|
"learning_rate": 9.974001032617208e-06,
|
|
"loss": 1.2366,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.40452127659574466,
|
|
"grad_norm": 4.251558780670166,
|
|
"learning_rate": 9.973911381831178e-06,
|
|
"loss": 1.3208,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 0.4047872340425532,
|
|
"grad_norm": 3.7560923099517822,
|
|
"learning_rate": 9.973821577146475e-06,
|
|
"loss": 1.2298,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 0.4050531914893617,
|
|
"grad_norm": 3.9338622093200684,
|
|
"learning_rate": 9.973731618565876e-06,
|
|
"loss": 1.34,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 0.4053191489361702,
|
|
"grad_norm": 3.8561365604400635,
|
|
"learning_rate": 9.973641506092165e-06,
|
|
"loss": 1.4198,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 0.40558510638297873,
|
|
"grad_norm": 3.7590527534484863,
|
|
"learning_rate": 9.973551239728129e-06,
|
|
"loss": 1.3644,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.40585106382978725,
|
|
"grad_norm": 4.470832824707031,
|
|
"learning_rate": 9.973460819476562e-06,
|
|
"loss": 1.3641,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 0.40611702127659577,
|
|
"grad_norm": 3.5494723320007324,
|
|
"learning_rate": 9.973370245340264e-06,
|
|
"loss": 1.2552,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 0.40638297872340423,
|
|
"grad_norm": 4.204685211181641,
|
|
"learning_rate": 9.973279517322033e-06,
|
|
"loss": 1.3577,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 0.40664893617021275,
|
|
"grad_norm": 4.775966167449951,
|
|
"learning_rate": 9.97318863542468e-06,
|
|
"loss": 1.4342,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 0.40691489361702127,
|
|
"grad_norm": 4.2795729637146,
|
|
"learning_rate": 9.973097599651013e-06,
|
|
"loss": 1.3033,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.4071808510638298,
|
|
"grad_norm": 4.110699653625488,
|
|
"learning_rate": 9.973006410003853e-06,
|
|
"loss": 1.3463,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 0.4074468085106383,
|
|
"grad_norm": 3.8819406032562256,
|
|
"learning_rate": 9.97291506648602e-06,
|
|
"loss": 1.1908,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 0.4077127659574468,
|
|
"grad_norm": 4.164956092834473,
|
|
"learning_rate": 9.972823569100338e-06,
|
|
"loss": 1.2573,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 0.40797872340425534,
|
|
"grad_norm": 3.9775986671447754,
|
|
"learning_rate": 9.97273191784964e-06,
|
|
"loss": 1.2141,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 0.40824468085106386,
|
|
"grad_norm": 4.500059604644775,
|
|
"learning_rate": 9.972640112736764e-06,
|
|
"loss": 1.3342,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 0.4085106382978723,
|
|
"grad_norm": 4.081606864929199,
|
|
"learning_rate": 9.972548153764547e-06,
|
|
"loss": 1.2027,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 0.40877659574468084,
|
|
"grad_norm": 4.272010803222656,
|
|
"learning_rate": 9.972456040935838e-06,
|
|
"loss": 1.2332,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 0.40904255319148936,
|
|
"grad_norm": 4.042487144470215,
|
|
"learning_rate": 9.972363774253481e-06,
|
|
"loss": 1.1932,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 0.4093085106382979,
|
|
"grad_norm": 3.9628350734710693,
|
|
"learning_rate": 9.972271353720337e-06,
|
|
"loss": 1.2636,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 0.4095744680851064,
|
|
"grad_norm": 4.018553256988525,
|
|
"learning_rate": 9.972178779339264e-06,
|
|
"loss": 1.2822,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.4098404255319149,
|
|
"grad_norm": 4.054775714874268,
|
|
"learning_rate": 9.972086051113123e-06,
|
|
"loss": 1.3419,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 0.4101063829787234,
|
|
"grad_norm": 4.035485744476318,
|
|
"learning_rate": 9.971993169044787e-06,
|
|
"loss": 1.2586,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 0.4103723404255319,
|
|
"grad_norm": 4.139084815979004,
|
|
"learning_rate": 9.971900133137128e-06,
|
|
"loss": 1.3533,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 0.4106382978723404,
|
|
"grad_norm": 3.9709324836730957,
|
|
"learning_rate": 9.971806943393026e-06,
|
|
"loss": 1.1807,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 0.4109042553191489,
|
|
"grad_norm": 3.836603879928589,
|
|
"learning_rate": 9.971713599815364e-06,
|
|
"loss": 1.2364,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 0.41117021276595744,
|
|
"grad_norm": 3.484250068664551,
|
|
"learning_rate": 9.97162010240703e-06,
|
|
"loss": 1.2536,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 0.41143617021276596,
|
|
"grad_norm": 4.203670978546143,
|
|
"learning_rate": 9.971526451170914e-06,
|
|
"loss": 1.2339,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 0.4117021276595745,
|
|
"grad_norm": 3.7969377040863037,
|
|
"learning_rate": 9.971432646109919e-06,
|
|
"loss": 1.4205,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 0.411968085106383,
|
|
"grad_norm": 3.9421546459198,
|
|
"learning_rate": 9.971338687226944e-06,
|
|
"loss": 1.2441,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 0.4122340425531915,
|
|
"grad_norm": 3.8566412925720215,
|
|
"learning_rate": 9.971244574524897e-06,
|
|
"loss": 1.3148,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.4125,
|
|
"grad_norm": 3.6699059009552,
|
|
"learning_rate": 9.971150308006689e-06,
|
|
"loss": 1.1396,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 0.4127659574468085,
|
|
"grad_norm": 4.328299522399902,
|
|
"learning_rate": 9.971055887675238e-06,
|
|
"loss": 1.4105,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 0.413031914893617,
|
|
"grad_norm": 3.6258397102355957,
|
|
"learning_rate": 9.970961313533465e-06,
|
|
"loss": 1.2399,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 0.41329787234042553,
|
|
"grad_norm": 4.217952251434326,
|
|
"learning_rate": 9.970866585584298e-06,
|
|
"loss": 1.2643,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 0.41356382978723405,
|
|
"grad_norm": 3.8410286903381348,
|
|
"learning_rate": 9.970771703830666e-06,
|
|
"loss": 1.3982,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 0.41382978723404257,
|
|
"grad_norm": 4.1184234619140625,
|
|
"learning_rate": 9.970676668275504e-06,
|
|
"loss": 1.3206,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 0.4140957446808511,
|
|
"grad_norm": 3.805264472961426,
|
|
"learning_rate": 9.970581478921755e-06,
|
|
"loss": 1.3301,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 0.41436170212765955,
|
|
"grad_norm": 3.7191929817199707,
|
|
"learning_rate": 9.970486135772362e-06,
|
|
"loss": 1.3443,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 0.41462765957446807,
|
|
"grad_norm": 3.7962100505828857,
|
|
"learning_rate": 9.970390638830275e-06,
|
|
"loss": 1.1145,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 0.4148936170212766,
|
|
"grad_norm": 3.8480000495910645,
|
|
"learning_rate": 9.970294988098452e-06,
|
|
"loss": 1.303,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.4151595744680851,
|
|
"grad_norm": 4.154008388519287,
|
|
"learning_rate": 9.970199183579847e-06,
|
|
"loss": 1.2505,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 0.4154255319148936,
|
|
"grad_norm": 3.6945624351501465,
|
|
"learning_rate": 9.97010322527743e-06,
|
|
"loss": 1.2318,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 0.41569148936170214,
|
|
"grad_norm": 4.145558834075928,
|
|
"learning_rate": 9.970007113194168e-06,
|
|
"loss": 1.2855,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 0.41595744680851066,
|
|
"grad_norm": 4.037220001220703,
|
|
"learning_rate": 9.969910847333032e-06,
|
|
"loss": 1.2599,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 0.4162234042553192,
|
|
"grad_norm": 4.070208549499512,
|
|
"learning_rate": 9.969814427697007e-06,
|
|
"loss": 1.3002,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 0.41648936170212764,
|
|
"grad_norm": 4.0794548988342285,
|
|
"learning_rate": 9.969717854289069e-06,
|
|
"loss": 1.3807,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 0.41675531914893615,
|
|
"grad_norm": 3.9017162322998047,
|
|
"learning_rate": 9.969621127112211e-06,
|
|
"loss": 1.1982,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 0.41702127659574467,
|
|
"grad_norm": 4.089752674102783,
|
|
"learning_rate": 9.969524246169424e-06,
|
|
"loss": 1.2734,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 0.4172872340425532,
|
|
"grad_norm": 3.7550644874572754,
|
|
"learning_rate": 9.969427211463705e-06,
|
|
"loss": 1.2207,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 0.4175531914893617,
|
|
"grad_norm": 3.9977076053619385,
|
|
"learning_rate": 9.969330022998057e-06,
|
|
"loss": 1.3695,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.4178191489361702,
|
|
"grad_norm": 4.422798156738281,
|
|
"learning_rate": 9.969232680775491e-06,
|
|
"loss": 1.3292,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 0.41808510638297874,
|
|
"grad_norm": 4.122771263122559,
|
|
"learning_rate": 9.969135184799013e-06,
|
|
"loss": 1.3753,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 0.4183510638297872,
|
|
"grad_norm": 3.827120542526245,
|
|
"learning_rate": 9.969037535071641e-06,
|
|
"loss": 1.2738,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 0.4186170212765957,
|
|
"grad_norm": 3.823761463165283,
|
|
"learning_rate": 9.968939731596399e-06,
|
|
"loss": 1.2201,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 0.41888297872340424,
|
|
"grad_norm": 4.0475616455078125,
|
|
"learning_rate": 9.96884177437631e-06,
|
|
"loss": 1.3511,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.41914893617021276,
|
|
"grad_norm": 4.167337894439697,
|
|
"learning_rate": 9.968743663414408e-06,
|
|
"loss": 1.3725,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 0.4194148936170213,
|
|
"grad_norm": 4.683474063873291,
|
|
"learning_rate": 9.968645398713726e-06,
|
|
"loss": 1.3719,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 0.4196808510638298,
|
|
"grad_norm": 4.450965881347656,
|
|
"learning_rate": 9.968546980277305e-06,
|
|
"loss": 1.2847,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 0.4199468085106383,
|
|
"grad_norm": 4.25331449508667,
|
|
"learning_rate": 9.968448408108191e-06,
|
|
"loss": 1.4151,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 0.42021276595744683,
|
|
"grad_norm": 4.090495586395264,
|
|
"learning_rate": 9.968349682209434e-06,
|
|
"loss": 1.2518,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.4204787234042553,
|
|
"grad_norm": 4.116806507110596,
|
|
"learning_rate": 9.96825080258409e-06,
|
|
"loss": 1.3986,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 0.4207446808510638,
|
|
"grad_norm": 4.016780376434326,
|
|
"learning_rate": 9.968151769235216e-06,
|
|
"loss": 1.2488,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 0.42101063829787233,
|
|
"grad_norm": 4.153627872467041,
|
|
"learning_rate": 9.968052582165874e-06,
|
|
"loss": 1.3459,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 0.42127659574468085,
|
|
"grad_norm": 4.0243048667907715,
|
|
"learning_rate": 9.96795324137914e-06,
|
|
"loss": 1.2554,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 0.42154255319148937,
|
|
"grad_norm": 4.162500381469727,
|
|
"learning_rate": 9.96785374687808e-06,
|
|
"loss": 1.3597,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 0.4218085106382979,
|
|
"grad_norm": 3.8271100521087646,
|
|
"learning_rate": 9.967754098665778e-06,
|
|
"loss": 1.2375,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 0.4220744680851064,
|
|
"grad_norm": 3.73313045501709,
|
|
"learning_rate": 9.967654296745317e-06,
|
|
"loss": 1.1394,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 0.4223404255319149,
|
|
"grad_norm": 4.17546272277832,
|
|
"learning_rate": 9.96755434111978e-06,
|
|
"loss": 1.3004,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 0.4226063829787234,
|
|
"grad_norm": 3.7987289428710938,
|
|
"learning_rate": 9.967454231792267e-06,
|
|
"loss": 1.2551,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 0.4228723404255319,
|
|
"grad_norm": 4.171220779418945,
|
|
"learning_rate": 9.967353968765868e-06,
|
|
"loss": 1.2722,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.4231382978723404,
|
|
"grad_norm": 4.090373516082764,
|
|
"learning_rate": 9.96725355204369e-06,
|
|
"loss": 1.2963,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 0.42340425531914894,
|
|
"grad_norm": 4.222188949584961,
|
|
"learning_rate": 9.967152981628841e-06,
|
|
"loss": 1.1075,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 0.42367021276595745,
|
|
"grad_norm": 3.9014172554016113,
|
|
"learning_rate": 9.967052257524428e-06,
|
|
"loss": 1.251,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 0.423936170212766,
|
|
"grad_norm": 4.0223870277404785,
|
|
"learning_rate": 9.966951379733572e-06,
|
|
"loss": 1.1924,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 0.4242021276595745,
|
|
"grad_norm": 3.724557876586914,
|
|
"learning_rate": 9.96685034825939e-06,
|
|
"loss": 1.206,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 0.42446808510638295,
|
|
"grad_norm": 4.103020191192627,
|
|
"learning_rate": 9.966749163105011e-06,
|
|
"loss": 1.374,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 0.42473404255319147,
|
|
"grad_norm": 3.997119188308716,
|
|
"learning_rate": 9.966647824273567e-06,
|
|
"loss": 1.2097,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 0.425,
|
|
"grad_norm": 4.226285934448242,
|
|
"learning_rate": 9.966546331768192e-06,
|
|
"loss": 1.3387,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 0.4252659574468085,
|
|
"grad_norm": 4.060708999633789,
|
|
"learning_rate": 9.966444685592025e-06,
|
|
"loss": 1.2762,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 0.425531914893617,
|
|
"grad_norm": 4.005706787109375,
|
|
"learning_rate": 9.966342885748212e-06,
|
|
"loss": 1.2845,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.42579787234042554,
|
|
"grad_norm": 4.201882839202881,
|
|
"learning_rate": 9.966240932239904e-06,
|
|
"loss": 1.2953,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 0.42606382978723406,
|
|
"grad_norm": 3.7558727264404297,
|
|
"learning_rate": 9.966138825070254e-06,
|
|
"loss": 1.2806,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 0.4263297872340426,
|
|
"grad_norm": 3.9751381874084473,
|
|
"learning_rate": 9.96603656424242e-06,
|
|
"loss": 1.2354,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 0.42659574468085104,
|
|
"grad_norm": 3.775033712387085,
|
|
"learning_rate": 9.96593414975957e-06,
|
|
"loss": 1.2592,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 0.42686170212765956,
|
|
"grad_norm": 4.114045143127441,
|
|
"learning_rate": 9.965831581624872e-06,
|
|
"loss": 1.1019,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 0.4271276595744681,
|
|
"grad_norm": 3.6853203773498535,
|
|
"learning_rate": 9.965728859841497e-06,
|
|
"loss": 1.356,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 0.4273936170212766,
|
|
"grad_norm": 3.8778109550476074,
|
|
"learning_rate": 9.965625984412623e-06,
|
|
"loss": 1.2266,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 0.4276595744680851,
|
|
"grad_norm": 3.860879421234131,
|
|
"learning_rate": 9.965522955341437e-06,
|
|
"loss": 1.2998,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 0.42792553191489363,
|
|
"grad_norm": 3.7324464321136475,
|
|
"learning_rate": 9.965419772631125e-06,
|
|
"loss": 1.3103,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 0.42819148936170215,
|
|
"grad_norm": 3.8030385971069336,
|
|
"learning_rate": 9.965316436284877e-06,
|
|
"loss": 1.2967,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.4284574468085106,
|
|
"grad_norm": 4.376537322998047,
|
|
"learning_rate": 9.965212946305893e-06,
|
|
"loss": 1.4258,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 0.42872340425531913,
|
|
"grad_norm": 4.365556716918945,
|
|
"learning_rate": 9.965109302697376e-06,
|
|
"loss": 1.3794,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 0.42898936170212765,
|
|
"grad_norm": 4.431367874145508,
|
|
"learning_rate": 9.96500550546253e-06,
|
|
"loss": 1.2973,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 0.42925531914893617,
|
|
"grad_norm": 4.084920406341553,
|
|
"learning_rate": 9.96490155460457e-06,
|
|
"loss": 1.2417,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 0.4295212765957447,
|
|
"grad_norm": 3.6877284049987793,
|
|
"learning_rate": 9.964797450126708e-06,
|
|
"loss": 1.2577,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 0.4297872340425532,
|
|
"grad_norm": 4.147090911865234,
|
|
"learning_rate": 9.964693192032168e-06,
|
|
"loss": 1.3127,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 0.4300531914893617,
|
|
"grad_norm": 3.9144530296325684,
|
|
"learning_rate": 9.964588780324176e-06,
|
|
"loss": 1.2333,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 0.43031914893617024,
|
|
"grad_norm": 3.9510538578033447,
|
|
"learning_rate": 9.964484215005963e-06,
|
|
"loss": 1.2541,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 0.4305851063829787,
|
|
"grad_norm": 4.1784892082214355,
|
|
"learning_rate": 9.964379496080763e-06,
|
|
"loss": 1.3247,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 0.4308510638297872,
|
|
"grad_norm": 3.9380571842193604,
|
|
"learning_rate": 9.964274623551814e-06,
|
|
"loss": 1.3042,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.43111702127659574,
|
|
"grad_norm": 3.6729469299316406,
|
|
"learning_rate": 9.964169597422367e-06,
|
|
"loss": 1.2064,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 0.43138297872340425,
|
|
"grad_norm": 4.168332576751709,
|
|
"learning_rate": 9.964064417695666e-06,
|
|
"loss": 1.2936,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 0.43164893617021277,
|
|
"grad_norm": 3.7848429679870605,
|
|
"learning_rate": 9.963959084374969e-06,
|
|
"loss": 1.3055,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 0.4319148936170213,
|
|
"grad_norm": 3.760188579559326,
|
|
"learning_rate": 9.963853597463533e-06,
|
|
"loss": 1.2085,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 0.4321808510638298,
|
|
"grad_norm": 3.734712839126587,
|
|
"learning_rate": 9.963747956964623e-06,
|
|
"loss": 1.1788,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.4324468085106383,
|
|
"grad_norm": 4.398496627807617,
|
|
"learning_rate": 9.963642162881506e-06,
|
|
"loss": 1.1853,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 0.4327127659574468,
|
|
"grad_norm": 4.267323970794678,
|
|
"learning_rate": 9.963536215217457e-06,
|
|
"loss": 1.2317,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 0.4329787234042553,
|
|
"grad_norm": 4.306065082550049,
|
|
"learning_rate": 9.963430113975753e-06,
|
|
"loss": 1.5309,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 0.4332446808510638,
|
|
"grad_norm": 3.862356424331665,
|
|
"learning_rate": 9.963323859159679e-06,
|
|
"loss": 1.2449,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 0.43351063829787234,
|
|
"grad_norm": 3.6479053497314453,
|
|
"learning_rate": 9.96321745077252e-06,
|
|
"loss": 1.1502,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.43377659574468086,
|
|
"grad_norm": 3.702998399734497,
|
|
"learning_rate": 9.963110888817569e-06,
|
|
"loss": 1.1776,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 0.4340425531914894,
|
|
"grad_norm": 4.183767795562744,
|
|
"learning_rate": 9.963004173298125e-06,
|
|
"loss": 1.2266,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 0.4343085106382979,
|
|
"grad_norm": 3.9834625720977783,
|
|
"learning_rate": 9.96289730421749e-06,
|
|
"loss": 1.222,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 0.43457446808510636,
|
|
"grad_norm": 3.971428871154785,
|
|
"learning_rate": 9.962790281578966e-06,
|
|
"loss": 1.3843,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 0.4348404255319149,
|
|
"grad_norm": 3.833468437194824,
|
|
"learning_rate": 9.96268310538587e-06,
|
|
"loss": 1.3268,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 0.4351063829787234,
|
|
"grad_norm": 3.7899720668792725,
|
|
"learning_rate": 9.962575775641516e-06,
|
|
"loss": 1.2939,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 0.4353723404255319,
|
|
"grad_norm": 3.8362271785736084,
|
|
"learning_rate": 9.962468292349223e-06,
|
|
"loss": 1.2681,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 0.43563829787234043,
|
|
"grad_norm": 3.884549140930176,
|
|
"learning_rate": 9.96236065551232e-06,
|
|
"loss": 1.267,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 0.43590425531914895,
|
|
"grad_norm": 3.975801944732666,
|
|
"learning_rate": 9.962252865134136e-06,
|
|
"loss": 1.3039,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 0.43617021276595747,
|
|
"grad_norm": 4.278522491455078,
|
|
"learning_rate": 9.962144921218005e-06,
|
|
"loss": 1.3885,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.436436170212766,
|
|
"grad_norm": 3.9850552082061768,
|
|
"learning_rate": 9.962036823767269e-06,
|
|
"loss": 1.2586,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 0.43670212765957445,
|
|
"grad_norm": 4.315723419189453,
|
|
"learning_rate": 9.961928572785272e-06,
|
|
"loss": 1.3281,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 0.43696808510638296,
|
|
"grad_norm": 3.7114546298980713,
|
|
"learning_rate": 9.96182016827536e-06,
|
|
"loss": 1.1813,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 0.4372340425531915,
|
|
"grad_norm": 4.079943656921387,
|
|
"learning_rate": 9.961711610240892e-06,
|
|
"loss": 1.2878,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 0.4375,
|
|
"grad_norm": 3.7427685260772705,
|
|
"learning_rate": 9.961602898685225e-06,
|
|
"loss": 1.3068,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 0.4377659574468085,
|
|
"grad_norm": 4.234682083129883,
|
|
"learning_rate": 9.961494033611726e-06,
|
|
"loss": 1.4143,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 0.43803191489361704,
|
|
"grad_norm": 3.7043113708496094,
|
|
"learning_rate": 9.961385015023755e-06,
|
|
"loss": 1.356,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 0.43829787234042555,
|
|
"grad_norm": 3.9575397968292236,
|
|
"learning_rate": 9.961275842924694e-06,
|
|
"loss": 1.3257,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 0.438563829787234,
|
|
"grad_norm": 4.285686016082764,
|
|
"learning_rate": 9.961166517317914e-06,
|
|
"loss": 1.2934,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 0.43882978723404253,
|
|
"grad_norm": 4.141624927520752,
|
|
"learning_rate": 9.961057038206804e-06,
|
|
"loss": 1.1941,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.43909574468085105,
|
|
"grad_norm": 3.7219042778015137,
|
|
"learning_rate": 9.960947405594747e-06,
|
|
"loss": 1.309,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 0.43936170212765957,
|
|
"grad_norm": 4.113218307495117,
|
|
"learning_rate": 9.960837619485136e-06,
|
|
"loss": 1.2331,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 0.4396276595744681,
|
|
"grad_norm": 4.069479465484619,
|
|
"learning_rate": 9.96072767988137e-06,
|
|
"loss": 1.1383,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 0.4398936170212766,
|
|
"grad_norm": 3.974097967147827,
|
|
"learning_rate": 9.960617586786847e-06,
|
|
"loss": 1.2015,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 0.4401595744680851,
|
|
"grad_norm": 3.991530656814575,
|
|
"learning_rate": 9.960507340204977e-06,
|
|
"loss": 1.254,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 0.44042553191489364,
|
|
"grad_norm": 4.121614933013916,
|
|
"learning_rate": 9.960396940139169e-06,
|
|
"loss": 1.4372,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 0.4406914893617021,
|
|
"grad_norm": 4.809171676635742,
|
|
"learning_rate": 9.960286386592839e-06,
|
|
"loss": 1.1771,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 0.4409574468085106,
|
|
"grad_norm": 3.7910423278808594,
|
|
"learning_rate": 9.960175679569409e-06,
|
|
"loss": 1.4103,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 0.44122340425531914,
|
|
"grad_norm": 3.5597236156463623,
|
|
"learning_rate": 9.960064819072305e-06,
|
|
"loss": 1.2461,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 0.44148936170212766,
|
|
"grad_norm": 4.393692493438721,
|
|
"learning_rate": 9.959953805104953e-06,
|
|
"loss": 1.3746,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.4417553191489362,
|
|
"grad_norm": 4.309146881103516,
|
|
"learning_rate": 9.959842637670791e-06,
|
|
"loss": 1.2619,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 0.4420212765957447,
|
|
"grad_norm": 4.537207126617432,
|
|
"learning_rate": 9.95973131677326e-06,
|
|
"loss": 1.2895,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 0.4422872340425532,
|
|
"grad_norm": 4.204534530639648,
|
|
"learning_rate": 9.959619842415802e-06,
|
|
"loss": 1.2458,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 0.4425531914893617,
|
|
"grad_norm": 3.859935998916626,
|
|
"learning_rate": 9.959508214601866e-06,
|
|
"loss": 1.2334,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 0.4428191489361702,
|
|
"grad_norm": 4.042413711547852,
|
|
"learning_rate": 9.959396433334907e-06,
|
|
"loss": 1.451,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 0.4430851063829787,
|
|
"grad_norm": 4.226952075958252,
|
|
"learning_rate": 9.959284498618385e-06,
|
|
"loss": 1.3204,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 0.44335106382978723,
|
|
"grad_norm": 4.049594402313232,
|
|
"learning_rate": 9.95917241045576e-06,
|
|
"loss": 1.3671,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 0.44361702127659575,
|
|
"grad_norm": 3.731627941131592,
|
|
"learning_rate": 9.959060168850504e-06,
|
|
"loss": 1.289,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 0.44388297872340426,
|
|
"grad_norm": 4.097120761871338,
|
|
"learning_rate": 9.958947773806084e-06,
|
|
"loss": 1.2126,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 0.4441489361702128,
|
|
"grad_norm": 4.148438930511475,
|
|
"learning_rate": 9.958835225325984e-06,
|
|
"loss": 1.1967,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.4444148936170213,
|
|
"grad_norm": 3.9843711853027344,
|
|
"learning_rate": 9.958722523413685e-06,
|
|
"loss": 1.3463,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 0.44468085106382976,
|
|
"grad_norm": 4.3066630363464355,
|
|
"learning_rate": 9.958609668072673e-06,
|
|
"loss": 1.4344,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 0.4449468085106383,
|
|
"grad_norm": 3.673088550567627,
|
|
"learning_rate": 9.958496659306436e-06,
|
|
"loss": 1.3849,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 0.4452127659574468,
|
|
"grad_norm": 4.2683210372924805,
|
|
"learning_rate": 9.958383497118478e-06,
|
|
"loss": 1.3148,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 0.4454787234042553,
|
|
"grad_norm": 3.677374839782715,
|
|
"learning_rate": 9.958270181512295e-06,
|
|
"loss": 1.1148,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 0.44574468085106383,
|
|
"grad_norm": 4.075168132781982,
|
|
"learning_rate": 9.958156712491396e-06,
|
|
"loss": 1.4016,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 0.44601063829787235,
|
|
"grad_norm": 4.137705326080322,
|
|
"learning_rate": 9.95804309005929e-06,
|
|
"loss": 1.3865,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 0.44627659574468087,
|
|
"grad_norm": 3.7367939949035645,
|
|
"learning_rate": 9.957929314219494e-06,
|
|
"loss": 1.3304,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 0.4465425531914894,
|
|
"grad_norm": 3.8000895977020264,
|
|
"learning_rate": 9.957815384975528e-06,
|
|
"loss": 1.4171,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 0.44680851063829785,
|
|
"grad_norm": 3.774846315383911,
|
|
"learning_rate": 9.957701302330915e-06,
|
|
"loss": 1.0019,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.44707446808510637,
|
|
"grad_norm": 3.7514147758483887,
|
|
"learning_rate": 9.957587066289189e-06,
|
|
"loss": 1.0711,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 0.4473404255319149,
|
|
"grad_norm": 4.298345565795898,
|
|
"learning_rate": 9.957472676853882e-06,
|
|
"loss": 1.2902,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 0.4476063829787234,
|
|
"grad_norm": 3.632465362548828,
|
|
"learning_rate": 9.957358134028535e-06,
|
|
"loss": 1.1969,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 0.4478723404255319,
|
|
"grad_norm": 3.680661201477051,
|
|
"learning_rate": 9.957243437816688e-06,
|
|
"loss": 1.2266,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 0.44813829787234044,
|
|
"grad_norm": 3.757211208343506,
|
|
"learning_rate": 9.957128588221895e-06,
|
|
"loss": 1.2374,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 0.44840425531914896,
|
|
"grad_norm": 3.93074107170105,
|
|
"learning_rate": 9.957013585247703e-06,
|
|
"loss": 1.2285,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 0.4486702127659574,
|
|
"grad_norm": 4.218538284301758,
|
|
"learning_rate": 9.95689842889768e-06,
|
|
"loss": 1.1887,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 0.44893617021276594,
|
|
"grad_norm": 4.04231595993042,
|
|
"learning_rate": 9.95678311917538e-06,
|
|
"loss": 1.3696,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 0.44920212765957446,
|
|
"grad_norm": 3.7490601539611816,
|
|
"learning_rate": 9.956667656084376e-06,
|
|
"loss": 1.2857,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 0.449468085106383,
|
|
"grad_norm": 3.642409324645996,
|
|
"learning_rate": 9.956552039628237e-06,
|
|
"loss": 1.1536,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.4497340425531915,
|
|
"grad_norm": 4.070724964141846,
|
|
"learning_rate": 9.956436269810543e-06,
|
|
"loss": 1.3129,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"grad_norm": 3.6677682399749756,
|
|
"learning_rate": 9.956320346634877e-06,
|
|
"loss": 1.2578,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 0.45026595744680853,
|
|
"grad_norm": 3.783087730407715,
|
|
"learning_rate": 9.956204270104823e-06,
|
|
"loss": 1.2943,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 0.45053191489361705,
|
|
"grad_norm": 4.206989765167236,
|
|
"learning_rate": 9.956088040223975e-06,
|
|
"loss": 1.4913,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 0.4507978723404255,
|
|
"grad_norm": 4.3370819091796875,
|
|
"learning_rate": 9.955971656995927e-06,
|
|
"loss": 1.1996,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 0.451063829787234,
|
|
"grad_norm": 3.9697062969207764,
|
|
"learning_rate": 9.95585512042428e-06,
|
|
"loss": 1.253,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 0.45132978723404255,
|
|
"grad_norm": 3.6939969062805176,
|
|
"learning_rate": 9.95573843051264e-06,
|
|
"loss": 1.1627,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 0.45159574468085106,
|
|
"grad_norm": 4.0041351318359375,
|
|
"learning_rate": 9.955621587264621e-06,
|
|
"loss": 1.2185,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 0.4518617021276596,
|
|
"grad_norm": 4.0276079177856445,
|
|
"learning_rate": 9.955504590683834e-06,
|
|
"loss": 1.2071,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 0.4521276595744681,
|
|
"grad_norm": 4.058544158935547,
|
|
"learning_rate": 9.955387440773902e-06,
|
|
"loss": 1.2284,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.4523936170212766,
|
|
"grad_norm": 3.8239941596984863,
|
|
"learning_rate": 9.955270137538446e-06,
|
|
"loss": 1.3371,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 0.4526595744680851,
|
|
"grad_norm": 4.147292613983154,
|
|
"learning_rate": 9.955152680981099e-06,
|
|
"loss": 1.3542,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 0.4529255319148936,
|
|
"grad_norm": 3.7271342277526855,
|
|
"learning_rate": 9.955035071105495e-06,
|
|
"loss": 1.0038,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 0.4531914893617021,
|
|
"grad_norm": 4.002806663513184,
|
|
"learning_rate": 9.954917307915272e-06,
|
|
"loss": 1.3361,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 0.45345744680851063,
|
|
"grad_norm": 3.8606765270233154,
|
|
"learning_rate": 9.954799391414073e-06,
|
|
"loss": 1.2703,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 0.45372340425531915,
|
|
"grad_norm": 4.117914199829102,
|
|
"learning_rate": 9.954681321605546e-06,
|
|
"loss": 1.4262,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 0.45398936170212767,
|
|
"grad_norm": 3.956178903579712,
|
|
"learning_rate": 9.954563098493349e-06,
|
|
"loss": 1.2889,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 0.4542553191489362,
|
|
"grad_norm": 3.8659157752990723,
|
|
"learning_rate": 9.954444722081133e-06,
|
|
"loss": 1.2892,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 0.4545212765957447,
|
|
"grad_norm": 3.936624765396118,
|
|
"learning_rate": 9.954326192372565e-06,
|
|
"loss": 1.5031,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 0.45478723404255317,
|
|
"grad_norm": 3.8671083450317383,
|
|
"learning_rate": 9.954207509371313e-06,
|
|
"loss": 1.3221,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.4550531914893617,
|
|
"grad_norm": 4.292788505554199,
|
|
"learning_rate": 9.954088673081048e-06,
|
|
"loss": 1.3216,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 0.4553191489361702,
|
|
"grad_norm": 3.8020899295806885,
|
|
"learning_rate": 9.953969683505444e-06,
|
|
"loss": 1.2248,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 0.4555851063829787,
|
|
"grad_norm": 4.227027893066406,
|
|
"learning_rate": 9.953850540648189e-06,
|
|
"loss": 1.2624,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 0.45585106382978724,
|
|
"grad_norm": 4.067933559417725,
|
|
"learning_rate": 9.953731244512963e-06,
|
|
"loss": 1.2756,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 0.45611702127659576,
|
|
"grad_norm": 3.9916749000549316,
|
|
"learning_rate": 9.953611795103462e-06,
|
|
"loss": 1.2651,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 0.4563829787234043,
|
|
"grad_norm": 4.110116004943848,
|
|
"learning_rate": 9.953492192423379e-06,
|
|
"loss": 1.3669,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 0.4566489361702128,
|
|
"grad_norm": 4.194306373596191,
|
|
"learning_rate": 9.953372436476414e-06,
|
|
"loss": 1.534,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 0.45691489361702126,
|
|
"grad_norm": 3.9467716217041016,
|
|
"learning_rate": 9.953252527266275e-06,
|
|
"loss": 1.2748,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 0.4571808510638298,
|
|
"grad_norm": 4.1253886222839355,
|
|
"learning_rate": 9.953132464796674e-06,
|
|
"loss": 1.2625,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 0.4574468085106383,
|
|
"grad_norm": 4.45941162109375,
|
|
"learning_rate": 9.95301224907132e-06,
|
|
"loss": 1.3565,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.4577127659574468,
|
|
"grad_norm": 4.033083915710449,
|
|
"learning_rate": 9.952891880093935e-06,
|
|
"loss": 1.2789,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 0.45797872340425533,
|
|
"grad_norm": 4.035634517669678,
|
|
"learning_rate": 9.952771357868245e-06,
|
|
"loss": 1.2641,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 0.45824468085106385,
|
|
"grad_norm": 3.722550630569458,
|
|
"learning_rate": 9.952650682397978e-06,
|
|
"loss": 1.3316,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 0.45851063829787236,
|
|
"grad_norm": 3.8771049976348877,
|
|
"learning_rate": 9.952529853686868e-06,
|
|
"loss": 1.3889,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 0.4587765957446808,
|
|
"grad_norm": 4.175072193145752,
|
|
"learning_rate": 9.952408871738652e-06,
|
|
"loss": 1.3766,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 0.45904255319148934,
|
|
"grad_norm": 3.859618902206421,
|
|
"learning_rate": 9.952287736557078e-06,
|
|
"loss": 1.1251,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 0.45930851063829786,
|
|
"grad_norm": 4.060375213623047,
|
|
"learning_rate": 9.952166448145887e-06,
|
|
"loss": 1.2308,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 0.4595744680851064,
|
|
"grad_norm": 3.9827208518981934,
|
|
"learning_rate": 9.952045006508839e-06,
|
|
"loss": 1.2434,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 0.4598404255319149,
|
|
"grad_norm": 3.8347811698913574,
|
|
"learning_rate": 9.951923411649686e-06,
|
|
"loss": 1.1165,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 0.4601063829787234,
|
|
"grad_norm": 3.8551104068756104,
|
|
"learning_rate": 9.951801663572194e-06,
|
|
"loss": 1.2536,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.46037234042553193,
|
|
"grad_norm": 4.300414562225342,
|
|
"learning_rate": 9.951679762280127e-06,
|
|
"loss": 1.3653,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 0.46063829787234045,
|
|
"grad_norm": 3.9349825382232666,
|
|
"learning_rate": 9.95155770777726e-06,
|
|
"loss": 1.1563,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 0.4609042553191489,
|
|
"grad_norm": 4.161105632781982,
|
|
"learning_rate": 9.951435500067366e-06,
|
|
"loss": 1.3807,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 0.46117021276595743,
|
|
"grad_norm": 4.0084686279296875,
|
|
"learning_rate": 9.95131313915423e-06,
|
|
"loss": 1.2486,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 0.46143617021276595,
|
|
"grad_norm": 3.6559159755706787,
|
|
"learning_rate": 9.951190625041634e-06,
|
|
"loss": 1.2063,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 0.46170212765957447,
|
|
"grad_norm": 3.99893856048584,
|
|
"learning_rate": 9.95106795773337e-06,
|
|
"loss": 1.2945,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 0.461968085106383,
|
|
"grad_norm": 4.061460018157959,
|
|
"learning_rate": 9.950945137233237e-06,
|
|
"loss": 1.3383,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 0.4622340425531915,
|
|
"grad_norm": 4.054213047027588,
|
|
"learning_rate": 9.950822163545032e-06,
|
|
"loss": 1.2836,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 0.4625,
|
|
"grad_norm": 3.9057390689849854,
|
|
"learning_rate": 9.95069903667256e-06,
|
|
"loss": 1.2157,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 0.4627659574468085,
|
|
"grad_norm": 3.977504014968872,
|
|
"learning_rate": 9.95057575661963e-06,
|
|
"loss": 1.322,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.463031914893617,
|
|
"grad_norm": 3.478853702545166,
|
|
"learning_rate": 9.950452323390058e-06,
|
|
"loss": 1.1772,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 0.4632978723404255,
|
|
"grad_norm": 3.8592848777770996,
|
|
"learning_rate": 9.950328736987664e-06,
|
|
"loss": 1.3234,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 0.46356382978723404,
|
|
"grad_norm": 3.858339309692383,
|
|
"learning_rate": 9.95020499741627e-06,
|
|
"loss": 1.3079,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 0.46382978723404256,
|
|
"grad_norm": 3.797468900680542,
|
|
"learning_rate": 9.950081104679704e-06,
|
|
"loss": 1.1611,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 0.4640957446808511,
|
|
"grad_norm": 3.9753012657165527,
|
|
"learning_rate": 9.949957058781802e-06,
|
|
"loss": 1.3449,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 0.4643617021276596,
|
|
"grad_norm": 4.22615385055542,
|
|
"learning_rate": 9.9498328597264e-06,
|
|
"loss": 1.1605,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 0.4646276595744681,
|
|
"grad_norm": 4.091019153594971,
|
|
"learning_rate": 9.949708507517342e-06,
|
|
"loss": 1.2877,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 0.4648936170212766,
|
|
"grad_norm": 4.121149063110352,
|
|
"learning_rate": 9.949584002158474e-06,
|
|
"loss": 1.2463,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 0.4651595744680851,
|
|
"grad_norm": 4.406885147094727,
|
|
"learning_rate": 9.949459343653652e-06,
|
|
"loss": 1.3303,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 0.4654255319148936,
|
|
"grad_norm": 4.5540666580200195,
|
|
"learning_rate": 9.94933453200673e-06,
|
|
"loss": 1.3149,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.4656914893617021,
|
|
"grad_norm": 3.9736440181732178,
|
|
"learning_rate": 9.949209567221569e-06,
|
|
"loss": 1.4947,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 0.46595744680851064,
|
|
"grad_norm": 4.265797138214111,
|
|
"learning_rate": 9.949084449302038e-06,
|
|
"loss": 1.2727,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 0.46622340425531916,
|
|
"grad_norm": 3.906663656234741,
|
|
"learning_rate": 9.948959178252007e-06,
|
|
"loss": 1.2346,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 0.4664893617021277,
|
|
"grad_norm": 3.8884990215301514,
|
|
"learning_rate": 9.948833754075351e-06,
|
|
"loss": 1.2997,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 0.46675531914893614,
|
|
"grad_norm": 3.943458080291748,
|
|
"learning_rate": 9.948708176775954e-06,
|
|
"loss": 1.2945,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 0.46702127659574466,
|
|
"grad_norm": 3.9176204204559326,
|
|
"learning_rate": 9.9485824463577e-06,
|
|
"loss": 1.2714,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 0.4672872340425532,
|
|
"grad_norm": 3.834636926651001,
|
|
"learning_rate": 9.948456562824478e-06,
|
|
"loss": 1.1341,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 0.4675531914893617,
|
|
"grad_norm": 3.8121955394744873,
|
|
"learning_rate": 9.948330526180183e-06,
|
|
"loss": 1.3064,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 0.4678191489361702,
|
|
"grad_norm": 4.121542930603027,
|
|
"learning_rate": 9.948204336428717e-06,
|
|
"loss": 1.2775,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 0.46808510638297873,
|
|
"grad_norm": 4.043048858642578,
|
|
"learning_rate": 9.948077993573983e-06,
|
|
"loss": 1.2601,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.46835106382978725,
|
|
"grad_norm": 3.7144079208374023,
|
|
"learning_rate": 9.94795149761989e-06,
|
|
"loss": 1.1136,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 0.46861702127659577,
|
|
"grad_norm": 4.818117141723633,
|
|
"learning_rate": 9.947824848570352e-06,
|
|
"loss": 1.4366,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 0.46888297872340423,
|
|
"grad_norm": 4.190409183502197,
|
|
"learning_rate": 9.947698046429287e-06,
|
|
"loss": 1.2308,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 0.46914893617021275,
|
|
"grad_norm": 4.0341267585754395,
|
|
"learning_rate": 9.94757109120062e-06,
|
|
"loss": 1.2466,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 0.46941489361702127,
|
|
"grad_norm": 3.9223225116729736,
|
|
"learning_rate": 9.947443982888279e-06,
|
|
"loss": 1.212,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 0.4696808510638298,
|
|
"grad_norm": 4.121956825256348,
|
|
"learning_rate": 9.947316721496196e-06,
|
|
"loss": 1.2635,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 0.4699468085106383,
|
|
"grad_norm": 3.9485208988189697,
|
|
"learning_rate": 9.947189307028308e-06,
|
|
"loss": 1.3579,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 0.4702127659574468,
|
|
"grad_norm": 4.009948253631592,
|
|
"learning_rate": 9.947061739488559e-06,
|
|
"loss": 1.4448,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 0.47047872340425534,
|
|
"grad_norm": 4.2954912185668945,
|
|
"learning_rate": 9.946934018880896e-06,
|
|
"loss": 1.1665,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 0.47074468085106386,
|
|
"grad_norm": 3.6225626468658447,
|
|
"learning_rate": 9.94680614520927e-06,
|
|
"loss": 1.2863,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.4710106382978723,
|
|
"grad_norm": 3.9409780502319336,
|
|
"learning_rate": 9.946678118477635e-06,
|
|
"loss": 1.1042,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 0.47127659574468084,
|
|
"grad_norm": 3.5868918895721436,
|
|
"learning_rate": 9.946549938689958e-06,
|
|
"loss": 1.1924,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 0.47154255319148936,
|
|
"grad_norm": 3.5596354007720947,
|
|
"learning_rate": 9.946421605850201e-06,
|
|
"loss": 1.1459,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 0.4718085106382979,
|
|
"grad_norm": 3.595719337463379,
|
|
"learning_rate": 9.946293119962336e-06,
|
|
"loss": 1.2274,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 0.4720744680851064,
|
|
"grad_norm": 4.341657638549805,
|
|
"learning_rate": 9.946164481030339e-06,
|
|
"loss": 1.433,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 0.4723404255319149,
|
|
"grad_norm": 4.137777328491211,
|
|
"learning_rate": 9.946035689058189e-06,
|
|
"loss": 1.3307,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 0.4726063829787234,
|
|
"grad_norm": 4.115199565887451,
|
|
"learning_rate": 9.94590674404987e-06,
|
|
"loss": 1.3575,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 0.4728723404255319,
|
|
"grad_norm": 3.9467270374298096,
|
|
"learning_rate": 9.945777646009375e-06,
|
|
"loss": 1.1772,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 0.4731382978723404,
|
|
"grad_norm": 3.986268997192383,
|
|
"learning_rate": 9.945648394940697e-06,
|
|
"loss": 1.3949,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 0.4734042553191489,
|
|
"grad_norm": 4.070546627044678,
|
|
"learning_rate": 9.945518990847835e-06,
|
|
"loss": 1.3664,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.47367021276595744,
|
|
"grad_norm": 4.0783233642578125,
|
|
"learning_rate": 9.94538943373479e-06,
|
|
"loss": 1.3199,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 0.47393617021276596,
|
|
"grad_norm": 4.331148147583008,
|
|
"learning_rate": 9.945259723605579e-06,
|
|
"loss": 1.3809,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 0.4742021276595745,
|
|
"grad_norm": 4.163266658782959,
|
|
"learning_rate": 9.945129860464205e-06,
|
|
"loss": 1.3325,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 0.474468085106383,
|
|
"grad_norm": 4.23274564743042,
|
|
"learning_rate": 9.944999844314693e-06,
|
|
"loss": 1.3793,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 0.4747340425531915,
|
|
"grad_norm": 4.219319820404053,
|
|
"learning_rate": 9.944869675161062e-06,
|
|
"loss": 1.3631,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 0.475,
|
|
"grad_norm": 4.5794830322265625,
|
|
"learning_rate": 9.944739353007344e-06,
|
|
"loss": 1.3941,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 0.4752659574468085,
|
|
"grad_norm": 3.806102752685547,
|
|
"learning_rate": 9.944608877857567e-06,
|
|
"loss": 1.2896,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 0.475531914893617,
|
|
"grad_norm": 3.927706241607666,
|
|
"learning_rate": 9.94447824971577e-06,
|
|
"loss": 1.4121,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 0.47579787234042553,
|
|
"grad_norm": 3.8713526725769043,
|
|
"learning_rate": 9.944347468585995e-06,
|
|
"loss": 1.3029,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 0.47606382978723405,
|
|
"grad_norm": 3.6732828617095947,
|
|
"learning_rate": 9.944216534472287e-06,
|
|
"loss": 1.2379,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.47632978723404257,
|
|
"grad_norm": 4.1793084144592285,
|
|
"learning_rate": 9.9440854473787e-06,
|
|
"loss": 1.391,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 0.4765957446808511,
|
|
"grad_norm": 4.131939888000488,
|
|
"learning_rate": 9.943954207309287e-06,
|
|
"loss": 1.2346,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 0.47686170212765955,
|
|
"grad_norm": 4.083577632904053,
|
|
"learning_rate": 9.94382281426811e-06,
|
|
"loss": 1.4478,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 0.47712765957446807,
|
|
"grad_norm": 3.640902280807495,
|
|
"learning_rate": 9.943691268259234e-06,
|
|
"loss": 1.2515,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 0.4773936170212766,
|
|
"grad_norm": 4.226308345794678,
|
|
"learning_rate": 9.943559569286731e-06,
|
|
"loss": 1.3599,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 0.4776595744680851,
|
|
"grad_norm": 4.301510810852051,
|
|
"learning_rate": 9.943427717354674e-06,
|
|
"loss": 1.2623,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 0.4779255319148936,
|
|
"grad_norm": 3.6332836151123047,
|
|
"learning_rate": 9.943295712467145e-06,
|
|
"loss": 1.2776,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 0.47819148936170214,
|
|
"grad_norm": 3.6086063385009766,
|
|
"learning_rate": 9.943163554628223e-06,
|
|
"loss": 1.2306,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 0.47845744680851066,
|
|
"grad_norm": 3.787510395050049,
|
|
"learning_rate": 9.943031243842004e-06,
|
|
"loss": 1.3904,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 0.4787234042553192,
|
|
"grad_norm": 4.257116317749023,
|
|
"learning_rate": 9.942898780112578e-06,
|
|
"loss": 1.2504,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.47898936170212764,
|
|
"grad_norm": 4.033913612365723,
|
|
"learning_rate": 9.942766163444044e-06,
|
|
"loss": 1.1252,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 0.47925531914893615,
|
|
"grad_norm": 3.9039859771728516,
|
|
"learning_rate": 9.942633393840504e-06,
|
|
"loss": 1.2183,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 0.47952127659574467,
|
|
"grad_norm": 4.116021156311035,
|
|
"learning_rate": 9.94250047130607e-06,
|
|
"loss": 1.3872,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 0.4797872340425532,
|
|
"grad_norm": 4.146193504333496,
|
|
"learning_rate": 9.94236739584485e-06,
|
|
"loss": 1.2302,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 0.4800531914893617,
|
|
"grad_norm": 4.098079681396484,
|
|
"learning_rate": 9.942234167460966e-06,
|
|
"loss": 1.3785,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 0.4803191489361702,
|
|
"grad_norm": 3.643486976623535,
|
|
"learning_rate": 9.942100786158537e-06,
|
|
"loss": 1.1499,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 0.48058510638297874,
|
|
"grad_norm": 4.246469974517822,
|
|
"learning_rate": 9.94196725194169e-06,
|
|
"loss": 1.3295,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 0.4808510638297872,
|
|
"grad_norm": 3.857382297515869,
|
|
"learning_rate": 9.94183356481456e-06,
|
|
"loss": 1.325,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 0.4811170212765957,
|
|
"grad_norm": 3.5324032306671143,
|
|
"learning_rate": 9.94169972478128e-06,
|
|
"loss": 1.1482,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 0.48138297872340424,
|
|
"grad_norm": 3.7972612380981445,
|
|
"learning_rate": 9.941565731845993e-06,
|
|
"loss": 1.4476,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.48164893617021276,
|
|
"grad_norm": 3.770042896270752,
|
|
"learning_rate": 9.941431586012844e-06,
|
|
"loss": 1.3034,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 0.4819148936170213,
|
|
"grad_norm": 3.675645351409912,
|
|
"learning_rate": 9.941297287285984e-06,
|
|
"loss": 1.2526,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 0.4821808510638298,
|
|
"grad_norm": 3.526350975036621,
|
|
"learning_rate": 9.941162835669568e-06,
|
|
"loss": 1.1573,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 0.4824468085106383,
|
|
"grad_norm": 3.4532649517059326,
|
|
"learning_rate": 9.941028231167756e-06,
|
|
"loss": 1.1735,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 0.48271276595744683,
|
|
"grad_norm": 3.9783992767333984,
|
|
"learning_rate": 9.940893473784714e-06,
|
|
"loss": 1.3828,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 0.4829787234042553,
|
|
"grad_norm": 4.059201717376709,
|
|
"learning_rate": 9.940758563524611e-06,
|
|
"loss": 1.2649,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 0.4832446808510638,
|
|
"grad_norm": 4.069849491119385,
|
|
"learning_rate": 9.94062350039162e-06,
|
|
"loss": 1.2833,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 0.48351063829787233,
|
|
"grad_norm": 3.488699197769165,
|
|
"learning_rate": 9.940488284389923e-06,
|
|
"loss": 1.0884,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 0.48377659574468085,
|
|
"grad_norm": 3.721902370452881,
|
|
"learning_rate": 9.940352915523699e-06,
|
|
"loss": 1.2442,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 0.48404255319148937,
|
|
"grad_norm": 4.082354545593262,
|
|
"learning_rate": 9.94021739379714e-06,
|
|
"loss": 1.3406,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.4843085106382979,
|
|
"grad_norm": 3.9286141395568848,
|
|
"learning_rate": 9.94008171921444e-06,
|
|
"loss": 1.2856,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 0.4845744680851064,
|
|
"grad_norm": 3.968208074569702,
|
|
"learning_rate": 9.939945891779795e-06,
|
|
"loss": 1.3172,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 0.4848404255319149,
|
|
"grad_norm": 4.114230155944824,
|
|
"learning_rate": 9.939809911497407e-06,
|
|
"loss": 1.2936,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 0.4851063829787234,
|
|
"grad_norm": 3.840162754058838,
|
|
"learning_rate": 9.939673778371484e-06,
|
|
"loss": 1.3923,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 0.4853723404255319,
|
|
"grad_norm": 4.272914886474609,
|
|
"learning_rate": 9.939537492406239e-06,
|
|
"loss": 1.2932,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 0.4856382978723404,
|
|
"grad_norm": 3.7386868000030518,
|
|
"learning_rate": 9.939401053605889e-06,
|
|
"loss": 1.3849,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 0.48590425531914894,
|
|
"grad_norm": 4.278271675109863,
|
|
"learning_rate": 9.939264461974654e-06,
|
|
"loss": 1.2878,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 0.48617021276595745,
|
|
"grad_norm": 3.827216386795044,
|
|
"learning_rate": 9.939127717516763e-06,
|
|
"loss": 1.2833,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 0.486436170212766,
|
|
"grad_norm": 3.888113498687744,
|
|
"learning_rate": 9.938990820236445e-06,
|
|
"loss": 1.2384,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 0.4867021276595745,
|
|
"grad_norm": 3.886965036392212,
|
|
"learning_rate": 9.938853770137935e-06,
|
|
"loss": 1.3365,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.48696808510638295,
|
|
"grad_norm": 3.9059507846832275,
|
|
"learning_rate": 9.938716567225475e-06,
|
|
"loss": 1.3569,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 0.48723404255319147,
|
|
"grad_norm": 3.922834634780884,
|
|
"learning_rate": 9.93857921150331e-06,
|
|
"loss": 1.2035,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 0.4875,
|
|
"grad_norm": 3.949385643005371,
|
|
"learning_rate": 9.938441702975689e-06,
|
|
"loss": 1.3485,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 0.4877659574468085,
|
|
"grad_norm": 4.1959333419799805,
|
|
"learning_rate": 9.938304041646869e-06,
|
|
"loss": 1.3079,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 0.488031914893617,
|
|
"grad_norm": 3.98871111869812,
|
|
"learning_rate": 9.938166227521106e-06,
|
|
"loss": 1.3067,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 0.48829787234042554,
|
|
"grad_norm": 4.129928112030029,
|
|
"learning_rate": 9.938028260602668e-06,
|
|
"loss": 1.3053,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 0.48856382978723406,
|
|
"grad_norm": 4.131626129150391,
|
|
"learning_rate": 9.937890140895819e-06,
|
|
"loss": 1.3332,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 0.4888297872340426,
|
|
"grad_norm": 3.8896591663360596,
|
|
"learning_rate": 9.937751868404838e-06,
|
|
"loss": 1.2105,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 0.48909574468085104,
|
|
"grad_norm": 3.6959292888641357,
|
|
"learning_rate": 9.937613443134e-06,
|
|
"loss": 1.1607,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 0.48936170212765956,
|
|
"grad_norm": 4.914716720581055,
|
|
"learning_rate": 9.937474865087588e-06,
|
|
"loss": 1.1406,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.4896276595744681,
|
|
"grad_norm": 3.811239004135132,
|
|
"learning_rate": 9.93733613426989e-06,
|
|
"loss": 1.2047,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 0.4898936170212766,
|
|
"grad_norm": 3.8995115756988525,
|
|
"learning_rate": 9.937197250685202e-06,
|
|
"loss": 1.1582,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 0.4901595744680851,
|
|
"grad_norm": 3.6087286472320557,
|
|
"learning_rate": 9.937058214337817e-06,
|
|
"loss": 1.1866,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 0.49042553191489363,
|
|
"grad_norm": 3.854526996612549,
|
|
"learning_rate": 9.936919025232036e-06,
|
|
"loss": 1.2744,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 0.49069148936170215,
|
|
"grad_norm": 3.870508909225464,
|
|
"learning_rate": 9.936779683372169e-06,
|
|
"loss": 1.1989,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 0.4909574468085106,
|
|
"grad_norm": 4.0505194664001465,
|
|
"learning_rate": 9.936640188762527e-06,
|
|
"loss": 1.206,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 0.49122340425531913,
|
|
"grad_norm": 3.8995118141174316,
|
|
"learning_rate": 9.936500541407424e-06,
|
|
"loss": 1.1642,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 0.49148936170212765,
|
|
"grad_norm": 4.045437812805176,
|
|
"learning_rate": 9.936360741311185e-06,
|
|
"loss": 1.2949,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 0.49175531914893617,
|
|
"grad_norm": 3.954519271850586,
|
|
"learning_rate": 9.93622078847813e-06,
|
|
"loss": 1.3334,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 0.4920212765957447,
|
|
"grad_norm": 3.9482545852661133,
|
|
"learning_rate": 9.936080682912594e-06,
|
|
"loss": 1.2859,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.4922872340425532,
|
|
"grad_norm": 3.7565512657165527,
|
|
"learning_rate": 9.935940424618908e-06,
|
|
"loss": 1.1294,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 0.4925531914893617,
|
|
"grad_norm": 4.012822151184082,
|
|
"learning_rate": 9.935800013601415e-06,
|
|
"loss": 1.4283,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 0.49281914893617024,
|
|
"grad_norm": 3.7840845584869385,
|
|
"learning_rate": 9.935659449864458e-06,
|
|
"loss": 1.332,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 0.4930851063829787,
|
|
"grad_norm": 4.097705364227295,
|
|
"learning_rate": 9.935518733412387e-06,
|
|
"loss": 1.1062,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 0.4933510638297872,
|
|
"grad_norm": 4.073275089263916,
|
|
"learning_rate": 9.935377864249558e-06,
|
|
"loss": 1.4567,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 0.49361702127659574,
|
|
"grad_norm": 4.020910263061523,
|
|
"learning_rate": 9.935236842380325e-06,
|
|
"loss": 1.247,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 0.49388297872340425,
|
|
"grad_norm": 4.380120277404785,
|
|
"learning_rate": 9.935095667809053e-06,
|
|
"loss": 1.2439,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 0.49414893617021277,
|
|
"grad_norm": 3.8681838512420654,
|
|
"learning_rate": 9.934954340540111e-06,
|
|
"loss": 1.3522,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 0.4944148936170213,
|
|
"grad_norm": 3.7794203758239746,
|
|
"learning_rate": 9.934812860577871e-06,
|
|
"loss": 1.1068,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 0.4946808510638298,
|
|
"grad_norm": 3.9970266819000244,
|
|
"learning_rate": 9.934671227926714e-06,
|
|
"loss": 1.228,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.4949468085106383,
|
|
"grad_norm": 4.03349494934082,
|
|
"learning_rate": 9.934529442591016e-06,
|
|
"loss": 1.5158,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 0.4952127659574468,
|
|
"grad_norm": 3.6862449645996094,
|
|
"learning_rate": 9.934387504575169e-06,
|
|
"loss": 1.3988,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 0.4954787234042553,
|
|
"grad_norm": 3.7959797382354736,
|
|
"learning_rate": 9.934245413883561e-06,
|
|
"loss": 1.2412,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 0.4957446808510638,
|
|
"grad_norm": 3.952791929244995,
|
|
"learning_rate": 9.934103170520592e-06,
|
|
"loss": 1.3866,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 0.49601063829787234,
|
|
"grad_norm": 3.7724785804748535,
|
|
"learning_rate": 9.933960774490663e-06,
|
|
"loss": 1.1724,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 0.49627659574468086,
|
|
"grad_norm": 3.9937689304351807,
|
|
"learning_rate": 9.933818225798178e-06,
|
|
"loss": 1.3353,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 0.4965425531914894,
|
|
"grad_norm": 3.818441152572632,
|
|
"learning_rate": 9.933675524447549e-06,
|
|
"loss": 1.205,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 0.4968085106382979,
|
|
"grad_norm": 3.97725772857666,
|
|
"learning_rate": 9.933532670443188e-06,
|
|
"loss": 1.289,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 0.49707446808510636,
|
|
"grad_norm": 3.930464744567871,
|
|
"learning_rate": 9.93338966378952e-06,
|
|
"loss": 1.5099,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 0.4973404255319149,
|
|
"grad_norm": 4.353559494018555,
|
|
"learning_rate": 9.933246504490966e-06,
|
|
"loss": 1.4003,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.4976063829787234,
|
|
"grad_norm": 3.9544339179992676,
|
|
"learning_rate": 9.933103192551958e-06,
|
|
"loss": 1.1387,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 0.4978723404255319,
|
|
"grad_norm": 3.9833321571350098,
|
|
"learning_rate": 9.932959727976928e-06,
|
|
"loss": 1.2584,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 0.49813829787234043,
|
|
"grad_norm": 3.862346887588501,
|
|
"learning_rate": 9.932816110770317e-06,
|
|
"loss": 1.4073,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 0.49840425531914895,
|
|
"grad_norm": 3.7747912406921387,
|
|
"learning_rate": 9.932672340936568e-06,
|
|
"loss": 1.2541,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 0.49867021276595747,
|
|
"grad_norm": 4.324585437774658,
|
|
"learning_rate": 9.93252841848013e-06,
|
|
"loss": 1.4344,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 0.498936170212766,
|
|
"grad_norm": 4.572371006011963,
|
|
"learning_rate": 9.932384343405452e-06,
|
|
"loss": 1.246,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 0.49920212765957445,
|
|
"grad_norm": 4.566850662231445,
|
|
"learning_rate": 9.932240115716998e-06,
|
|
"loss": 1.2813,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 0.49946808510638296,
|
|
"grad_norm": 3.940889358520508,
|
|
"learning_rate": 9.932095735419228e-06,
|
|
"loss": 1.1925,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 0.4997340425531915,
|
|
"grad_norm": 3.6935203075408936,
|
|
"learning_rate": 9.93195120251661e-06,
|
|
"loss": 1.2649,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 4.11472749710083,
|
|
"learning_rate": 9.931806517013612e-06,
|
|
"loss": 1.3672,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.5002659574468085,
|
|
"grad_norm": 4.156626224517822,
|
|
"learning_rate": 9.931661678914717e-06,
|
|
"loss": 1.4258,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 0.500531914893617,
|
|
"grad_norm": 4.2577805519104,
|
|
"learning_rate": 9.9315166882244e-06,
|
|
"loss": 1.3524,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 0.5007978723404255,
|
|
"grad_norm": 3.9902119636535645,
|
|
"learning_rate": 9.931371544947154e-06,
|
|
"loss": 1.2988,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 0.5010638297872341,
|
|
"grad_norm": 4.20100736618042,
|
|
"learning_rate": 9.931226249087465e-06,
|
|
"loss": 1.3102,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 0.5013297872340425,
|
|
"grad_norm": 4.172153949737549,
|
|
"learning_rate": 9.93108080064983e-06,
|
|
"loss": 1.2019,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 0.5015957446808511,
|
|
"grad_norm": 4.27764892578125,
|
|
"learning_rate": 9.93093519963875e-06,
|
|
"loss": 1.2075,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 0.5018617021276596,
|
|
"grad_norm": 4.327826023101807,
|
|
"learning_rate": 9.930789446058729e-06,
|
|
"loss": 1.2459,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 0.502127659574468,
|
|
"grad_norm": 4.269448757171631,
|
|
"learning_rate": 9.930643539914276e-06,
|
|
"loss": 1.4385,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 0.5023936170212766,
|
|
"grad_norm": 3.7377564907073975,
|
|
"learning_rate": 9.930497481209908e-06,
|
|
"loss": 1.2267,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 0.5026595744680851,
|
|
"grad_norm": 3.958397388458252,
|
|
"learning_rate": 9.930351269950144e-06,
|
|
"loss": 1.3289,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.5029255319148936,
|
|
"grad_norm": 3.992171049118042,
|
|
"learning_rate": 9.930204906139506e-06,
|
|
"loss": 1.2989,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 0.5031914893617021,
|
|
"grad_norm": 3.8019278049468994,
|
|
"learning_rate": 9.930058389782523e-06,
|
|
"loss": 1.3542,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 0.5034574468085107,
|
|
"grad_norm": 3.7610788345336914,
|
|
"learning_rate": 9.929911720883729e-06,
|
|
"loss": 1.247,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 0.5037234042553191,
|
|
"grad_norm": 3.765941619873047,
|
|
"learning_rate": 9.929764899447662e-06,
|
|
"loss": 1.3651,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 0.5039893617021277,
|
|
"grad_norm": 4.16331672668457,
|
|
"learning_rate": 9.929617925478868e-06,
|
|
"loss": 1.28,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 0.5042553191489362,
|
|
"grad_norm": 4.166515827178955,
|
|
"learning_rate": 9.929470798981888e-06,
|
|
"loss": 1.2401,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 0.5045212765957446,
|
|
"grad_norm": 4.0264177322387695,
|
|
"learning_rate": 9.929323519961278e-06,
|
|
"loss": 1.3036,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 0.5047872340425532,
|
|
"grad_norm": 3.85672926902771,
|
|
"learning_rate": 9.929176088421596e-06,
|
|
"loss": 1.1619,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 0.5050531914893617,
|
|
"grad_norm": 4.00507926940918,
|
|
"learning_rate": 9.929028504367402e-06,
|
|
"loss": 1.2787,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 0.5053191489361702,
|
|
"grad_norm": 3.6691126823425293,
|
|
"learning_rate": 9.928880767803264e-06,
|
|
"loss": 1.3256,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.5055851063829787,
|
|
"grad_norm": 4.093438625335693,
|
|
"learning_rate": 9.92873287873375e-06,
|
|
"loss": 1.2623,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 0.5058510638297873,
|
|
"grad_norm": 3.689911127090454,
|
|
"learning_rate": 9.92858483716344e-06,
|
|
"loss": 1.4022,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 0.5061170212765957,
|
|
"grad_norm": 4.178584575653076,
|
|
"learning_rate": 9.928436643096909e-06,
|
|
"loss": 1.3588,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 0.5063829787234042,
|
|
"grad_norm": 4.098899841308594,
|
|
"learning_rate": 9.928288296538749e-06,
|
|
"loss": 1.2687,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 0.5066489361702128,
|
|
"grad_norm": 4.034060001373291,
|
|
"learning_rate": 9.928139797493545e-06,
|
|
"loss": 1.2859,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 0.5069148936170212,
|
|
"grad_norm": 4.75716495513916,
|
|
"learning_rate": 9.927991145965894e-06,
|
|
"loss": 1.445,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 0.5071808510638298,
|
|
"grad_norm": 3.466297149658203,
|
|
"learning_rate": 9.927842341960396e-06,
|
|
"loss": 1.0634,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 0.5074468085106383,
|
|
"grad_norm": 3.9337103366851807,
|
|
"learning_rate": 9.927693385481652e-06,
|
|
"loss": 1.4115,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 0.5077127659574469,
|
|
"grad_norm": 3.6876132488250732,
|
|
"learning_rate": 9.927544276534275e-06,
|
|
"loss": 1.2333,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 0.5079787234042553,
|
|
"grad_norm": 4.154485702514648,
|
|
"learning_rate": 9.927395015122876e-06,
|
|
"loss": 1.2432,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.5082446808510638,
|
|
"grad_norm": 4.0430073738098145,
|
|
"learning_rate": 9.927245601252074e-06,
|
|
"loss": 1.3562,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 0.5085106382978724,
|
|
"grad_norm": 3.6701016426086426,
|
|
"learning_rate": 9.927096034926491e-06,
|
|
"loss": 1.2138,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 0.5087765957446808,
|
|
"grad_norm": 3.7969815731048584,
|
|
"learning_rate": 9.926946316150757e-06,
|
|
"loss": 1.3166,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 0.5090425531914894,
|
|
"grad_norm": 3.662705183029175,
|
|
"learning_rate": 9.926796444929502e-06,
|
|
"loss": 1.1107,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 0.5093085106382979,
|
|
"grad_norm": 3.8880231380462646,
|
|
"learning_rate": 9.926646421267366e-06,
|
|
"loss": 1.2989,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 0.5095744680851064,
|
|
"grad_norm": 3.6114046573638916,
|
|
"learning_rate": 9.926496245168989e-06,
|
|
"loss": 1.1822,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 0.5098404255319149,
|
|
"grad_norm": 3.799083948135376,
|
|
"learning_rate": 9.926345916639018e-06,
|
|
"loss": 1.1918,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 0.5101063829787233,
|
|
"grad_norm": 3.4708175659179688,
|
|
"learning_rate": 9.926195435682102e-06,
|
|
"loss": 1.1244,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 0.5103723404255319,
|
|
"grad_norm": 4.323407173156738,
|
|
"learning_rate": 9.926044802302904e-06,
|
|
"loss": 1.275,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 0.5106382978723404,
|
|
"grad_norm": 3.8659491539001465,
|
|
"learning_rate": 9.925894016506076e-06,
|
|
"loss": 1.2904,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.510904255319149,
|
|
"grad_norm": 3.7898192405700684,
|
|
"learning_rate": 9.925743078296288e-06,
|
|
"loss": 1.2569,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 0.5111702127659574,
|
|
"grad_norm": 3.559047222137451,
|
|
"learning_rate": 9.925591987678212e-06,
|
|
"loss": 1.3267,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 0.511436170212766,
|
|
"grad_norm": 3.8164639472961426,
|
|
"learning_rate": 9.925440744656518e-06,
|
|
"loss": 1.2059,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 0.5117021276595745,
|
|
"grad_norm": 4.318164825439453,
|
|
"learning_rate": 9.925289349235892e-06,
|
|
"loss": 1.3528,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 0.511968085106383,
|
|
"grad_norm": 3.8021814823150635,
|
|
"learning_rate": 9.925137801421011e-06,
|
|
"loss": 1.2096,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 0.5122340425531915,
|
|
"grad_norm": 3.7836246490478516,
|
|
"learning_rate": 9.924986101216569e-06,
|
|
"loss": 1.2719,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 0.5125,
|
|
"grad_norm": 4.108916282653809,
|
|
"learning_rate": 9.92483424862726e-06,
|
|
"loss": 1.4018,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 0.5127659574468085,
|
|
"grad_norm": 3.7151575088500977,
|
|
"learning_rate": 9.92468224365778e-06,
|
|
"loss": 1.3966,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 0.513031914893617,
|
|
"grad_norm": 3.5576205253601074,
|
|
"learning_rate": 9.924530086312834e-06,
|
|
"loss": 1.2066,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 0.5132978723404256,
|
|
"grad_norm": 3.6642985343933105,
|
|
"learning_rate": 9.924377776597128e-06,
|
|
"loss": 1.3887,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.513563829787234,
|
|
"grad_norm": 4.360495567321777,
|
|
"learning_rate": 9.924225314515375e-06,
|
|
"loss": 1.6151,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 0.5138297872340426,
|
|
"grad_norm": 3.934380292892456,
|
|
"learning_rate": 9.924072700072296e-06,
|
|
"loss": 1.2027,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 0.5140957446808511,
|
|
"grad_norm": 3.95251727104187,
|
|
"learning_rate": 9.923919933272608e-06,
|
|
"loss": 1.4496,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 0.5143617021276595,
|
|
"grad_norm": 3.660336494445801,
|
|
"learning_rate": 9.923767014121042e-06,
|
|
"loss": 1.2549,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 0.5146276595744681,
|
|
"grad_norm": 3.936469316482544,
|
|
"learning_rate": 9.923613942622326e-06,
|
|
"loss": 1.3851,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 0.5148936170212766,
|
|
"grad_norm": 3.912565231323242,
|
|
"learning_rate": 9.923460718781198e-06,
|
|
"loss": 1.303,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 0.5151595744680851,
|
|
"grad_norm": 3.9063549041748047,
|
|
"learning_rate": 9.923307342602399e-06,
|
|
"loss": 1.315,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 0.5154255319148936,
|
|
"grad_norm": 3.749720335006714,
|
|
"learning_rate": 9.923153814090675e-06,
|
|
"loss": 1.2961,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 0.5156914893617022,
|
|
"grad_norm": 3.978954315185547,
|
|
"learning_rate": 9.923000133250776e-06,
|
|
"loss": 1.4325,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 0.5159574468085106,
|
|
"grad_norm": 4.081971645355225,
|
|
"learning_rate": 9.922846300087454e-06,
|
|
"loss": 1.2811,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.5162234042553191,
|
|
"grad_norm": 3.9421591758728027,
|
|
"learning_rate": 9.922692314605472e-06,
|
|
"loss": 1.3513,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 0.5164893617021277,
|
|
"grad_norm": 3.6500041484832764,
|
|
"learning_rate": 9.922538176809597e-06,
|
|
"loss": 1.2927,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 0.5167553191489361,
|
|
"grad_norm": 3.858421564102173,
|
|
"learning_rate": 9.922383886704594e-06,
|
|
"loss": 1.1699,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 0.5170212765957447,
|
|
"grad_norm": 4.286783695220947,
|
|
"learning_rate": 9.922229444295238e-06,
|
|
"loss": 1.4037,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 0.5172872340425532,
|
|
"grad_norm": 4.163476943969727,
|
|
"learning_rate": 9.922074849586308e-06,
|
|
"loss": 1.1268,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 0.5175531914893617,
|
|
"grad_norm": 3.8577239513397217,
|
|
"learning_rate": 9.921920102582587e-06,
|
|
"loss": 1.2154,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 0.5178191489361702,
|
|
"grad_norm": 4.213263988494873,
|
|
"learning_rate": 9.921765203288862e-06,
|
|
"loss": 1.3188,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 0.5180851063829788,
|
|
"grad_norm": 3.817172050476074,
|
|
"learning_rate": 9.921610151709929e-06,
|
|
"loss": 1.2897,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 0.5183510638297872,
|
|
"grad_norm": 3.954479694366455,
|
|
"learning_rate": 9.921454947850582e-06,
|
|
"loss": 1.1568,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 0.5186170212765957,
|
|
"grad_norm": 4.054901123046875,
|
|
"learning_rate": 9.921299591715624e-06,
|
|
"loss": 1.1991,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.5188829787234043,
|
|
"grad_norm": 3.9514553546905518,
|
|
"learning_rate": 9.921144083309864e-06,
|
|
"loss": 1.2588,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 0.5191489361702127,
|
|
"grad_norm": 4.228671550750732,
|
|
"learning_rate": 9.920988422638112e-06,
|
|
"loss": 1.3348,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 0.5194148936170213,
|
|
"grad_norm": 3.997422695159912,
|
|
"learning_rate": 9.920832609705184e-06,
|
|
"loss": 1.2402,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 0.5196808510638298,
|
|
"grad_norm": 3.8394384384155273,
|
|
"learning_rate": 9.920676644515902e-06,
|
|
"loss": 1.222,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 0.5199468085106383,
|
|
"grad_norm": 3.654381036758423,
|
|
"learning_rate": 9.92052052707509e-06,
|
|
"loss": 1.4059,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 0.5202127659574468,
|
|
"grad_norm": 3.881578207015991,
|
|
"learning_rate": 9.92036425738758e-06,
|
|
"loss": 1.3507,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 0.5204787234042553,
|
|
"grad_norm": 3.819066286087036,
|
|
"learning_rate": 9.920207835458208e-06,
|
|
"loss": 1.3433,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 0.5207446808510638,
|
|
"grad_norm": 3.2657382488250732,
|
|
"learning_rate": 9.920051261291812e-06,
|
|
"loss": 1.0601,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 0.5210106382978723,
|
|
"grad_norm": 3.789560556411743,
|
|
"learning_rate": 9.919894534893237e-06,
|
|
"loss": 1.2395,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 0.5212765957446809,
|
|
"grad_norm": 3.620661973953247,
|
|
"learning_rate": 9.919737656267335e-06,
|
|
"loss": 1.1793,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.5215425531914893,
|
|
"grad_norm": 4.208719253540039,
|
|
"learning_rate": 9.919580625418955e-06,
|
|
"loss": 1.5431,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 0.5218085106382979,
|
|
"grad_norm": 4.2255024909973145,
|
|
"learning_rate": 9.919423442352958e-06,
|
|
"loss": 1.3665,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 0.5220744680851064,
|
|
"grad_norm": 4.246603965759277,
|
|
"learning_rate": 9.91926610707421e-06,
|
|
"loss": 1.2552,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 0.5223404255319148,
|
|
"grad_norm": 4.042827606201172,
|
|
"learning_rate": 9.919108619587575e-06,
|
|
"loss": 1.2171,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 0.5226063829787234,
|
|
"grad_norm": 4.006556510925293,
|
|
"learning_rate": 9.918950979897928e-06,
|
|
"loss": 1.2559,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 0.5228723404255319,
|
|
"grad_norm": 3.7249419689178467,
|
|
"learning_rate": 9.918793188010147e-06,
|
|
"loss": 1.0816,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 0.5231382978723405,
|
|
"grad_norm": 4.087320804595947,
|
|
"learning_rate": 9.918635243929115e-06,
|
|
"loss": 1.2607,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 0.5234042553191489,
|
|
"grad_norm": 4.031649589538574,
|
|
"learning_rate": 9.918477147659715e-06,
|
|
"loss": 1.2983,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 0.5236702127659575,
|
|
"grad_norm": 4.055499076843262,
|
|
"learning_rate": 9.918318899206842e-06,
|
|
"loss": 1.2686,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 0.523936170212766,
|
|
"grad_norm": 4.922122955322266,
|
|
"learning_rate": 9.918160498575394e-06,
|
|
"loss": 1.2761,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.5242021276595744,
|
|
"grad_norm": 4.155685901641846,
|
|
"learning_rate": 9.918001945770267e-06,
|
|
"loss": 1.3004,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 0.524468085106383,
|
|
"grad_norm": 4.165022373199463,
|
|
"learning_rate": 9.91784324079637e-06,
|
|
"loss": 1.4643,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 0.5247340425531914,
|
|
"grad_norm": 3.9013566970825195,
|
|
"learning_rate": 9.917684383658614e-06,
|
|
"loss": 1.2264,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 0.525,
|
|
"grad_norm": 4.016994953155518,
|
|
"learning_rate": 9.917525374361913e-06,
|
|
"loss": 1.2748,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 0.5252659574468085,
|
|
"grad_norm": 4.0600996017456055,
|
|
"learning_rate": 9.917366212911187e-06,
|
|
"loss": 1.2,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 0.5255319148936171,
|
|
"grad_norm": 4.1870903968811035,
|
|
"learning_rate": 9.91720689931136e-06,
|
|
"loss": 1.2307,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 0.5257978723404255,
|
|
"grad_norm": 3.7501108646392822,
|
|
"learning_rate": 9.917047433567364e-06,
|
|
"loss": 1.2853,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 0.5260638297872341,
|
|
"grad_norm": 3.8789479732513428,
|
|
"learning_rate": 9.91688781568413e-06,
|
|
"loss": 1.3571,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 0.5263297872340426,
|
|
"grad_norm": 3.641453981399536,
|
|
"learning_rate": 9.9167280456666e-06,
|
|
"loss": 1.1975,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 0.526595744680851,
|
|
"grad_norm": 4.097661972045898,
|
|
"learning_rate": 9.916568123519713e-06,
|
|
"loss": 1.2415,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.5268617021276596,
|
|
"grad_norm": 3.447585105895996,
|
|
"learning_rate": 9.91640804924842e-06,
|
|
"loss": 1.1599,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 0.527127659574468,
|
|
"grad_norm": 3.906158208847046,
|
|
"learning_rate": 9.916247822857675e-06,
|
|
"loss": 1.2141,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 0.5273936170212766,
|
|
"grad_norm": 4.226005554199219,
|
|
"learning_rate": 9.916087444352433e-06,
|
|
"loss": 1.3575,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 0.5276595744680851,
|
|
"grad_norm": 3.955073118209839,
|
|
"learning_rate": 9.91592691373766e-06,
|
|
"loss": 1.159,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 0.5279255319148937,
|
|
"grad_norm": 3.770538568496704,
|
|
"learning_rate": 9.915766231018317e-06,
|
|
"loss": 1.2722,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 0.5281914893617021,
|
|
"grad_norm": 4.1326422691345215,
|
|
"learning_rate": 9.91560539619938e-06,
|
|
"loss": 1.4044,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 0.5284574468085106,
|
|
"grad_norm": 3.933978319168091,
|
|
"learning_rate": 9.915444409285827e-06,
|
|
"loss": 1.1495,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 0.5287234042553192,
|
|
"grad_norm": 3.8940069675445557,
|
|
"learning_rate": 9.915283270282637e-06,
|
|
"loss": 1.2658,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 0.5289893617021276,
|
|
"grad_norm": 3.8015975952148438,
|
|
"learning_rate": 9.915121979194793e-06,
|
|
"loss": 1.2155,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 0.5292553191489362,
|
|
"grad_norm": 4.204024791717529,
|
|
"learning_rate": 9.914960536027289e-06,
|
|
"loss": 1.3081,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.5295212765957447,
|
|
"grad_norm": 3.80530047416687,
|
|
"learning_rate": 9.91479894078512e-06,
|
|
"loss": 1.2827,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 0.5297872340425532,
|
|
"grad_norm": 4.011538505554199,
|
|
"learning_rate": 9.914637193473284e-06,
|
|
"loss": 1.2801,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 0.5300531914893617,
|
|
"grad_norm": 3.848898410797119,
|
|
"learning_rate": 9.914475294096788e-06,
|
|
"loss": 1.2904,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 0.5303191489361702,
|
|
"grad_norm": 3.7076499462127686,
|
|
"learning_rate": 9.91431324266064e-06,
|
|
"loss": 1.3455,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 0.5305851063829787,
|
|
"grad_norm": 4.372555255889893,
|
|
"learning_rate": 9.914151039169855e-06,
|
|
"loss": 1.3233,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 0.5308510638297872,
|
|
"grad_norm": 4.168186664581299,
|
|
"learning_rate": 9.913988683629449e-06,
|
|
"loss": 1.3303,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 0.5311170212765958,
|
|
"grad_norm": 3.4844412803649902,
|
|
"learning_rate": 9.91382617604445e-06,
|
|
"loss": 1.28,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 0.5313829787234042,
|
|
"grad_norm": 3.981612205505371,
|
|
"learning_rate": 9.913663516419883e-06,
|
|
"loss": 1.4133,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 0.5316489361702128,
|
|
"grad_norm": 3.6310243606567383,
|
|
"learning_rate": 9.913500704760781e-06,
|
|
"loss": 1.2546,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 0.5319148936170213,
|
|
"grad_norm": 3.6045448780059814,
|
|
"learning_rate": 9.913337741072183e-06,
|
|
"loss": 1.1445,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5319148936170213,
|
|
"eval_loss": 1.2938566207885742,
|
|
"eval_runtime": 12.2817,
|
|
"eval_samples_per_second": 32.569,
|
|
"eval_steps_per_second": 4.071,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5321808510638298,
|
|
"grad_norm": 4.040936470031738,
|
|
"learning_rate": 9.913174625359132e-06,
|
|
"loss": 1.2325,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 0.5324468085106383,
|
|
"grad_norm": 3.7908430099487305,
|
|
"learning_rate": 9.913011357626672e-06,
|
|
"loss": 1.3091,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 0.5327127659574468,
|
|
"grad_norm": 3.7691242694854736,
|
|
"learning_rate": 9.912847937879855e-06,
|
|
"loss": 1.2236,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 0.5329787234042553,
|
|
"grad_norm": 4.643370628356934,
|
|
"learning_rate": 9.91268436612374e-06,
|
|
"loss": 1.3033,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 0.5332446808510638,
|
|
"grad_norm": 3.5233020782470703,
|
|
"learning_rate": 9.912520642363387e-06,
|
|
"loss": 1.1542,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 0.5335106382978724,
|
|
"grad_norm": 4.1154022216796875,
|
|
"learning_rate": 9.912356766603862e-06,
|
|
"loss": 1.4088,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 0.5337765957446808,
|
|
"grad_norm": 5.4873247146606445,
|
|
"learning_rate": 9.912192738850234e-06,
|
|
"loss": 1.3057,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 0.5340425531914894,
|
|
"grad_norm": 3.9308226108551025,
|
|
"learning_rate": 9.912028559107577e-06,
|
|
"loss": 1.2788,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 0.5343085106382979,
|
|
"grad_norm": 3.6488893032073975,
|
|
"learning_rate": 9.91186422738098e-06,
|
|
"loss": 1.1555,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 0.5345744680851063,
|
|
"grad_norm": 3.553065061569214,
|
|
"learning_rate": 9.911699743675513e-06,
|
|
"loss": 1.2228,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.5348404255319149,
|
|
"grad_norm": 3.8336079120635986,
|
|
"learning_rate": 9.911535107996278e-06,
|
|
"loss": 1.2563,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 0.5351063829787234,
|
|
"grad_norm": 4.1601715087890625,
|
|
"learning_rate": 9.911370320348363e-06,
|
|
"loss": 1.2525,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 0.535372340425532,
|
|
"grad_norm": 3.4441726207733154,
|
|
"learning_rate": 9.911205380736868e-06,
|
|
"loss": 1.2293,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 0.5356382978723404,
|
|
"grad_norm": 4.281271457672119,
|
|
"learning_rate": 9.911040289166896e-06,
|
|
"loss": 1.5168,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 0.535904255319149,
|
|
"grad_norm": 3.982959508895874,
|
|
"learning_rate": 9.910875045643555e-06,
|
|
"loss": 1.2864,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 0.5361702127659574,
|
|
"grad_norm": 3.9199705123901367,
|
|
"learning_rate": 9.91070965017196e-06,
|
|
"loss": 1.2906,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 0.5364361702127659,
|
|
"grad_norm": 4.073878288269043,
|
|
"learning_rate": 9.910544102757224e-06,
|
|
"loss": 1.2435,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 0.5367021276595745,
|
|
"grad_norm": 4.169588088989258,
|
|
"learning_rate": 9.910378403404473e-06,
|
|
"loss": 1.3231,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 0.5369680851063829,
|
|
"grad_norm": 3.7797560691833496,
|
|
"learning_rate": 9.910212552118835e-06,
|
|
"loss": 1.2632,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 0.5372340425531915,
|
|
"grad_norm": 4.002804756164551,
|
|
"learning_rate": 9.910046548905437e-06,
|
|
"loss": 1.3988,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.5375,
|
|
"grad_norm": 3.8956003189086914,
|
|
"learning_rate": 9.90988039376942e-06,
|
|
"loss": 1.2534,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 0.5377659574468086,
|
|
"grad_norm": 3.6937549114227295,
|
|
"learning_rate": 9.90971408671592e-06,
|
|
"loss": 1.2312,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 0.538031914893617,
|
|
"grad_norm": 3.7216007709503174,
|
|
"learning_rate": 9.909547627750089e-06,
|
|
"loss": 1.2408,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 0.5382978723404256,
|
|
"grad_norm": 3.827702760696411,
|
|
"learning_rate": 9.909381016877074e-06,
|
|
"loss": 1.2551,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 0.538563829787234,
|
|
"grad_norm": 3.5307586193084717,
|
|
"learning_rate": 9.909214254102027e-06,
|
|
"loss": 1.2352,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 0.5388297872340425,
|
|
"grad_norm": 3.7490625381469727,
|
|
"learning_rate": 9.909047339430113e-06,
|
|
"loss": 1.2867,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 0.5390957446808511,
|
|
"grad_norm": 4.107030391693115,
|
|
"learning_rate": 9.908880272866495e-06,
|
|
"loss": 1.3459,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 0.5393617021276595,
|
|
"grad_norm": 3.855973482131958,
|
|
"learning_rate": 9.908713054416342e-06,
|
|
"loss": 1.224,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 0.5396276595744681,
|
|
"grad_norm": 4.167142391204834,
|
|
"learning_rate": 9.908545684084826e-06,
|
|
"loss": 1.4258,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 0.5398936170212766,
|
|
"grad_norm": 3.899373769760132,
|
|
"learning_rate": 9.90837816187713e-06,
|
|
"loss": 1.2853,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.5401595744680852,
|
|
"grad_norm": 3.8360328674316406,
|
|
"learning_rate": 9.908210487798433e-06,
|
|
"loss": 1.3503,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 0.5404255319148936,
|
|
"grad_norm": 3.633971929550171,
|
|
"learning_rate": 9.908042661853926e-06,
|
|
"loss": 1.0622,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 0.5406914893617021,
|
|
"grad_norm": 4.1685991287231445,
|
|
"learning_rate": 9.9078746840488e-06,
|
|
"loss": 1.3733,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 0.5409574468085107,
|
|
"grad_norm": 3.9930756092071533,
|
|
"learning_rate": 9.907706554388253e-06,
|
|
"loss": 1.4306,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 0.5412234042553191,
|
|
"grad_norm": 3.9129087924957275,
|
|
"learning_rate": 9.907538272877487e-06,
|
|
"loss": 1.1834,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 0.5414893617021277,
|
|
"grad_norm": 3.658611536026001,
|
|
"learning_rate": 9.90736983952171e-06,
|
|
"loss": 1.1908,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 0.5417553191489362,
|
|
"grad_norm": 3.9367542266845703,
|
|
"learning_rate": 9.907201254326132e-06,
|
|
"loss": 1.2853,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 0.5420212765957447,
|
|
"grad_norm": 3.9035940170288086,
|
|
"learning_rate": 9.907032517295966e-06,
|
|
"loss": 1.2867,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 0.5422872340425532,
|
|
"grad_norm": 3.702096939086914,
|
|
"learning_rate": 9.906863628436441e-06,
|
|
"loss": 1.2614,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 0.5425531914893617,
|
|
"grad_norm": 4.073267459869385,
|
|
"learning_rate": 9.906694587752777e-06,
|
|
"loss": 1.3793,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.5428191489361702,
|
|
"grad_norm": 3.864699363708496,
|
|
"learning_rate": 9.906525395250206e-06,
|
|
"loss": 1.1233,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 0.5430851063829787,
|
|
"grad_norm": 3.8738772869110107,
|
|
"learning_rate": 9.906356050933962e-06,
|
|
"loss": 1.1704,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 0.5433510638297873,
|
|
"grad_norm": 3.837299108505249,
|
|
"learning_rate": 9.906186554809284e-06,
|
|
"loss": 1.1802,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 0.5436170212765957,
|
|
"grad_norm": 4.00624942779541,
|
|
"learning_rate": 9.906016906881419e-06,
|
|
"loss": 1.2934,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 0.5438829787234043,
|
|
"grad_norm": 3.6519479751586914,
|
|
"learning_rate": 9.905847107155615e-06,
|
|
"loss": 1.2313,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 0.5441489361702128,
|
|
"grad_norm": 4.127234935760498,
|
|
"learning_rate": 9.905677155637126e-06,
|
|
"loss": 1.476,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 0.5444148936170212,
|
|
"grad_norm": 3.580862283706665,
|
|
"learning_rate": 9.90550705233121e-06,
|
|
"loss": 1.1991,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 0.5446808510638298,
|
|
"grad_norm": 4.004328727722168,
|
|
"learning_rate": 9.90533679724313e-06,
|
|
"loss": 1.2811,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 0.5449468085106383,
|
|
"grad_norm": 3.6748900413513184,
|
|
"learning_rate": 9.905166390378154e-06,
|
|
"loss": 1.3381,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 0.5452127659574468,
|
|
"grad_norm": 3.5765295028686523,
|
|
"learning_rate": 9.904995831741553e-06,
|
|
"loss": 1.2265,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.5454787234042553,
|
|
"grad_norm": 3.910905361175537,
|
|
"learning_rate": 9.904825121338609e-06,
|
|
"loss": 1.2516,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 0.5457446808510639,
|
|
"grad_norm": 3.8337693214416504,
|
|
"learning_rate": 9.9046542591746e-06,
|
|
"loss": 1.2997,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 0.5460106382978723,
|
|
"grad_norm": 3.837082862854004,
|
|
"learning_rate": 9.904483245254812e-06,
|
|
"loss": 1.3341,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 0.5462765957446809,
|
|
"grad_norm": 4.098066806793213,
|
|
"learning_rate": 9.90431207958454e-06,
|
|
"loss": 1.2182,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 0.5465425531914894,
|
|
"grad_norm": 4.022514343261719,
|
|
"learning_rate": 9.904140762169079e-06,
|
|
"loss": 1.4144,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 0.5468085106382978,
|
|
"grad_norm": 3.779283046722412,
|
|
"learning_rate": 9.903969293013727e-06,
|
|
"loss": 1.2291,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 0.5470744680851064,
|
|
"grad_norm": 4.28890323638916,
|
|
"learning_rate": 9.903797672123791e-06,
|
|
"loss": 1.3899,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 0.5473404255319149,
|
|
"grad_norm": 3.720780372619629,
|
|
"learning_rate": 9.903625899504583e-06,
|
|
"loss": 1.1992,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 0.5476063829787234,
|
|
"grad_norm": 3.80373215675354,
|
|
"learning_rate": 9.903453975161416e-06,
|
|
"loss": 1.322,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 0.5478723404255319,
|
|
"grad_norm": 4.012282371520996,
|
|
"learning_rate": 9.90328189909961e-06,
|
|
"loss": 1.1998,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.5481382978723405,
|
|
"grad_norm": 4.059588432312012,
|
|
"learning_rate": 9.903109671324488e-06,
|
|
"loss": 1.286,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 0.5484042553191489,
|
|
"grad_norm": 3.9015207290649414,
|
|
"learning_rate": 9.902937291841383e-06,
|
|
"loss": 1.3525,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 0.5486702127659574,
|
|
"grad_norm": 4.0359954833984375,
|
|
"learning_rate": 9.902764760655623e-06,
|
|
"loss": 1.3094,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 0.548936170212766,
|
|
"grad_norm": 3.487372875213623,
|
|
"learning_rate": 9.90259207777255e-06,
|
|
"loss": 1.2127,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 0.5492021276595744,
|
|
"grad_norm": 3.607064723968506,
|
|
"learning_rate": 9.902419243197505e-06,
|
|
"loss": 1.2091,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 0.549468085106383,
|
|
"grad_norm": 3.9896395206451416,
|
|
"learning_rate": 9.902246256935837e-06,
|
|
"loss": 1.3059,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 0.5497340425531915,
|
|
"grad_norm": 4.376030445098877,
|
|
"learning_rate": 9.9020731189929e-06,
|
|
"loss": 1.3092,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"grad_norm": 3.3590362071990967,
|
|
"learning_rate": 9.901899829374048e-06,
|
|
"loss": 1.201,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 0.5502659574468085,
|
|
"grad_norm": 3.7063753604888916,
|
|
"learning_rate": 9.901726388084643e-06,
|
|
"loss": 1.182,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 0.550531914893617,
|
|
"grad_norm": 3.709569215774536,
|
|
"learning_rate": 9.901552795130054e-06,
|
|
"loss": 1.1766,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.5507978723404255,
|
|
"grad_norm": 4.3449249267578125,
|
|
"learning_rate": 9.90137905051565e-06,
|
|
"loss": 1.3167,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 0.551063829787234,
|
|
"grad_norm": 3.8162055015563965,
|
|
"learning_rate": 9.901205154246807e-06,
|
|
"loss": 1.2192,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 0.5513297872340426,
|
|
"grad_norm": 3.792880058288574,
|
|
"learning_rate": 9.901031106328907e-06,
|
|
"loss": 1.2957,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 0.551595744680851,
|
|
"grad_norm": 3.6657822132110596,
|
|
"learning_rate": 9.900856906767334e-06,
|
|
"loss": 1.3045,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 0.5518617021276596,
|
|
"grad_norm": 3.327601194381714,
|
|
"learning_rate": 9.900682555567478e-06,
|
|
"loss": 1.1348,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 0.5521276595744681,
|
|
"grad_norm": 3.9993128776550293,
|
|
"learning_rate": 9.900508052734734e-06,
|
|
"loss": 1.2678,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 0.5523936170212767,
|
|
"grad_norm": 3.922495126724243,
|
|
"learning_rate": 9.900333398274501e-06,
|
|
"loss": 1.1644,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 0.5526595744680851,
|
|
"grad_norm": 3.6909377574920654,
|
|
"learning_rate": 9.900158592192184e-06,
|
|
"loss": 1.208,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 0.5529255319148936,
|
|
"grad_norm": 4.378490924835205,
|
|
"learning_rate": 9.89998363449319e-06,
|
|
"loss": 1.2866,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 0.5531914893617021,
|
|
"grad_norm": 3.6202850341796875,
|
|
"learning_rate": 9.899808525182935e-06,
|
|
"loss": 1.238,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.5534574468085106,
|
|
"grad_norm": 3.9422550201416016,
|
|
"learning_rate": 9.899633264266835e-06,
|
|
"loss": 1.2932,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 0.5537234042553192,
|
|
"grad_norm": 4.002807140350342,
|
|
"learning_rate": 9.899457851750312e-06,
|
|
"loss": 1.301,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 0.5539893617021276,
|
|
"grad_norm": 4.242476940155029,
|
|
"learning_rate": 9.899282287638795e-06,
|
|
"loss": 1.2967,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 0.5542553191489362,
|
|
"grad_norm": 4.148952007293701,
|
|
"learning_rate": 9.899106571937716e-06,
|
|
"loss": 1.2863,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 0.5545212765957447,
|
|
"grad_norm": 3.8258893489837646,
|
|
"learning_rate": 9.898930704652512e-06,
|
|
"loss": 1.2253,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 0.5547872340425531,
|
|
"grad_norm": 4.117706298828125,
|
|
"learning_rate": 9.898754685788623e-06,
|
|
"loss": 1.3706,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 0.5550531914893617,
|
|
"grad_norm": 3.989381790161133,
|
|
"learning_rate": 9.898578515351498e-06,
|
|
"loss": 1.2585,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 0.5553191489361702,
|
|
"grad_norm": 3.8721275329589844,
|
|
"learning_rate": 9.898402193346585e-06,
|
|
"loss": 1.1284,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 0.5555851063829788,
|
|
"grad_norm": 4.169785499572754,
|
|
"learning_rate": 9.898225719779342e-06,
|
|
"loss": 1.2176,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 0.5558510638297872,
|
|
"grad_norm": 3.8007307052612305,
|
|
"learning_rate": 9.898049094655229e-06,
|
|
"loss": 1.1421,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.5561170212765958,
|
|
"grad_norm": 3.48579740524292,
|
|
"learning_rate": 9.897872317979708e-06,
|
|
"loss": 1.1123,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 0.5563829787234043,
|
|
"grad_norm": 3.6224656105041504,
|
|
"learning_rate": 9.897695389758253e-06,
|
|
"loss": 1.2452,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 0.5566489361702127,
|
|
"grad_norm": 4.0066752433776855,
|
|
"learning_rate": 9.897518309996336e-06,
|
|
"loss": 1.3127,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 0.5569148936170213,
|
|
"grad_norm": 3.5834217071533203,
|
|
"learning_rate": 9.897341078699437e-06,
|
|
"loss": 1.1945,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 0.5571808510638298,
|
|
"grad_norm": 3.616166830062866,
|
|
"learning_rate": 9.897163695873036e-06,
|
|
"loss": 1.2113,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 0.5574468085106383,
|
|
"grad_norm": 4.5236945152282715,
|
|
"learning_rate": 9.896986161522627e-06,
|
|
"loss": 1.556,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 0.5577127659574468,
|
|
"grad_norm": 4.006591320037842,
|
|
"learning_rate": 9.896808475653701e-06,
|
|
"loss": 1.3505,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 0.5579787234042554,
|
|
"grad_norm": 4.137003421783447,
|
|
"learning_rate": 9.896630638271755e-06,
|
|
"loss": 1.2105,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 0.5582446808510638,
|
|
"grad_norm": 4.136394500732422,
|
|
"learning_rate": 9.896452649382291e-06,
|
|
"loss": 1.4277,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 0.5585106382978723,
|
|
"grad_norm": 3.8342485427856445,
|
|
"learning_rate": 9.896274508990818e-06,
|
|
"loss": 1.2839,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.5587765957446809,
|
|
"grad_norm": 3.687845230102539,
|
|
"learning_rate": 9.896096217102848e-06,
|
|
"loss": 1.1659,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 0.5590425531914893,
|
|
"grad_norm": 3.971306562423706,
|
|
"learning_rate": 9.895917773723895e-06,
|
|
"loss": 1.4681,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 0.5593085106382979,
|
|
"grad_norm": 3.5636236667633057,
|
|
"learning_rate": 9.895739178859483e-06,
|
|
"loss": 1.2463,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 0.5595744680851064,
|
|
"grad_norm": 4.580478191375732,
|
|
"learning_rate": 9.895560432515136e-06,
|
|
"loss": 1.488,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 0.5598404255319149,
|
|
"grad_norm": 3.5549540519714355,
|
|
"learning_rate": 9.895381534696385e-06,
|
|
"loss": 1.1869,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 0.5601063829787234,
|
|
"grad_norm": 3.6891443729400635,
|
|
"learning_rate": 9.895202485408766e-06,
|
|
"loss": 1.2356,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 0.560372340425532,
|
|
"grad_norm": 4.139247894287109,
|
|
"learning_rate": 9.895023284657821e-06,
|
|
"loss": 1.2941,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 0.5606382978723404,
|
|
"grad_norm": 3.616758346557617,
|
|
"learning_rate": 9.89484393244909e-06,
|
|
"loss": 1.2292,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 0.5609042553191489,
|
|
"grad_norm": 3.634755849838257,
|
|
"learning_rate": 9.894664428788126e-06,
|
|
"loss": 1.2215,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 0.5611702127659575,
|
|
"grad_norm": 3.9066550731658936,
|
|
"learning_rate": 9.89448477368048e-06,
|
|
"loss": 1.3777,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.5614361702127659,
|
|
"grad_norm": 3.8861474990844727,
|
|
"learning_rate": 9.894304967131713e-06,
|
|
"loss": 1.2666,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 0.5617021276595745,
|
|
"grad_norm": 3.3856041431427,
|
|
"learning_rate": 9.894125009147389e-06,
|
|
"loss": 1.3001,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 0.561968085106383,
|
|
"grad_norm": 3.5979838371276855,
|
|
"learning_rate": 9.893944899733076e-06,
|
|
"loss": 1.2005,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 0.5622340425531915,
|
|
"grad_norm": 3.851020336151123,
|
|
"learning_rate": 9.893764638894345e-06,
|
|
"loss": 1.3479,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 0.5625,
|
|
"grad_norm": 4.208298206329346,
|
|
"learning_rate": 9.893584226636773e-06,
|
|
"loss": 1.3329,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 0.5627659574468085,
|
|
"grad_norm": 3.6734988689422607,
|
|
"learning_rate": 9.893403662965944e-06,
|
|
"loss": 1.3678,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 0.563031914893617,
|
|
"grad_norm": 3.708069324493408,
|
|
"learning_rate": 9.893222947887446e-06,
|
|
"loss": 1.3176,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 0.5632978723404255,
|
|
"grad_norm": 4.194994926452637,
|
|
"learning_rate": 9.893042081406868e-06,
|
|
"loss": 1.381,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 0.5635638297872341,
|
|
"grad_norm": 3.740922689437866,
|
|
"learning_rate": 9.892861063529807e-06,
|
|
"loss": 1.1555,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 0.5638297872340425,
|
|
"grad_norm": 3.744663715362549,
|
|
"learning_rate": 9.892679894261865e-06,
|
|
"loss": 1.132,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.5640957446808511,
|
|
"grad_norm": 4.050332546234131,
|
|
"learning_rate": 9.892498573608645e-06,
|
|
"loss": 1.3709,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 0.5643617021276596,
|
|
"grad_norm": 3.9612951278686523,
|
|
"learning_rate": 9.89231710157576e-06,
|
|
"loss": 1.2954,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 0.564627659574468,
|
|
"grad_norm": 3.165841817855835,
|
|
"learning_rate": 9.892135478168824e-06,
|
|
"loss": 1.1757,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 0.5648936170212766,
|
|
"grad_norm": 3.6281683444976807,
|
|
"learning_rate": 9.891953703393455e-06,
|
|
"loss": 1.0733,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 0.5651595744680851,
|
|
"grad_norm": 3.7431442737579346,
|
|
"learning_rate": 9.89177177725528e-06,
|
|
"loss": 1.3628,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 0.5654255319148936,
|
|
"grad_norm": 3.704817295074463,
|
|
"learning_rate": 9.891589699759929e-06,
|
|
"loss": 1.284,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 0.5656914893617021,
|
|
"grad_norm": 3.5511844158172607,
|
|
"learning_rate": 9.89140747091303e-06,
|
|
"loss": 1.1152,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 0.5659574468085107,
|
|
"grad_norm": 3.450695753097534,
|
|
"learning_rate": 9.891225090720227e-06,
|
|
"loss": 1.2245,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 0.5662234042553191,
|
|
"grad_norm": 3.8009350299835205,
|
|
"learning_rate": 9.891042559187161e-06,
|
|
"loss": 1.319,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 0.5664893617021277,
|
|
"grad_norm": 4.276994228363037,
|
|
"learning_rate": 9.890859876319479e-06,
|
|
"loss": 1.3191,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.5667553191489362,
|
|
"grad_norm": 4.0986738204956055,
|
|
"learning_rate": 9.890677042122834e-06,
|
|
"loss": 1.2553,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 0.5670212765957446,
|
|
"grad_norm": 3.861093044281006,
|
|
"learning_rate": 9.890494056602883e-06,
|
|
"loss": 1.1618,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 0.5672872340425532,
|
|
"grad_norm": 3.8807971477508545,
|
|
"learning_rate": 9.89031091976529e-06,
|
|
"loss": 1.3676,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 0.5675531914893617,
|
|
"grad_norm": 3.5750906467437744,
|
|
"learning_rate": 9.890127631615719e-06,
|
|
"loss": 1.3009,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 0.5678191489361702,
|
|
"grad_norm": 3.740861654281616,
|
|
"learning_rate": 9.88994419215984e-06,
|
|
"loss": 1.3059,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 0.5680851063829787,
|
|
"grad_norm": 3.945333480834961,
|
|
"learning_rate": 9.88976060140333e-06,
|
|
"loss": 1.3027,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 0.5683510638297873,
|
|
"grad_norm": 3.9484307765960693,
|
|
"learning_rate": 9.889576859351873e-06,
|
|
"loss": 1.4177,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 0.5686170212765957,
|
|
"grad_norm": 3.9661643505096436,
|
|
"learning_rate": 9.88939296601115e-06,
|
|
"loss": 1.3607,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 0.5688829787234042,
|
|
"grad_norm": 3.4872074127197266,
|
|
"learning_rate": 9.88920892138685e-06,
|
|
"loss": 1.1658,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 0.5691489361702128,
|
|
"grad_norm": 3.545102119445801,
|
|
"learning_rate": 9.889024725484672e-06,
|
|
"loss": 1.1813,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.5694148936170212,
|
|
"grad_norm": 3.738452434539795,
|
|
"learning_rate": 9.888840378310312e-06,
|
|
"loss": 1.2977,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 0.5696808510638298,
|
|
"grad_norm": 3.6037521362304688,
|
|
"learning_rate": 9.888655879869475e-06,
|
|
"loss": 1.2053,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 0.5699468085106383,
|
|
"grad_norm": 4.002810955047607,
|
|
"learning_rate": 9.888471230167869e-06,
|
|
"loss": 1.1678,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 0.5702127659574469,
|
|
"grad_norm": 3.659442186355591,
|
|
"learning_rate": 9.88828642921121e-06,
|
|
"loss": 1.3656,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 0.5704787234042553,
|
|
"grad_norm": 3.817089557647705,
|
|
"learning_rate": 9.88810147700521e-06,
|
|
"loss": 1.3597,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 0.5707446808510638,
|
|
"grad_norm": 3.5655431747436523,
|
|
"learning_rate": 9.887916373555597e-06,
|
|
"loss": 1.2276,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 0.5710106382978724,
|
|
"grad_norm": 3.873889923095703,
|
|
"learning_rate": 9.887731118868098e-06,
|
|
"loss": 1.3873,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 0.5712765957446808,
|
|
"grad_norm": 4.273273468017578,
|
|
"learning_rate": 9.887545712948441e-06,
|
|
"loss": 1.366,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 0.5715425531914894,
|
|
"grad_norm": 3.5899455547332764,
|
|
"learning_rate": 9.887360155802366e-06,
|
|
"loss": 1.1787,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 0.5718085106382979,
|
|
"grad_norm": 3.615471124649048,
|
|
"learning_rate": 9.887174447435615e-06,
|
|
"loss": 1.1561,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.5720744680851064,
|
|
"grad_norm": 3.8445990085601807,
|
|
"learning_rate": 9.886988587853933e-06,
|
|
"loss": 1.315,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 0.5723404255319149,
|
|
"grad_norm": 3.989668846130371,
|
|
"learning_rate": 9.886802577063068e-06,
|
|
"loss": 1.3116,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 0.5726063829787233,
|
|
"grad_norm": 4.619128227233887,
|
|
"learning_rate": 9.886616415068779e-06,
|
|
"loss": 1.3862,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 0.5728723404255319,
|
|
"grad_norm": 3.6989963054656982,
|
|
"learning_rate": 9.886430101876825e-06,
|
|
"loss": 1.2221,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 0.5731382978723404,
|
|
"grad_norm": 4.153132915496826,
|
|
"learning_rate": 9.886243637492969e-06,
|
|
"loss": 1.2128,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 0.573404255319149,
|
|
"grad_norm": 3.970520257949829,
|
|
"learning_rate": 9.886057021922984e-06,
|
|
"loss": 1.2802,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 0.5736702127659574,
|
|
"grad_norm": 3.751838207244873,
|
|
"learning_rate": 9.885870255172642e-06,
|
|
"loss": 1.1967,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 0.573936170212766,
|
|
"grad_norm": 3.6611552238464355,
|
|
"learning_rate": 9.88568333724772e-06,
|
|
"loss": 1.2956,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 0.5742021276595745,
|
|
"grad_norm": 4.170332908630371,
|
|
"learning_rate": 9.885496268154005e-06,
|
|
"loss": 1.2867,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 0.574468085106383,
|
|
"grad_norm": 3.5777552127838135,
|
|
"learning_rate": 9.885309047897285e-06,
|
|
"loss": 1.1703,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.5747340425531915,
|
|
"grad_norm": 3.9369912147521973,
|
|
"learning_rate": 9.88512167648335e-06,
|
|
"loss": 1.3682,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 0.575,
|
|
"grad_norm": 4.30880069732666,
|
|
"learning_rate": 9.884934153917998e-06,
|
|
"loss": 1.2892,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 0.5752659574468085,
|
|
"grad_norm": 4.251465797424316,
|
|
"learning_rate": 9.884746480207031e-06,
|
|
"loss": 1.3043,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 0.575531914893617,
|
|
"grad_norm": 3.4858951568603516,
|
|
"learning_rate": 9.88455865535626e-06,
|
|
"loss": 1.3418,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 0.5757978723404256,
|
|
"grad_norm": 3.715372085571289,
|
|
"learning_rate": 9.88437067937149e-06,
|
|
"loss": 1.274,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 0.576063829787234,
|
|
"grad_norm": 3.5083811283111572,
|
|
"learning_rate": 9.884182552258543e-06,
|
|
"loss": 1.1127,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 0.5763297872340426,
|
|
"grad_norm": 4.5049004554748535,
|
|
"learning_rate": 9.883994274023237e-06,
|
|
"loss": 1.3182,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 0.5765957446808511,
|
|
"grad_norm": 4.002771377563477,
|
|
"learning_rate": 9.883805844671396e-06,
|
|
"loss": 1.4289,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 0.5768617021276595,
|
|
"grad_norm": 3.691743850708008,
|
|
"learning_rate": 9.883617264208854e-06,
|
|
"loss": 1.3677,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 0.5771276595744681,
|
|
"grad_norm": 4.031147003173828,
|
|
"learning_rate": 9.883428532641445e-06,
|
|
"loss": 1.1805,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.5773936170212766,
|
|
"grad_norm": 4.453026294708252,
|
|
"learning_rate": 9.883239649975007e-06,
|
|
"loss": 1.4034,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 0.5776595744680851,
|
|
"grad_norm": 3.6685361862182617,
|
|
"learning_rate": 9.883050616215383e-06,
|
|
"loss": 1.3169,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 0.5779255319148936,
|
|
"grad_norm": 3.6789016723632812,
|
|
"learning_rate": 9.882861431368425e-06,
|
|
"loss": 1.3912,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 0.5781914893617022,
|
|
"grad_norm": 3.6971778869628906,
|
|
"learning_rate": 9.882672095439987e-06,
|
|
"loss": 1.1346,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 0.5784574468085106,
|
|
"grad_norm": 3.8128819465637207,
|
|
"learning_rate": 9.882482608435924e-06,
|
|
"loss": 1.3105,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 0.5787234042553191,
|
|
"grad_norm": 4.369806289672852,
|
|
"learning_rate": 9.882292970362101e-06,
|
|
"loss": 1.3673,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 0.5789893617021277,
|
|
"grad_norm": 3.403639316558838,
|
|
"learning_rate": 9.882103181224386e-06,
|
|
"loss": 1.2435,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 0.5792553191489361,
|
|
"grad_norm": 3.7755768299102783,
|
|
"learning_rate": 9.88191324102865e-06,
|
|
"loss": 1.3237,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 0.5795212765957447,
|
|
"grad_norm": 3.4330899715423584,
|
|
"learning_rate": 9.88172314978077e-06,
|
|
"loss": 1.249,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 0.5797872340425532,
|
|
"grad_norm": 3.9291467666625977,
|
|
"learning_rate": 9.88153290748663e-06,
|
|
"loss": 1.4475,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.5800531914893617,
|
|
"grad_norm": 3.731370210647583,
|
|
"learning_rate": 9.881342514152114e-06,
|
|
"loss": 1.2166,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 0.5803191489361702,
|
|
"grad_norm": 3.7620556354522705,
|
|
"learning_rate": 9.881151969783113e-06,
|
|
"loss": 1.2329,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 0.5805851063829788,
|
|
"grad_norm": 3.822985887527466,
|
|
"learning_rate": 9.880961274385523e-06,
|
|
"loss": 1.2219,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 0.5808510638297872,
|
|
"grad_norm": 3.2141547203063965,
|
|
"learning_rate": 9.880770427965245e-06,
|
|
"loss": 1.0712,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 0.5811170212765957,
|
|
"grad_norm": 3.733004331588745,
|
|
"learning_rate": 9.880579430528183e-06,
|
|
"loss": 1.203,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 0.5813829787234043,
|
|
"grad_norm": 3.6706783771514893,
|
|
"learning_rate": 9.880388282080247e-06,
|
|
"loss": 1.1757,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 0.5816489361702127,
|
|
"grad_norm": 3.7189342975616455,
|
|
"learning_rate": 9.880196982627352e-06,
|
|
"loss": 1.2265,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 0.5819148936170213,
|
|
"grad_norm": 3.8598103523254395,
|
|
"learning_rate": 9.88000553217542e-06,
|
|
"loss": 1.2892,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 0.5821808510638298,
|
|
"grad_norm": 3.854811191558838,
|
|
"learning_rate": 9.879813930730367e-06,
|
|
"loss": 1.1292,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 0.5824468085106383,
|
|
"grad_norm": 4.142318248748779,
|
|
"learning_rate": 9.879622178298128e-06,
|
|
"loss": 1.1795,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.5827127659574468,
|
|
"grad_norm": 3.688462257385254,
|
|
"learning_rate": 9.879430274884632e-06,
|
|
"loss": 1.2044,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 0.5829787234042553,
|
|
"grad_norm": 3.4742586612701416,
|
|
"learning_rate": 9.879238220495818e-06,
|
|
"loss": 1.1547,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 0.5832446808510638,
|
|
"grad_norm": 3.9008736610412598,
|
|
"learning_rate": 9.87904601513763e-06,
|
|
"loss": 1.2293,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 0.5835106382978723,
|
|
"grad_norm": 3.70694899559021,
|
|
"learning_rate": 9.878853658816015e-06,
|
|
"loss": 1.2758,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 0.5837765957446809,
|
|
"grad_norm": 4.015002727508545,
|
|
"learning_rate": 9.878661151536923e-06,
|
|
"loss": 1.3352,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 0.5840425531914893,
|
|
"grad_norm": 3.423016309738159,
|
|
"learning_rate": 9.87846849330631e-06,
|
|
"loss": 1.1313,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 0.5843085106382979,
|
|
"grad_norm": 3.549492120742798,
|
|
"learning_rate": 9.87827568413014e-06,
|
|
"loss": 1.3162,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 0.5845744680851064,
|
|
"grad_norm": 4.05422306060791,
|
|
"learning_rate": 9.878082724014375e-06,
|
|
"loss": 1.2593,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 0.5848404255319148,
|
|
"grad_norm": 3.875730514526367,
|
|
"learning_rate": 9.877889612964988e-06,
|
|
"loss": 1.1837,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 0.5851063829787234,
|
|
"grad_norm": 3.4176459312438965,
|
|
"learning_rate": 9.877696350987954e-06,
|
|
"loss": 1.1748,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.5853723404255319,
|
|
"grad_norm": 4.281347751617432,
|
|
"learning_rate": 9.87750293808925e-06,
|
|
"loss": 1.272,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 0.5856382978723405,
|
|
"grad_norm": 4.0162577629089355,
|
|
"learning_rate": 9.877309374274865e-06,
|
|
"loss": 1.2567,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 0.5859042553191489,
|
|
"grad_norm": 4.051181793212891,
|
|
"learning_rate": 9.877115659550785e-06,
|
|
"loss": 1.2305,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 0.5861702127659575,
|
|
"grad_norm": 3.711719512939453,
|
|
"learning_rate": 9.876921793923005e-06,
|
|
"loss": 1.1956,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 0.586436170212766,
|
|
"grad_norm": 3.402353048324585,
|
|
"learning_rate": 9.876727777397522e-06,
|
|
"loss": 1.1938,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 0.5867021276595744,
|
|
"grad_norm": 3.7966136932373047,
|
|
"learning_rate": 9.87653360998034e-06,
|
|
"loss": 1.2964,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 0.586968085106383,
|
|
"grad_norm": 3.816732406616211,
|
|
"learning_rate": 9.876339291677466e-06,
|
|
"loss": 1.2739,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 0.5872340425531914,
|
|
"grad_norm": 3.801443576812744,
|
|
"learning_rate": 9.876144822494913e-06,
|
|
"loss": 1.2832,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 0.5875,
|
|
"grad_norm": 3.7559401988983154,
|
|
"learning_rate": 9.8759502024387e-06,
|
|
"loss": 1.2176,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 0.5877659574468085,
|
|
"grad_norm": 3.9138758182525635,
|
|
"learning_rate": 9.875755431514846e-06,
|
|
"loss": 1.3423,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.5880319148936171,
|
|
"grad_norm": 4.0434041023254395,
|
|
"learning_rate": 9.875560509729379e-06,
|
|
"loss": 1.3064,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 0.5882978723404255,
|
|
"grad_norm": 3.7799887657165527,
|
|
"learning_rate": 9.87536543708833e-06,
|
|
"loss": 1.2518,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 0.5885638297872341,
|
|
"grad_norm": 3.8034684658050537,
|
|
"learning_rate": 9.875170213597731e-06,
|
|
"loss": 1.2485,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 0.5888297872340426,
|
|
"grad_norm": 4.390495300292969,
|
|
"learning_rate": 9.874974839263629e-06,
|
|
"loss": 1.263,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 0.589095744680851,
|
|
"grad_norm": 4.027488708496094,
|
|
"learning_rate": 9.874779314092065e-06,
|
|
"loss": 1.2718,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 0.5893617021276596,
|
|
"grad_norm": 3.8035428524017334,
|
|
"learning_rate": 9.87458363808909e-06,
|
|
"loss": 1.2636,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 0.589627659574468,
|
|
"grad_norm": 3.5652413368225098,
|
|
"learning_rate": 9.874387811260756e-06,
|
|
"loss": 1.241,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 0.5898936170212766,
|
|
"grad_norm": 4.2285614013671875,
|
|
"learning_rate": 9.874191833613128e-06,
|
|
"loss": 1.1943,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 0.5901595744680851,
|
|
"grad_norm": 4.229702472686768,
|
|
"learning_rate": 9.873995705152264e-06,
|
|
"loss": 1.382,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 0.5904255319148937,
|
|
"grad_norm": 4.092412948608398,
|
|
"learning_rate": 9.873799425884235e-06,
|
|
"loss": 1.132,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.5906914893617021,
|
|
"grad_norm": 3.6512703895568848,
|
|
"learning_rate": 9.873602995815113e-06,
|
|
"loss": 1.2022,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 0.5909574468085106,
|
|
"grad_norm": 3.634768009185791,
|
|
"learning_rate": 9.873406414950977e-06,
|
|
"loss": 1.2932,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 0.5912234042553192,
|
|
"grad_norm": 3.6227974891662598,
|
|
"learning_rate": 9.873209683297908e-06,
|
|
"loss": 1.2947,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 0.5914893617021276,
|
|
"grad_norm": 3.5124943256378174,
|
|
"learning_rate": 9.873012800861996e-06,
|
|
"loss": 1.1896,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 0.5917553191489362,
|
|
"grad_norm": 3.759474992752075,
|
|
"learning_rate": 9.872815767649329e-06,
|
|
"loss": 1.2116,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 0.5920212765957447,
|
|
"grad_norm": 3.7036375999450684,
|
|
"learning_rate": 9.872618583666005e-06,
|
|
"loss": 1.2293,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 0.5922872340425532,
|
|
"grad_norm": 3.61789608001709,
|
|
"learning_rate": 9.872421248918124e-06,
|
|
"loss": 1.2121,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 0.5925531914893617,
|
|
"grad_norm": 4.019472122192383,
|
|
"learning_rate": 9.872223763411794e-06,
|
|
"loss": 1.1467,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 0.5928191489361702,
|
|
"grad_norm": 3.774531364440918,
|
|
"learning_rate": 9.872026127153126e-06,
|
|
"loss": 1.3685,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 0.5930851063829787,
|
|
"grad_norm": 3.9165661334991455,
|
|
"learning_rate": 9.871828340148232e-06,
|
|
"loss": 1.1668,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.5933510638297872,
|
|
"grad_norm": 3.762282133102417,
|
|
"learning_rate": 9.871630402403235e-06,
|
|
"loss": 1.2315,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 0.5936170212765958,
|
|
"grad_norm": 3.96540904045105,
|
|
"learning_rate": 9.871432313924255e-06,
|
|
"loss": 1.3042,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 0.5938829787234042,
|
|
"grad_norm": 4.1440229415893555,
|
|
"learning_rate": 9.871234074717424e-06,
|
|
"loss": 1.3715,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 0.5941489361702128,
|
|
"grad_norm": 3.7638661861419678,
|
|
"learning_rate": 9.871035684788878e-06,
|
|
"loss": 1.2619,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 0.5944148936170213,
|
|
"grad_norm": 3.5591323375701904,
|
|
"learning_rate": 9.870837144144752e-06,
|
|
"loss": 1.1941,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 0.5946808510638298,
|
|
"grad_norm": 4.143522262573242,
|
|
"learning_rate": 9.87063845279119e-06,
|
|
"loss": 1.1687,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 0.5949468085106383,
|
|
"grad_norm": 4.148569583892822,
|
|
"learning_rate": 9.87043961073434e-06,
|
|
"loss": 1.4218,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 0.5952127659574468,
|
|
"grad_norm": 3.687147378921509,
|
|
"learning_rate": 9.870240617980353e-06,
|
|
"loss": 1.1311,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 0.5954787234042553,
|
|
"grad_norm": 3.5179238319396973,
|
|
"learning_rate": 9.870041474535388e-06,
|
|
"loss": 1.1823,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 0.5957446808510638,
|
|
"grad_norm": 3.844238519668579,
|
|
"learning_rate": 9.869842180405607e-06,
|
|
"loss": 1.3256,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.5960106382978724,
|
|
"grad_norm": 3.9333431720733643,
|
|
"learning_rate": 9.869642735597174e-06,
|
|
"loss": 1.3545,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 0.5962765957446808,
|
|
"grad_norm": 3.531179666519165,
|
|
"learning_rate": 9.869443140116261e-06,
|
|
"loss": 1.3254,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 0.5965425531914894,
|
|
"grad_norm": 3.795381546020508,
|
|
"learning_rate": 9.869243393969045e-06,
|
|
"loss": 1.2744,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 0.5968085106382979,
|
|
"grad_norm": 4.001238822937012,
|
|
"learning_rate": 9.869043497161707e-06,
|
|
"loss": 1.3585,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 0.5970744680851063,
|
|
"grad_norm": 4.289900302886963,
|
|
"learning_rate": 9.868843449700429e-06,
|
|
"loss": 1.3628,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 0.5973404255319149,
|
|
"grad_norm": 3.581144332885742,
|
|
"learning_rate": 9.868643251591403e-06,
|
|
"loss": 1.3021,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 0.5976063829787234,
|
|
"grad_norm": 3.504152536392212,
|
|
"learning_rate": 9.868442902840823e-06,
|
|
"loss": 1.2073,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 0.597872340425532,
|
|
"grad_norm": 3.648141622543335,
|
|
"learning_rate": 9.868242403454886e-06,
|
|
"loss": 1.3169,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 0.5981382978723404,
|
|
"grad_norm": 3.544408082962036,
|
|
"learning_rate": 9.8680417534398e-06,
|
|
"loss": 1.1334,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 0.598404255319149,
|
|
"grad_norm": 3.6868479251861572,
|
|
"learning_rate": 9.867840952801768e-06,
|
|
"loss": 1.209,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.5986702127659574,
|
|
"grad_norm": 3.6805198192596436,
|
|
"learning_rate": 9.867640001547007e-06,
|
|
"loss": 1.3011,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 0.5989361702127659,
|
|
"grad_norm": 3.646977186203003,
|
|
"learning_rate": 9.867438899681734e-06,
|
|
"loss": 1.2178,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 0.5992021276595745,
|
|
"grad_norm": 3.4612386226654053,
|
|
"learning_rate": 9.867237647212168e-06,
|
|
"loss": 1.1646,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 0.5994680851063829,
|
|
"grad_norm": 3.663968324661255,
|
|
"learning_rate": 9.867036244144544e-06,
|
|
"loss": 1.2337,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 0.5997340425531915,
|
|
"grad_norm": 3.724919080734253,
|
|
"learning_rate": 9.866834690485083e-06,
|
|
"loss": 1.3467,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"grad_norm": 3.6140668392181396,
|
|
"learning_rate": 9.86663298624003e-06,
|
|
"loss": 1.2684,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 0.6002659574468086,
|
|
"grad_norm": 3.805572271347046,
|
|
"learning_rate": 9.866431131415621e-06,
|
|
"loss": 1.3172,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 0.600531914893617,
|
|
"grad_norm": 3.921037435531616,
|
|
"learning_rate": 9.866229126018104e-06,
|
|
"loss": 1.1632,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 0.6007978723404256,
|
|
"grad_norm": 4.814824104309082,
|
|
"learning_rate": 9.866026970053728e-06,
|
|
"loss": 1.371,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 0.601063829787234,
|
|
"grad_norm": 3.8934485912323,
|
|
"learning_rate": 9.86582466352875e-06,
|
|
"loss": 1.2192,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.6013297872340425,
|
|
"grad_norm": 4.167794704437256,
|
|
"learning_rate": 9.865622206449428e-06,
|
|
"loss": 1.3167,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 0.6015957446808511,
|
|
"grad_norm": 3.916013479232788,
|
|
"learning_rate": 9.865419598822025e-06,
|
|
"loss": 1.2492,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 0.6018617021276595,
|
|
"grad_norm": 3.5649423599243164,
|
|
"learning_rate": 9.865216840652811e-06,
|
|
"loss": 1.1833,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 0.6021276595744681,
|
|
"grad_norm": 3.508890151977539,
|
|
"learning_rate": 9.865013931948061e-06,
|
|
"loss": 1.2527,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 0.6023936170212766,
|
|
"grad_norm": 3.513054132461548,
|
|
"learning_rate": 9.864810872714053e-06,
|
|
"loss": 1.2032,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 0.6026595744680852,
|
|
"grad_norm": 3.777679443359375,
|
|
"learning_rate": 9.864607662957066e-06,
|
|
"loss": 1.3355,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 0.6029255319148936,
|
|
"grad_norm": 3.778639316558838,
|
|
"learning_rate": 9.864404302683393e-06,
|
|
"loss": 1.3697,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 0.6031914893617021,
|
|
"grad_norm": 3.5880136489868164,
|
|
"learning_rate": 9.864200791899323e-06,
|
|
"loss": 1.2124,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 0.6034574468085107,
|
|
"grad_norm": 3.5101895332336426,
|
|
"learning_rate": 9.863997130611153e-06,
|
|
"loss": 1.1641,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 0.6037234042553191,
|
|
"grad_norm": 3.5391786098480225,
|
|
"learning_rate": 9.863793318825186e-06,
|
|
"loss": 1.2167,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.6039893617021277,
|
|
"grad_norm": 3.74766206741333,
|
|
"learning_rate": 9.863589356547728e-06,
|
|
"loss": 1.3565,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 0.6042553191489362,
|
|
"grad_norm": 3.966728925704956,
|
|
"learning_rate": 9.863385243785088e-06,
|
|
"loss": 1.3416,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 0.6045212765957447,
|
|
"grad_norm": 3.2839200496673584,
|
|
"learning_rate": 9.863180980543582e-06,
|
|
"loss": 1.1073,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 0.6047872340425532,
|
|
"grad_norm": 3.958099603652954,
|
|
"learning_rate": 9.862976566829532e-06,
|
|
"loss": 1.356,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 0.6050531914893617,
|
|
"grad_norm": 3.6041507720947266,
|
|
"learning_rate": 9.862772002649261e-06,
|
|
"loss": 1.4091,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 0.6053191489361702,
|
|
"grad_norm": 3.320826530456543,
|
|
"learning_rate": 9.862567288009099e-06,
|
|
"loss": 1.196,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 0.6055851063829787,
|
|
"grad_norm": 3.375542163848877,
|
|
"learning_rate": 9.862362422915382e-06,
|
|
"loss": 1.161,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 0.6058510638297873,
|
|
"grad_norm": 3.680457353591919,
|
|
"learning_rate": 9.862157407374446e-06,
|
|
"loss": 1.129,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 0.6061170212765957,
|
|
"grad_norm": 3.8363595008850098,
|
|
"learning_rate": 9.861952241392633e-06,
|
|
"loss": 1.309,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 0.6063829787234043,
|
|
"grad_norm": 3.7582051753997803,
|
|
"learning_rate": 9.861746924976297e-06,
|
|
"loss": 1.2328,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.6066489361702128,
|
|
"grad_norm": 3.5171892642974854,
|
|
"learning_rate": 9.861541458131785e-06,
|
|
"loss": 1.2098,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 0.6069148936170212,
|
|
"grad_norm": 3.905834197998047,
|
|
"learning_rate": 9.861335840865455e-06,
|
|
"loss": 1.2909,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 0.6071808510638298,
|
|
"grad_norm": 3.9347522258758545,
|
|
"learning_rate": 9.861130073183674e-06,
|
|
"loss": 1.265,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 0.6074468085106383,
|
|
"grad_norm": 3.6212542057037354,
|
|
"learning_rate": 9.860924155092803e-06,
|
|
"loss": 1.3044,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 0.6077127659574468,
|
|
"grad_norm": 3.9703807830810547,
|
|
"learning_rate": 9.860718086599217e-06,
|
|
"loss": 1.3497,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 0.6079787234042553,
|
|
"grad_norm": 3.94783091545105,
|
|
"learning_rate": 9.860511867709289e-06,
|
|
"loss": 1.248,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 0.6082446808510639,
|
|
"grad_norm": 4.237410545349121,
|
|
"learning_rate": 9.860305498429404e-06,
|
|
"loss": 1.3791,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 0.6085106382978723,
|
|
"grad_norm": 3.7259433269500732,
|
|
"learning_rate": 9.860098978765942e-06,
|
|
"loss": 1.3233,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 0.6087765957446809,
|
|
"grad_norm": 3.8508055210113525,
|
|
"learning_rate": 9.859892308725296e-06,
|
|
"loss": 1.2324,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 0.6090425531914894,
|
|
"grad_norm": 3.8663196563720703,
|
|
"learning_rate": 9.859685488313861e-06,
|
|
"loss": 1.2425,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.6093085106382978,
|
|
"grad_norm": 4.03026008605957,
|
|
"learning_rate": 9.859478517538035e-06,
|
|
"loss": 1.2932,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 0.6095744680851064,
|
|
"grad_norm": 3.517122745513916,
|
|
"learning_rate": 9.859271396404223e-06,
|
|
"loss": 1.1597,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 0.6098404255319149,
|
|
"grad_norm": 3.6704776287078857,
|
|
"learning_rate": 9.85906412491883e-06,
|
|
"loss": 1.1834,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 0.6101063829787234,
|
|
"grad_norm": 4.267923831939697,
|
|
"learning_rate": 9.858856703088276e-06,
|
|
"loss": 1.1888,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 0.6103723404255319,
|
|
"grad_norm": 4.178102493286133,
|
|
"learning_rate": 9.85864913091897e-06,
|
|
"loss": 1.3685,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 0.6106382978723405,
|
|
"grad_norm": 4.176131725311279,
|
|
"learning_rate": 9.858441408417345e-06,
|
|
"loss": 1.231,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 0.6109042553191489,
|
|
"grad_norm": 3.4884450435638428,
|
|
"learning_rate": 9.85823353558982e-06,
|
|
"loss": 1.2206,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 0.6111702127659574,
|
|
"grad_norm": 3.8766729831695557,
|
|
"learning_rate": 9.85802551244283e-06,
|
|
"loss": 1.3035,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 0.611436170212766,
|
|
"grad_norm": 3.5301473140716553,
|
|
"learning_rate": 9.857817338982811e-06,
|
|
"loss": 1.1712,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 0.6117021276595744,
|
|
"grad_norm": 3.7902379035949707,
|
|
"learning_rate": 9.857609015216205e-06,
|
|
"loss": 1.1324,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.611968085106383,
|
|
"grad_norm": 4.028817176818848,
|
|
"learning_rate": 9.857400541149455e-06,
|
|
"loss": 1.3142,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 0.6122340425531915,
|
|
"grad_norm": 3.6242549419403076,
|
|
"learning_rate": 9.857191916789016e-06,
|
|
"loss": 1.2368,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 0.6125,
|
|
"grad_norm": 3.6776719093322754,
|
|
"learning_rate": 9.856983142141338e-06,
|
|
"loss": 1.3289,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 0.6127659574468085,
|
|
"grad_norm": 3.8104121685028076,
|
|
"learning_rate": 9.856774217212886e-06,
|
|
"loss": 1.3076,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 0.613031914893617,
|
|
"grad_norm": 3.668893337249756,
|
|
"learning_rate": 9.85656514201012e-06,
|
|
"loss": 1.2935,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 0.6132978723404255,
|
|
"grad_norm": 3.5787241458892822,
|
|
"learning_rate": 9.85635591653951e-06,
|
|
"loss": 1.1477,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 0.613563829787234,
|
|
"grad_norm": 3.9113807678222656,
|
|
"learning_rate": 9.856146540807531e-06,
|
|
"loss": 1.3338,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 0.6138297872340426,
|
|
"grad_norm": 3.6910572052001953,
|
|
"learning_rate": 9.85593701482066e-06,
|
|
"loss": 1.1302,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 0.614095744680851,
|
|
"grad_norm": 4.1038689613342285,
|
|
"learning_rate": 9.855727338585381e-06,
|
|
"loss": 1.4519,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 0.6143617021276596,
|
|
"grad_norm": 3.5061099529266357,
|
|
"learning_rate": 9.855517512108182e-06,
|
|
"loss": 1.2243,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.6146276595744681,
|
|
"grad_norm": 3.5231192111968994,
|
|
"learning_rate": 9.855307535395553e-06,
|
|
"loss": 1.2158,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 0.6148936170212767,
|
|
"grad_norm": 3.8572421073913574,
|
|
"learning_rate": 9.855097408453993e-06,
|
|
"loss": 1.2392,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 0.6151595744680851,
|
|
"grad_norm": 3.7707557678222656,
|
|
"learning_rate": 9.854887131290002e-06,
|
|
"loss": 1.2316,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 0.6154255319148936,
|
|
"grad_norm": 3.860130548477173,
|
|
"learning_rate": 9.854676703910092e-06,
|
|
"loss": 1.2118,
|
|
"step": 2314
|
|
},
|
|
{
|
|
"epoch": 0.6156914893617021,
|
|
"grad_norm": 3.404811382293701,
|
|
"learning_rate": 9.854466126320763e-06,
|
|
"loss": 1.1942,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 0.6159574468085106,
|
|
"grad_norm": 3.659116268157959,
|
|
"learning_rate": 9.854255398528541e-06,
|
|
"loss": 1.2822,
|
|
"step": 2316
|
|
},
|
|
{
|
|
"epoch": 0.6162234042553192,
|
|
"grad_norm": 3.97190260887146,
|
|
"learning_rate": 9.85404452053994e-06,
|
|
"loss": 1.3892,
|
|
"step": 2317
|
|
},
|
|
{
|
|
"epoch": 0.6164893617021276,
|
|
"grad_norm": 3.99293851852417,
|
|
"learning_rate": 9.853833492361486e-06,
|
|
"loss": 1.2248,
|
|
"step": 2318
|
|
},
|
|
{
|
|
"epoch": 0.6167553191489362,
|
|
"grad_norm": 3.846611499786377,
|
|
"learning_rate": 9.85362231399971e-06,
|
|
"loss": 1.3553,
|
|
"step": 2319
|
|
},
|
|
{
|
|
"epoch": 0.6170212765957447,
|
|
"grad_norm": 3.922665596008301,
|
|
"learning_rate": 9.853410985461145e-06,
|
|
"loss": 1.2831,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.6172872340425531,
|
|
"grad_norm": 3.788879871368408,
|
|
"learning_rate": 9.85319950675233e-06,
|
|
"loss": 1.3213,
|
|
"step": 2321
|
|
},
|
|
{
|
|
"epoch": 0.6175531914893617,
|
|
"grad_norm": 3.7415027618408203,
|
|
"learning_rate": 9.852987877879807e-06,
|
|
"loss": 1.1951,
|
|
"step": 2322
|
|
},
|
|
{
|
|
"epoch": 0.6178191489361702,
|
|
"grad_norm": 4.016115665435791,
|
|
"learning_rate": 9.852776098850128e-06,
|
|
"loss": 1.2595,
|
|
"step": 2323
|
|
},
|
|
{
|
|
"epoch": 0.6180851063829788,
|
|
"grad_norm": 3.5927200317382812,
|
|
"learning_rate": 9.85256416966984e-06,
|
|
"loss": 1.2103,
|
|
"step": 2324
|
|
},
|
|
{
|
|
"epoch": 0.6183510638297872,
|
|
"grad_norm": 3.9768147468566895,
|
|
"learning_rate": 9.852352090345504e-06,
|
|
"loss": 1.3389,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 0.6186170212765958,
|
|
"grad_norm": 3.378852605819702,
|
|
"learning_rate": 9.852139860883684e-06,
|
|
"loss": 1.1266,
|
|
"step": 2326
|
|
},
|
|
{
|
|
"epoch": 0.6188829787234043,
|
|
"grad_norm": 4.071725368499756,
|
|
"learning_rate": 9.851927481290943e-06,
|
|
"loss": 1.4006,
|
|
"step": 2327
|
|
},
|
|
{
|
|
"epoch": 0.6191489361702127,
|
|
"grad_norm": 3.721118688583374,
|
|
"learning_rate": 9.851714951573853e-06,
|
|
"loss": 1.2344,
|
|
"step": 2328
|
|
},
|
|
{
|
|
"epoch": 0.6194148936170213,
|
|
"grad_norm": 3.551180839538574,
|
|
"learning_rate": 9.851502271738989e-06,
|
|
"loss": 1.3175,
|
|
"step": 2329
|
|
},
|
|
{
|
|
"epoch": 0.6196808510638298,
|
|
"grad_norm": 3.6764516830444336,
|
|
"learning_rate": 9.851289441792934e-06,
|
|
"loss": 1.2169,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.6199468085106383,
|
|
"grad_norm": 3.8505606651306152,
|
|
"learning_rate": 9.851076461742272e-06,
|
|
"loss": 1.3586,
|
|
"step": 2331
|
|
},
|
|
{
|
|
"epoch": 0.6202127659574468,
|
|
"grad_norm": 3.9605445861816406,
|
|
"learning_rate": 9.850863331593591e-06,
|
|
"loss": 1.2454,
|
|
"step": 2332
|
|
},
|
|
{
|
|
"epoch": 0.6204787234042554,
|
|
"grad_norm": 4.140010833740234,
|
|
"learning_rate": 9.85065005135349e-06,
|
|
"loss": 1.4014,
|
|
"step": 2333
|
|
},
|
|
{
|
|
"epoch": 0.6207446808510638,
|
|
"grad_norm": 4.118074417114258,
|
|
"learning_rate": 9.850436621028565e-06,
|
|
"loss": 1.2367,
|
|
"step": 2334
|
|
},
|
|
{
|
|
"epoch": 0.6210106382978723,
|
|
"grad_norm": 3.6424777507781982,
|
|
"learning_rate": 9.85022304062542e-06,
|
|
"loss": 1.129,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 0.6212765957446809,
|
|
"grad_norm": 3.643145799636841,
|
|
"learning_rate": 9.850009310150662e-06,
|
|
"loss": 1.3767,
|
|
"step": 2336
|
|
},
|
|
{
|
|
"epoch": 0.6215425531914893,
|
|
"grad_norm": 3.913959503173828,
|
|
"learning_rate": 9.849795429610908e-06,
|
|
"loss": 1.1977,
|
|
"step": 2337
|
|
},
|
|
{
|
|
"epoch": 0.6218085106382979,
|
|
"grad_norm": 3.91186261177063,
|
|
"learning_rate": 9.849581399012772e-06,
|
|
"loss": 1.2842,
|
|
"step": 2338
|
|
},
|
|
{
|
|
"epoch": 0.6220744680851064,
|
|
"grad_norm": 3.7167961597442627,
|
|
"learning_rate": 9.849367218362879e-06,
|
|
"loss": 1.2802,
|
|
"step": 2339
|
|
},
|
|
{
|
|
"epoch": 0.6223404255319149,
|
|
"grad_norm": 3.5471532344818115,
|
|
"learning_rate": 9.849152887667855e-06,
|
|
"loss": 1.2785,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.6226063829787234,
|
|
"grad_norm": 4.358826637268066,
|
|
"learning_rate": 9.84893840693433e-06,
|
|
"loss": 1.1696,
|
|
"step": 2341
|
|
},
|
|
{
|
|
"epoch": 0.622872340425532,
|
|
"grad_norm": 3.869590997695923,
|
|
"learning_rate": 9.848723776168942e-06,
|
|
"loss": 1.3316,
|
|
"step": 2342
|
|
},
|
|
{
|
|
"epoch": 0.6231382978723404,
|
|
"grad_norm": 4.493122577667236,
|
|
"learning_rate": 9.848508995378333e-06,
|
|
"loss": 1.2928,
|
|
"step": 2343
|
|
},
|
|
{
|
|
"epoch": 0.6234042553191489,
|
|
"grad_norm": 3.808885335922241,
|
|
"learning_rate": 9.848294064569146e-06,
|
|
"loss": 1.331,
|
|
"step": 2344
|
|
},
|
|
{
|
|
"epoch": 0.6236702127659575,
|
|
"grad_norm": 3.6614105701446533,
|
|
"learning_rate": 9.848078983748032e-06,
|
|
"loss": 1.3549,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 0.6239361702127659,
|
|
"grad_norm": 3.5685722827911377,
|
|
"learning_rate": 9.847863752921649e-06,
|
|
"loss": 1.1914,
|
|
"step": 2346
|
|
},
|
|
{
|
|
"epoch": 0.6242021276595745,
|
|
"grad_norm": 4.203314781188965,
|
|
"learning_rate": 9.847648372096652e-06,
|
|
"loss": 1.3369,
|
|
"step": 2347
|
|
},
|
|
{
|
|
"epoch": 0.624468085106383,
|
|
"grad_norm": 3.762103796005249,
|
|
"learning_rate": 9.847432841279707e-06,
|
|
"loss": 1.261,
|
|
"step": 2348
|
|
},
|
|
{
|
|
"epoch": 0.6247340425531915,
|
|
"grad_norm": 4.371121883392334,
|
|
"learning_rate": 9.847217160477483e-06,
|
|
"loss": 1.3071,
|
|
"step": 2349
|
|
},
|
|
{
|
|
"epoch": 0.625,
|
|
"grad_norm": 3.928662061691284,
|
|
"learning_rate": 9.847001329696653e-06,
|
|
"loss": 1.2321,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.6252659574468085,
|
|
"grad_norm": 3.7375707626342773,
|
|
"learning_rate": 9.846785348943896e-06,
|
|
"loss": 1.3022,
|
|
"step": 2351
|
|
},
|
|
{
|
|
"epoch": 0.625531914893617,
|
|
"grad_norm": 3.684936046600342,
|
|
"learning_rate": 9.846569218225892e-06,
|
|
"loss": 1.2365,
|
|
"step": 2352
|
|
},
|
|
{
|
|
"epoch": 0.6257978723404255,
|
|
"grad_norm": 3.5079708099365234,
|
|
"learning_rate": 9.846352937549332e-06,
|
|
"loss": 1.2328,
|
|
"step": 2353
|
|
},
|
|
{
|
|
"epoch": 0.6260638297872341,
|
|
"grad_norm": 3.814976692199707,
|
|
"learning_rate": 9.846136506920907e-06,
|
|
"loss": 1.1824,
|
|
"step": 2354
|
|
},
|
|
{
|
|
"epoch": 0.6263297872340425,
|
|
"grad_norm": 3.3843934535980225,
|
|
"learning_rate": 9.84591992634731e-06,
|
|
"loss": 1.0477,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 0.6265957446808511,
|
|
"grad_norm": 3.712428569793701,
|
|
"learning_rate": 9.845703195835248e-06,
|
|
"loss": 1.2826,
|
|
"step": 2356
|
|
},
|
|
{
|
|
"epoch": 0.6268617021276596,
|
|
"grad_norm": 3.617882251739502,
|
|
"learning_rate": 9.845486315391421e-06,
|
|
"loss": 1.2472,
|
|
"step": 2357
|
|
},
|
|
{
|
|
"epoch": 0.627127659574468,
|
|
"grad_norm": 4.057145595550537,
|
|
"learning_rate": 9.845269285022545e-06,
|
|
"loss": 1.4144,
|
|
"step": 2358
|
|
},
|
|
{
|
|
"epoch": 0.6273936170212766,
|
|
"grad_norm": 4.23139762878418,
|
|
"learning_rate": 9.845052104735331e-06,
|
|
"loss": 1.4445,
|
|
"step": 2359
|
|
},
|
|
{
|
|
"epoch": 0.6276595744680851,
|
|
"grad_norm": 3.8976731300354004,
|
|
"learning_rate": 9.844834774536503e-06,
|
|
"loss": 1.2646,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.6279255319148936,
|
|
"grad_norm": 3.6036627292633057,
|
|
"learning_rate": 9.844617294432781e-06,
|
|
"loss": 1.251,
|
|
"step": 2361
|
|
},
|
|
{
|
|
"epoch": 0.6281914893617021,
|
|
"grad_norm": 3.4059393405914307,
|
|
"learning_rate": 9.844399664430896e-06,
|
|
"loss": 1.1432,
|
|
"step": 2362
|
|
},
|
|
{
|
|
"epoch": 0.6284574468085107,
|
|
"grad_norm": 3.6594855785369873,
|
|
"learning_rate": 9.844181884537583e-06,
|
|
"loss": 1.3047,
|
|
"step": 2363
|
|
},
|
|
{
|
|
"epoch": 0.6287234042553191,
|
|
"grad_norm": 4.183903217315674,
|
|
"learning_rate": 9.843963954759578e-06,
|
|
"loss": 1.2951,
|
|
"step": 2364
|
|
},
|
|
{
|
|
"epoch": 0.6289893617021277,
|
|
"grad_norm": 3.496905565261841,
|
|
"learning_rate": 9.843745875103628e-06,
|
|
"loss": 1.3087,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 0.6292553191489362,
|
|
"grad_norm": 3.5995302200317383,
|
|
"learning_rate": 9.843527645576475e-06,
|
|
"loss": 1.2998,
|
|
"step": 2366
|
|
},
|
|
{
|
|
"epoch": 0.6295212765957446,
|
|
"grad_norm": 3.597393035888672,
|
|
"learning_rate": 9.843309266184875e-06,
|
|
"loss": 1.2151,
|
|
"step": 2367
|
|
},
|
|
{
|
|
"epoch": 0.6297872340425532,
|
|
"grad_norm": 3.922405481338501,
|
|
"learning_rate": 9.843090736935583e-06,
|
|
"loss": 1.4409,
|
|
"step": 2368
|
|
},
|
|
{
|
|
"epoch": 0.6300531914893617,
|
|
"grad_norm": 3.7593741416931152,
|
|
"learning_rate": 9.842872057835363e-06,
|
|
"loss": 1.0905,
|
|
"step": 2369
|
|
},
|
|
{
|
|
"epoch": 0.6303191489361702,
|
|
"grad_norm": 3.570892572402954,
|
|
"learning_rate": 9.842653228890979e-06,
|
|
"loss": 1.2337,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.6305851063829787,
|
|
"grad_norm": 3.2270023822784424,
|
|
"learning_rate": 9.842434250109202e-06,
|
|
"loss": 0.9824,
|
|
"step": 2371
|
|
},
|
|
{
|
|
"epoch": 0.6308510638297873,
|
|
"grad_norm": 3.9054601192474365,
|
|
"learning_rate": 9.84221512149681e-06,
|
|
"loss": 1.3091,
|
|
"step": 2372
|
|
},
|
|
{
|
|
"epoch": 0.6311170212765957,
|
|
"grad_norm": 3.7820627689361572,
|
|
"learning_rate": 9.84199584306058e-06,
|
|
"loss": 1.2331,
|
|
"step": 2373
|
|
},
|
|
{
|
|
"epoch": 0.6313829787234042,
|
|
"grad_norm": 3.407257080078125,
|
|
"learning_rate": 9.841776414807297e-06,
|
|
"loss": 1.1868,
|
|
"step": 2374
|
|
},
|
|
{
|
|
"epoch": 0.6316489361702128,
|
|
"grad_norm": 3.471640110015869,
|
|
"learning_rate": 9.841556836743752e-06,
|
|
"loss": 1.2025,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 0.6319148936170212,
|
|
"grad_norm": 3.824422597885132,
|
|
"learning_rate": 9.841337108876739e-06,
|
|
"loss": 1.1932,
|
|
"step": 2376
|
|
},
|
|
{
|
|
"epoch": 0.6321808510638298,
|
|
"grad_norm": 3.6980538368225098,
|
|
"learning_rate": 9.841117231213055e-06,
|
|
"loss": 1.2374,
|
|
"step": 2377
|
|
},
|
|
{
|
|
"epoch": 0.6324468085106383,
|
|
"grad_norm": 3.9002277851104736,
|
|
"learning_rate": 9.840897203759502e-06,
|
|
"loss": 1.3205,
|
|
"step": 2378
|
|
},
|
|
{
|
|
"epoch": 0.6327127659574469,
|
|
"grad_norm": 3.993248462677002,
|
|
"learning_rate": 9.840677026522893e-06,
|
|
"loss": 1.1262,
|
|
"step": 2379
|
|
},
|
|
{
|
|
"epoch": 0.6329787234042553,
|
|
"grad_norm": 3.8742499351501465,
|
|
"learning_rate": 9.840456699510038e-06,
|
|
"loss": 1.1456,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.6332446808510638,
|
|
"grad_norm": 3.772584915161133,
|
|
"learning_rate": 9.840236222727752e-06,
|
|
"loss": 1.1367,
|
|
"step": 2381
|
|
},
|
|
{
|
|
"epoch": 0.6335106382978724,
|
|
"grad_norm": 3.7653708457946777,
|
|
"learning_rate": 9.840015596182861e-06,
|
|
"loss": 1.24,
|
|
"step": 2382
|
|
},
|
|
{
|
|
"epoch": 0.6337765957446808,
|
|
"grad_norm": 3.4554617404937744,
|
|
"learning_rate": 9.839794819882188e-06,
|
|
"loss": 1.2708,
|
|
"step": 2383
|
|
},
|
|
{
|
|
"epoch": 0.6340425531914894,
|
|
"grad_norm": 3.808807611465454,
|
|
"learning_rate": 9.839573893832564e-06,
|
|
"loss": 1.3985,
|
|
"step": 2384
|
|
},
|
|
{
|
|
"epoch": 0.6343085106382979,
|
|
"grad_norm": 3.6254007816314697,
|
|
"learning_rate": 9.839352818040825e-06,
|
|
"loss": 1.3145,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 0.6345744680851064,
|
|
"grad_norm": 3.83559513092041,
|
|
"learning_rate": 9.839131592513814e-06,
|
|
"loss": 1.2868,
|
|
"step": 2386
|
|
},
|
|
{
|
|
"epoch": 0.6348404255319149,
|
|
"grad_norm": 3.465432643890381,
|
|
"learning_rate": 9.838910217258375e-06,
|
|
"loss": 1.213,
|
|
"step": 2387
|
|
},
|
|
{
|
|
"epoch": 0.6351063829787233,
|
|
"grad_norm": 3.762899160385132,
|
|
"learning_rate": 9.838688692281356e-06,
|
|
"loss": 1.3678,
|
|
"step": 2388
|
|
},
|
|
{
|
|
"epoch": 0.6353723404255319,
|
|
"grad_norm": 3.573856830596924,
|
|
"learning_rate": 9.83846701758961e-06,
|
|
"loss": 1.3181,
|
|
"step": 2389
|
|
},
|
|
{
|
|
"epoch": 0.6356382978723404,
|
|
"grad_norm": 3.873749256134033,
|
|
"learning_rate": 9.838245193189999e-06,
|
|
"loss": 1.252,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.635904255319149,
|
|
"grad_norm": 3.5495100021362305,
|
|
"learning_rate": 9.838023219089386e-06,
|
|
"loss": 1.352,
|
|
"step": 2391
|
|
},
|
|
{
|
|
"epoch": 0.6361702127659574,
|
|
"grad_norm": 3.6257059574127197,
|
|
"learning_rate": 9.837801095294639e-06,
|
|
"loss": 1.2099,
|
|
"step": 2392
|
|
},
|
|
{
|
|
"epoch": 0.636436170212766,
|
|
"grad_norm": 3.658745288848877,
|
|
"learning_rate": 9.83757882181263e-06,
|
|
"loss": 1.2089,
|
|
"step": 2393
|
|
},
|
|
{
|
|
"epoch": 0.6367021276595745,
|
|
"grad_norm": 3.6948094367980957,
|
|
"learning_rate": 9.837356398650235e-06,
|
|
"loss": 1.3032,
|
|
"step": 2394
|
|
},
|
|
{
|
|
"epoch": 0.636968085106383,
|
|
"grad_norm": 3.677865743637085,
|
|
"learning_rate": 9.83713382581434e-06,
|
|
"loss": 1.2295,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 0.6372340425531915,
|
|
"grad_norm": 3.758213758468628,
|
|
"learning_rate": 9.836911103311828e-06,
|
|
"loss": 1.2542,
|
|
"step": 2396
|
|
},
|
|
{
|
|
"epoch": 0.6375,
|
|
"grad_norm": 3.710860252380371,
|
|
"learning_rate": 9.836688231149593e-06,
|
|
"loss": 1.3331,
|
|
"step": 2397
|
|
},
|
|
{
|
|
"epoch": 0.6377659574468085,
|
|
"grad_norm": 3.436738967895508,
|
|
"learning_rate": 9.836465209334529e-06,
|
|
"loss": 1.1318,
|
|
"step": 2398
|
|
},
|
|
{
|
|
"epoch": 0.638031914893617,
|
|
"grad_norm": 4.398902416229248,
|
|
"learning_rate": 9.836242037873536e-06,
|
|
"loss": 1.3268,
|
|
"step": 2399
|
|
},
|
|
{
|
|
"epoch": 0.6382978723404256,
|
|
"grad_norm": 3.483926773071289,
|
|
"learning_rate": 9.836018716773522e-06,
|
|
"loss": 1.1744,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.638563829787234,
|
|
"grad_norm": 3.766038417816162,
|
|
"learning_rate": 9.835795246041395e-06,
|
|
"loss": 1.1829,
|
|
"step": 2401
|
|
},
|
|
{
|
|
"epoch": 0.6388297872340426,
|
|
"grad_norm": 3.7989938259124756,
|
|
"learning_rate": 9.835571625684068e-06,
|
|
"loss": 1.2691,
|
|
"step": 2402
|
|
},
|
|
{
|
|
"epoch": 0.6390957446808511,
|
|
"grad_norm": 3.6767778396606445,
|
|
"learning_rate": 9.835347855708464e-06,
|
|
"loss": 1.1456,
|
|
"step": 2403
|
|
},
|
|
{
|
|
"epoch": 0.6393617021276595,
|
|
"grad_norm": 3.689368963241577,
|
|
"learning_rate": 9.835123936121504e-06,
|
|
"loss": 1.2714,
|
|
"step": 2404
|
|
},
|
|
{
|
|
"epoch": 0.6396276595744681,
|
|
"grad_norm": 3.6774284839630127,
|
|
"learning_rate": 9.834899866930116e-06,
|
|
"loss": 1.1968,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 0.6398936170212766,
|
|
"grad_norm": 3.734713077545166,
|
|
"learning_rate": 9.834675648141235e-06,
|
|
"loss": 1.4036,
|
|
"step": 2406
|
|
},
|
|
{
|
|
"epoch": 0.6401595744680851,
|
|
"grad_norm": 3.4915902614593506,
|
|
"learning_rate": 9.834451279761796e-06,
|
|
"loss": 1.0733,
|
|
"step": 2407
|
|
},
|
|
{
|
|
"epoch": 0.6404255319148936,
|
|
"grad_norm": 3.5466091632843018,
|
|
"learning_rate": 9.834226761798742e-06,
|
|
"loss": 1.2197,
|
|
"step": 2408
|
|
},
|
|
{
|
|
"epoch": 0.6406914893617022,
|
|
"grad_norm": 3.5611202716827393,
|
|
"learning_rate": 9.83400209425902e-06,
|
|
"loss": 1.092,
|
|
"step": 2409
|
|
},
|
|
{
|
|
"epoch": 0.6409574468085106,
|
|
"grad_norm": 3.35369610786438,
|
|
"learning_rate": 9.833777277149585e-06,
|
|
"loss": 1.2385,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.6412234042553191,
|
|
"grad_norm": 3.7679550647735596,
|
|
"learning_rate": 9.833552310477388e-06,
|
|
"loss": 1.0647,
|
|
"step": 2411
|
|
},
|
|
{
|
|
"epoch": 0.6414893617021277,
|
|
"grad_norm": 3.6990325450897217,
|
|
"learning_rate": 9.833327194249392e-06,
|
|
"loss": 1.1853,
|
|
"step": 2412
|
|
},
|
|
{
|
|
"epoch": 0.6417553191489361,
|
|
"grad_norm": 3.6745262145996094,
|
|
"learning_rate": 9.833101928472562e-06,
|
|
"loss": 1.2038,
|
|
"step": 2413
|
|
},
|
|
{
|
|
"epoch": 0.6420212765957447,
|
|
"grad_norm": 3.357508897781372,
|
|
"learning_rate": 9.832876513153867e-06,
|
|
"loss": 1.0274,
|
|
"step": 2414
|
|
},
|
|
{
|
|
"epoch": 0.6422872340425532,
|
|
"grad_norm": 3.786376953125,
|
|
"learning_rate": 9.832650948300284e-06,
|
|
"loss": 1.288,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 0.6425531914893617,
|
|
"grad_norm": 3.253251314163208,
|
|
"learning_rate": 9.83242523391879e-06,
|
|
"loss": 1.0876,
|
|
"step": 2416
|
|
},
|
|
{
|
|
"epoch": 0.6428191489361702,
|
|
"grad_norm": 3.3168015480041504,
|
|
"learning_rate": 9.832199370016371e-06,
|
|
"loss": 1.1551,
|
|
"step": 2417
|
|
},
|
|
{
|
|
"epoch": 0.6430851063829788,
|
|
"grad_norm": 3.8747761249542236,
|
|
"learning_rate": 9.831973356600013e-06,
|
|
"loss": 1.2343,
|
|
"step": 2418
|
|
},
|
|
{
|
|
"epoch": 0.6433510638297872,
|
|
"grad_norm": 3.9137704372406006,
|
|
"learning_rate": 9.83174719367671e-06,
|
|
"loss": 1.1782,
|
|
"step": 2419
|
|
},
|
|
{
|
|
"epoch": 0.6436170212765957,
|
|
"grad_norm": 3.64943528175354,
|
|
"learning_rate": 9.831520881253462e-06,
|
|
"loss": 1.0506,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.6438829787234043,
|
|
"grad_norm": 3.5648887157440186,
|
|
"learning_rate": 9.83129441933727e-06,
|
|
"loss": 1.0195,
|
|
"step": 2421
|
|
},
|
|
{
|
|
"epoch": 0.6441489361702127,
|
|
"grad_norm": 3.6668763160705566,
|
|
"learning_rate": 9.83106780793514e-06,
|
|
"loss": 1.349,
|
|
"step": 2422
|
|
},
|
|
{
|
|
"epoch": 0.6444148936170213,
|
|
"grad_norm": 3.6365723609924316,
|
|
"learning_rate": 9.830841047054083e-06,
|
|
"loss": 1.2105,
|
|
"step": 2423
|
|
},
|
|
{
|
|
"epoch": 0.6446808510638298,
|
|
"grad_norm": 3.657466411590576,
|
|
"learning_rate": 9.830614136701116e-06,
|
|
"loss": 1.2453,
|
|
"step": 2424
|
|
},
|
|
{
|
|
"epoch": 0.6449468085106383,
|
|
"grad_norm": 3.7750251293182373,
|
|
"learning_rate": 9.83038707688326e-06,
|
|
"loss": 1.2753,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 0.6452127659574468,
|
|
"grad_norm": 3.4032111167907715,
|
|
"learning_rate": 9.830159867607543e-06,
|
|
"loss": 1.2054,
|
|
"step": 2426
|
|
},
|
|
{
|
|
"epoch": 0.6454787234042553,
|
|
"grad_norm": 3.546877861022949,
|
|
"learning_rate": 9.82993250888099e-06,
|
|
"loss": 1.35,
|
|
"step": 2427
|
|
},
|
|
{
|
|
"epoch": 0.6457446808510638,
|
|
"grad_norm": 3.5076162815093994,
|
|
"learning_rate": 9.829705000710642e-06,
|
|
"loss": 1.1382,
|
|
"step": 2428
|
|
},
|
|
{
|
|
"epoch": 0.6460106382978723,
|
|
"grad_norm": 3.955322742462158,
|
|
"learning_rate": 9.829477343103533e-06,
|
|
"loss": 1.3948,
|
|
"step": 2429
|
|
},
|
|
{
|
|
"epoch": 0.6462765957446809,
|
|
"grad_norm": 3.5918376445770264,
|
|
"learning_rate": 9.82924953606671e-06,
|
|
"loss": 1.2271,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.6465425531914893,
|
|
"grad_norm": 3.8371551036834717,
|
|
"learning_rate": 9.82902157960722e-06,
|
|
"loss": 1.2004,
|
|
"step": 2431
|
|
},
|
|
{
|
|
"epoch": 0.6468085106382979,
|
|
"grad_norm": 3.573141098022461,
|
|
"learning_rate": 9.828793473732116e-06,
|
|
"loss": 1.2059,
|
|
"step": 2432
|
|
},
|
|
{
|
|
"epoch": 0.6470744680851064,
|
|
"grad_norm": 3.8021459579467773,
|
|
"learning_rate": 9.828565218448457e-06,
|
|
"loss": 1.1852,
|
|
"step": 2433
|
|
},
|
|
{
|
|
"epoch": 0.6473404255319148,
|
|
"grad_norm": 4.022589206695557,
|
|
"learning_rate": 9.828336813763308e-06,
|
|
"loss": 1.2385,
|
|
"step": 2434
|
|
},
|
|
{
|
|
"epoch": 0.6476063829787234,
|
|
"grad_norm": 3.364841938018799,
|
|
"learning_rate": 9.82810825968373e-06,
|
|
"loss": 1.1976,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 0.6478723404255319,
|
|
"grad_norm": 4.046548843383789,
|
|
"learning_rate": 9.8278795562168e-06,
|
|
"loss": 1.3522,
|
|
"step": 2436
|
|
},
|
|
{
|
|
"epoch": 0.6481382978723405,
|
|
"grad_norm": 3.795485019683838,
|
|
"learning_rate": 9.82765070336959e-06,
|
|
"loss": 1.2166,
|
|
"step": 2437
|
|
},
|
|
{
|
|
"epoch": 0.6484042553191489,
|
|
"grad_norm": 3.8107662200927734,
|
|
"learning_rate": 9.827421701149187e-06,
|
|
"loss": 1.3138,
|
|
"step": 2438
|
|
},
|
|
{
|
|
"epoch": 0.6486702127659575,
|
|
"grad_norm": 3.618577241897583,
|
|
"learning_rate": 9.82719254956267e-06,
|
|
"loss": 1.1677,
|
|
"step": 2439
|
|
},
|
|
{
|
|
"epoch": 0.648936170212766,
|
|
"grad_norm": 3.680255651473999,
|
|
"learning_rate": 9.826963248617133e-06,
|
|
"loss": 1.2319,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.6492021276595744,
|
|
"grad_norm": 3.6145694255828857,
|
|
"learning_rate": 9.82673379831967e-06,
|
|
"loss": 1.2276,
|
|
"step": 2441
|
|
},
|
|
{
|
|
"epoch": 0.649468085106383,
|
|
"grad_norm": 3.643686532974243,
|
|
"learning_rate": 9.82650419867738e-06,
|
|
"loss": 1.2989,
|
|
"step": 2442
|
|
},
|
|
{
|
|
"epoch": 0.6497340425531914,
|
|
"grad_norm": 3.774909019470215,
|
|
"learning_rate": 9.82627444969737e-06,
|
|
"loss": 1.2749,
|
|
"step": 2443
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"grad_norm": 3.7553470134735107,
|
|
"learning_rate": 9.826044551386743e-06,
|
|
"loss": 1.0902,
|
|
"step": 2444
|
|
},
|
|
{
|
|
"epoch": 0.6502659574468085,
|
|
"grad_norm": 3.453191041946411,
|
|
"learning_rate": 9.825814503752618e-06,
|
|
"loss": 1.2609,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 0.6505319148936171,
|
|
"grad_norm": 3.889417886734009,
|
|
"learning_rate": 9.825584306802109e-06,
|
|
"loss": 1.2514,
|
|
"step": 2446
|
|
},
|
|
{
|
|
"epoch": 0.6507978723404255,
|
|
"grad_norm": 3.5073375701904297,
|
|
"learning_rate": 9.825353960542342e-06,
|
|
"loss": 1.2466,
|
|
"step": 2447
|
|
},
|
|
{
|
|
"epoch": 0.6510638297872341,
|
|
"grad_norm": 3.4606523513793945,
|
|
"learning_rate": 9.825123464980442e-06,
|
|
"loss": 1.1156,
|
|
"step": 2448
|
|
},
|
|
{
|
|
"epoch": 0.6513297872340426,
|
|
"grad_norm": 3.831897497177124,
|
|
"learning_rate": 9.82489282012354e-06,
|
|
"loss": 1.1323,
|
|
"step": 2449
|
|
},
|
|
{
|
|
"epoch": 0.651595744680851,
|
|
"grad_norm": 4.391724109649658,
|
|
"learning_rate": 9.824662025978774e-06,
|
|
"loss": 1.2543,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.6518617021276596,
|
|
"grad_norm": 3.8090097904205322,
|
|
"learning_rate": 9.824431082553285e-06,
|
|
"loss": 1.3592,
|
|
"step": 2451
|
|
},
|
|
{
|
|
"epoch": 0.652127659574468,
|
|
"grad_norm": 3.706662893295288,
|
|
"learning_rate": 9.824199989854217e-06,
|
|
"loss": 1.2753,
|
|
"step": 2452
|
|
},
|
|
{
|
|
"epoch": 0.6523936170212766,
|
|
"grad_norm": 4.826519966125488,
|
|
"learning_rate": 9.823968747888722e-06,
|
|
"loss": 1.501,
|
|
"step": 2453
|
|
},
|
|
{
|
|
"epoch": 0.6526595744680851,
|
|
"grad_norm": 3.7181127071380615,
|
|
"learning_rate": 9.823737356663956e-06,
|
|
"loss": 1.283,
|
|
"step": 2454
|
|
},
|
|
{
|
|
"epoch": 0.6529255319148937,
|
|
"grad_norm": 3.6020474433898926,
|
|
"learning_rate": 9.823505816187076e-06,
|
|
"loss": 1.195,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 0.6531914893617021,
|
|
"grad_norm": 3.7805116176605225,
|
|
"learning_rate": 9.823274126465245e-06,
|
|
"loss": 1.3032,
|
|
"step": 2456
|
|
},
|
|
{
|
|
"epoch": 0.6534574468085106,
|
|
"grad_norm": 3.6897008419036865,
|
|
"learning_rate": 9.823042287505636e-06,
|
|
"loss": 1.33,
|
|
"step": 2457
|
|
},
|
|
{
|
|
"epoch": 0.6537234042553192,
|
|
"grad_norm": 3.6036691665649414,
|
|
"learning_rate": 9.82281029931542e-06,
|
|
"loss": 1.2454,
|
|
"step": 2458
|
|
},
|
|
{
|
|
"epoch": 0.6539893617021276,
|
|
"grad_norm": 3.8645083904266357,
|
|
"learning_rate": 9.822578161901774e-06,
|
|
"loss": 1.4082,
|
|
"step": 2459
|
|
},
|
|
{
|
|
"epoch": 0.6542553191489362,
|
|
"grad_norm": 3.982588052749634,
|
|
"learning_rate": 9.822345875271884e-06,
|
|
"loss": 1.2635,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.6545212765957447,
|
|
"grad_norm": 3.576320171356201,
|
|
"learning_rate": 9.822113439432933e-06,
|
|
"loss": 1.3524,
|
|
"step": 2461
|
|
},
|
|
{
|
|
"epoch": 0.6547872340425532,
|
|
"grad_norm": 3.387544870376587,
|
|
"learning_rate": 9.821880854392115e-06,
|
|
"loss": 1.2344,
|
|
"step": 2462
|
|
},
|
|
{
|
|
"epoch": 0.6550531914893617,
|
|
"grad_norm": 3.385258436203003,
|
|
"learning_rate": 9.821648120156628e-06,
|
|
"loss": 1.2054,
|
|
"step": 2463
|
|
},
|
|
{
|
|
"epoch": 0.6553191489361702,
|
|
"grad_norm": 3.952305316925049,
|
|
"learning_rate": 9.82141523673367e-06,
|
|
"loss": 1.153,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 0.6555851063829787,
|
|
"grad_norm": 3.8070571422576904,
|
|
"learning_rate": 9.821182204130448e-06,
|
|
"loss": 1.3405,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 0.6558510638297872,
|
|
"grad_norm": 3.9651296138763428,
|
|
"learning_rate": 9.820949022354174e-06,
|
|
"loss": 1.3205,
|
|
"step": 2466
|
|
},
|
|
{
|
|
"epoch": 0.6561170212765958,
|
|
"grad_norm": 3.980510950088501,
|
|
"learning_rate": 9.82071569141206e-06,
|
|
"loss": 1.401,
|
|
"step": 2467
|
|
},
|
|
{
|
|
"epoch": 0.6563829787234042,
|
|
"grad_norm": 4.441346168518066,
|
|
"learning_rate": 9.820482211311326e-06,
|
|
"loss": 1.3839,
|
|
"step": 2468
|
|
},
|
|
{
|
|
"epoch": 0.6566489361702128,
|
|
"grad_norm": 3.4150032997131348,
|
|
"learning_rate": 9.820248582059197e-06,
|
|
"loss": 1.0058,
|
|
"step": 2469
|
|
},
|
|
{
|
|
"epoch": 0.6569148936170213,
|
|
"grad_norm": 3.4013893604278564,
|
|
"learning_rate": 9.820014803662905e-06,
|
|
"loss": 1.1612,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.6571808510638298,
|
|
"grad_norm": 4.017107009887695,
|
|
"learning_rate": 9.819780876129677e-06,
|
|
"loss": 1.2295,
|
|
"step": 2471
|
|
},
|
|
{
|
|
"epoch": 0.6574468085106383,
|
|
"grad_norm": 3.500370979309082,
|
|
"learning_rate": 9.819546799466756e-06,
|
|
"loss": 1.2573,
|
|
"step": 2472
|
|
},
|
|
{
|
|
"epoch": 0.6577127659574468,
|
|
"grad_norm": 3.7119557857513428,
|
|
"learning_rate": 9.81931257368138e-06,
|
|
"loss": 1.1827,
|
|
"step": 2473
|
|
},
|
|
{
|
|
"epoch": 0.6579787234042553,
|
|
"grad_norm": 4.006588935852051,
|
|
"learning_rate": 9.8190781987808e-06,
|
|
"loss": 1.3236,
|
|
"step": 2474
|
|
},
|
|
{
|
|
"epoch": 0.6582446808510638,
|
|
"grad_norm": 3.6574013233184814,
|
|
"learning_rate": 9.818843674772268e-06,
|
|
"loss": 1.2783,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 0.6585106382978724,
|
|
"grad_norm": 3.4724280834198,
|
|
"learning_rate": 9.818609001663038e-06,
|
|
"loss": 1.3469,
|
|
"step": 2476
|
|
},
|
|
{
|
|
"epoch": 0.6587765957446808,
|
|
"grad_norm": 3.3943772315979004,
|
|
"learning_rate": 9.818374179460372e-06,
|
|
"loss": 1.1934,
|
|
"step": 2477
|
|
},
|
|
{
|
|
"epoch": 0.6590425531914894,
|
|
"grad_norm": 3.6822094917297363,
|
|
"learning_rate": 9.818139208171537e-06,
|
|
"loss": 1.3505,
|
|
"step": 2478
|
|
},
|
|
{
|
|
"epoch": 0.6593085106382979,
|
|
"grad_norm": 3.474010467529297,
|
|
"learning_rate": 9.817904087803802e-06,
|
|
"loss": 1.1487,
|
|
"step": 2479
|
|
},
|
|
{
|
|
"epoch": 0.6595744680851063,
|
|
"grad_norm": 3.4429280757904053,
|
|
"learning_rate": 9.817668818364441e-06,
|
|
"loss": 1.1786,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.6598404255319149,
|
|
"grad_norm": 4.096560955047607,
|
|
"learning_rate": 9.817433399860736e-06,
|
|
"loss": 1.3167,
|
|
"step": 2481
|
|
},
|
|
{
|
|
"epoch": 0.6601063829787234,
|
|
"grad_norm": 3.4501636028289795,
|
|
"learning_rate": 9.817197832299971e-06,
|
|
"loss": 1.0416,
|
|
"step": 2482
|
|
},
|
|
{
|
|
"epoch": 0.660372340425532,
|
|
"grad_norm": 3.7687666416168213,
|
|
"learning_rate": 9.816962115689432e-06,
|
|
"loss": 1.1121,
|
|
"step": 2483
|
|
},
|
|
{
|
|
"epoch": 0.6606382978723404,
|
|
"grad_norm": 3.6816604137420654,
|
|
"learning_rate": 9.816726250036413e-06,
|
|
"loss": 1.2019,
|
|
"step": 2484
|
|
},
|
|
{
|
|
"epoch": 0.660904255319149,
|
|
"grad_norm": 4.033024787902832,
|
|
"learning_rate": 9.816490235348215e-06,
|
|
"loss": 1.3078,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 0.6611702127659574,
|
|
"grad_norm": 3.7372167110443115,
|
|
"learning_rate": 9.816254071632137e-06,
|
|
"loss": 1.4434,
|
|
"step": 2486
|
|
},
|
|
{
|
|
"epoch": 0.6614361702127659,
|
|
"grad_norm": 3.694561004638672,
|
|
"learning_rate": 9.816017758895488e-06,
|
|
"loss": 1.2969,
|
|
"step": 2487
|
|
},
|
|
{
|
|
"epoch": 0.6617021276595745,
|
|
"grad_norm": 4.178577423095703,
|
|
"learning_rate": 9.815781297145578e-06,
|
|
"loss": 1.3661,
|
|
"step": 2488
|
|
},
|
|
{
|
|
"epoch": 0.6619680851063829,
|
|
"grad_norm": 3.647728681564331,
|
|
"learning_rate": 9.815544686389727e-06,
|
|
"loss": 1.1693,
|
|
"step": 2489
|
|
},
|
|
{
|
|
"epoch": 0.6622340425531915,
|
|
"grad_norm": 3.6795883178710938,
|
|
"learning_rate": 9.815307926635252e-06,
|
|
"loss": 1.2308,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.6625,
|
|
"grad_norm": 3.8441531658172607,
|
|
"learning_rate": 9.81507101788948e-06,
|
|
"loss": 1.2011,
|
|
"step": 2491
|
|
},
|
|
{
|
|
"epoch": 0.6627659574468086,
|
|
"grad_norm": 3.512495994567871,
|
|
"learning_rate": 9.814833960159744e-06,
|
|
"loss": 1.1509,
|
|
"step": 2492
|
|
},
|
|
{
|
|
"epoch": 0.663031914893617,
|
|
"grad_norm": 3.631899356842041,
|
|
"learning_rate": 9.814596753453376e-06,
|
|
"loss": 1.0989,
|
|
"step": 2493
|
|
},
|
|
{
|
|
"epoch": 0.6632978723404256,
|
|
"grad_norm": 3.5272533893585205,
|
|
"learning_rate": 9.814359397777716e-06,
|
|
"loss": 1.3053,
|
|
"step": 2494
|
|
},
|
|
{
|
|
"epoch": 0.663563829787234,
|
|
"grad_norm": 3.492922306060791,
|
|
"learning_rate": 9.814121893140105e-06,
|
|
"loss": 1.2977,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 0.6638297872340425,
|
|
"grad_norm": 3.5858078002929688,
|
|
"learning_rate": 9.813884239547898e-06,
|
|
"loss": 1.1963,
|
|
"step": 2496
|
|
},
|
|
{
|
|
"epoch": 0.6640957446808511,
|
|
"grad_norm": 3.4466118812561035,
|
|
"learning_rate": 9.813646437008444e-06,
|
|
"loss": 1.266,
|
|
"step": 2497
|
|
},
|
|
{
|
|
"epoch": 0.6643617021276595,
|
|
"grad_norm": 3.682159900665283,
|
|
"learning_rate": 9.813408485529103e-06,
|
|
"loss": 1.1549,
|
|
"step": 2498
|
|
},
|
|
{
|
|
"epoch": 0.6646276595744681,
|
|
"grad_norm": 4.358649253845215,
|
|
"learning_rate": 9.813170385117235e-06,
|
|
"loss": 1.3577,
|
|
"step": 2499
|
|
},
|
|
{
|
|
"epoch": 0.6648936170212766,
|
|
"grad_norm": 4.059812068939209,
|
|
"learning_rate": 9.81293213578021e-06,
|
|
"loss": 1.3728,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.6648936170212766,
|
|
"eval_loss": 1.2857128381729126,
|
|
"eval_runtime": 12.6822,
|
|
"eval_samples_per_second": 31.54,
|
|
"eval_steps_per_second": 3.943,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.6651595744680852,
|
|
"grad_norm": 3.519260883331299,
|
|
"learning_rate": 9.812693737525396e-06,
|
|
"loss": 1.1743,
|
|
"step": 2501
|
|
},
|
|
{
|
|
"epoch": 0.6654255319148936,
|
|
"grad_norm": 4.004322052001953,
|
|
"learning_rate": 9.812455190360172e-06,
|
|
"loss": 1.2847,
|
|
"step": 2502
|
|
},
|
|
{
|
|
"epoch": 0.6656914893617021,
|
|
"grad_norm": 3.699012517929077,
|
|
"learning_rate": 9.81221649429192e-06,
|
|
"loss": 1.3645,
|
|
"step": 2503
|
|
},
|
|
{
|
|
"epoch": 0.6659574468085107,
|
|
"grad_norm": 3.5919108390808105,
|
|
"learning_rate": 9.811977649328021e-06,
|
|
"loss": 1.1794,
|
|
"step": 2504
|
|
},
|
|
{
|
|
"epoch": 0.6662234042553191,
|
|
"grad_norm": 3.382624626159668,
|
|
"learning_rate": 9.81173865547587e-06,
|
|
"loss": 1.2909,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 0.6664893617021277,
|
|
"grad_norm": 3.7188732624053955,
|
|
"learning_rate": 9.811499512742861e-06,
|
|
"loss": 1.2731,
|
|
"step": 2506
|
|
},
|
|
{
|
|
"epoch": 0.6667553191489362,
|
|
"grad_norm": 3.5745997428894043,
|
|
"learning_rate": 9.811260221136392e-06,
|
|
"loss": 1.1994,
|
|
"step": 2507
|
|
},
|
|
{
|
|
"epoch": 0.6670212765957447,
|
|
"grad_norm": 3.6393473148345947,
|
|
"learning_rate": 9.811020780663865e-06,
|
|
"loss": 1.2335,
|
|
"step": 2508
|
|
},
|
|
{
|
|
"epoch": 0.6672872340425532,
|
|
"grad_norm": 3.4967026710510254,
|
|
"learning_rate": 9.810781191332692e-06,
|
|
"loss": 1.2272,
|
|
"step": 2509
|
|
},
|
|
{
|
|
"epoch": 0.6675531914893617,
|
|
"grad_norm": 3.826430559158325,
|
|
"learning_rate": 9.810541453150286e-06,
|
|
"loss": 1.3689,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.6678191489361702,
|
|
"grad_norm": 4.058473110198975,
|
|
"learning_rate": 9.810301566124063e-06,
|
|
"loss": 1.1942,
|
|
"step": 2511
|
|
},
|
|
{
|
|
"epoch": 0.6680851063829787,
|
|
"grad_norm": 3.5520458221435547,
|
|
"learning_rate": 9.810061530261446e-06,
|
|
"loss": 1.1599,
|
|
"step": 2512
|
|
},
|
|
{
|
|
"epoch": 0.6683510638297873,
|
|
"grad_norm": 3.7619452476501465,
|
|
"learning_rate": 9.80982134556986e-06,
|
|
"loss": 1.2391,
|
|
"step": 2513
|
|
},
|
|
{
|
|
"epoch": 0.6686170212765957,
|
|
"grad_norm": 3.9400548934936523,
|
|
"learning_rate": 9.809581012056743e-06,
|
|
"loss": 1.2792,
|
|
"step": 2514
|
|
},
|
|
{
|
|
"epoch": 0.6688829787234043,
|
|
"grad_norm": 3.3986830711364746,
|
|
"learning_rate": 9.809340529729523e-06,
|
|
"loss": 1.2333,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 0.6691489361702128,
|
|
"grad_norm": 3.8278701305389404,
|
|
"learning_rate": 9.809099898595647e-06,
|
|
"loss": 1.2988,
|
|
"step": 2516
|
|
},
|
|
{
|
|
"epoch": 0.6694148936170212,
|
|
"grad_norm": 3.8813681602478027,
|
|
"learning_rate": 9.808859118662558e-06,
|
|
"loss": 1.1505,
|
|
"step": 2517
|
|
},
|
|
{
|
|
"epoch": 0.6696808510638298,
|
|
"grad_norm": 3.5952844619750977,
|
|
"learning_rate": 9.808618189937706e-06,
|
|
"loss": 1.3804,
|
|
"step": 2518
|
|
},
|
|
{
|
|
"epoch": 0.6699468085106383,
|
|
"grad_norm": 3.642479181289673,
|
|
"learning_rate": 9.808377112428546e-06,
|
|
"loss": 1.2918,
|
|
"step": 2519
|
|
},
|
|
{
|
|
"epoch": 0.6702127659574468,
|
|
"grad_norm": 3.810826301574707,
|
|
"learning_rate": 9.808135886142536e-06,
|
|
"loss": 1.3684,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.6704787234042553,
|
|
"grad_norm": 3.843879222869873,
|
|
"learning_rate": 9.807894511087141e-06,
|
|
"loss": 1.2815,
|
|
"step": 2521
|
|
},
|
|
{
|
|
"epoch": 0.6707446808510639,
|
|
"grad_norm": 3.68229341506958,
|
|
"learning_rate": 9.807652987269829e-06,
|
|
"loss": 1.1894,
|
|
"step": 2522
|
|
},
|
|
{
|
|
"epoch": 0.6710106382978723,
|
|
"grad_norm": 3.585465669631958,
|
|
"learning_rate": 9.807411314698075e-06,
|
|
"loss": 1.3078,
|
|
"step": 2523
|
|
},
|
|
{
|
|
"epoch": 0.6712765957446809,
|
|
"grad_norm": 3.825195074081421,
|
|
"learning_rate": 9.807169493379353e-06,
|
|
"loss": 1.2117,
|
|
"step": 2524
|
|
},
|
|
{
|
|
"epoch": 0.6715425531914894,
|
|
"grad_norm": 3.376753091812134,
|
|
"learning_rate": 9.806927523321148e-06,
|
|
"loss": 1.1575,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 0.6718085106382978,
|
|
"grad_norm": 3.877986431121826,
|
|
"learning_rate": 9.806685404530946e-06,
|
|
"loss": 1.3773,
|
|
"step": 2526
|
|
},
|
|
{
|
|
"epoch": 0.6720744680851064,
|
|
"grad_norm": 3.9964683055877686,
|
|
"learning_rate": 9.806443137016237e-06,
|
|
"loss": 1.2466,
|
|
"step": 2527
|
|
},
|
|
{
|
|
"epoch": 0.6723404255319149,
|
|
"grad_norm": 3.6897804737091064,
|
|
"learning_rate": 9.80620072078452e-06,
|
|
"loss": 1.2107,
|
|
"step": 2528
|
|
},
|
|
{
|
|
"epoch": 0.6726063829787234,
|
|
"grad_norm": 3.921840190887451,
|
|
"learning_rate": 9.805958155843294e-06,
|
|
"loss": 1.226,
|
|
"step": 2529
|
|
},
|
|
{
|
|
"epoch": 0.6728723404255319,
|
|
"grad_norm": 3.4277050495147705,
|
|
"learning_rate": 9.805715442200065e-06,
|
|
"loss": 1.2126,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.6731382978723405,
|
|
"grad_norm": 3.841946601867676,
|
|
"learning_rate": 9.805472579862342e-06,
|
|
"loss": 1.323,
|
|
"step": 2531
|
|
},
|
|
{
|
|
"epoch": 0.6734042553191489,
|
|
"grad_norm": 3.7039599418640137,
|
|
"learning_rate": 9.805229568837637e-06,
|
|
"loss": 1.2843,
|
|
"step": 2532
|
|
},
|
|
{
|
|
"epoch": 0.6736702127659574,
|
|
"grad_norm": 3.5301520824432373,
|
|
"learning_rate": 9.804986409133475e-06,
|
|
"loss": 1.0612,
|
|
"step": 2533
|
|
},
|
|
{
|
|
"epoch": 0.673936170212766,
|
|
"grad_norm": 4.042654037475586,
|
|
"learning_rate": 9.804743100757375e-06,
|
|
"loss": 1.215,
|
|
"step": 2534
|
|
},
|
|
{
|
|
"epoch": 0.6742021276595744,
|
|
"grad_norm": 3.895273447036743,
|
|
"learning_rate": 9.804499643716866e-06,
|
|
"loss": 1.4006,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 0.674468085106383,
|
|
"grad_norm": 3.5299017429351807,
|
|
"learning_rate": 9.804256038019482e-06,
|
|
"loss": 1.3813,
|
|
"step": 2536
|
|
},
|
|
{
|
|
"epoch": 0.6747340425531915,
|
|
"grad_norm": 3.8434762954711914,
|
|
"learning_rate": 9.80401228367276e-06,
|
|
"loss": 1.4165,
|
|
"step": 2537
|
|
},
|
|
{
|
|
"epoch": 0.675,
|
|
"grad_norm": 4.0280256271362305,
|
|
"learning_rate": 9.803768380684242e-06,
|
|
"loss": 1.3851,
|
|
"step": 2538
|
|
},
|
|
{
|
|
"epoch": 0.6752659574468085,
|
|
"grad_norm": 3.663043260574341,
|
|
"learning_rate": 9.803524329061474e-06,
|
|
"loss": 1.3044,
|
|
"step": 2539
|
|
},
|
|
{
|
|
"epoch": 0.675531914893617,
|
|
"grad_norm": 3.575730562210083,
|
|
"learning_rate": 9.803280128812009e-06,
|
|
"loss": 1.2849,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.6757978723404255,
|
|
"grad_norm": 3.7937097549438477,
|
|
"learning_rate": 9.8030357799434e-06,
|
|
"loss": 1.2569,
|
|
"step": 2541
|
|
},
|
|
{
|
|
"epoch": 0.676063829787234,
|
|
"grad_norm": 3.982719898223877,
|
|
"learning_rate": 9.80279128246321e-06,
|
|
"loss": 1.411,
|
|
"step": 2542
|
|
},
|
|
{
|
|
"epoch": 0.6763297872340426,
|
|
"grad_norm": 3.825068950653076,
|
|
"learning_rate": 9.802546636379001e-06,
|
|
"loss": 1.295,
|
|
"step": 2543
|
|
},
|
|
{
|
|
"epoch": 0.676595744680851,
|
|
"grad_norm": 3.8499345779418945,
|
|
"learning_rate": 9.80230184169835e-06,
|
|
"loss": 1.282,
|
|
"step": 2544
|
|
},
|
|
{
|
|
"epoch": 0.6768617021276596,
|
|
"grad_norm": 3.4873030185699463,
|
|
"learning_rate": 9.802056898428823e-06,
|
|
"loss": 1.2803,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 0.6771276595744681,
|
|
"grad_norm": 3.9438254833221436,
|
|
"learning_rate": 9.801811806578001e-06,
|
|
"loss": 1.2881,
|
|
"step": 2546
|
|
},
|
|
{
|
|
"epoch": 0.6773936170212767,
|
|
"grad_norm": 3.392169237136841,
|
|
"learning_rate": 9.80156656615347e-06,
|
|
"loss": 1.2485,
|
|
"step": 2547
|
|
},
|
|
{
|
|
"epoch": 0.6776595744680851,
|
|
"grad_norm": 3.8698456287384033,
|
|
"learning_rate": 9.801321177162814e-06,
|
|
"loss": 1.281,
|
|
"step": 2548
|
|
},
|
|
{
|
|
"epoch": 0.6779255319148936,
|
|
"grad_norm": 3.8232076168060303,
|
|
"learning_rate": 9.801075639613628e-06,
|
|
"loss": 1.3045,
|
|
"step": 2549
|
|
},
|
|
{
|
|
"epoch": 0.6781914893617021,
|
|
"grad_norm": 3.8453428745269775,
|
|
"learning_rate": 9.80082995351351e-06,
|
|
"loss": 1.2239,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.6784574468085106,
|
|
"grad_norm": 3.7375547885894775,
|
|
"learning_rate": 9.800584118870063e-06,
|
|
"loss": 1.195,
|
|
"step": 2551
|
|
},
|
|
{
|
|
"epoch": 0.6787234042553192,
|
|
"grad_norm": 3.84708571434021,
|
|
"learning_rate": 9.800338135690889e-06,
|
|
"loss": 1.1614,
|
|
"step": 2552
|
|
},
|
|
{
|
|
"epoch": 0.6789893617021276,
|
|
"grad_norm": 3.612217664718628,
|
|
"learning_rate": 9.800092003983602e-06,
|
|
"loss": 1.2499,
|
|
"step": 2553
|
|
},
|
|
{
|
|
"epoch": 0.6792553191489362,
|
|
"grad_norm": 3.217289447784424,
|
|
"learning_rate": 9.799845723755818e-06,
|
|
"loss": 1.1648,
|
|
"step": 2554
|
|
},
|
|
{
|
|
"epoch": 0.6795212765957447,
|
|
"grad_norm": 4.510238170623779,
|
|
"learning_rate": 9.799599295015154e-06,
|
|
"loss": 1.2728,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 0.6797872340425531,
|
|
"grad_norm": 4.0085129737854,
|
|
"learning_rate": 9.79935271776924e-06,
|
|
"loss": 1.3524,
|
|
"step": 2556
|
|
},
|
|
{
|
|
"epoch": 0.6800531914893617,
|
|
"grad_norm": 3.8481833934783936,
|
|
"learning_rate": 9.799105992025699e-06,
|
|
"loss": 1.2783,
|
|
"step": 2557
|
|
},
|
|
{
|
|
"epoch": 0.6803191489361702,
|
|
"grad_norm": 3.901775598526001,
|
|
"learning_rate": 9.79885911779217e-06,
|
|
"loss": 1.1736,
|
|
"step": 2558
|
|
},
|
|
{
|
|
"epoch": 0.6805851063829788,
|
|
"grad_norm": 3.864826202392578,
|
|
"learning_rate": 9.798612095076291e-06,
|
|
"loss": 1.3108,
|
|
"step": 2559
|
|
},
|
|
{
|
|
"epoch": 0.6808510638297872,
|
|
"grad_norm": 3.7867627143859863,
|
|
"learning_rate": 9.798364923885703e-06,
|
|
"loss": 1.1626,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.6811170212765958,
|
|
"grad_norm": 3.8203864097595215,
|
|
"learning_rate": 9.798117604228054e-06,
|
|
"loss": 1.2232,
|
|
"step": 2561
|
|
},
|
|
{
|
|
"epoch": 0.6813829787234043,
|
|
"grad_norm": 3.5479917526245117,
|
|
"learning_rate": 9.797870136110998e-06,
|
|
"loss": 1.1571,
|
|
"step": 2562
|
|
},
|
|
{
|
|
"epoch": 0.6816489361702127,
|
|
"grad_norm": 3.782655715942383,
|
|
"learning_rate": 9.797622519542193e-06,
|
|
"loss": 1.3004,
|
|
"step": 2563
|
|
},
|
|
{
|
|
"epoch": 0.6819148936170213,
|
|
"grad_norm": 3.477875232696533,
|
|
"learning_rate": 9.797374754529297e-06,
|
|
"loss": 1.0335,
|
|
"step": 2564
|
|
},
|
|
{
|
|
"epoch": 0.6821808510638298,
|
|
"grad_norm": 3.8241772651672363,
|
|
"learning_rate": 9.797126841079979e-06,
|
|
"loss": 1.4163,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 0.6824468085106383,
|
|
"grad_norm": 3.764817476272583,
|
|
"learning_rate": 9.796878779201906e-06,
|
|
"loss": 1.2243,
|
|
"step": 2566
|
|
},
|
|
{
|
|
"epoch": 0.6827127659574468,
|
|
"grad_norm": 3.784823417663574,
|
|
"learning_rate": 9.796630568902758e-06,
|
|
"loss": 1.4082,
|
|
"step": 2567
|
|
},
|
|
{
|
|
"epoch": 0.6829787234042554,
|
|
"grad_norm": 3.3941454887390137,
|
|
"learning_rate": 9.796382210190212e-06,
|
|
"loss": 1.0939,
|
|
"step": 2568
|
|
},
|
|
{
|
|
"epoch": 0.6832446808510638,
|
|
"grad_norm": 3.484823226928711,
|
|
"learning_rate": 9.796133703071956e-06,
|
|
"loss": 1.2322,
|
|
"step": 2569
|
|
},
|
|
{
|
|
"epoch": 0.6835106382978723,
|
|
"grad_norm": 3.6055960655212402,
|
|
"learning_rate": 9.795885047555673e-06,
|
|
"loss": 1.3383,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.6837765957446809,
|
|
"grad_norm": 3.7031943798065186,
|
|
"learning_rate": 9.795636243649061e-06,
|
|
"loss": 1.2987,
|
|
"step": 2571
|
|
},
|
|
{
|
|
"epoch": 0.6840425531914893,
|
|
"grad_norm": 3.5490245819091797,
|
|
"learning_rate": 9.795387291359819e-06,
|
|
"loss": 1.291,
|
|
"step": 2572
|
|
},
|
|
{
|
|
"epoch": 0.6843085106382979,
|
|
"grad_norm": 3.611907958984375,
|
|
"learning_rate": 9.795138190695647e-06,
|
|
"loss": 1.2693,
|
|
"step": 2573
|
|
},
|
|
{
|
|
"epoch": 0.6845744680851064,
|
|
"grad_norm": 3.580634832382202,
|
|
"learning_rate": 9.794888941664253e-06,
|
|
"loss": 1.3336,
|
|
"step": 2574
|
|
},
|
|
{
|
|
"epoch": 0.6848404255319149,
|
|
"grad_norm": 3.957103967666626,
|
|
"learning_rate": 9.794639544273352e-06,
|
|
"loss": 1.2077,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 0.6851063829787234,
|
|
"grad_norm": 3.5140933990478516,
|
|
"learning_rate": 9.794389998530659e-06,
|
|
"loss": 1.2885,
|
|
"step": 2576
|
|
},
|
|
{
|
|
"epoch": 0.685372340425532,
|
|
"grad_norm": 3.6171066761016846,
|
|
"learning_rate": 9.794140304443891e-06,
|
|
"loss": 1.2211,
|
|
"step": 2577
|
|
},
|
|
{
|
|
"epoch": 0.6856382978723404,
|
|
"grad_norm": 3.641486167907715,
|
|
"learning_rate": 9.793890462020781e-06,
|
|
"loss": 1.0571,
|
|
"step": 2578
|
|
},
|
|
{
|
|
"epoch": 0.6859042553191489,
|
|
"grad_norm": 3.605208396911621,
|
|
"learning_rate": 9.793640471269055e-06,
|
|
"loss": 1.1932,
|
|
"step": 2579
|
|
},
|
|
{
|
|
"epoch": 0.6861702127659575,
|
|
"grad_norm": 3.67253041267395,
|
|
"learning_rate": 9.793390332196448e-06,
|
|
"loss": 1.1474,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.6864361702127659,
|
|
"grad_norm": 4.190906524658203,
|
|
"learning_rate": 9.793140044810701e-06,
|
|
"loss": 1.2488,
|
|
"step": 2581
|
|
},
|
|
{
|
|
"epoch": 0.6867021276595745,
|
|
"grad_norm": 4.1439104080200195,
|
|
"learning_rate": 9.792889609119558e-06,
|
|
"loss": 1.2747,
|
|
"step": 2582
|
|
},
|
|
{
|
|
"epoch": 0.686968085106383,
|
|
"grad_norm": 3.9002907276153564,
|
|
"learning_rate": 9.79263902513077e-06,
|
|
"loss": 1.2291,
|
|
"step": 2583
|
|
},
|
|
{
|
|
"epoch": 0.6872340425531915,
|
|
"grad_norm": 3.6862435340881348,
|
|
"learning_rate": 9.792388292852084e-06,
|
|
"loss": 1.1637,
|
|
"step": 2584
|
|
},
|
|
{
|
|
"epoch": 0.6875,
|
|
"grad_norm": 3.789638042449951,
|
|
"learning_rate": 9.792137412291265e-06,
|
|
"loss": 1.1779,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 0.6877659574468085,
|
|
"grad_norm": 3.5384011268615723,
|
|
"learning_rate": 9.791886383456071e-06,
|
|
"loss": 1.2701,
|
|
"step": 2586
|
|
},
|
|
{
|
|
"epoch": 0.688031914893617,
|
|
"grad_norm": 3.6008050441741943,
|
|
"learning_rate": 9.79163520635427e-06,
|
|
"loss": 1.2479,
|
|
"step": 2587
|
|
},
|
|
{
|
|
"epoch": 0.6882978723404255,
|
|
"grad_norm": 3.71974515914917,
|
|
"learning_rate": 9.791383880993635e-06,
|
|
"loss": 1.267,
|
|
"step": 2588
|
|
},
|
|
{
|
|
"epoch": 0.6885638297872341,
|
|
"grad_norm": 3.5324504375457764,
|
|
"learning_rate": 9.791132407381942e-06,
|
|
"loss": 1.2725,
|
|
"step": 2589
|
|
},
|
|
{
|
|
"epoch": 0.6888297872340425,
|
|
"grad_norm": 3.602149724960327,
|
|
"learning_rate": 9.790880785526971e-06,
|
|
"loss": 1.1551,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.6890957446808511,
|
|
"grad_norm": 3.761108160018921,
|
|
"learning_rate": 9.790629015436508e-06,
|
|
"loss": 1.2654,
|
|
"step": 2591
|
|
},
|
|
{
|
|
"epoch": 0.6893617021276596,
|
|
"grad_norm": 3.6845576763153076,
|
|
"learning_rate": 9.790377097118342e-06,
|
|
"loss": 1.1352,
|
|
"step": 2592
|
|
},
|
|
{
|
|
"epoch": 0.689627659574468,
|
|
"grad_norm": 3.4206063747406006,
|
|
"learning_rate": 9.79012503058027e-06,
|
|
"loss": 1.1649,
|
|
"step": 2593
|
|
},
|
|
{
|
|
"epoch": 0.6898936170212766,
|
|
"grad_norm": 3.91064190864563,
|
|
"learning_rate": 9.789872815830089e-06,
|
|
"loss": 1.2736,
|
|
"step": 2594
|
|
},
|
|
{
|
|
"epoch": 0.6901595744680851,
|
|
"grad_norm": 3.3683114051818848,
|
|
"learning_rate": 9.789620452875605e-06,
|
|
"loss": 1.1734,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 0.6904255319148936,
|
|
"grad_norm": 3.797476053237915,
|
|
"learning_rate": 9.789367941724623e-06,
|
|
"loss": 1.239,
|
|
"step": 2596
|
|
},
|
|
{
|
|
"epoch": 0.6906914893617021,
|
|
"grad_norm": 3.623358964920044,
|
|
"learning_rate": 9.78911528238496e-06,
|
|
"loss": 1.2941,
|
|
"step": 2597
|
|
},
|
|
{
|
|
"epoch": 0.6909574468085107,
|
|
"grad_norm": 4.187454700469971,
|
|
"learning_rate": 9.78886247486443e-06,
|
|
"loss": 1.3176,
|
|
"step": 2598
|
|
},
|
|
{
|
|
"epoch": 0.6912234042553191,
|
|
"grad_norm": 4.131342887878418,
|
|
"learning_rate": 9.78860951917086e-06,
|
|
"loss": 1.3183,
|
|
"step": 2599
|
|
},
|
|
{
|
|
"epoch": 0.6914893617021277,
|
|
"grad_norm": 3.6273796558380127,
|
|
"learning_rate": 9.78835641531207e-06,
|
|
"loss": 1.1836,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.6917553191489362,
|
|
"grad_norm": 3.8663980960845947,
|
|
"learning_rate": 9.788103163295897e-06,
|
|
"loss": 1.4566,
|
|
"step": 2601
|
|
},
|
|
{
|
|
"epoch": 0.6920212765957446,
|
|
"grad_norm": 3.8288991451263428,
|
|
"learning_rate": 9.787849763130174e-06,
|
|
"loss": 1.2238,
|
|
"step": 2602
|
|
},
|
|
{
|
|
"epoch": 0.6922872340425532,
|
|
"grad_norm": 4.178062438964844,
|
|
"learning_rate": 9.787596214822743e-06,
|
|
"loss": 1.399,
|
|
"step": 2603
|
|
},
|
|
{
|
|
"epoch": 0.6925531914893617,
|
|
"grad_norm": 3.824878215789795,
|
|
"learning_rate": 9.787342518381447e-06,
|
|
"loss": 1.2654,
|
|
"step": 2604
|
|
},
|
|
{
|
|
"epoch": 0.6928191489361702,
|
|
"grad_norm": 3.742422103881836,
|
|
"learning_rate": 9.787088673814137e-06,
|
|
"loss": 1.3921,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 0.6930851063829787,
|
|
"grad_norm": 4.080827713012695,
|
|
"learning_rate": 9.78683468112867e-06,
|
|
"loss": 1.2525,
|
|
"step": 2606
|
|
},
|
|
{
|
|
"epoch": 0.6933510638297873,
|
|
"grad_norm": 3.393066883087158,
|
|
"learning_rate": 9.7865805403329e-06,
|
|
"loss": 1.0471,
|
|
"step": 2607
|
|
},
|
|
{
|
|
"epoch": 0.6936170212765957,
|
|
"grad_norm": 3.3034181594848633,
|
|
"learning_rate": 9.786326251434694e-06,
|
|
"loss": 1.1627,
|
|
"step": 2608
|
|
},
|
|
{
|
|
"epoch": 0.6938829787234042,
|
|
"grad_norm": 3.8288989067077637,
|
|
"learning_rate": 9.786071814441918e-06,
|
|
"loss": 1.2483,
|
|
"step": 2609
|
|
},
|
|
{
|
|
"epoch": 0.6941489361702128,
|
|
"grad_norm": 3.4944722652435303,
|
|
"learning_rate": 9.785817229362445e-06,
|
|
"loss": 1.2921,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.6944148936170212,
|
|
"grad_norm": 3.653322219848633,
|
|
"learning_rate": 9.785562496204151e-06,
|
|
"loss": 1.2367,
|
|
"step": 2611
|
|
},
|
|
{
|
|
"epoch": 0.6946808510638298,
|
|
"grad_norm": 3.3792853355407715,
|
|
"learning_rate": 9.785307614974922e-06,
|
|
"loss": 1.1746,
|
|
"step": 2612
|
|
},
|
|
{
|
|
"epoch": 0.6949468085106383,
|
|
"grad_norm": 3.608031988143921,
|
|
"learning_rate": 9.78505258568264e-06,
|
|
"loss": 1.2059,
|
|
"step": 2613
|
|
},
|
|
{
|
|
"epoch": 0.6952127659574469,
|
|
"grad_norm": 4.2280402183532715,
|
|
"learning_rate": 9.784797408335195e-06,
|
|
"loss": 1.294,
|
|
"step": 2614
|
|
},
|
|
{
|
|
"epoch": 0.6954787234042553,
|
|
"grad_norm": 3.8257791996002197,
|
|
"learning_rate": 9.784542082940488e-06,
|
|
"loss": 1.3261,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 0.6957446808510638,
|
|
"grad_norm": 3.9494855403900146,
|
|
"learning_rate": 9.784286609506415e-06,
|
|
"loss": 1.3776,
|
|
"step": 2616
|
|
},
|
|
{
|
|
"epoch": 0.6960106382978724,
|
|
"grad_norm": 3.8635013103485107,
|
|
"learning_rate": 9.78403098804088e-06,
|
|
"loss": 1.3371,
|
|
"step": 2617
|
|
},
|
|
{
|
|
"epoch": 0.6962765957446808,
|
|
"grad_norm": 3.8114707469940186,
|
|
"learning_rate": 9.783775218551796e-06,
|
|
"loss": 1.3064,
|
|
"step": 2618
|
|
},
|
|
{
|
|
"epoch": 0.6965425531914894,
|
|
"grad_norm": 3.8006489276885986,
|
|
"learning_rate": 9.783519301047072e-06,
|
|
"loss": 1.3864,
|
|
"step": 2619
|
|
},
|
|
{
|
|
"epoch": 0.6968085106382979,
|
|
"grad_norm": 3.504070997238159,
|
|
"learning_rate": 9.783263235534632e-06,
|
|
"loss": 1.2172,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.6970744680851064,
|
|
"grad_norm": 3.741771936416626,
|
|
"learning_rate": 9.783007022022394e-06,
|
|
"loss": 1.2375,
|
|
"step": 2621
|
|
},
|
|
{
|
|
"epoch": 0.6973404255319149,
|
|
"grad_norm": 3.5260889530181885,
|
|
"learning_rate": 9.782750660518288e-06,
|
|
"loss": 1.4035,
|
|
"step": 2622
|
|
},
|
|
{
|
|
"epoch": 0.6976063829787233,
|
|
"grad_norm": 3.832963466644287,
|
|
"learning_rate": 9.782494151030245e-06,
|
|
"loss": 1.2979,
|
|
"step": 2623
|
|
},
|
|
{
|
|
"epoch": 0.6978723404255319,
|
|
"grad_norm": 3.5783939361572266,
|
|
"learning_rate": 9.782237493566202e-06,
|
|
"loss": 1.1859,
|
|
"step": 2624
|
|
},
|
|
{
|
|
"epoch": 0.6981382978723404,
|
|
"grad_norm": 3.677419900894165,
|
|
"learning_rate": 9.781980688134102e-06,
|
|
"loss": 1.2306,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 0.698404255319149,
|
|
"grad_norm": 3.812321901321411,
|
|
"learning_rate": 9.781723734741889e-06,
|
|
"loss": 1.3585,
|
|
"step": 2626
|
|
},
|
|
{
|
|
"epoch": 0.6986702127659574,
|
|
"grad_norm": 3.3270645141601562,
|
|
"learning_rate": 9.781466633397512e-06,
|
|
"loss": 1.0776,
|
|
"step": 2627
|
|
},
|
|
{
|
|
"epoch": 0.698936170212766,
|
|
"grad_norm": 3.6559667587280273,
|
|
"learning_rate": 9.78120938410893e-06,
|
|
"loss": 1.3296,
|
|
"step": 2628
|
|
},
|
|
{
|
|
"epoch": 0.6992021276595745,
|
|
"grad_norm": 3.707422971725464,
|
|
"learning_rate": 9.7809519868841e-06,
|
|
"loss": 1.2396,
|
|
"step": 2629
|
|
},
|
|
{
|
|
"epoch": 0.699468085106383,
|
|
"grad_norm": 3.875147581100464,
|
|
"learning_rate": 9.780694441730987e-06,
|
|
"loss": 1.4079,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.6997340425531915,
|
|
"grad_norm": 4.308002471923828,
|
|
"learning_rate": 9.780436748657559e-06,
|
|
"loss": 1.3675,
|
|
"step": 2631
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"grad_norm": 3.6063718795776367,
|
|
"learning_rate": 9.780178907671788e-06,
|
|
"loss": 1.1953,
|
|
"step": 2632
|
|
},
|
|
{
|
|
"epoch": 0.7002659574468085,
|
|
"grad_norm": 3.582390308380127,
|
|
"learning_rate": 9.779920918781656e-06,
|
|
"loss": 1.2841,
|
|
"step": 2633
|
|
},
|
|
{
|
|
"epoch": 0.700531914893617,
|
|
"grad_norm": 3.8668954372406006,
|
|
"learning_rate": 9.779662781995144e-06,
|
|
"loss": 1.3806,
|
|
"step": 2634
|
|
},
|
|
{
|
|
"epoch": 0.7007978723404256,
|
|
"grad_norm": 3.4479143619537354,
|
|
"learning_rate": 9.779404497320236e-06,
|
|
"loss": 1.3201,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 0.701063829787234,
|
|
"grad_norm": 4.041039943695068,
|
|
"learning_rate": 9.779146064764925e-06,
|
|
"loss": 1.1912,
|
|
"step": 2636
|
|
},
|
|
{
|
|
"epoch": 0.7013297872340426,
|
|
"grad_norm": 3.944117307662964,
|
|
"learning_rate": 9.77888748433721e-06,
|
|
"loss": 1.1603,
|
|
"step": 2637
|
|
},
|
|
{
|
|
"epoch": 0.7015957446808511,
|
|
"grad_norm": 4.008464336395264,
|
|
"learning_rate": 9.77862875604509e-06,
|
|
"loss": 1.3612,
|
|
"step": 2638
|
|
},
|
|
{
|
|
"epoch": 0.7018617021276595,
|
|
"grad_norm": 3.5746493339538574,
|
|
"learning_rate": 9.778369879896568e-06,
|
|
"loss": 1.3117,
|
|
"step": 2639
|
|
},
|
|
{
|
|
"epoch": 0.7021276595744681,
|
|
"grad_norm": 4.120686054229736,
|
|
"learning_rate": 9.778110855899659e-06,
|
|
"loss": 1.2801,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.7023936170212766,
|
|
"grad_norm": 3.7582547664642334,
|
|
"learning_rate": 9.777851684062371e-06,
|
|
"loss": 1.291,
|
|
"step": 2641
|
|
},
|
|
{
|
|
"epoch": 0.7026595744680851,
|
|
"grad_norm": 3.8033053874969482,
|
|
"learning_rate": 9.77759236439273e-06,
|
|
"loss": 1.3342,
|
|
"step": 2642
|
|
},
|
|
{
|
|
"epoch": 0.7029255319148936,
|
|
"grad_norm": 3.712113618850708,
|
|
"learning_rate": 9.777332896898754e-06,
|
|
"loss": 1.1921,
|
|
"step": 2643
|
|
},
|
|
{
|
|
"epoch": 0.7031914893617022,
|
|
"grad_norm": 3.1552655696868896,
|
|
"learning_rate": 9.777073281588476e-06,
|
|
"loss": 1.1407,
|
|
"step": 2644
|
|
},
|
|
{
|
|
"epoch": 0.7034574468085106,
|
|
"grad_norm": 4.050416946411133,
|
|
"learning_rate": 9.776813518469924e-06,
|
|
"loss": 1.3787,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 0.7037234042553191,
|
|
"grad_norm": 3.63802170753479,
|
|
"learning_rate": 9.77655360755114e-06,
|
|
"loss": 1.3203,
|
|
"step": 2646
|
|
},
|
|
{
|
|
"epoch": 0.7039893617021277,
|
|
"grad_norm": 4.1890482902526855,
|
|
"learning_rate": 9.77629354884016e-06,
|
|
"loss": 1.3532,
|
|
"step": 2647
|
|
},
|
|
{
|
|
"epoch": 0.7042553191489361,
|
|
"grad_norm": 4.1286444664001465,
|
|
"learning_rate": 9.776033342345038e-06,
|
|
"loss": 1.2704,
|
|
"step": 2648
|
|
},
|
|
{
|
|
"epoch": 0.7045212765957447,
|
|
"grad_norm": 3.4052047729492188,
|
|
"learning_rate": 9.77577298807382e-06,
|
|
"loss": 1.2537,
|
|
"step": 2649
|
|
},
|
|
{
|
|
"epoch": 0.7047872340425532,
|
|
"grad_norm": 4.194342136383057,
|
|
"learning_rate": 9.775512486034564e-06,
|
|
"loss": 1.449,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.7050531914893617,
|
|
"grad_norm": 3.945206880569458,
|
|
"learning_rate": 9.775251836235327e-06,
|
|
"loss": 1.357,
|
|
"step": 2651
|
|
},
|
|
{
|
|
"epoch": 0.7053191489361702,
|
|
"grad_norm": 3.5744996070861816,
|
|
"learning_rate": 9.774991038684177e-06,
|
|
"loss": 1.2701,
|
|
"step": 2652
|
|
},
|
|
{
|
|
"epoch": 0.7055851063829788,
|
|
"grad_norm": 3.9091970920562744,
|
|
"learning_rate": 9.774730093389182e-06,
|
|
"loss": 1.3401,
|
|
"step": 2653
|
|
},
|
|
{
|
|
"epoch": 0.7058510638297872,
|
|
"grad_norm": 3.7527072429656982,
|
|
"learning_rate": 9.774469000358418e-06,
|
|
"loss": 1.2886,
|
|
"step": 2654
|
|
},
|
|
{
|
|
"epoch": 0.7061170212765957,
|
|
"grad_norm": 3.5021281242370605,
|
|
"learning_rate": 9.774207759599961e-06,
|
|
"loss": 1.2253,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 0.7063829787234043,
|
|
"grad_norm": 3.725334405899048,
|
|
"learning_rate": 9.773946371121894e-06,
|
|
"loss": 1.3451,
|
|
"step": 2656
|
|
},
|
|
{
|
|
"epoch": 0.7066489361702127,
|
|
"grad_norm": 3.3787760734558105,
|
|
"learning_rate": 9.773684834932306e-06,
|
|
"loss": 1.183,
|
|
"step": 2657
|
|
},
|
|
{
|
|
"epoch": 0.7069148936170213,
|
|
"grad_norm": 3.956935167312622,
|
|
"learning_rate": 9.77342315103929e-06,
|
|
"loss": 1.3828,
|
|
"step": 2658
|
|
},
|
|
{
|
|
"epoch": 0.7071808510638298,
|
|
"grad_norm": 3.7493388652801514,
|
|
"learning_rate": 9.77316131945094e-06,
|
|
"loss": 1.2192,
|
|
"step": 2659
|
|
},
|
|
{
|
|
"epoch": 0.7074468085106383,
|
|
"grad_norm": 4.022577285766602,
|
|
"learning_rate": 9.772899340175362e-06,
|
|
"loss": 1.2509,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.7077127659574468,
|
|
"grad_norm": 3.9888761043548584,
|
|
"learning_rate": 9.772637213220658e-06,
|
|
"loss": 1.3076,
|
|
"step": 2661
|
|
},
|
|
{
|
|
"epoch": 0.7079787234042553,
|
|
"grad_norm": 3.502845048904419,
|
|
"learning_rate": 9.772374938594937e-06,
|
|
"loss": 1.4205,
|
|
"step": 2662
|
|
},
|
|
{
|
|
"epoch": 0.7082446808510638,
|
|
"grad_norm": 3.611692190170288,
|
|
"learning_rate": 9.772112516306318e-06,
|
|
"loss": 1.2036,
|
|
"step": 2663
|
|
},
|
|
{
|
|
"epoch": 0.7085106382978723,
|
|
"grad_norm": 3.3075003623962402,
|
|
"learning_rate": 9.77184994636292e-06,
|
|
"loss": 1.1399,
|
|
"step": 2664
|
|
},
|
|
{
|
|
"epoch": 0.7087765957446809,
|
|
"grad_norm": 3.6357240676879883,
|
|
"learning_rate": 9.771587228772866e-06,
|
|
"loss": 1.2438,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 0.7090425531914893,
|
|
"grad_norm": 3.798506259918213,
|
|
"learning_rate": 9.771324363544286e-06,
|
|
"loss": 1.2793,
|
|
"step": 2666
|
|
},
|
|
{
|
|
"epoch": 0.7093085106382979,
|
|
"grad_norm": 3.3980555534362793,
|
|
"learning_rate": 9.771061350685312e-06,
|
|
"loss": 1.2446,
|
|
"step": 2667
|
|
},
|
|
{
|
|
"epoch": 0.7095744680851064,
|
|
"grad_norm": 3.5380852222442627,
|
|
"learning_rate": 9.770798190204083e-06,
|
|
"loss": 1.1996,
|
|
"step": 2668
|
|
},
|
|
{
|
|
"epoch": 0.7098404255319148,
|
|
"grad_norm": 3.93696665763855,
|
|
"learning_rate": 9.77053488210874e-06,
|
|
"loss": 1.2549,
|
|
"step": 2669
|
|
},
|
|
{
|
|
"epoch": 0.7101063829787234,
|
|
"grad_norm": 4.042500019073486,
|
|
"learning_rate": 9.770271426407432e-06,
|
|
"loss": 1.455,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.7103723404255319,
|
|
"grad_norm": 3.6526906490325928,
|
|
"learning_rate": 9.770007823108309e-06,
|
|
"loss": 1.3447,
|
|
"step": 2671
|
|
},
|
|
{
|
|
"epoch": 0.7106382978723405,
|
|
"grad_norm": 3.8958542346954346,
|
|
"learning_rate": 9.76974407221953e-06,
|
|
"loss": 1.2542,
|
|
"step": 2672
|
|
},
|
|
{
|
|
"epoch": 0.7109042553191489,
|
|
"grad_norm": 3.5408430099487305,
|
|
"learning_rate": 9.769480173749252e-06,
|
|
"loss": 1.3333,
|
|
"step": 2673
|
|
},
|
|
{
|
|
"epoch": 0.7111702127659575,
|
|
"grad_norm": 3.586918592453003,
|
|
"learning_rate": 9.769216127705643e-06,
|
|
"loss": 1.2469,
|
|
"step": 2674
|
|
},
|
|
{
|
|
"epoch": 0.711436170212766,
|
|
"grad_norm": 3.6321678161621094,
|
|
"learning_rate": 9.76895193409687e-06,
|
|
"loss": 1.3352,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 0.7117021276595744,
|
|
"grad_norm": 3.4352383613586426,
|
|
"learning_rate": 9.768687592931111e-06,
|
|
"loss": 1.228,
|
|
"step": 2676
|
|
},
|
|
{
|
|
"epoch": 0.711968085106383,
|
|
"grad_norm": 3.756770610809326,
|
|
"learning_rate": 9.768423104216544e-06,
|
|
"loss": 1.1776,
|
|
"step": 2677
|
|
},
|
|
{
|
|
"epoch": 0.7122340425531914,
|
|
"grad_norm": 4.270863056182861,
|
|
"learning_rate": 9.76815846796135e-06,
|
|
"loss": 1.2372,
|
|
"step": 2678
|
|
},
|
|
{
|
|
"epoch": 0.7125,
|
|
"grad_norm": 4.0467848777771,
|
|
"learning_rate": 9.767893684173722e-06,
|
|
"loss": 1.33,
|
|
"step": 2679
|
|
},
|
|
{
|
|
"epoch": 0.7127659574468085,
|
|
"grad_norm": 3.9330484867095947,
|
|
"learning_rate": 9.767628752861848e-06,
|
|
"loss": 1.2019,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.7130319148936171,
|
|
"grad_norm": 4.011680603027344,
|
|
"learning_rate": 9.767363674033928e-06,
|
|
"loss": 1.1982,
|
|
"step": 2681
|
|
},
|
|
{
|
|
"epoch": 0.7132978723404255,
|
|
"grad_norm": 3.5905420780181885,
|
|
"learning_rate": 9.767098447698163e-06,
|
|
"loss": 1.2441,
|
|
"step": 2682
|
|
},
|
|
{
|
|
"epoch": 0.7135638297872341,
|
|
"grad_norm": 3.8876521587371826,
|
|
"learning_rate": 9.766833073862758e-06,
|
|
"loss": 1.3112,
|
|
"step": 2683
|
|
},
|
|
{
|
|
"epoch": 0.7138297872340426,
|
|
"grad_norm": 3.6759207248687744,
|
|
"learning_rate": 9.766567552535928e-06,
|
|
"loss": 1.2974,
|
|
"step": 2684
|
|
},
|
|
{
|
|
"epoch": 0.714095744680851,
|
|
"grad_norm": 3.6160476207733154,
|
|
"learning_rate": 9.766301883725884e-06,
|
|
"loss": 1.3107,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 0.7143617021276596,
|
|
"grad_norm": 3.9795331954956055,
|
|
"learning_rate": 9.766036067440849e-06,
|
|
"loss": 1.4063,
|
|
"step": 2686
|
|
},
|
|
{
|
|
"epoch": 0.714627659574468,
|
|
"grad_norm": 3.899998188018799,
|
|
"learning_rate": 9.765770103689045e-06,
|
|
"loss": 1.3517,
|
|
"step": 2687
|
|
},
|
|
{
|
|
"epoch": 0.7148936170212766,
|
|
"grad_norm": 3.501302719116211,
|
|
"learning_rate": 9.765503992478704e-06,
|
|
"loss": 1.078,
|
|
"step": 2688
|
|
},
|
|
{
|
|
"epoch": 0.7151595744680851,
|
|
"grad_norm": 3.4490084648132324,
|
|
"learning_rate": 9.76523773381806e-06,
|
|
"loss": 1.2363,
|
|
"step": 2689
|
|
},
|
|
{
|
|
"epoch": 0.7154255319148937,
|
|
"grad_norm": 3.773393154144287,
|
|
"learning_rate": 9.76497132771535e-06,
|
|
"loss": 1.2677,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.7156914893617021,
|
|
"grad_norm": 3.2833402156829834,
|
|
"learning_rate": 9.764704774178816e-06,
|
|
"loss": 1.2409,
|
|
"step": 2691
|
|
},
|
|
{
|
|
"epoch": 0.7159574468085106,
|
|
"grad_norm": 3.798407793045044,
|
|
"learning_rate": 9.764438073216706e-06,
|
|
"loss": 1.2375,
|
|
"step": 2692
|
|
},
|
|
{
|
|
"epoch": 0.7162234042553192,
|
|
"grad_norm": 3.383553981781006,
|
|
"learning_rate": 9.764171224837274e-06,
|
|
"loss": 1.223,
|
|
"step": 2693
|
|
},
|
|
{
|
|
"epoch": 0.7164893617021276,
|
|
"grad_norm": 3.781569242477417,
|
|
"learning_rate": 9.763904229048775e-06,
|
|
"loss": 1.1822,
|
|
"step": 2694
|
|
},
|
|
{
|
|
"epoch": 0.7167553191489362,
|
|
"grad_norm": 3.862577438354492,
|
|
"learning_rate": 9.76363708585947e-06,
|
|
"loss": 1.2266,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 0.7170212765957447,
|
|
"grad_norm": 3.4044363498687744,
|
|
"learning_rate": 9.763369795277627e-06,
|
|
"loss": 1.1887,
|
|
"step": 2696
|
|
},
|
|
{
|
|
"epoch": 0.7172872340425532,
|
|
"grad_norm": 3.930368185043335,
|
|
"learning_rate": 9.763102357311511e-06,
|
|
"loss": 1.2911,
|
|
"step": 2697
|
|
},
|
|
{
|
|
"epoch": 0.7175531914893617,
|
|
"grad_norm": 3.72084379196167,
|
|
"learning_rate": 9.762834771969403e-06,
|
|
"loss": 1.2693,
|
|
"step": 2698
|
|
},
|
|
{
|
|
"epoch": 0.7178191489361702,
|
|
"grad_norm": 3.3735997676849365,
|
|
"learning_rate": 9.762567039259577e-06,
|
|
"loss": 1.2202,
|
|
"step": 2699
|
|
},
|
|
{
|
|
"epoch": 0.7180851063829787,
|
|
"grad_norm": 3.3215930461883545,
|
|
"learning_rate": 9.762299159190322e-06,
|
|
"loss": 1.311,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.7183510638297872,
|
|
"grad_norm": 3.2667737007141113,
|
|
"learning_rate": 9.762031131769923e-06,
|
|
"loss": 1.1621,
|
|
"step": 2701
|
|
},
|
|
{
|
|
"epoch": 0.7186170212765958,
|
|
"grad_norm": 3.8327572345733643,
|
|
"learning_rate": 9.761762957006673e-06,
|
|
"loss": 1.2764,
|
|
"step": 2702
|
|
},
|
|
{
|
|
"epoch": 0.7188829787234042,
|
|
"grad_norm": 3.693328857421875,
|
|
"learning_rate": 9.761494634908872e-06,
|
|
"loss": 1.168,
|
|
"step": 2703
|
|
},
|
|
{
|
|
"epoch": 0.7191489361702128,
|
|
"grad_norm": 3.7882509231567383,
|
|
"learning_rate": 9.761226165484822e-06,
|
|
"loss": 1.3076,
|
|
"step": 2704
|
|
},
|
|
{
|
|
"epoch": 0.7194148936170213,
|
|
"grad_norm": 3.366978645324707,
|
|
"learning_rate": 9.760957548742828e-06,
|
|
"loss": 1.3628,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 0.7196808510638298,
|
|
"grad_norm": 3.4671497344970703,
|
|
"learning_rate": 9.7606887846912e-06,
|
|
"loss": 1.2197,
|
|
"step": 2706
|
|
},
|
|
{
|
|
"epoch": 0.7199468085106383,
|
|
"grad_norm": 4.486639022827148,
|
|
"learning_rate": 9.760419873338261e-06,
|
|
"loss": 1.1786,
|
|
"step": 2707
|
|
},
|
|
{
|
|
"epoch": 0.7202127659574468,
|
|
"grad_norm": 3.5285980701446533,
|
|
"learning_rate": 9.760150814692321e-06,
|
|
"loss": 1.0701,
|
|
"step": 2708
|
|
},
|
|
{
|
|
"epoch": 0.7204787234042553,
|
|
"grad_norm": 3.4500350952148438,
|
|
"learning_rate": 9.759881608761714e-06,
|
|
"loss": 1.1768,
|
|
"step": 2709
|
|
},
|
|
{
|
|
"epoch": 0.7207446808510638,
|
|
"grad_norm": 3.219653606414795,
|
|
"learning_rate": 9.759612255554765e-06,
|
|
"loss": 1.1413,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.7210106382978724,
|
|
"grad_norm": 3.7905290126800537,
|
|
"learning_rate": 9.75934275507981e-06,
|
|
"loss": 1.3632,
|
|
"step": 2711
|
|
},
|
|
{
|
|
"epoch": 0.7212765957446808,
|
|
"grad_norm": 3.765892744064331,
|
|
"learning_rate": 9.759073107345186e-06,
|
|
"loss": 1.3237,
|
|
"step": 2712
|
|
},
|
|
{
|
|
"epoch": 0.7215425531914894,
|
|
"grad_norm": 3.8589115142822266,
|
|
"learning_rate": 9.758803312359236e-06,
|
|
"loss": 1.3028,
|
|
"step": 2713
|
|
},
|
|
{
|
|
"epoch": 0.7218085106382979,
|
|
"grad_norm": 3.688624143600464,
|
|
"learning_rate": 9.758533370130308e-06,
|
|
"loss": 1.2325,
|
|
"step": 2714
|
|
},
|
|
{
|
|
"epoch": 0.7220744680851063,
|
|
"grad_norm": 3.397474765777588,
|
|
"learning_rate": 9.758263280666757e-06,
|
|
"loss": 1.3173,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 0.7223404255319149,
|
|
"grad_norm": 3.9396157264709473,
|
|
"learning_rate": 9.757993043976937e-06,
|
|
"loss": 1.4517,
|
|
"step": 2716
|
|
},
|
|
{
|
|
"epoch": 0.7226063829787234,
|
|
"grad_norm": 3.5887930393218994,
|
|
"learning_rate": 9.757722660069211e-06,
|
|
"loss": 1.1431,
|
|
"step": 2717
|
|
},
|
|
{
|
|
"epoch": 0.722872340425532,
|
|
"grad_norm": 3.520183563232422,
|
|
"learning_rate": 9.757452128951945e-06,
|
|
"loss": 1.3442,
|
|
"step": 2718
|
|
},
|
|
{
|
|
"epoch": 0.7231382978723404,
|
|
"grad_norm": 3.704939365386963,
|
|
"learning_rate": 9.757181450633507e-06,
|
|
"loss": 1.2257,
|
|
"step": 2719
|
|
},
|
|
{
|
|
"epoch": 0.723404255319149,
|
|
"grad_norm": 4.201409816741943,
|
|
"learning_rate": 9.756910625122276e-06,
|
|
"loss": 1.234,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.7236702127659574,
|
|
"grad_norm": 3.571162700653076,
|
|
"learning_rate": 9.756639652426627e-06,
|
|
"loss": 1.195,
|
|
"step": 2721
|
|
},
|
|
{
|
|
"epoch": 0.7239361702127659,
|
|
"grad_norm": 3.463414192199707,
|
|
"learning_rate": 9.75636853255495e-06,
|
|
"loss": 1.2494,
|
|
"step": 2722
|
|
},
|
|
{
|
|
"epoch": 0.7242021276595745,
|
|
"grad_norm": 3.4496824741363525,
|
|
"learning_rate": 9.75609726551563e-06,
|
|
"loss": 1.1707,
|
|
"step": 2723
|
|
},
|
|
{
|
|
"epoch": 0.7244680851063829,
|
|
"grad_norm": 3.9885363578796387,
|
|
"learning_rate": 9.75582585131706e-06,
|
|
"loss": 1.2613,
|
|
"step": 2724
|
|
},
|
|
{
|
|
"epoch": 0.7247340425531915,
|
|
"grad_norm": 4.085259437561035,
|
|
"learning_rate": 9.755554289967638e-06,
|
|
"loss": 1.2527,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 0.725,
|
|
"grad_norm": 4.417264938354492,
|
|
"learning_rate": 9.755282581475769e-06,
|
|
"loss": 1.466,
|
|
"step": 2726
|
|
},
|
|
{
|
|
"epoch": 0.7252659574468086,
|
|
"grad_norm": 3.954056739807129,
|
|
"learning_rate": 9.755010725849857e-06,
|
|
"loss": 1.2379,
|
|
"step": 2727
|
|
},
|
|
{
|
|
"epoch": 0.725531914893617,
|
|
"grad_norm": 3.838103771209717,
|
|
"learning_rate": 9.754738723098316e-06,
|
|
"loss": 1.1999,
|
|
"step": 2728
|
|
},
|
|
{
|
|
"epoch": 0.7257978723404256,
|
|
"grad_norm": 4.1355695724487305,
|
|
"learning_rate": 9.75446657322956e-06,
|
|
"loss": 1.2805,
|
|
"step": 2729
|
|
},
|
|
{
|
|
"epoch": 0.726063829787234,
|
|
"grad_norm": 4.266016483306885,
|
|
"learning_rate": 9.75419427625201e-06,
|
|
"loss": 1.274,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.7263297872340425,
|
|
"grad_norm": 3.8930816650390625,
|
|
"learning_rate": 9.753921832174094e-06,
|
|
"loss": 1.3094,
|
|
"step": 2731
|
|
},
|
|
{
|
|
"epoch": 0.7265957446808511,
|
|
"grad_norm": 3.7425036430358887,
|
|
"learning_rate": 9.753649241004238e-06,
|
|
"loss": 1.2826,
|
|
"step": 2732
|
|
},
|
|
{
|
|
"epoch": 0.7268617021276595,
|
|
"grad_norm": 4.708345890045166,
|
|
"learning_rate": 9.753376502750878e-06,
|
|
"loss": 1.4243,
|
|
"step": 2733
|
|
},
|
|
{
|
|
"epoch": 0.7271276595744681,
|
|
"grad_norm": 3.6511597633361816,
|
|
"learning_rate": 9.753103617422452e-06,
|
|
"loss": 1.1892,
|
|
"step": 2734
|
|
},
|
|
{
|
|
"epoch": 0.7273936170212766,
|
|
"grad_norm": 3.807124376296997,
|
|
"learning_rate": 9.752830585027406e-06,
|
|
"loss": 1.2767,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 0.7276595744680852,
|
|
"grad_norm": 3.596545457839966,
|
|
"learning_rate": 9.752557405574184e-06,
|
|
"loss": 1.1901,
|
|
"step": 2736
|
|
},
|
|
{
|
|
"epoch": 0.7279255319148936,
|
|
"grad_norm": 3.6757147312164307,
|
|
"learning_rate": 9.752284079071242e-06,
|
|
"loss": 1.4032,
|
|
"step": 2737
|
|
},
|
|
{
|
|
"epoch": 0.7281914893617021,
|
|
"grad_norm": 3.862985372543335,
|
|
"learning_rate": 9.752010605527033e-06,
|
|
"loss": 1.1524,
|
|
"step": 2738
|
|
},
|
|
{
|
|
"epoch": 0.7284574468085107,
|
|
"grad_norm": 3.685128927230835,
|
|
"learning_rate": 9.751736984950023e-06,
|
|
"loss": 1.1703,
|
|
"step": 2739
|
|
},
|
|
{
|
|
"epoch": 0.7287234042553191,
|
|
"grad_norm": 3.4319050312042236,
|
|
"learning_rate": 9.751463217348675e-06,
|
|
"loss": 1.1965,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.7289893617021277,
|
|
"grad_norm": 3.4726648330688477,
|
|
"learning_rate": 9.751189302731463e-06,
|
|
"loss": 1.24,
|
|
"step": 2741
|
|
},
|
|
{
|
|
"epoch": 0.7292553191489362,
|
|
"grad_norm": 3.4759905338287354,
|
|
"learning_rate": 9.750915241106857e-06,
|
|
"loss": 1.1663,
|
|
"step": 2742
|
|
},
|
|
{
|
|
"epoch": 0.7295212765957447,
|
|
"grad_norm": 3.5179250240325928,
|
|
"learning_rate": 9.750641032483344e-06,
|
|
"loss": 1.1964,
|
|
"step": 2743
|
|
},
|
|
{
|
|
"epoch": 0.7297872340425532,
|
|
"grad_norm": 3.397850751876831,
|
|
"learning_rate": 9.750366676869401e-06,
|
|
"loss": 1.159,
|
|
"step": 2744
|
|
},
|
|
{
|
|
"epoch": 0.7300531914893617,
|
|
"grad_norm": 3.505492687225342,
|
|
"learning_rate": 9.75009217427352e-06,
|
|
"loss": 1.4271,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 0.7303191489361702,
|
|
"grad_norm": 3.516559362411499,
|
|
"learning_rate": 9.749817524704198e-06,
|
|
"loss": 1.2119,
|
|
"step": 2746
|
|
},
|
|
{
|
|
"epoch": 0.7305851063829787,
|
|
"grad_norm": 3.5949020385742188,
|
|
"learning_rate": 9.749542728169925e-06,
|
|
"loss": 1.1291,
|
|
"step": 2747
|
|
},
|
|
{
|
|
"epoch": 0.7308510638297873,
|
|
"grad_norm": 3.3480985164642334,
|
|
"learning_rate": 9.749267784679211e-06,
|
|
"loss": 1.1421,
|
|
"step": 2748
|
|
},
|
|
{
|
|
"epoch": 0.7311170212765957,
|
|
"grad_norm": 3.4003922939300537,
|
|
"learning_rate": 9.74899269424056e-06,
|
|
"loss": 1.3106,
|
|
"step": 2749
|
|
},
|
|
{
|
|
"epoch": 0.7313829787234043,
|
|
"grad_norm": 3.5191762447357178,
|
|
"learning_rate": 9.748717456862484e-06,
|
|
"loss": 1.1878,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.7316489361702128,
|
|
"grad_norm": 3.5664145946502686,
|
|
"learning_rate": 9.748442072553496e-06,
|
|
"loss": 1.2272,
|
|
"step": 2751
|
|
},
|
|
{
|
|
"epoch": 0.7319148936170212,
|
|
"grad_norm": 3.928241491317749,
|
|
"learning_rate": 9.748166541322124e-06,
|
|
"loss": 1.2986,
|
|
"step": 2752
|
|
},
|
|
{
|
|
"epoch": 0.7321808510638298,
|
|
"grad_norm": 3.8403828144073486,
|
|
"learning_rate": 9.747890863176887e-06,
|
|
"loss": 1.3132,
|
|
"step": 2753
|
|
},
|
|
{
|
|
"epoch": 0.7324468085106383,
|
|
"grad_norm": 3.4996137619018555,
|
|
"learning_rate": 9.747615038126317e-06,
|
|
"loss": 1.3824,
|
|
"step": 2754
|
|
},
|
|
{
|
|
"epoch": 0.7327127659574468,
|
|
"grad_norm": 3.5281126499176025,
|
|
"learning_rate": 9.747339066178947e-06,
|
|
"loss": 1.3015,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 0.7329787234042553,
|
|
"grad_norm": 3.466567277908325,
|
|
"learning_rate": 9.747062947343318e-06,
|
|
"loss": 1.2638,
|
|
"step": 2756
|
|
},
|
|
{
|
|
"epoch": 0.7332446808510639,
|
|
"grad_norm": 3.8412346839904785,
|
|
"learning_rate": 9.746786681627971e-06,
|
|
"loss": 1.1944,
|
|
"step": 2757
|
|
},
|
|
{
|
|
"epoch": 0.7335106382978723,
|
|
"grad_norm": 3.3403968811035156,
|
|
"learning_rate": 9.746510269041459e-06,
|
|
"loss": 1.215,
|
|
"step": 2758
|
|
},
|
|
{
|
|
"epoch": 0.7337765957446809,
|
|
"grad_norm": 3.735173225402832,
|
|
"learning_rate": 9.746233709592328e-06,
|
|
"loss": 1.393,
|
|
"step": 2759
|
|
},
|
|
{
|
|
"epoch": 0.7340425531914894,
|
|
"grad_norm": 4.095008373260498,
|
|
"learning_rate": 9.745957003289138e-06,
|
|
"loss": 1.2848,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.7343085106382978,
|
|
"grad_norm": 3.8568758964538574,
|
|
"learning_rate": 9.745680150140452e-06,
|
|
"loss": 1.3195,
|
|
"step": 2761
|
|
},
|
|
{
|
|
"epoch": 0.7345744680851064,
|
|
"grad_norm": 3.512941360473633,
|
|
"learning_rate": 9.745403150154833e-06,
|
|
"loss": 1.0682,
|
|
"step": 2762
|
|
},
|
|
{
|
|
"epoch": 0.7348404255319149,
|
|
"grad_norm": 4.007373332977295,
|
|
"learning_rate": 9.745126003340854e-06,
|
|
"loss": 1.2665,
|
|
"step": 2763
|
|
},
|
|
{
|
|
"epoch": 0.7351063829787234,
|
|
"grad_norm": 3.8637166023254395,
|
|
"learning_rate": 9.74484870970709e-06,
|
|
"loss": 1.4367,
|
|
"step": 2764
|
|
},
|
|
{
|
|
"epoch": 0.7353723404255319,
|
|
"grad_norm": 3.6544454097747803,
|
|
"learning_rate": 9.744571269262122e-06,
|
|
"loss": 1.157,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 0.7356382978723405,
|
|
"grad_norm": 3.5814568996429443,
|
|
"learning_rate": 9.744293682014532e-06,
|
|
"loss": 1.2989,
|
|
"step": 2766
|
|
},
|
|
{
|
|
"epoch": 0.7359042553191489,
|
|
"grad_norm": 3.59860897064209,
|
|
"learning_rate": 9.74401594797291e-06,
|
|
"loss": 1.1852,
|
|
"step": 2767
|
|
},
|
|
{
|
|
"epoch": 0.7361702127659574,
|
|
"grad_norm": 3.694519519805908,
|
|
"learning_rate": 9.743738067145849e-06,
|
|
"loss": 1.3947,
|
|
"step": 2768
|
|
},
|
|
{
|
|
"epoch": 0.736436170212766,
|
|
"grad_norm": 3.570734977722168,
|
|
"learning_rate": 9.743460039541947e-06,
|
|
"loss": 1.3176,
|
|
"step": 2769
|
|
},
|
|
{
|
|
"epoch": 0.7367021276595744,
|
|
"grad_norm": 3.448857545852661,
|
|
"learning_rate": 9.743181865169806e-06,
|
|
"loss": 1.2162,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.736968085106383,
|
|
"grad_norm": 3.7955188751220703,
|
|
"learning_rate": 9.742903544038033e-06,
|
|
"loss": 1.2489,
|
|
"step": 2771
|
|
},
|
|
{
|
|
"epoch": 0.7372340425531915,
|
|
"grad_norm": 3.520260810852051,
|
|
"learning_rate": 9.742625076155244e-06,
|
|
"loss": 1.2545,
|
|
"step": 2772
|
|
},
|
|
{
|
|
"epoch": 0.7375,
|
|
"grad_norm": 3.3301799297332764,
|
|
"learning_rate": 9.742346461530048e-06,
|
|
"loss": 1.0909,
|
|
"step": 2773
|
|
},
|
|
{
|
|
"epoch": 0.7377659574468085,
|
|
"grad_norm": 3.57509708404541,
|
|
"learning_rate": 9.742067700171069e-06,
|
|
"loss": 1.2049,
|
|
"step": 2774
|
|
},
|
|
{
|
|
"epoch": 0.738031914893617,
|
|
"grad_norm": 3.4712679386138916,
|
|
"learning_rate": 9.741788792086934e-06,
|
|
"loss": 1.1797,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 0.7382978723404255,
|
|
"grad_norm": 3.4553110599517822,
|
|
"learning_rate": 9.74150973728627e-06,
|
|
"loss": 1.1082,
|
|
"step": 2776
|
|
},
|
|
{
|
|
"epoch": 0.738563829787234,
|
|
"grad_norm": 3.6550087928771973,
|
|
"learning_rate": 9.741230535777712e-06,
|
|
"loss": 1.281,
|
|
"step": 2777
|
|
},
|
|
{
|
|
"epoch": 0.7388297872340426,
|
|
"grad_norm": 3.3699588775634766,
|
|
"learning_rate": 9.7409511875699e-06,
|
|
"loss": 1.2331,
|
|
"step": 2778
|
|
},
|
|
{
|
|
"epoch": 0.739095744680851,
|
|
"grad_norm": 3.393129825592041,
|
|
"learning_rate": 9.740671692671478e-06,
|
|
"loss": 1.1614,
|
|
"step": 2779
|
|
},
|
|
{
|
|
"epoch": 0.7393617021276596,
|
|
"grad_norm": 3.888546943664551,
|
|
"learning_rate": 9.74039205109109e-06,
|
|
"loss": 1.3773,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.7396276595744681,
|
|
"grad_norm": 3.5572216510772705,
|
|
"learning_rate": 9.740112262837391e-06,
|
|
"loss": 1.2269,
|
|
"step": 2781
|
|
},
|
|
{
|
|
"epoch": 0.7398936170212767,
|
|
"grad_norm": 3.7788665294647217,
|
|
"learning_rate": 9.73983232791904e-06,
|
|
"loss": 1.2385,
|
|
"step": 2782
|
|
},
|
|
{
|
|
"epoch": 0.7401595744680851,
|
|
"grad_norm": 4.092897891998291,
|
|
"learning_rate": 9.739552246344692e-06,
|
|
"loss": 1.3396,
|
|
"step": 2783
|
|
},
|
|
{
|
|
"epoch": 0.7404255319148936,
|
|
"grad_norm": 3.679199457168579,
|
|
"learning_rate": 9.73927201812302e-06,
|
|
"loss": 1.2957,
|
|
"step": 2784
|
|
},
|
|
{
|
|
"epoch": 0.7406914893617021,
|
|
"grad_norm": 3.590893030166626,
|
|
"learning_rate": 9.738991643262693e-06,
|
|
"loss": 1.3364,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 0.7409574468085106,
|
|
"grad_norm": 3.5082991123199463,
|
|
"learning_rate": 9.738711121772384e-06,
|
|
"loss": 1.1921,
|
|
"step": 2786
|
|
},
|
|
{
|
|
"epoch": 0.7412234042553192,
|
|
"grad_norm": 3.556530475616455,
|
|
"learning_rate": 9.738430453660774e-06,
|
|
"loss": 1.2388,
|
|
"step": 2787
|
|
},
|
|
{
|
|
"epoch": 0.7414893617021276,
|
|
"grad_norm": 4.152648448944092,
|
|
"learning_rate": 9.738149638936547e-06,
|
|
"loss": 1.3962,
|
|
"step": 2788
|
|
},
|
|
{
|
|
"epoch": 0.7417553191489362,
|
|
"grad_norm": 3.8726470470428467,
|
|
"learning_rate": 9.73786867760839e-06,
|
|
"loss": 1.368,
|
|
"step": 2789
|
|
},
|
|
{
|
|
"epoch": 0.7420212765957447,
|
|
"grad_norm": 3.4200189113616943,
|
|
"learning_rate": 9.737587569685e-06,
|
|
"loss": 1.3165,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.7422872340425531,
|
|
"grad_norm": 3.8217222690582275,
|
|
"learning_rate": 9.737306315175072e-06,
|
|
"loss": 1.07,
|
|
"step": 2791
|
|
},
|
|
{
|
|
"epoch": 0.7425531914893617,
|
|
"grad_norm": 4.083987236022949,
|
|
"learning_rate": 9.73702491408731e-06,
|
|
"loss": 1.2129,
|
|
"step": 2792
|
|
},
|
|
{
|
|
"epoch": 0.7428191489361702,
|
|
"grad_norm": 3.396623373031616,
|
|
"learning_rate": 9.73674336643042e-06,
|
|
"loss": 1.1692,
|
|
"step": 2793
|
|
},
|
|
{
|
|
"epoch": 0.7430851063829788,
|
|
"grad_norm": 3.545069456100464,
|
|
"learning_rate": 9.736461672213112e-06,
|
|
"loss": 1.2257,
|
|
"step": 2794
|
|
},
|
|
{
|
|
"epoch": 0.7433510638297872,
|
|
"grad_norm": 3.856208324432373,
|
|
"learning_rate": 9.736179831444103e-06,
|
|
"loss": 1.4061,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 0.7436170212765958,
|
|
"grad_norm": 3.6652262210845947,
|
|
"learning_rate": 9.735897844132116e-06,
|
|
"loss": 1.1792,
|
|
"step": 2796
|
|
},
|
|
{
|
|
"epoch": 0.7438829787234043,
|
|
"grad_norm": 3.402409791946411,
|
|
"learning_rate": 9.735615710285873e-06,
|
|
"loss": 1.1954,
|
|
"step": 2797
|
|
},
|
|
{
|
|
"epoch": 0.7441489361702127,
|
|
"grad_norm": 4.120236396789551,
|
|
"learning_rate": 9.735333429914103e-06,
|
|
"loss": 1.3625,
|
|
"step": 2798
|
|
},
|
|
{
|
|
"epoch": 0.7444148936170213,
|
|
"grad_norm": 3.873011350631714,
|
|
"learning_rate": 9.735051003025543e-06,
|
|
"loss": 1.1915,
|
|
"step": 2799
|
|
},
|
|
{
|
|
"epoch": 0.7446808510638298,
|
|
"grad_norm": 3.4933876991271973,
|
|
"learning_rate": 9.73476842962893e-06,
|
|
"loss": 1.1695,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7449468085106383,
|
|
"grad_norm": 3.8242671489715576,
|
|
"learning_rate": 9.734485709733007e-06,
|
|
"loss": 1.2618,
|
|
"step": 2801
|
|
},
|
|
{
|
|
"epoch": 0.7452127659574468,
|
|
"grad_norm": 3.512907028198242,
|
|
"learning_rate": 9.734202843346522e-06,
|
|
"loss": 1.1924,
|
|
"step": 2802
|
|
},
|
|
{
|
|
"epoch": 0.7454787234042554,
|
|
"grad_norm": 4.221972465515137,
|
|
"learning_rate": 9.733919830478227e-06,
|
|
"loss": 1.2335,
|
|
"step": 2803
|
|
},
|
|
{
|
|
"epoch": 0.7457446808510638,
|
|
"grad_norm": 3.864529609680176,
|
|
"learning_rate": 9.73363667113688e-06,
|
|
"loss": 1.3128,
|
|
"step": 2804
|
|
},
|
|
{
|
|
"epoch": 0.7460106382978723,
|
|
"grad_norm": 4.328346252441406,
|
|
"learning_rate": 9.73335336533124e-06,
|
|
"loss": 1.3956,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 0.7462765957446809,
|
|
"grad_norm": 3.605314254760742,
|
|
"learning_rate": 9.733069913070074e-06,
|
|
"loss": 1.1795,
|
|
"step": 2806
|
|
},
|
|
{
|
|
"epoch": 0.7465425531914893,
|
|
"grad_norm": 4.531727313995361,
|
|
"learning_rate": 9.732786314362154e-06,
|
|
"loss": 1.3895,
|
|
"step": 2807
|
|
},
|
|
{
|
|
"epoch": 0.7468085106382979,
|
|
"grad_norm": 3.587550163269043,
|
|
"learning_rate": 9.732502569216252e-06,
|
|
"loss": 1.289,
|
|
"step": 2808
|
|
},
|
|
{
|
|
"epoch": 0.7470744680851064,
|
|
"grad_norm": 3.99782133102417,
|
|
"learning_rate": 9.73221867764115e-06,
|
|
"loss": 1.3014,
|
|
"step": 2809
|
|
},
|
|
{
|
|
"epoch": 0.7473404255319149,
|
|
"grad_norm": 3.9140994548797607,
|
|
"learning_rate": 9.731934639645628e-06,
|
|
"loss": 1.2428,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.7476063829787234,
|
|
"grad_norm": 3.7804577350616455,
|
|
"learning_rate": 9.73165045523848e-06,
|
|
"loss": 1.2315,
|
|
"step": 2811
|
|
},
|
|
{
|
|
"epoch": 0.747872340425532,
|
|
"grad_norm": 4.103899002075195,
|
|
"learning_rate": 9.731366124428495e-06,
|
|
"loss": 1.4515,
|
|
"step": 2812
|
|
},
|
|
{
|
|
"epoch": 0.7481382978723404,
|
|
"grad_norm": 4.170511245727539,
|
|
"learning_rate": 9.73108164722447e-06,
|
|
"loss": 1.3773,
|
|
"step": 2813
|
|
},
|
|
{
|
|
"epoch": 0.7484042553191489,
|
|
"grad_norm": 3.4937591552734375,
|
|
"learning_rate": 9.73079702363521e-06,
|
|
"loss": 1.1113,
|
|
"step": 2814
|
|
},
|
|
{
|
|
"epoch": 0.7486702127659575,
|
|
"grad_norm": 3.6979286670684814,
|
|
"learning_rate": 9.730512253669523e-06,
|
|
"loss": 1.2525,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 0.7489361702127659,
|
|
"grad_norm": 3.6911709308624268,
|
|
"learning_rate": 9.730227337336214e-06,
|
|
"loss": 1.2443,
|
|
"step": 2816
|
|
},
|
|
{
|
|
"epoch": 0.7492021276595745,
|
|
"grad_norm": 3.462308883666992,
|
|
"learning_rate": 9.729942274644102e-06,
|
|
"loss": 1.1075,
|
|
"step": 2817
|
|
},
|
|
{
|
|
"epoch": 0.749468085106383,
|
|
"grad_norm": 4.0079240798950195,
|
|
"learning_rate": 9.729657065602007e-06,
|
|
"loss": 1.2715,
|
|
"step": 2818
|
|
},
|
|
{
|
|
"epoch": 0.7497340425531915,
|
|
"grad_norm": 3.6619253158569336,
|
|
"learning_rate": 9.729371710218755e-06,
|
|
"loss": 1.135,
|
|
"step": 2819
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"grad_norm": 3.3799519538879395,
|
|
"learning_rate": 9.729086208503174e-06,
|
|
"loss": 1.2331,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.7502659574468085,
|
|
"grad_norm": 3.828418493270874,
|
|
"learning_rate": 9.728800560464097e-06,
|
|
"loss": 1.3006,
|
|
"step": 2821
|
|
},
|
|
{
|
|
"epoch": 0.750531914893617,
|
|
"grad_norm": 4.1295928955078125,
|
|
"learning_rate": 9.728514766110366e-06,
|
|
"loss": 1.2404,
|
|
"step": 2822
|
|
},
|
|
{
|
|
"epoch": 0.7507978723404255,
|
|
"grad_norm": 3.73343825340271,
|
|
"learning_rate": 9.728228825450818e-06,
|
|
"loss": 1.3261,
|
|
"step": 2823
|
|
},
|
|
{
|
|
"epoch": 0.7510638297872341,
|
|
"grad_norm": 3.336246967315674,
|
|
"learning_rate": 9.727942738494305e-06,
|
|
"loss": 1.0928,
|
|
"step": 2824
|
|
},
|
|
{
|
|
"epoch": 0.7513297872340425,
|
|
"grad_norm": 3.4438130855560303,
|
|
"learning_rate": 9.727656505249676e-06,
|
|
"loss": 1.2058,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 0.7515957446808511,
|
|
"grad_norm": 3.7546231746673584,
|
|
"learning_rate": 9.72737012572579e-06,
|
|
"loss": 1.1447,
|
|
"step": 2826
|
|
},
|
|
{
|
|
"epoch": 0.7518617021276596,
|
|
"grad_norm": 4.008635520935059,
|
|
"learning_rate": 9.727083599931506e-06,
|
|
"loss": 1.3526,
|
|
"step": 2827
|
|
},
|
|
{
|
|
"epoch": 0.752127659574468,
|
|
"grad_norm": 4.192075729370117,
|
|
"learning_rate": 9.726796927875688e-06,
|
|
"loss": 1.3889,
|
|
"step": 2828
|
|
},
|
|
{
|
|
"epoch": 0.7523936170212766,
|
|
"grad_norm": 3.805386543273926,
|
|
"learning_rate": 9.726510109567211e-06,
|
|
"loss": 1.3894,
|
|
"step": 2829
|
|
},
|
|
{
|
|
"epoch": 0.7526595744680851,
|
|
"grad_norm": 3.9009950160980225,
|
|
"learning_rate": 9.726223145014946e-06,
|
|
"loss": 1.2844,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.7529255319148936,
|
|
"grad_norm": 3.870450735092163,
|
|
"learning_rate": 9.725936034227771e-06,
|
|
"loss": 1.2328,
|
|
"step": 2831
|
|
},
|
|
{
|
|
"epoch": 0.7531914893617021,
|
|
"grad_norm": 3.5746779441833496,
|
|
"learning_rate": 9.725648777214571e-06,
|
|
"loss": 1.2661,
|
|
"step": 2832
|
|
},
|
|
{
|
|
"epoch": 0.7534574468085107,
|
|
"grad_norm": 4.304332733154297,
|
|
"learning_rate": 9.725361373984235e-06,
|
|
"loss": 1.2722,
|
|
"step": 2833
|
|
},
|
|
{
|
|
"epoch": 0.7537234042553191,
|
|
"grad_norm": 3.693098783493042,
|
|
"learning_rate": 9.725073824545655e-06,
|
|
"loss": 1.3476,
|
|
"step": 2834
|
|
},
|
|
{
|
|
"epoch": 0.7539893617021277,
|
|
"grad_norm": 3.3664565086364746,
|
|
"learning_rate": 9.724786128907726e-06,
|
|
"loss": 1.2575,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 0.7542553191489362,
|
|
"grad_norm": 3.585892915725708,
|
|
"learning_rate": 9.724498287079353e-06,
|
|
"loss": 1.3478,
|
|
"step": 2836
|
|
},
|
|
{
|
|
"epoch": 0.7545212765957446,
|
|
"grad_norm": 3.768718957901001,
|
|
"learning_rate": 9.72421029906944e-06,
|
|
"loss": 1.2749,
|
|
"step": 2837
|
|
},
|
|
{
|
|
"epoch": 0.7547872340425532,
|
|
"grad_norm": 3.891233205795288,
|
|
"learning_rate": 9.723922164886898e-06,
|
|
"loss": 1.3033,
|
|
"step": 2838
|
|
},
|
|
{
|
|
"epoch": 0.7550531914893617,
|
|
"grad_norm": 3.5751054286956787,
|
|
"learning_rate": 9.723633884540643e-06,
|
|
"loss": 1.1453,
|
|
"step": 2839
|
|
},
|
|
{
|
|
"epoch": 0.7553191489361702,
|
|
"grad_norm": 3.516754150390625,
|
|
"learning_rate": 9.723345458039595e-06,
|
|
"loss": 1.2553,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.7555851063829787,
|
|
"grad_norm": 3.76668643951416,
|
|
"learning_rate": 9.723056885392677e-06,
|
|
"loss": 1.3444,
|
|
"step": 2841
|
|
},
|
|
{
|
|
"epoch": 0.7558510638297873,
|
|
"grad_norm": 3.9877772331237793,
|
|
"learning_rate": 9.722768166608818e-06,
|
|
"loss": 1.2582,
|
|
"step": 2842
|
|
},
|
|
{
|
|
"epoch": 0.7561170212765957,
|
|
"grad_norm": 3.631065607070923,
|
|
"learning_rate": 9.72247930169695e-06,
|
|
"loss": 1.3652,
|
|
"step": 2843
|
|
},
|
|
{
|
|
"epoch": 0.7563829787234042,
|
|
"grad_norm": 3.124361515045166,
|
|
"learning_rate": 9.722190290666014e-06,
|
|
"loss": 0.9727,
|
|
"step": 2844
|
|
},
|
|
{
|
|
"epoch": 0.7566489361702128,
|
|
"grad_norm": 3.7869699001312256,
|
|
"learning_rate": 9.721901133524951e-06,
|
|
"loss": 1.3348,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 0.7569148936170212,
|
|
"grad_norm": 3.49450421333313,
|
|
"learning_rate": 9.721611830282707e-06,
|
|
"loss": 1.2607,
|
|
"step": 2846
|
|
},
|
|
{
|
|
"epoch": 0.7571808510638298,
|
|
"grad_norm": 4.137457370758057,
|
|
"learning_rate": 9.721322380948235e-06,
|
|
"loss": 1.2993,
|
|
"step": 2847
|
|
},
|
|
{
|
|
"epoch": 0.7574468085106383,
|
|
"grad_norm": 3.492685317993164,
|
|
"learning_rate": 9.721032785530488e-06,
|
|
"loss": 1.3636,
|
|
"step": 2848
|
|
},
|
|
{
|
|
"epoch": 0.7577127659574469,
|
|
"grad_norm": 3.78635835647583,
|
|
"learning_rate": 9.72074304403843e-06,
|
|
"loss": 1.3039,
|
|
"step": 2849
|
|
},
|
|
{
|
|
"epoch": 0.7579787234042553,
|
|
"grad_norm": 3.5052456855773926,
|
|
"learning_rate": 9.720453156481023e-06,
|
|
"loss": 1.1737,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.7582446808510638,
|
|
"grad_norm": 3.5687224864959717,
|
|
"learning_rate": 9.72016312286724e-06,
|
|
"loss": 1.3378,
|
|
"step": 2851
|
|
},
|
|
{
|
|
"epoch": 0.7585106382978724,
|
|
"grad_norm": 3.2821710109710693,
|
|
"learning_rate": 9.71987294320605e-06,
|
|
"loss": 1.0614,
|
|
"step": 2852
|
|
},
|
|
{
|
|
"epoch": 0.7587765957446808,
|
|
"grad_norm": 3.9896838665008545,
|
|
"learning_rate": 9.719582617506434e-06,
|
|
"loss": 1.4842,
|
|
"step": 2853
|
|
},
|
|
{
|
|
"epoch": 0.7590425531914894,
|
|
"grad_norm": 3.674095392227173,
|
|
"learning_rate": 9.719292145777377e-06,
|
|
"loss": 1.2268,
|
|
"step": 2854
|
|
},
|
|
{
|
|
"epoch": 0.7593085106382979,
|
|
"grad_norm": 3.586404800415039,
|
|
"learning_rate": 9.719001528027863e-06,
|
|
"loss": 1.3219,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 0.7595744680851064,
|
|
"grad_norm": 3.734853744506836,
|
|
"learning_rate": 9.718710764266888e-06,
|
|
"loss": 1.2469,
|
|
"step": 2856
|
|
},
|
|
{
|
|
"epoch": 0.7598404255319149,
|
|
"grad_norm": 3.4392611980438232,
|
|
"learning_rate": 9.718419854503444e-06,
|
|
"loss": 1.1928,
|
|
"step": 2857
|
|
},
|
|
{
|
|
"epoch": 0.7601063829787233,
|
|
"grad_norm": 3.7639527320861816,
|
|
"learning_rate": 9.718128798746537e-06,
|
|
"loss": 1.2995,
|
|
"step": 2858
|
|
},
|
|
{
|
|
"epoch": 0.7603723404255319,
|
|
"grad_norm": 3.564790964126587,
|
|
"learning_rate": 9.717837597005169e-06,
|
|
"loss": 1.2086,
|
|
"step": 2859
|
|
},
|
|
{
|
|
"epoch": 0.7606382978723404,
|
|
"grad_norm": 3.9883244037628174,
|
|
"learning_rate": 9.71754624928835e-06,
|
|
"loss": 1.2138,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.760904255319149,
|
|
"grad_norm": 3.823289632797241,
|
|
"learning_rate": 9.717254755605097e-06,
|
|
"loss": 1.2225,
|
|
"step": 2861
|
|
},
|
|
{
|
|
"epoch": 0.7611702127659574,
|
|
"grad_norm": 3.4945852756500244,
|
|
"learning_rate": 9.716963115964427e-06,
|
|
"loss": 1.26,
|
|
"step": 2862
|
|
},
|
|
{
|
|
"epoch": 0.761436170212766,
|
|
"grad_norm": 3.7626545429229736,
|
|
"learning_rate": 9.716671330375366e-06,
|
|
"loss": 1.2424,
|
|
"step": 2863
|
|
},
|
|
{
|
|
"epoch": 0.7617021276595745,
|
|
"grad_norm": 3.789428949356079,
|
|
"learning_rate": 9.71637939884694e-06,
|
|
"loss": 1.3538,
|
|
"step": 2864
|
|
},
|
|
{
|
|
"epoch": 0.761968085106383,
|
|
"grad_norm": 3.781531810760498,
|
|
"learning_rate": 9.716087321388184e-06,
|
|
"loss": 1.2693,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 0.7622340425531915,
|
|
"grad_norm": 3.184601306915283,
|
|
"learning_rate": 9.715795098008132e-06,
|
|
"loss": 1.0477,
|
|
"step": 2866
|
|
},
|
|
{
|
|
"epoch": 0.7625,
|
|
"grad_norm": 3.636810302734375,
|
|
"learning_rate": 9.715502728715827e-06,
|
|
"loss": 1.2691,
|
|
"step": 2867
|
|
},
|
|
{
|
|
"epoch": 0.7627659574468085,
|
|
"grad_norm": 4.0694122314453125,
|
|
"learning_rate": 9.715210213520317e-06,
|
|
"loss": 1.3419,
|
|
"step": 2868
|
|
},
|
|
{
|
|
"epoch": 0.763031914893617,
|
|
"grad_norm": 3.9551241397857666,
|
|
"learning_rate": 9.714917552430652e-06,
|
|
"loss": 1.2398,
|
|
"step": 2869
|
|
},
|
|
{
|
|
"epoch": 0.7632978723404256,
|
|
"grad_norm": 3.7696473598480225,
|
|
"learning_rate": 9.714624745455885e-06,
|
|
"loss": 1.2691,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.763563829787234,
|
|
"grad_norm": 3.726793050765991,
|
|
"learning_rate": 9.71433179260508e-06,
|
|
"loss": 1.2308,
|
|
"step": 2871
|
|
},
|
|
{
|
|
"epoch": 0.7638297872340426,
|
|
"grad_norm": 3.6226067543029785,
|
|
"learning_rate": 9.714038693887298e-06,
|
|
"loss": 1.3653,
|
|
"step": 2872
|
|
},
|
|
{
|
|
"epoch": 0.7640957446808511,
|
|
"grad_norm": 3.4948949813842773,
|
|
"learning_rate": 9.713745449311606e-06,
|
|
"loss": 1.2048,
|
|
"step": 2873
|
|
},
|
|
{
|
|
"epoch": 0.7643617021276595,
|
|
"grad_norm": 3.3849282264709473,
|
|
"learning_rate": 9.713452058887084e-06,
|
|
"loss": 1.1664,
|
|
"step": 2874
|
|
},
|
|
{
|
|
"epoch": 0.7646276595744681,
|
|
"grad_norm": 3.9506824016571045,
|
|
"learning_rate": 9.713158522622804e-06,
|
|
"loss": 1.4175,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 0.7648936170212766,
|
|
"grad_norm": 3.5069642066955566,
|
|
"learning_rate": 9.71286484052785e-06,
|
|
"loss": 1.2298,
|
|
"step": 2876
|
|
},
|
|
{
|
|
"epoch": 0.7651595744680851,
|
|
"grad_norm": 3.5655500888824463,
|
|
"learning_rate": 9.71257101261131e-06,
|
|
"loss": 1.1717,
|
|
"step": 2877
|
|
},
|
|
{
|
|
"epoch": 0.7654255319148936,
|
|
"grad_norm": 3.450375556945801,
|
|
"learning_rate": 9.712277038882274e-06,
|
|
"loss": 1.1573,
|
|
"step": 2878
|
|
},
|
|
{
|
|
"epoch": 0.7656914893617022,
|
|
"grad_norm": 3.849936008453369,
|
|
"learning_rate": 9.711982919349839e-06,
|
|
"loss": 1.1671,
|
|
"step": 2879
|
|
},
|
|
{
|
|
"epoch": 0.7659574468085106,
|
|
"grad_norm": 3.557499647140503,
|
|
"learning_rate": 9.711688654023105e-06,
|
|
"loss": 1.2369,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.7662234042553191,
|
|
"grad_norm": 4.1276326179504395,
|
|
"learning_rate": 9.711394242911177e-06,
|
|
"loss": 1.2304,
|
|
"step": 2881
|
|
},
|
|
{
|
|
"epoch": 0.7664893617021277,
|
|
"grad_norm": 3.553694725036621,
|
|
"learning_rate": 9.711099686023161e-06,
|
|
"loss": 1.285,
|
|
"step": 2882
|
|
},
|
|
{
|
|
"epoch": 0.7667553191489361,
|
|
"grad_norm": 3.484138250350952,
|
|
"learning_rate": 9.710804983368177e-06,
|
|
"loss": 1.2578,
|
|
"step": 2883
|
|
},
|
|
{
|
|
"epoch": 0.7670212765957447,
|
|
"grad_norm": 3.855220317840576,
|
|
"learning_rate": 9.71051013495534e-06,
|
|
"loss": 1.2213,
|
|
"step": 2884
|
|
},
|
|
{
|
|
"epoch": 0.7672872340425532,
|
|
"grad_norm": 3.9998855590820312,
|
|
"learning_rate": 9.710215140793774e-06,
|
|
"loss": 1.231,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 0.7675531914893617,
|
|
"grad_norm": 3.568758487701416,
|
|
"learning_rate": 9.709920000892605e-06,
|
|
"loss": 1.1779,
|
|
"step": 2886
|
|
},
|
|
{
|
|
"epoch": 0.7678191489361702,
|
|
"grad_norm": 3.5209362506866455,
|
|
"learning_rate": 9.709624715260965e-06,
|
|
"loss": 1.0908,
|
|
"step": 2887
|
|
},
|
|
{
|
|
"epoch": 0.7680851063829788,
|
|
"grad_norm": 3.783108949661255,
|
|
"learning_rate": 9.709329283907993e-06,
|
|
"loss": 1.3374,
|
|
"step": 2888
|
|
},
|
|
{
|
|
"epoch": 0.7683510638297872,
|
|
"grad_norm": 3.672305107116699,
|
|
"learning_rate": 9.70903370684283e-06,
|
|
"loss": 1.2719,
|
|
"step": 2889
|
|
},
|
|
{
|
|
"epoch": 0.7686170212765957,
|
|
"grad_norm": 3.9783568382263184,
|
|
"learning_rate": 9.708737984074616e-06,
|
|
"loss": 1.2343,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.7688829787234043,
|
|
"grad_norm": 3.6471900939941406,
|
|
"learning_rate": 9.708442115612508e-06,
|
|
"loss": 1.1384,
|
|
"step": 2891
|
|
},
|
|
{
|
|
"epoch": 0.7691489361702127,
|
|
"grad_norm": 3.8330166339874268,
|
|
"learning_rate": 9.708146101465657e-06,
|
|
"loss": 1.3178,
|
|
"step": 2892
|
|
},
|
|
{
|
|
"epoch": 0.7694148936170213,
|
|
"grad_norm": 3.224055290222168,
|
|
"learning_rate": 9.707849941643222e-06,
|
|
"loss": 1.087,
|
|
"step": 2893
|
|
},
|
|
{
|
|
"epoch": 0.7696808510638298,
|
|
"grad_norm": 4.061996936798096,
|
|
"learning_rate": 9.707553636154366e-06,
|
|
"loss": 1.4389,
|
|
"step": 2894
|
|
},
|
|
{
|
|
"epoch": 0.7699468085106383,
|
|
"grad_norm": 3.7000250816345215,
|
|
"learning_rate": 9.707257185008259e-06,
|
|
"loss": 1.2383,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 0.7702127659574468,
|
|
"grad_norm": 3.3188624382019043,
|
|
"learning_rate": 9.706960588214072e-06,
|
|
"loss": 1.1835,
|
|
"step": 2896
|
|
},
|
|
{
|
|
"epoch": 0.7704787234042553,
|
|
"grad_norm": 3.68198299407959,
|
|
"learning_rate": 9.706663845780984e-06,
|
|
"loss": 1.2511,
|
|
"step": 2897
|
|
},
|
|
{
|
|
"epoch": 0.7707446808510638,
|
|
"grad_norm": 3.831139326095581,
|
|
"learning_rate": 9.706366957718174e-06,
|
|
"loss": 1.3409,
|
|
"step": 2898
|
|
},
|
|
{
|
|
"epoch": 0.7710106382978723,
|
|
"grad_norm": 3.3753414154052734,
|
|
"learning_rate": 9.70606992403483e-06,
|
|
"loss": 1.1988,
|
|
"step": 2899
|
|
},
|
|
{
|
|
"epoch": 0.7712765957446809,
|
|
"grad_norm": 3.3466532230377197,
|
|
"learning_rate": 9.705772744740142e-06,
|
|
"loss": 1.1079,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.7715425531914893,
|
|
"grad_norm": 3.39589524269104,
|
|
"learning_rate": 9.705475419843304e-06,
|
|
"loss": 1.2094,
|
|
"step": 2901
|
|
},
|
|
{
|
|
"epoch": 0.7718085106382979,
|
|
"grad_norm": 3.5272488594055176,
|
|
"learning_rate": 9.705177949353516e-06,
|
|
"loss": 1.2466,
|
|
"step": 2902
|
|
},
|
|
{
|
|
"epoch": 0.7720744680851064,
|
|
"grad_norm": 3.9202656745910645,
|
|
"learning_rate": 9.704880333279985e-06,
|
|
"loss": 1.2347,
|
|
"step": 2903
|
|
},
|
|
{
|
|
"epoch": 0.7723404255319148,
|
|
"grad_norm": 3.421706199645996,
|
|
"learning_rate": 9.704582571631915e-06,
|
|
"loss": 1.1643,
|
|
"step": 2904
|
|
},
|
|
{
|
|
"epoch": 0.7726063829787234,
|
|
"grad_norm": 3.8939504623413086,
|
|
"learning_rate": 9.704284664418521e-06,
|
|
"loss": 1.4996,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 0.7728723404255319,
|
|
"grad_norm": 3.362236976623535,
|
|
"learning_rate": 9.703986611649024e-06,
|
|
"loss": 1.2661,
|
|
"step": 2906
|
|
},
|
|
{
|
|
"epoch": 0.7731382978723405,
|
|
"grad_norm": 3.2896718978881836,
|
|
"learning_rate": 9.70368841333264e-06,
|
|
"loss": 1.0865,
|
|
"step": 2907
|
|
},
|
|
{
|
|
"epoch": 0.7734042553191489,
|
|
"grad_norm": 3.662534475326538,
|
|
"learning_rate": 9.7033900694786e-06,
|
|
"loss": 1.223,
|
|
"step": 2908
|
|
},
|
|
{
|
|
"epoch": 0.7736702127659575,
|
|
"grad_norm": 3.7135627269744873,
|
|
"learning_rate": 9.703091580096132e-06,
|
|
"loss": 1.4123,
|
|
"step": 2909
|
|
},
|
|
{
|
|
"epoch": 0.773936170212766,
|
|
"grad_norm": 3.431130886077881,
|
|
"learning_rate": 9.702792945194475e-06,
|
|
"loss": 1.139,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.7742021276595744,
|
|
"grad_norm": 4.038398742675781,
|
|
"learning_rate": 9.702494164782866e-06,
|
|
"loss": 1.3352,
|
|
"step": 2911
|
|
},
|
|
{
|
|
"epoch": 0.774468085106383,
|
|
"grad_norm": 3.5457537174224854,
|
|
"learning_rate": 9.702195238870552e-06,
|
|
"loss": 1.2472,
|
|
"step": 2912
|
|
},
|
|
{
|
|
"epoch": 0.7747340425531914,
|
|
"grad_norm": 3.9684653282165527,
|
|
"learning_rate": 9.70189616746678e-06,
|
|
"loss": 1.2834,
|
|
"step": 2913
|
|
},
|
|
{
|
|
"epoch": 0.775,
|
|
"grad_norm": 3.520798683166504,
|
|
"learning_rate": 9.701596950580807e-06,
|
|
"loss": 1.1989,
|
|
"step": 2914
|
|
},
|
|
{
|
|
"epoch": 0.7752659574468085,
|
|
"grad_norm": 3.4203343391418457,
|
|
"learning_rate": 9.701297588221888e-06,
|
|
"loss": 1.2368,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 0.7755319148936171,
|
|
"grad_norm": 3.5501503944396973,
|
|
"learning_rate": 9.700998080399287e-06,
|
|
"loss": 1.2317,
|
|
"step": 2916
|
|
},
|
|
{
|
|
"epoch": 0.7757978723404255,
|
|
"grad_norm": 3.5603249073028564,
|
|
"learning_rate": 9.700698427122269e-06,
|
|
"loss": 1.2071,
|
|
"step": 2917
|
|
},
|
|
{
|
|
"epoch": 0.7760638297872341,
|
|
"grad_norm": 3.5951790809631348,
|
|
"learning_rate": 9.700398628400109e-06,
|
|
"loss": 1.1681,
|
|
"step": 2918
|
|
},
|
|
{
|
|
"epoch": 0.7763297872340426,
|
|
"grad_norm": 3.6561312675476074,
|
|
"learning_rate": 9.700098684242082e-06,
|
|
"loss": 1.3097,
|
|
"step": 2919
|
|
},
|
|
{
|
|
"epoch": 0.776595744680851,
|
|
"grad_norm": 3.628885269165039,
|
|
"learning_rate": 9.699798594657464e-06,
|
|
"loss": 1.2199,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.7768617021276596,
|
|
"grad_norm": 3.6864166259765625,
|
|
"learning_rate": 9.699498359655548e-06,
|
|
"loss": 1.2123,
|
|
"step": 2921
|
|
},
|
|
{
|
|
"epoch": 0.777127659574468,
|
|
"grad_norm": 4.034405708312988,
|
|
"learning_rate": 9.699197979245617e-06,
|
|
"loss": 1.3019,
|
|
"step": 2922
|
|
},
|
|
{
|
|
"epoch": 0.7773936170212766,
|
|
"grad_norm": 3.9352498054504395,
|
|
"learning_rate": 9.69889745343697e-06,
|
|
"loss": 1.4196,
|
|
"step": 2923
|
|
},
|
|
{
|
|
"epoch": 0.7776595744680851,
|
|
"grad_norm": 3.983980894088745,
|
|
"learning_rate": 9.698596782238904e-06,
|
|
"loss": 1.1829,
|
|
"step": 2924
|
|
},
|
|
{
|
|
"epoch": 0.7779255319148937,
|
|
"grad_norm": 3.4715261459350586,
|
|
"learning_rate": 9.698295965660721e-06,
|
|
"loss": 1.144,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 0.7781914893617021,
|
|
"grad_norm": 3.7768967151641846,
|
|
"learning_rate": 9.69799500371173e-06,
|
|
"loss": 1.2891,
|
|
"step": 2926
|
|
},
|
|
{
|
|
"epoch": 0.7784574468085106,
|
|
"grad_norm": 3.628307580947876,
|
|
"learning_rate": 9.697693896401239e-06,
|
|
"loss": 1.2956,
|
|
"step": 2927
|
|
},
|
|
{
|
|
"epoch": 0.7787234042553192,
|
|
"grad_norm": 3.601635456085205,
|
|
"learning_rate": 9.697392643738571e-06,
|
|
"loss": 1.2924,
|
|
"step": 2928
|
|
},
|
|
{
|
|
"epoch": 0.7789893617021276,
|
|
"grad_norm": 3.6882519721984863,
|
|
"learning_rate": 9.697091245733043e-06,
|
|
"loss": 1.2887,
|
|
"step": 2929
|
|
},
|
|
{
|
|
"epoch": 0.7792553191489362,
|
|
"grad_norm": 3.7858314514160156,
|
|
"learning_rate": 9.696789702393982e-06,
|
|
"loss": 1.3439,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.7795212765957447,
|
|
"grad_norm": 3.6974260807037354,
|
|
"learning_rate": 9.696488013730717e-06,
|
|
"loss": 1.2487,
|
|
"step": 2931
|
|
},
|
|
{
|
|
"epoch": 0.7797872340425532,
|
|
"grad_norm": 3.5106611251831055,
|
|
"learning_rate": 9.696186179752587e-06,
|
|
"loss": 1.1533,
|
|
"step": 2932
|
|
},
|
|
{
|
|
"epoch": 0.7800531914893617,
|
|
"grad_norm": 3.440690279006958,
|
|
"learning_rate": 9.695884200468923e-06,
|
|
"loss": 1.1004,
|
|
"step": 2933
|
|
},
|
|
{
|
|
"epoch": 0.7803191489361702,
|
|
"grad_norm": 3.43935227394104,
|
|
"learning_rate": 9.695582075889077e-06,
|
|
"loss": 1.192,
|
|
"step": 2934
|
|
},
|
|
{
|
|
"epoch": 0.7805851063829787,
|
|
"grad_norm": 3.6551554203033447,
|
|
"learning_rate": 9.695279806022391e-06,
|
|
"loss": 1.2693,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 0.7808510638297872,
|
|
"grad_norm": 3.6879799365997314,
|
|
"learning_rate": 9.694977390878219e-06,
|
|
"loss": 1.3101,
|
|
"step": 2936
|
|
},
|
|
{
|
|
"epoch": 0.7811170212765958,
|
|
"grad_norm": 3.6642568111419678,
|
|
"learning_rate": 9.69467483046592e-06,
|
|
"loss": 1.3313,
|
|
"step": 2937
|
|
},
|
|
{
|
|
"epoch": 0.7813829787234042,
|
|
"grad_norm": 3.6739001274108887,
|
|
"learning_rate": 9.694372124794855e-06,
|
|
"loss": 1.175,
|
|
"step": 2938
|
|
},
|
|
{
|
|
"epoch": 0.7816489361702128,
|
|
"grad_norm": 3.346895933151245,
|
|
"learning_rate": 9.69406927387439e-06,
|
|
"loss": 1.135,
|
|
"step": 2939
|
|
},
|
|
{
|
|
"epoch": 0.7819148936170213,
|
|
"grad_norm": 3.605050563812256,
|
|
"learning_rate": 9.693766277713893e-06,
|
|
"loss": 1.2365,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.7821808510638298,
|
|
"grad_norm": 3.56868839263916,
|
|
"learning_rate": 9.693463136322743e-06,
|
|
"loss": 1.2756,
|
|
"step": 2941
|
|
},
|
|
{
|
|
"epoch": 0.7824468085106383,
|
|
"grad_norm": 3.4643678665161133,
|
|
"learning_rate": 9.693159849710317e-06,
|
|
"loss": 1.1344,
|
|
"step": 2942
|
|
},
|
|
{
|
|
"epoch": 0.7827127659574468,
|
|
"grad_norm": 3.7843425273895264,
|
|
"learning_rate": 9.692856417885998e-06,
|
|
"loss": 1.2301,
|
|
"step": 2943
|
|
},
|
|
{
|
|
"epoch": 0.7829787234042553,
|
|
"grad_norm": 3.7226831912994385,
|
|
"learning_rate": 9.69255284085918e-06,
|
|
"loss": 1.2124,
|
|
"step": 2944
|
|
},
|
|
{
|
|
"epoch": 0.7832446808510638,
|
|
"grad_norm": 3.5860259532928467,
|
|
"learning_rate": 9.69224911863925e-06,
|
|
"loss": 1.2237,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 0.7835106382978724,
|
|
"grad_norm": 3.68369722366333,
|
|
"learning_rate": 9.691945251235608e-06,
|
|
"loss": 1.3566,
|
|
"step": 2946
|
|
},
|
|
{
|
|
"epoch": 0.7837765957446808,
|
|
"grad_norm": 3.778324842453003,
|
|
"learning_rate": 9.691641238657655e-06,
|
|
"loss": 1.2369,
|
|
"step": 2947
|
|
},
|
|
{
|
|
"epoch": 0.7840425531914894,
|
|
"grad_norm": 3.4326350688934326,
|
|
"learning_rate": 9.6913370809148e-06,
|
|
"loss": 1.0766,
|
|
"step": 2948
|
|
},
|
|
{
|
|
"epoch": 0.7843085106382979,
|
|
"grad_norm": 3.609269380569458,
|
|
"learning_rate": 9.691032778016452e-06,
|
|
"loss": 1.228,
|
|
"step": 2949
|
|
},
|
|
{
|
|
"epoch": 0.7845744680851063,
|
|
"grad_norm": 3.3350110054016113,
|
|
"learning_rate": 9.690728329972025e-06,
|
|
"loss": 1.1658,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.7848404255319149,
|
|
"grad_norm": 3.53971004486084,
|
|
"learning_rate": 9.690423736790944e-06,
|
|
"loss": 1.2674,
|
|
"step": 2951
|
|
},
|
|
{
|
|
"epoch": 0.7851063829787234,
|
|
"grad_norm": 3.3145904541015625,
|
|
"learning_rate": 9.690118998482628e-06,
|
|
"loss": 1.2601,
|
|
"step": 2952
|
|
},
|
|
{
|
|
"epoch": 0.785372340425532,
|
|
"grad_norm": 3.7415387630462646,
|
|
"learning_rate": 9.689814115056509e-06,
|
|
"loss": 1.3693,
|
|
"step": 2953
|
|
},
|
|
{
|
|
"epoch": 0.7856382978723404,
|
|
"grad_norm": 3.2443130016326904,
|
|
"learning_rate": 9.689509086522019e-06,
|
|
"loss": 1.1516,
|
|
"step": 2954
|
|
},
|
|
{
|
|
"epoch": 0.785904255319149,
|
|
"grad_norm": 3.4239816665649414,
|
|
"learning_rate": 9.689203912888597e-06,
|
|
"loss": 1.2722,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 0.7861702127659574,
|
|
"grad_norm": 3.5822324752807617,
|
|
"learning_rate": 9.688898594165685e-06,
|
|
"loss": 1.2253,
|
|
"step": 2956
|
|
},
|
|
{
|
|
"epoch": 0.7864361702127659,
|
|
"grad_norm": 3.2302675247192383,
|
|
"learning_rate": 9.688593130362731e-06,
|
|
"loss": 1.1031,
|
|
"step": 2957
|
|
},
|
|
{
|
|
"epoch": 0.7867021276595745,
|
|
"grad_norm": 3.6517271995544434,
|
|
"learning_rate": 9.688287521489184e-06,
|
|
"loss": 1.2459,
|
|
"step": 2958
|
|
},
|
|
{
|
|
"epoch": 0.7869680851063829,
|
|
"grad_norm": 3.772766351699829,
|
|
"learning_rate": 9.687981767554502e-06,
|
|
"loss": 1.2623,
|
|
"step": 2959
|
|
},
|
|
{
|
|
"epoch": 0.7872340425531915,
|
|
"grad_norm": 3.646852731704712,
|
|
"learning_rate": 9.687675868568145e-06,
|
|
"loss": 1.2951,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.7875,
|
|
"grad_norm": 3.738582134246826,
|
|
"learning_rate": 9.687369824539577e-06,
|
|
"loss": 1.3321,
|
|
"step": 2961
|
|
},
|
|
{
|
|
"epoch": 0.7877659574468086,
|
|
"grad_norm": 3.6618778705596924,
|
|
"learning_rate": 9.687063635478269e-06,
|
|
"loss": 1.3527,
|
|
"step": 2962
|
|
},
|
|
{
|
|
"epoch": 0.788031914893617,
|
|
"grad_norm": 3.6133735179901123,
|
|
"learning_rate": 9.686757301393693e-06,
|
|
"loss": 1.2852,
|
|
"step": 2963
|
|
},
|
|
{
|
|
"epoch": 0.7882978723404256,
|
|
"grad_norm": 3.7590041160583496,
|
|
"learning_rate": 9.686450822295327e-06,
|
|
"loss": 1.2057,
|
|
"step": 2964
|
|
},
|
|
{
|
|
"epoch": 0.788563829787234,
|
|
"grad_norm": 3.4455080032348633,
|
|
"learning_rate": 9.686144198192658e-06,
|
|
"loss": 1.2478,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 0.7888297872340425,
|
|
"grad_norm": 3.4166572093963623,
|
|
"learning_rate": 9.685837429095169e-06,
|
|
"loss": 1.2585,
|
|
"step": 2966
|
|
},
|
|
{
|
|
"epoch": 0.7890957446808511,
|
|
"grad_norm": 3.322124719619751,
|
|
"learning_rate": 9.685530515012352e-06,
|
|
"loss": 1.2452,
|
|
"step": 2967
|
|
},
|
|
{
|
|
"epoch": 0.7893617021276595,
|
|
"grad_norm": 3.493075132369995,
|
|
"learning_rate": 9.685223455953703e-06,
|
|
"loss": 1.1951,
|
|
"step": 2968
|
|
},
|
|
{
|
|
"epoch": 0.7896276595744681,
|
|
"grad_norm": 3.7366654872894287,
|
|
"learning_rate": 9.684916251928727e-06,
|
|
"loss": 1.4098,
|
|
"step": 2969
|
|
},
|
|
{
|
|
"epoch": 0.7898936170212766,
|
|
"grad_norm": 3.846484899520874,
|
|
"learning_rate": 9.684608902946926e-06,
|
|
"loss": 1.2726,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.7901595744680852,
|
|
"grad_norm": 3.382856607437134,
|
|
"learning_rate": 9.684301409017808e-06,
|
|
"loss": 1.2072,
|
|
"step": 2971
|
|
},
|
|
{
|
|
"epoch": 0.7904255319148936,
|
|
"grad_norm": 3.600064277648926,
|
|
"learning_rate": 9.68399377015089e-06,
|
|
"loss": 1.2991,
|
|
"step": 2972
|
|
},
|
|
{
|
|
"epoch": 0.7906914893617021,
|
|
"grad_norm": 3.4890823364257812,
|
|
"learning_rate": 9.683685986355692e-06,
|
|
"loss": 1.303,
|
|
"step": 2973
|
|
},
|
|
{
|
|
"epoch": 0.7909574468085107,
|
|
"grad_norm": 3.2720248699188232,
|
|
"learning_rate": 9.683378057641735e-06,
|
|
"loss": 1.305,
|
|
"step": 2974
|
|
},
|
|
{
|
|
"epoch": 0.7912234042553191,
|
|
"grad_norm": 3.3121964931488037,
|
|
"learning_rate": 9.683069984018545e-06,
|
|
"loss": 1.228,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 0.7914893617021277,
|
|
"grad_norm": 3.5907375812530518,
|
|
"learning_rate": 9.682761765495657e-06,
|
|
"loss": 1.3374,
|
|
"step": 2976
|
|
},
|
|
{
|
|
"epoch": 0.7917553191489362,
|
|
"grad_norm": 3.518444538116455,
|
|
"learning_rate": 9.682453402082607e-06,
|
|
"loss": 1.0759,
|
|
"step": 2977
|
|
},
|
|
{
|
|
"epoch": 0.7920212765957447,
|
|
"grad_norm": 3.7533528804779053,
|
|
"learning_rate": 9.682144893788934e-06,
|
|
"loss": 1.2666,
|
|
"step": 2978
|
|
},
|
|
{
|
|
"epoch": 0.7922872340425532,
|
|
"grad_norm": 3.877476453781128,
|
|
"learning_rate": 9.681836240624187e-06,
|
|
"loss": 1.2371,
|
|
"step": 2979
|
|
},
|
|
{
|
|
"epoch": 0.7925531914893617,
|
|
"grad_norm": 3.945760488510132,
|
|
"learning_rate": 9.681527442597916e-06,
|
|
"loss": 1.282,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.7928191489361702,
|
|
"grad_norm": 3.585514783859253,
|
|
"learning_rate": 9.681218499719673e-06,
|
|
"loss": 1.3038,
|
|
"step": 2981
|
|
},
|
|
{
|
|
"epoch": 0.7930851063829787,
|
|
"grad_norm": 4.198021411895752,
|
|
"learning_rate": 9.680909411999018e-06,
|
|
"loss": 1.4758,
|
|
"step": 2982
|
|
},
|
|
{
|
|
"epoch": 0.7933510638297873,
|
|
"grad_norm": 3.670048713684082,
|
|
"learning_rate": 9.680600179445514e-06,
|
|
"loss": 1.2579,
|
|
"step": 2983
|
|
},
|
|
{
|
|
"epoch": 0.7936170212765957,
|
|
"grad_norm": 3.6147031784057617,
|
|
"learning_rate": 9.68029080206873e-06,
|
|
"loss": 1.2565,
|
|
"step": 2984
|
|
},
|
|
{
|
|
"epoch": 0.7938829787234043,
|
|
"grad_norm": 3.589110851287842,
|
|
"learning_rate": 9.67998127987824e-06,
|
|
"loss": 1.2516,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 0.7941489361702128,
|
|
"grad_norm": 3.5315637588500977,
|
|
"learning_rate": 9.679671612883615e-06,
|
|
"loss": 1.2206,
|
|
"step": 2986
|
|
},
|
|
{
|
|
"epoch": 0.7944148936170212,
|
|
"grad_norm": 3.6465420722961426,
|
|
"learning_rate": 9.679361801094445e-06,
|
|
"loss": 1.2784,
|
|
"step": 2987
|
|
},
|
|
{
|
|
"epoch": 0.7946808510638298,
|
|
"grad_norm": 3.6671435832977295,
|
|
"learning_rate": 9.679051844520308e-06,
|
|
"loss": 1.4118,
|
|
"step": 2988
|
|
},
|
|
{
|
|
"epoch": 0.7949468085106383,
|
|
"grad_norm": 3.479151725769043,
|
|
"learning_rate": 9.6787417431708e-06,
|
|
"loss": 1.303,
|
|
"step": 2989
|
|
},
|
|
{
|
|
"epoch": 0.7952127659574468,
|
|
"grad_norm": 3.694517135620117,
|
|
"learning_rate": 9.678431497055515e-06,
|
|
"loss": 1.1658,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.7954787234042553,
|
|
"grad_norm": 3.453770637512207,
|
|
"learning_rate": 9.67812110618405e-06,
|
|
"loss": 1.2784,
|
|
"step": 2991
|
|
},
|
|
{
|
|
"epoch": 0.7957446808510639,
|
|
"grad_norm": 3.926161527633667,
|
|
"learning_rate": 9.677810570566011e-06,
|
|
"loss": 1.2926,
|
|
"step": 2992
|
|
},
|
|
{
|
|
"epoch": 0.7960106382978723,
|
|
"grad_norm": 3.6100566387176514,
|
|
"learning_rate": 9.677499890211005e-06,
|
|
"loss": 1.2504,
|
|
"step": 2993
|
|
},
|
|
{
|
|
"epoch": 0.7962765957446809,
|
|
"grad_norm": 3.496819019317627,
|
|
"learning_rate": 9.677189065128646e-06,
|
|
"loss": 1.1922,
|
|
"step": 2994
|
|
},
|
|
{
|
|
"epoch": 0.7965425531914894,
|
|
"grad_norm": 3.4073357582092285,
|
|
"learning_rate": 9.676878095328547e-06,
|
|
"loss": 1.1934,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 0.7968085106382978,
|
|
"grad_norm": 3.5559115409851074,
|
|
"learning_rate": 9.676566980820338e-06,
|
|
"loss": 1.3128,
|
|
"step": 2996
|
|
},
|
|
{
|
|
"epoch": 0.7970744680851064,
|
|
"grad_norm": 3.844743013381958,
|
|
"learning_rate": 9.676255721613639e-06,
|
|
"loss": 1.2881,
|
|
"step": 2997
|
|
},
|
|
{
|
|
"epoch": 0.7973404255319149,
|
|
"grad_norm": 3.2858474254608154,
|
|
"learning_rate": 9.675944317718083e-06,
|
|
"loss": 1.2103,
|
|
"step": 2998
|
|
},
|
|
{
|
|
"epoch": 0.7976063829787234,
|
|
"grad_norm": 3.7412915229797363,
|
|
"learning_rate": 9.675632769143303e-06,
|
|
"loss": 1.2254,
|
|
"step": 2999
|
|
},
|
|
{
|
|
"epoch": 0.7978723404255319,
|
|
"grad_norm": 4.140746116638184,
|
|
"learning_rate": 9.67532107589894e-06,
|
|
"loss": 1.2933,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.7978723404255319,
|
|
"eval_loss": 1.2683638334274292,
|
|
"eval_runtime": 12.6307,
|
|
"eval_samples_per_second": 31.669,
|
|
"eval_steps_per_second": 3.959,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.7981382978723405,
|
|
"grad_norm": 3.8456828594207764,
|
|
"learning_rate": 9.67500923799464e-06,
|
|
"loss": 1.3237,
|
|
"step": 3001
|
|
},
|
|
{
|
|
"epoch": 0.7984042553191489,
|
|
"grad_norm": 3.4592676162719727,
|
|
"learning_rate": 9.67469725544005e-06,
|
|
"loss": 1.0598,
|
|
"step": 3002
|
|
},
|
|
{
|
|
"epoch": 0.7986702127659574,
|
|
"grad_norm": 3.729926586151123,
|
|
"learning_rate": 9.674385128244823e-06,
|
|
"loss": 1.2681,
|
|
"step": 3003
|
|
},
|
|
{
|
|
"epoch": 0.798936170212766,
|
|
"grad_norm": 3.4208433628082275,
|
|
"learning_rate": 9.674072856418616e-06,
|
|
"loss": 1.3245,
|
|
"step": 3004
|
|
},
|
|
{
|
|
"epoch": 0.7992021276595744,
|
|
"grad_norm": 3.511957883834839,
|
|
"learning_rate": 9.673760439971091e-06,
|
|
"loss": 1.1623,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 0.799468085106383,
|
|
"grad_norm": 3.794137477874756,
|
|
"learning_rate": 9.673447878911916e-06,
|
|
"loss": 1.1303,
|
|
"step": 3006
|
|
},
|
|
{
|
|
"epoch": 0.7997340425531915,
|
|
"grad_norm": 3.826404571533203,
|
|
"learning_rate": 9.673135173250763e-06,
|
|
"loss": 1.3698,
|
|
"step": 3007
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"grad_norm": 3.5505003929138184,
|
|
"learning_rate": 9.672822322997305e-06,
|
|
"loss": 1.257,
|
|
"step": 3008
|
|
},
|
|
{
|
|
"epoch": 0.8002659574468085,
|
|
"grad_norm": 3.616678237915039,
|
|
"learning_rate": 9.672509328161222e-06,
|
|
"loss": 1.263,
|
|
"step": 3009
|
|
},
|
|
{
|
|
"epoch": 0.800531914893617,
|
|
"grad_norm": 3.5338237285614014,
|
|
"learning_rate": 9.672196188752201e-06,
|
|
"loss": 1.2328,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.8007978723404255,
|
|
"grad_norm": 3.4037692546844482,
|
|
"learning_rate": 9.671882904779927e-06,
|
|
"loss": 1.1843,
|
|
"step": 3011
|
|
},
|
|
{
|
|
"epoch": 0.801063829787234,
|
|
"grad_norm": 3.918245315551758,
|
|
"learning_rate": 9.671569476254096e-06,
|
|
"loss": 1.3486,
|
|
"step": 3012
|
|
},
|
|
{
|
|
"epoch": 0.8013297872340426,
|
|
"grad_norm": 3.5351336002349854,
|
|
"learning_rate": 9.671255903184405e-06,
|
|
"loss": 1.3272,
|
|
"step": 3013
|
|
},
|
|
{
|
|
"epoch": 0.801595744680851,
|
|
"grad_norm": 3.9071462154388428,
|
|
"learning_rate": 9.670942185580557e-06,
|
|
"loss": 1.1649,
|
|
"step": 3014
|
|
},
|
|
{
|
|
"epoch": 0.8018617021276596,
|
|
"grad_norm": 3.493410110473633,
|
|
"learning_rate": 9.670628323452259e-06,
|
|
"loss": 1.1651,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 0.8021276595744681,
|
|
"grad_norm": 3.2986040115356445,
|
|
"learning_rate": 9.670314316809222e-06,
|
|
"loss": 1.2718,
|
|
"step": 3016
|
|
},
|
|
{
|
|
"epoch": 0.8023936170212767,
|
|
"grad_norm": 3.4360411167144775,
|
|
"learning_rate": 9.67000016566116e-06,
|
|
"loss": 1.1393,
|
|
"step": 3017
|
|
},
|
|
{
|
|
"epoch": 0.8026595744680851,
|
|
"grad_norm": 3.690444231033325,
|
|
"learning_rate": 9.669685870017795e-06,
|
|
"loss": 1.1887,
|
|
"step": 3018
|
|
},
|
|
{
|
|
"epoch": 0.8029255319148936,
|
|
"grad_norm": 3.58248233795166,
|
|
"learning_rate": 9.669371429888852e-06,
|
|
"loss": 1.3714,
|
|
"step": 3019
|
|
},
|
|
{
|
|
"epoch": 0.8031914893617021,
|
|
"grad_norm": 3.723407745361328,
|
|
"learning_rate": 9.66905684528406e-06,
|
|
"loss": 1.2999,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.8034574468085106,
|
|
"grad_norm": 3.7996089458465576,
|
|
"learning_rate": 9.66874211621315e-06,
|
|
"loss": 1.3091,
|
|
"step": 3021
|
|
},
|
|
{
|
|
"epoch": 0.8037234042553192,
|
|
"grad_norm": 3.741523265838623,
|
|
"learning_rate": 9.668427242685864e-06,
|
|
"loss": 1.261,
|
|
"step": 3022
|
|
},
|
|
{
|
|
"epoch": 0.8039893617021276,
|
|
"grad_norm": 3.6952426433563232,
|
|
"learning_rate": 9.668112224711941e-06,
|
|
"loss": 1.3148,
|
|
"step": 3023
|
|
},
|
|
{
|
|
"epoch": 0.8042553191489362,
|
|
"grad_norm": 3.728320837020874,
|
|
"learning_rate": 9.667797062301133e-06,
|
|
"loss": 1.2188,
|
|
"step": 3024
|
|
},
|
|
{
|
|
"epoch": 0.8045212765957447,
|
|
"grad_norm": 3.7836687564849854,
|
|
"learning_rate": 9.667481755463183e-06,
|
|
"loss": 1.3981,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 0.8047872340425531,
|
|
"grad_norm": 3.308515787124634,
|
|
"learning_rate": 9.667166304207856e-06,
|
|
"loss": 1.2107,
|
|
"step": 3026
|
|
},
|
|
{
|
|
"epoch": 0.8050531914893617,
|
|
"grad_norm": 3.5682644844055176,
|
|
"learning_rate": 9.666850708544907e-06,
|
|
"loss": 1.2288,
|
|
"step": 3027
|
|
},
|
|
{
|
|
"epoch": 0.8053191489361702,
|
|
"grad_norm": 3.817530632019043,
|
|
"learning_rate": 9.666534968484105e-06,
|
|
"loss": 1.2821,
|
|
"step": 3028
|
|
},
|
|
{
|
|
"epoch": 0.8055851063829788,
|
|
"grad_norm": 3.1704676151275635,
|
|
"learning_rate": 9.666219084035215e-06,
|
|
"loss": 1.1683,
|
|
"step": 3029
|
|
},
|
|
{
|
|
"epoch": 0.8058510638297872,
|
|
"grad_norm": 3.884427547454834,
|
|
"learning_rate": 9.665903055208013e-06,
|
|
"loss": 1.3448,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.8061170212765958,
|
|
"grad_norm": 3.8523178100585938,
|
|
"learning_rate": 9.665586882012278e-06,
|
|
"loss": 1.1827,
|
|
"step": 3031
|
|
},
|
|
{
|
|
"epoch": 0.8063829787234043,
|
|
"grad_norm": 3.217390298843384,
|
|
"learning_rate": 9.66527056445779e-06,
|
|
"loss": 1.1782,
|
|
"step": 3032
|
|
},
|
|
{
|
|
"epoch": 0.8066489361702127,
|
|
"grad_norm": 3.484069585800171,
|
|
"learning_rate": 9.66495410255434e-06,
|
|
"loss": 1.2279,
|
|
"step": 3033
|
|
},
|
|
{
|
|
"epoch": 0.8069148936170213,
|
|
"grad_norm": 3.62542724609375,
|
|
"learning_rate": 9.664637496311717e-06,
|
|
"loss": 1.232,
|
|
"step": 3034
|
|
},
|
|
{
|
|
"epoch": 0.8071808510638298,
|
|
"grad_norm": 3.6373066902160645,
|
|
"learning_rate": 9.664320745739717e-06,
|
|
"loss": 1.2463,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 0.8074468085106383,
|
|
"grad_norm": 3.3646364212036133,
|
|
"learning_rate": 9.664003850848142e-06,
|
|
"loss": 1.1543,
|
|
"step": 3036
|
|
},
|
|
{
|
|
"epoch": 0.8077127659574468,
|
|
"grad_norm": 3.772383689880371,
|
|
"learning_rate": 9.663686811646798e-06,
|
|
"loss": 1.3646,
|
|
"step": 3037
|
|
},
|
|
{
|
|
"epoch": 0.8079787234042554,
|
|
"grad_norm": 3.8896496295928955,
|
|
"learning_rate": 9.663369628145493e-06,
|
|
"loss": 1.2321,
|
|
"step": 3038
|
|
},
|
|
{
|
|
"epoch": 0.8082446808510638,
|
|
"grad_norm": 4.038544654846191,
|
|
"learning_rate": 9.66305230035404e-06,
|
|
"loss": 1.2345,
|
|
"step": 3039
|
|
},
|
|
{
|
|
"epoch": 0.8085106382978723,
|
|
"grad_norm": 3.7592129707336426,
|
|
"learning_rate": 9.662734828282258e-06,
|
|
"loss": 1.2879,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.8087765957446809,
|
|
"grad_norm": 3.3927769660949707,
|
|
"learning_rate": 9.662417211939974e-06,
|
|
"loss": 1.2495,
|
|
"step": 3041
|
|
},
|
|
{
|
|
"epoch": 0.8090425531914893,
|
|
"grad_norm": 3.7398223876953125,
|
|
"learning_rate": 9.662099451337009e-06,
|
|
"loss": 1.2328,
|
|
"step": 3042
|
|
},
|
|
{
|
|
"epoch": 0.8093085106382979,
|
|
"grad_norm": 3.697510004043579,
|
|
"learning_rate": 9.6617815464832e-06,
|
|
"loss": 1.2306,
|
|
"step": 3043
|
|
},
|
|
{
|
|
"epoch": 0.8095744680851064,
|
|
"grad_norm": 3.362252712249756,
|
|
"learning_rate": 9.66146349738838e-06,
|
|
"loss": 1.2598,
|
|
"step": 3044
|
|
},
|
|
{
|
|
"epoch": 0.8098404255319149,
|
|
"grad_norm": 3.629018783569336,
|
|
"learning_rate": 9.661145304062391e-06,
|
|
"loss": 1.2364,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 0.8101063829787234,
|
|
"grad_norm": 3.6889262199401855,
|
|
"learning_rate": 9.66082696651508e-06,
|
|
"loss": 1.2122,
|
|
"step": 3046
|
|
},
|
|
{
|
|
"epoch": 0.810372340425532,
|
|
"grad_norm": 3.6210176944732666,
|
|
"learning_rate": 9.660508484756295e-06,
|
|
"loss": 1.2425,
|
|
"step": 3047
|
|
},
|
|
{
|
|
"epoch": 0.8106382978723404,
|
|
"grad_norm": 3.52443528175354,
|
|
"learning_rate": 9.66018985879589e-06,
|
|
"loss": 1.1755,
|
|
"step": 3048
|
|
},
|
|
{
|
|
"epoch": 0.8109042553191489,
|
|
"grad_norm": 3.6943182945251465,
|
|
"learning_rate": 9.659871088643724e-06,
|
|
"loss": 1.2033,
|
|
"step": 3049
|
|
},
|
|
{
|
|
"epoch": 0.8111702127659575,
|
|
"grad_norm": 3.6708784103393555,
|
|
"learning_rate": 9.65955217430966e-06,
|
|
"loss": 1.2418,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.8114361702127659,
|
|
"grad_norm": 3.3263115882873535,
|
|
"learning_rate": 9.659233115803565e-06,
|
|
"loss": 1.133,
|
|
"step": 3051
|
|
},
|
|
{
|
|
"epoch": 0.8117021276595745,
|
|
"grad_norm": 3.9797048568725586,
|
|
"learning_rate": 9.658913913135314e-06,
|
|
"loss": 1.2549,
|
|
"step": 3052
|
|
},
|
|
{
|
|
"epoch": 0.811968085106383,
|
|
"grad_norm": 3.505920648574829,
|
|
"learning_rate": 9.658594566314781e-06,
|
|
"loss": 1.3769,
|
|
"step": 3053
|
|
},
|
|
{
|
|
"epoch": 0.8122340425531915,
|
|
"grad_norm": 3.466444492340088,
|
|
"learning_rate": 9.658275075351846e-06,
|
|
"loss": 1.2394,
|
|
"step": 3054
|
|
},
|
|
{
|
|
"epoch": 0.8125,
|
|
"grad_norm": 3.4919936656951904,
|
|
"learning_rate": 9.657955440256396e-06,
|
|
"loss": 1.1807,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 0.8127659574468085,
|
|
"grad_norm": 3.8641278743743896,
|
|
"learning_rate": 9.65763566103832e-06,
|
|
"loss": 1.2532,
|
|
"step": 3056
|
|
},
|
|
{
|
|
"epoch": 0.813031914893617,
|
|
"grad_norm": 3.5937435626983643,
|
|
"learning_rate": 9.657315737707514e-06,
|
|
"loss": 1.2234,
|
|
"step": 3057
|
|
},
|
|
{
|
|
"epoch": 0.8132978723404255,
|
|
"grad_norm": 3.8876571655273438,
|
|
"learning_rate": 9.656995670273877e-06,
|
|
"loss": 1.2057,
|
|
"step": 3058
|
|
},
|
|
{
|
|
"epoch": 0.8135638297872341,
|
|
"grad_norm": 3.532804012298584,
|
|
"learning_rate": 9.656675458747308e-06,
|
|
"loss": 1.2109,
|
|
"step": 3059
|
|
},
|
|
{
|
|
"epoch": 0.8138297872340425,
|
|
"grad_norm": 3.421060800552368,
|
|
"learning_rate": 9.65635510313772e-06,
|
|
"loss": 1.2677,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.8140957446808511,
|
|
"grad_norm": 3.599653720855713,
|
|
"learning_rate": 9.656034603455022e-06,
|
|
"loss": 1.2561,
|
|
"step": 3061
|
|
},
|
|
{
|
|
"epoch": 0.8143617021276596,
|
|
"grad_norm": 3.297154664993286,
|
|
"learning_rate": 9.655713959709133e-06,
|
|
"loss": 1.1693,
|
|
"step": 3062
|
|
},
|
|
{
|
|
"epoch": 0.814627659574468,
|
|
"grad_norm": 3.678478240966797,
|
|
"learning_rate": 9.65539317190997e-06,
|
|
"loss": 1.2403,
|
|
"step": 3063
|
|
},
|
|
{
|
|
"epoch": 0.8148936170212766,
|
|
"grad_norm": 3.6876394748687744,
|
|
"learning_rate": 9.655072240067464e-06,
|
|
"loss": 1.2774,
|
|
"step": 3064
|
|
},
|
|
{
|
|
"epoch": 0.8151595744680851,
|
|
"grad_norm": 3.6876394748687744,
|
|
"learning_rate": 9.65475116419154e-06,
|
|
"loss": 1.1866,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 0.8154255319148936,
|
|
"grad_norm": 4.459439277648926,
|
|
"learning_rate": 9.654429944292136e-06,
|
|
"loss": 1.255,
|
|
"step": 3066
|
|
},
|
|
{
|
|
"epoch": 0.8156914893617021,
|
|
"grad_norm": 3.636715888977051,
|
|
"learning_rate": 9.65410858037919e-06,
|
|
"loss": 1.4368,
|
|
"step": 3067
|
|
},
|
|
{
|
|
"epoch": 0.8159574468085107,
|
|
"grad_norm": 3.7368946075439453,
|
|
"learning_rate": 9.653787072462644e-06,
|
|
"loss": 1.3039,
|
|
"step": 3068
|
|
},
|
|
{
|
|
"epoch": 0.8162234042553191,
|
|
"grad_norm": 3.32794451713562,
|
|
"learning_rate": 9.653465420552445e-06,
|
|
"loss": 1.1366,
|
|
"step": 3069
|
|
},
|
|
{
|
|
"epoch": 0.8164893617021277,
|
|
"grad_norm": 3.3161087036132812,
|
|
"learning_rate": 9.65314362465855e-06,
|
|
"loss": 1.0602,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.8167553191489362,
|
|
"grad_norm": 3.6150729656219482,
|
|
"learning_rate": 9.652821684790912e-06,
|
|
"loss": 1.3939,
|
|
"step": 3071
|
|
},
|
|
{
|
|
"epoch": 0.8170212765957446,
|
|
"grad_norm": 3.7740049362182617,
|
|
"learning_rate": 9.652499600959493e-06,
|
|
"loss": 1.3626,
|
|
"step": 3072
|
|
},
|
|
{
|
|
"epoch": 0.8172872340425532,
|
|
"grad_norm": 3.8331871032714844,
|
|
"learning_rate": 9.65217737317426e-06,
|
|
"loss": 1.3151,
|
|
"step": 3073
|
|
},
|
|
{
|
|
"epoch": 0.8175531914893617,
|
|
"grad_norm": 3.3269927501678467,
|
|
"learning_rate": 9.65185500144518e-06,
|
|
"loss": 1.1879,
|
|
"step": 3074
|
|
},
|
|
{
|
|
"epoch": 0.8178191489361702,
|
|
"grad_norm": 3.318422555923462,
|
|
"learning_rate": 9.651532485782231e-06,
|
|
"loss": 1.2128,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 0.8180851063829787,
|
|
"grad_norm": 3.8798575401306152,
|
|
"learning_rate": 9.65120982619539e-06,
|
|
"loss": 1.2097,
|
|
"step": 3076
|
|
},
|
|
{
|
|
"epoch": 0.8183510638297873,
|
|
"grad_norm": 3.538886785507202,
|
|
"learning_rate": 9.650887022694639e-06,
|
|
"loss": 1.2558,
|
|
"step": 3077
|
|
},
|
|
{
|
|
"epoch": 0.8186170212765957,
|
|
"grad_norm": 3.8403117656707764,
|
|
"learning_rate": 9.65056407528997e-06,
|
|
"loss": 1.4618,
|
|
"step": 3078
|
|
},
|
|
{
|
|
"epoch": 0.8188829787234042,
|
|
"grad_norm": 3.731025218963623,
|
|
"learning_rate": 9.650240983991372e-06,
|
|
"loss": 1.2627,
|
|
"step": 3079
|
|
},
|
|
{
|
|
"epoch": 0.8191489361702128,
|
|
"grad_norm": 3.7986326217651367,
|
|
"learning_rate": 9.649917748808844e-06,
|
|
"loss": 1.2213,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.8194148936170212,
|
|
"grad_norm": 3.556394577026367,
|
|
"learning_rate": 9.649594369752384e-06,
|
|
"loss": 1.2093,
|
|
"step": 3081
|
|
},
|
|
{
|
|
"epoch": 0.8196808510638298,
|
|
"grad_norm": 3.989525318145752,
|
|
"learning_rate": 9.649270846832001e-06,
|
|
"loss": 1.4164,
|
|
"step": 3082
|
|
},
|
|
{
|
|
"epoch": 0.8199468085106383,
|
|
"grad_norm": 3.6029410362243652,
|
|
"learning_rate": 9.648947180057705e-06,
|
|
"loss": 1.315,
|
|
"step": 3083
|
|
},
|
|
{
|
|
"epoch": 0.8202127659574469,
|
|
"grad_norm": 3.677532196044922,
|
|
"learning_rate": 9.648623369439509e-06,
|
|
"loss": 1.3006,
|
|
"step": 3084
|
|
},
|
|
{
|
|
"epoch": 0.8204787234042553,
|
|
"grad_norm": 3.241009473800659,
|
|
"learning_rate": 9.648299414987434e-06,
|
|
"loss": 1.1637,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 0.8207446808510638,
|
|
"grad_norm": 3.470125198364258,
|
|
"learning_rate": 9.647975316711502e-06,
|
|
"loss": 1.1894,
|
|
"step": 3086
|
|
},
|
|
{
|
|
"epoch": 0.8210106382978724,
|
|
"grad_norm": 3.6613218784332275,
|
|
"learning_rate": 9.647651074621741e-06,
|
|
"loss": 1.2222,
|
|
"step": 3087
|
|
},
|
|
{
|
|
"epoch": 0.8212765957446808,
|
|
"grad_norm": 3.4483370780944824,
|
|
"learning_rate": 9.647326688728184e-06,
|
|
"loss": 1.1142,
|
|
"step": 3088
|
|
},
|
|
{
|
|
"epoch": 0.8215425531914894,
|
|
"grad_norm": 3.830843687057495,
|
|
"learning_rate": 9.647002159040868e-06,
|
|
"loss": 1.2923,
|
|
"step": 3089
|
|
},
|
|
{
|
|
"epoch": 0.8218085106382979,
|
|
"grad_norm": 3.445209264755249,
|
|
"learning_rate": 9.646677485569834e-06,
|
|
"loss": 1.2042,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.8220744680851064,
|
|
"grad_norm": 3.818505048751831,
|
|
"learning_rate": 9.646352668325128e-06,
|
|
"loss": 1.3102,
|
|
"step": 3091
|
|
},
|
|
{
|
|
"epoch": 0.8223404255319149,
|
|
"grad_norm": 3.4437718391418457,
|
|
"learning_rate": 9.646027707316798e-06,
|
|
"loss": 1.1836,
|
|
"step": 3092
|
|
},
|
|
{
|
|
"epoch": 0.8226063829787233,
|
|
"grad_norm": 3.690908670425415,
|
|
"learning_rate": 9.645702602554902e-06,
|
|
"loss": 1.1375,
|
|
"step": 3093
|
|
},
|
|
{
|
|
"epoch": 0.8228723404255319,
|
|
"grad_norm": 4.1998209953308105,
|
|
"learning_rate": 9.645377354049499e-06,
|
|
"loss": 1.3336,
|
|
"step": 3094
|
|
},
|
|
{
|
|
"epoch": 0.8231382978723404,
|
|
"grad_norm": 3.559067487716675,
|
|
"learning_rate": 9.64505196181065e-06,
|
|
"loss": 1.1967,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 0.823404255319149,
|
|
"grad_norm": 3.657874584197998,
|
|
"learning_rate": 9.644726425848425e-06,
|
|
"loss": 1.2603,
|
|
"step": 3096
|
|
},
|
|
{
|
|
"epoch": 0.8236702127659574,
|
|
"grad_norm": 3.2679355144500732,
|
|
"learning_rate": 9.644400746172896e-06,
|
|
"loss": 1.177,
|
|
"step": 3097
|
|
},
|
|
{
|
|
"epoch": 0.823936170212766,
|
|
"grad_norm": 3.9587206840515137,
|
|
"learning_rate": 9.644074922794139e-06,
|
|
"loss": 1.2768,
|
|
"step": 3098
|
|
},
|
|
{
|
|
"epoch": 0.8242021276595745,
|
|
"grad_norm": 3.2773869037628174,
|
|
"learning_rate": 9.643748955722238e-06,
|
|
"loss": 1.2397,
|
|
"step": 3099
|
|
},
|
|
{
|
|
"epoch": 0.824468085106383,
|
|
"grad_norm": 3.796388864517212,
|
|
"learning_rate": 9.643422844967274e-06,
|
|
"loss": 1.3281,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.8247340425531915,
|
|
"grad_norm": 3.6081080436706543,
|
|
"learning_rate": 9.643096590539343e-06,
|
|
"loss": 1.1514,
|
|
"step": 3101
|
|
},
|
|
{
|
|
"epoch": 0.825,
|
|
"grad_norm": 3.6461782455444336,
|
|
"learning_rate": 9.642770192448537e-06,
|
|
"loss": 1.3713,
|
|
"step": 3102
|
|
},
|
|
{
|
|
"epoch": 0.8252659574468085,
|
|
"grad_norm": 3.731442451477051,
|
|
"learning_rate": 9.642443650704954e-06,
|
|
"loss": 1.3621,
|
|
"step": 3103
|
|
},
|
|
{
|
|
"epoch": 0.825531914893617,
|
|
"grad_norm": 3.8544721603393555,
|
|
"learning_rate": 9.642116965318697e-06,
|
|
"loss": 1.2699,
|
|
"step": 3104
|
|
},
|
|
{
|
|
"epoch": 0.8257978723404256,
|
|
"grad_norm": 3.6057963371276855,
|
|
"learning_rate": 9.641790136299877e-06,
|
|
"loss": 1.1425,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 0.826063829787234,
|
|
"grad_norm": 3.618706226348877,
|
|
"learning_rate": 9.641463163658606e-06,
|
|
"loss": 1.309,
|
|
"step": 3106
|
|
},
|
|
{
|
|
"epoch": 0.8263297872340426,
|
|
"grad_norm": 3.2677018642425537,
|
|
"learning_rate": 9.641136047405e-06,
|
|
"loss": 1.221,
|
|
"step": 3107
|
|
},
|
|
{
|
|
"epoch": 0.8265957446808511,
|
|
"grad_norm": 3.311882734298706,
|
|
"learning_rate": 9.64080878754918e-06,
|
|
"loss": 1.2231,
|
|
"step": 3108
|
|
},
|
|
{
|
|
"epoch": 0.8268617021276595,
|
|
"grad_norm": 3.435105562210083,
|
|
"learning_rate": 9.640481384101273e-06,
|
|
"loss": 1.3697,
|
|
"step": 3109
|
|
},
|
|
{
|
|
"epoch": 0.8271276595744681,
|
|
"grad_norm": 3.77473783493042,
|
|
"learning_rate": 9.640153837071407e-06,
|
|
"loss": 1.4063,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.8273936170212766,
|
|
"grad_norm": 3.6035094261169434,
|
|
"learning_rate": 9.63982614646972e-06,
|
|
"loss": 1.3273,
|
|
"step": 3111
|
|
},
|
|
{
|
|
"epoch": 0.8276595744680851,
|
|
"grad_norm": 3.4138381481170654,
|
|
"learning_rate": 9.639498312306348e-06,
|
|
"loss": 1.1646,
|
|
"step": 3112
|
|
},
|
|
{
|
|
"epoch": 0.8279255319148936,
|
|
"grad_norm": 3.638125419616699,
|
|
"learning_rate": 9.639170334591437e-06,
|
|
"loss": 1.3288,
|
|
"step": 3113
|
|
},
|
|
{
|
|
"epoch": 0.8281914893617022,
|
|
"grad_norm": 3.917206287384033,
|
|
"learning_rate": 9.638842213335132e-06,
|
|
"loss": 1.3541,
|
|
"step": 3114
|
|
},
|
|
{
|
|
"epoch": 0.8284574468085106,
|
|
"grad_norm": 4.120351314544678,
|
|
"learning_rate": 9.63851394854759e-06,
|
|
"loss": 1.3473,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 0.8287234042553191,
|
|
"grad_norm": 3.6400179862976074,
|
|
"learning_rate": 9.638185540238963e-06,
|
|
"loss": 1.3199,
|
|
"step": 3116
|
|
},
|
|
{
|
|
"epoch": 0.8289893617021277,
|
|
"grad_norm": 3.4678385257720947,
|
|
"learning_rate": 9.637856988419413e-06,
|
|
"loss": 1.3348,
|
|
"step": 3117
|
|
},
|
|
{
|
|
"epoch": 0.8292553191489361,
|
|
"grad_norm": 3.490227460861206,
|
|
"learning_rate": 9.637528293099111e-06,
|
|
"loss": 1.2041,
|
|
"step": 3118
|
|
},
|
|
{
|
|
"epoch": 0.8295212765957447,
|
|
"grad_norm": 3.3085920810699463,
|
|
"learning_rate": 9.637199454288222e-06,
|
|
"loss": 1.2509,
|
|
"step": 3119
|
|
},
|
|
{
|
|
"epoch": 0.8297872340425532,
|
|
"grad_norm": 3.5364296436309814,
|
|
"learning_rate": 9.636870471996923e-06,
|
|
"loss": 1.3302,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.8300531914893617,
|
|
"grad_norm": 3.952470302581787,
|
|
"learning_rate": 9.636541346235392e-06,
|
|
"loss": 1.3387,
|
|
"step": 3121
|
|
},
|
|
{
|
|
"epoch": 0.8303191489361702,
|
|
"grad_norm": 3.678920269012451,
|
|
"learning_rate": 9.636212077013812e-06,
|
|
"loss": 1.2225,
|
|
"step": 3122
|
|
},
|
|
{
|
|
"epoch": 0.8305851063829788,
|
|
"grad_norm": 3.4960269927978516,
|
|
"learning_rate": 9.635882664342373e-06,
|
|
"loss": 1.1883,
|
|
"step": 3123
|
|
},
|
|
{
|
|
"epoch": 0.8308510638297872,
|
|
"grad_norm": 3.1453335285186768,
|
|
"learning_rate": 9.635553108231266e-06,
|
|
"loss": 1.0471,
|
|
"step": 3124
|
|
},
|
|
{
|
|
"epoch": 0.8311170212765957,
|
|
"grad_norm": 3.6323747634887695,
|
|
"learning_rate": 9.635223408690688e-06,
|
|
"loss": 1.1595,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 0.8313829787234043,
|
|
"grad_norm": 3.2408368587493896,
|
|
"learning_rate": 9.634893565730841e-06,
|
|
"loss": 1.2454,
|
|
"step": 3126
|
|
},
|
|
{
|
|
"epoch": 0.8316489361702127,
|
|
"grad_norm": 3.628117322921753,
|
|
"learning_rate": 9.63456357936193e-06,
|
|
"loss": 1.3161,
|
|
"step": 3127
|
|
},
|
|
{
|
|
"epoch": 0.8319148936170213,
|
|
"grad_norm": 3.896415948867798,
|
|
"learning_rate": 9.634233449594165e-06,
|
|
"loss": 1.29,
|
|
"step": 3128
|
|
},
|
|
{
|
|
"epoch": 0.8321808510638298,
|
|
"grad_norm": 3.3425135612487793,
|
|
"learning_rate": 9.63390317643776e-06,
|
|
"loss": 1.0845,
|
|
"step": 3129
|
|
},
|
|
{
|
|
"epoch": 0.8324468085106383,
|
|
"grad_norm": 3.593471050262451,
|
|
"learning_rate": 9.633572759902936e-06,
|
|
"loss": 1.1751,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.8327127659574468,
|
|
"grad_norm": 3.8105530738830566,
|
|
"learning_rate": 9.633242199999916e-06,
|
|
"loss": 1.2935,
|
|
"step": 3131
|
|
},
|
|
{
|
|
"epoch": 0.8329787234042553,
|
|
"grad_norm": 3.5633177757263184,
|
|
"learning_rate": 9.632911496738927e-06,
|
|
"loss": 1.2376,
|
|
"step": 3132
|
|
},
|
|
{
|
|
"epoch": 0.8332446808510638,
|
|
"grad_norm": 3.5305428504943848,
|
|
"learning_rate": 9.632580650130201e-06,
|
|
"loss": 1.2905,
|
|
"step": 3133
|
|
},
|
|
{
|
|
"epoch": 0.8335106382978723,
|
|
"grad_norm": 3.328059196472168,
|
|
"learning_rate": 9.632249660183977e-06,
|
|
"loss": 1.2773,
|
|
"step": 3134
|
|
},
|
|
{
|
|
"epoch": 0.8337765957446809,
|
|
"grad_norm": 3.8208043575286865,
|
|
"learning_rate": 9.631918526910493e-06,
|
|
"loss": 1.2472,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 0.8340425531914893,
|
|
"grad_norm": 3.6366043090820312,
|
|
"learning_rate": 9.631587250319998e-06,
|
|
"loss": 1.1361,
|
|
"step": 3136
|
|
},
|
|
{
|
|
"epoch": 0.8343085106382979,
|
|
"grad_norm": 3.3834152221679688,
|
|
"learning_rate": 9.631255830422739e-06,
|
|
"loss": 1.2766,
|
|
"step": 3137
|
|
},
|
|
{
|
|
"epoch": 0.8345744680851064,
|
|
"grad_norm": 3.6326873302459717,
|
|
"learning_rate": 9.630924267228973e-06,
|
|
"loss": 1.2792,
|
|
"step": 3138
|
|
},
|
|
{
|
|
"epoch": 0.8348404255319148,
|
|
"grad_norm": 3.720566749572754,
|
|
"learning_rate": 9.630592560748957e-06,
|
|
"loss": 1.113,
|
|
"step": 3139
|
|
},
|
|
{
|
|
"epoch": 0.8351063829787234,
|
|
"grad_norm": 3.732006549835205,
|
|
"learning_rate": 9.630260710992956e-06,
|
|
"loss": 1.1235,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.8353723404255319,
|
|
"grad_norm": 3.3565263748168945,
|
|
"learning_rate": 9.629928717971237e-06,
|
|
"loss": 1.1881,
|
|
"step": 3141
|
|
},
|
|
{
|
|
"epoch": 0.8356382978723405,
|
|
"grad_norm": 3.7368946075439453,
|
|
"learning_rate": 9.629596581694072e-06,
|
|
"loss": 1.2955,
|
|
"step": 3142
|
|
},
|
|
{
|
|
"epoch": 0.8359042553191489,
|
|
"grad_norm": 3.77895188331604,
|
|
"learning_rate": 9.629264302171739e-06,
|
|
"loss": 1.2691,
|
|
"step": 3143
|
|
},
|
|
{
|
|
"epoch": 0.8361702127659575,
|
|
"grad_norm": 3.6195473670959473,
|
|
"learning_rate": 9.628931879414519e-06,
|
|
"loss": 1.125,
|
|
"step": 3144
|
|
},
|
|
{
|
|
"epoch": 0.836436170212766,
|
|
"grad_norm": 3.4380621910095215,
|
|
"learning_rate": 9.628599313432694e-06,
|
|
"loss": 1.2379,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 0.8367021276595744,
|
|
"grad_norm": 3.972651958465576,
|
|
"learning_rate": 9.628266604236558e-06,
|
|
"loss": 1.2316,
|
|
"step": 3146
|
|
},
|
|
{
|
|
"epoch": 0.836968085106383,
|
|
"grad_norm": 3.770378351211548,
|
|
"learning_rate": 9.627933751836405e-06,
|
|
"loss": 1.4091,
|
|
"step": 3147
|
|
},
|
|
{
|
|
"epoch": 0.8372340425531914,
|
|
"grad_norm": 3.359567165374756,
|
|
"learning_rate": 9.627600756242532e-06,
|
|
"loss": 1.076,
|
|
"step": 3148
|
|
},
|
|
{
|
|
"epoch": 0.8375,
|
|
"grad_norm": 3.5449929237365723,
|
|
"learning_rate": 9.627267617465243e-06,
|
|
"loss": 1.1785,
|
|
"step": 3149
|
|
},
|
|
{
|
|
"epoch": 0.8377659574468085,
|
|
"grad_norm": 3.8262412548065186,
|
|
"learning_rate": 9.626934335514847e-06,
|
|
"loss": 1.1613,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.8380319148936171,
|
|
"grad_norm": 3.5842607021331787,
|
|
"learning_rate": 9.626600910401656e-06,
|
|
"loss": 1.4153,
|
|
"step": 3151
|
|
},
|
|
{
|
|
"epoch": 0.8382978723404255,
|
|
"grad_norm": 3.2474827766418457,
|
|
"learning_rate": 9.626267342135983e-06,
|
|
"loss": 1.1652,
|
|
"step": 3152
|
|
},
|
|
{
|
|
"epoch": 0.8385638297872341,
|
|
"grad_norm": 3.3414809703826904,
|
|
"learning_rate": 9.625933630728153e-06,
|
|
"loss": 1.062,
|
|
"step": 3153
|
|
},
|
|
{
|
|
"epoch": 0.8388297872340426,
|
|
"grad_norm": 3.496842384338379,
|
|
"learning_rate": 9.62559977618849e-06,
|
|
"loss": 1.255,
|
|
"step": 3154
|
|
},
|
|
{
|
|
"epoch": 0.839095744680851,
|
|
"grad_norm": 3.2567241191864014,
|
|
"learning_rate": 9.625265778527325e-06,
|
|
"loss": 1.1378,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 0.8393617021276596,
|
|
"grad_norm": 3.720892906188965,
|
|
"learning_rate": 9.62493163775499e-06,
|
|
"loss": 1.4717,
|
|
"step": 3156
|
|
},
|
|
{
|
|
"epoch": 0.839627659574468,
|
|
"grad_norm": 3.342963695526123,
|
|
"learning_rate": 9.624597353881827e-06,
|
|
"loss": 1.2974,
|
|
"step": 3157
|
|
},
|
|
{
|
|
"epoch": 0.8398936170212766,
|
|
"grad_norm": 3.3030459880828857,
|
|
"learning_rate": 9.624262926918174e-06,
|
|
"loss": 1.1823,
|
|
"step": 3158
|
|
},
|
|
{
|
|
"epoch": 0.8401595744680851,
|
|
"grad_norm": 3.4827306270599365,
|
|
"learning_rate": 9.623928356874384e-06,
|
|
"loss": 1.2282,
|
|
"step": 3159
|
|
},
|
|
{
|
|
"epoch": 0.8404255319148937,
|
|
"grad_norm": 3.247631311416626,
|
|
"learning_rate": 9.623593643760805e-06,
|
|
"loss": 1.2173,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.8406914893617021,
|
|
"grad_norm": 3.571974515914917,
|
|
"learning_rate": 9.623258787587795e-06,
|
|
"loss": 1.2277,
|
|
"step": 3161
|
|
},
|
|
{
|
|
"epoch": 0.8409574468085106,
|
|
"grad_norm": 3.5363829135894775,
|
|
"learning_rate": 9.622923788365716e-06,
|
|
"loss": 1.2212,
|
|
"step": 3162
|
|
},
|
|
{
|
|
"epoch": 0.8412234042553192,
|
|
"grad_norm": 3.816324234008789,
|
|
"learning_rate": 9.622588646104934e-06,
|
|
"loss": 1.3759,
|
|
"step": 3163
|
|
},
|
|
{
|
|
"epoch": 0.8414893617021276,
|
|
"grad_norm": 3.8033061027526855,
|
|
"learning_rate": 9.622253360815814e-06,
|
|
"loss": 1.1493,
|
|
"step": 3164
|
|
},
|
|
{
|
|
"epoch": 0.8417553191489362,
|
|
"grad_norm": 3.7425754070281982,
|
|
"learning_rate": 9.621917932508733e-06,
|
|
"loss": 1.1964,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 0.8420212765957447,
|
|
"grad_norm": 3.4991588592529297,
|
|
"learning_rate": 9.62158236119407e-06,
|
|
"loss": 1.2337,
|
|
"step": 3166
|
|
},
|
|
{
|
|
"epoch": 0.8422872340425532,
|
|
"grad_norm": 3.450436592102051,
|
|
"learning_rate": 9.621246646882209e-06,
|
|
"loss": 1.1413,
|
|
"step": 3167
|
|
},
|
|
{
|
|
"epoch": 0.8425531914893617,
|
|
"grad_norm": 3.449032783508301,
|
|
"learning_rate": 9.620910789583534e-06,
|
|
"loss": 1.269,
|
|
"step": 3168
|
|
},
|
|
{
|
|
"epoch": 0.8428191489361702,
|
|
"grad_norm": 3.609985589981079,
|
|
"learning_rate": 9.62057478930844e-06,
|
|
"loss": 1.2008,
|
|
"step": 3169
|
|
},
|
|
{
|
|
"epoch": 0.8430851063829787,
|
|
"grad_norm": 3.5072379112243652,
|
|
"learning_rate": 9.620238646067322e-06,
|
|
"loss": 1.2176,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.8433510638297872,
|
|
"grad_norm": 3.481480836868286,
|
|
"learning_rate": 9.619902359870579e-06,
|
|
"loss": 1.2152,
|
|
"step": 3171
|
|
},
|
|
{
|
|
"epoch": 0.8436170212765958,
|
|
"grad_norm": 3.640972852706909,
|
|
"learning_rate": 9.619565930728618e-06,
|
|
"loss": 1.4143,
|
|
"step": 3172
|
|
},
|
|
{
|
|
"epoch": 0.8438829787234042,
|
|
"grad_norm": 3.5323524475097656,
|
|
"learning_rate": 9.61922935865185e-06,
|
|
"loss": 1.1856,
|
|
"step": 3173
|
|
},
|
|
{
|
|
"epoch": 0.8441489361702128,
|
|
"grad_norm": 3.837163209915161,
|
|
"learning_rate": 9.618892643650686e-06,
|
|
"loss": 1.243,
|
|
"step": 3174
|
|
},
|
|
{
|
|
"epoch": 0.8444148936170213,
|
|
"grad_norm": 3.702387809753418,
|
|
"learning_rate": 9.618555785735546e-06,
|
|
"loss": 1.1177,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 0.8446808510638298,
|
|
"grad_norm": 3.696453094482422,
|
|
"learning_rate": 9.618218784916851e-06,
|
|
"loss": 1.2794,
|
|
"step": 3176
|
|
},
|
|
{
|
|
"epoch": 0.8449468085106383,
|
|
"grad_norm": 3.467315435409546,
|
|
"learning_rate": 9.617881641205032e-06,
|
|
"loss": 1.1261,
|
|
"step": 3177
|
|
},
|
|
{
|
|
"epoch": 0.8452127659574468,
|
|
"grad_norm": 3.392866849899292,
|
|
"learning_rate": 9.617544354610516e-06,
|
|
"loss": 1.3169,
|
|
"step": 3178
|
|
},
|
|
{
|
|
"epoch": 0.8454787234042553,
|
|
"grad_norm": 3.4695167541503906,
|
|
"learning_rate": 9.617206925143742e-06,
|
|
"loss": 1.3706,
|
|
"step": 3179
|
|
},
|
|
{
|
|
"epoch": 0.8457446808510638,
|
|
"grad_norm": 3.658966064453125,
|
|
"learning_rate": 9.61686935281515e-06,
|
|
"loss": 1.289,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.8460106382978724,
|
|
"grad_norm": 3.779771327972412,
|
|
"learning_rate": 9.616531637635183e-06,
|
|
"loss": 1.2999,
|
|
"step": 3181
|
|
},
|
|
{
|
|
"epoch": 0.8462765957446808,
|
|
"grad_norm": 3.8787152767181396,
|
|
"learning_rate": 9.616193779614294e-06,
|
|
"loss": 1.2876,
|
|
"step": 3182
|
|
},
|
|
{
|
|
"epoch": 0.8465425531914894,
|
|
"grad_norm": 3.5529751777648926,
|
|
"learning_rate": 9.615855778762933e-06,
|
|
"loss": 1.2511,
|
|
"step": 3183
|
|
},
|
|
{
|
|
"epoch": 0.8468085106382979,
|
|
"grad_norm": 4.681981563568115,
|
|
"learning_rate": 9.61551763509156e-06,
|
|
"loss": 1.3139,
|
|
"step": 3184
|
|
},
|
|
{
|
|
"epoch": 0.8470744680851063,
|
|
"grad_norm": 3.130150556564331,
|
|
"learning_rate": 9.615179348610638e-06,
|
|
"loss": 1.1744,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 0.8473404255319149,
|
|
"grad_norm": 3.374901056289673,
|
|
"learning_rate": 9.614840919330632e-06,
|
|
"loss": 1.0669,
|
|
"step": 3186
|
|
},
|
|
{
|
|
"epoch": 0.8476063829787234,
|
|
"grad_norm": 3.805163621902466,
|
|
"learning_rate": 9.614502347262015e-06,
|
|
"loss": 1.3958,
|
|
"step": 3187
|
|
},
|
|
{
|
|
"epoch": 0.847872340425532,
|
|
"grad_norm": 3.173311948776245,
|
|
"learning_rate": 9.614163632415265e-06,
|
|
"loss": 1.2402,
|
|
"step": 3188
|
|
},
|
|
{
|
|
"epoch": 0.8481382978723404,
|
|
"grad_norm": 3.7105321884155273,
|
|
"learning_rate": 9.613824774800857e-06,
|
|
"loss": 1.2364,
|
|
"step": 3189
|
|
},
|
|
{
|
|
"epoch": 0.848404255319149,
|
|
"grad_norm": 3.5191519260406494,
|
|
"learning_rate": 9.613485774429279e-06,
|
|
"loss": 1.3238,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.8486702127659574,
|
|
"grad_norm": 3.2969210147857666,
|
|
"learning_rate": 9.613146631311018e-06,
|
|
"loss": 1.2284,
|
|
"step": 3191
|
|
},
|
|
{
|
|
"epoch": 0.8489361702127659,
|
|
"grad_norm": 3.6637449264526367,
|
|
"learning_rate": 9.612807345456571e-06,
|
|
"loss": 1.1128,
|
|
"step": 3192
|
|
},
|
|
{
|
|
"epoch": 0.8492021276595745,
|
|
"grad_norm": 3.9408974647521973,
|
|
"learning_rate": 9.612467916876434e-06,
|
|
"loss": 1.171,
|
|
"step": 3193
|
|
},
|
|
{
|
|
"epoch": 0.8494680851063829,
|
|
"grad_norm": 3.3598899841308594,
|
|
"learning_rate": 9.612128345581108e-06,
|
|
"loss": 1.1941,
|
|
"step": 3194
|
|
},
|
|
{
|
|
"epoch": 0.8497340425531915,
|
|
"grad_norm": 3.5474600791931152,
|
|
"learning_rate": 9.6117886315811e-06,
|
|
"loss": 1.1679,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"grad_norm": 3.9404945373535156,
|
|
"learning_rate": 9.611448774886925e-06,
|
|
"loss": 1.3117,
|
|
"step": 3196
|
|
},
|
|
{
|
|
"epoch": 0.8502659574468086,
|
|
"grad_norm": 3.389488935470581,
|
|
"learning_rate": 9.611108775509093e-06,
|
|
"loss": 1.1708,
|
|
"step": 3197
|
|
},
|
|
{
|
|
"epoch": 0.850531914893617,
|
|
"grad_norm": 3.5706136226654053,
|
|
"learning_rate": 9.610768633458127e-06,
|
|
"loss": 1.249,
|
|
"step": 3198
|
|
},
|
|
{
|
|
"epoch": 0.8507978723404256,
|
|
"grad_norm": 3.899035930633545,
|
|
"learning_rate": 9.610428348744552e-06,
|
|
"loss": 1.2828,
|
|
"step": 3199
|
|
},
|
|
{
|
|
"epoch": 0.851063829787234,
|
|
"grad_norm": 3.648972511291504,
|
|
"learning_rate": 9.610087921378895e-06,
|
|
"loss": 1.2152,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8513297872340425,
|
|
"grad_norm": 3.762350559234619,
|
|
"learning_rate": 9.60974735137169e-06,
|
|
"loss": 1.3663,
|
|
"step": 3201
|
|
},
|
|
{
|
|
"epoch": 0.8515957446808511,
|
|
"grad_norm": 3.8155291080474854,
|
|
"learning_rate": 9.609406638733474e-06,
|
|
"loss": 1.1777,
|
|
"step": 3202
|
|
},
|
|
{
|
|
"epoch": 0.8518617021276595,
|
|
"grad_norm": 3.5268514156341553,
|
|
"learning_rate": 9.609065783474792e-06,
|
|
"loss": 1.2634,
|
|
"step": 3203
|
|
},
|
|
{
|
|
"epoch": 0.8521276595744681,
|
|
"grad_norm": 3.3057730197906494,
|
|
"learning_rate": 9.608724785606186e-06,
|
|
"loss": 1.2208,
|
|
"step": 3204
|
|
},
|
|
{
|
|
"epoch": 0.8523936170212766,
|
|
"grad_norm": 3.9648935794830322,
|
|
"learning_rate": 9.60838364513821e-06,
|
|
"loss": 1.2936,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 0.8526595744680852,
|
|
"grad_norm": 3.8742856979370117,
|
|
"learning_rate": 9.608042362081418e-06,
|
|
"loss": 1.298,
|
|
"step": 3206
|
|
},
|
|
{
|
|
"epoch": 0.8529255319148936,
|
|
"grad_norm": 3.845383644104004,
|
|
"learning_rate": 9.60770093644637e-06,
|
|
"loss": 1.2274,
|
|
"step": 3207
|
|
},
|
|
{
|
|
"epoch": 0.8531914893617021,
|
|
"grad_norm": 3.532756805419922,
|
|
"learning_rate": 9.60735936824363e-06,
|
|
"loss": 1.339,
|
|
"step": 3208
|
|
},
|
|
{
|
|
"epoch": 0.8534574468085107,
|
|
"grad_norm": 3.7821319103240967,
|
|
"learning_rate": 9.607017657483768e-06,
|
|
"loss": 1.3414,
|
|
"step": 3209
|
|
},
|
|
{
|
|
"epoch": 0.8537234042553191,
|
|
"grad_norm": 3.5962960720062256,
|
|
"learning_rate": 9.606675804177355e-06,
|
|
"loss": 1.1815,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.8539893617021277,
|
|
"grad_norm": 3.8669700622558594,
|
|
"learning_rate": 9.606333808334966e-06,
|
|
"loss": 1.2821,
|
|
"step": 3211
|
|
},
|
|
{
|
|
"epoch": 0.8542553191489362,
|
|
"grad_norm": 3.288717269897461,
|
|
"learning_rate": 9.605991669967189e-06,
|
|
"loss": 1.1532,
|
|
"step": 3212
|
|
},
|
|
{
|
|
"epoch": 0.8545212765957447,
|
|
"grad_norm": 3.445049285888672,
|
|
"learning_rate": 9.605649389084605e-06,
|
|
"loss": 1.2534,
|
|
"step": 3213
|
|
},
|
|
{
|
|
"epoch": 0.8547872340425532,
|
|
"grad_norm": 3.075615644454956,
|
|
"learning_rate": 9.605306965697809e-06,
|
|
"loss": 1.0243,
|
|
"step": 3214
|
|
},
|
|
{
|
|
"epoch": 0.8550531914893617,
|
|
"grad_norm": 3.6676225662231445,
|
|
"learning_rate": 9.604964399817392e-06,
|
|
"loss": 1.2927,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 0.8553191489361702,
|
|
"grad_norm": 3.4644627571105957,
|
|
"learning_rate": 9.604621691453954e-06,
|
|
"loss": 1.2167,
|
|
"step": 3216
|
|
},
|
|
{
|
|
"epoch": 0.8555851063829787,
|
|
"grad_norm": 3.3108158111572266,
|
|
"learning_rate": 9.6042788406181e-06,
|
|
"loss": 1.2437,
|
|
"step": 3217
|
|
},
|
|
{
|
|
"epoch": 0.8558510638297873,
|
|
"grad_norm": 3.634568929672241,
|
|
"learning_rate": 9.603935847320437e-06,
|
|
"loss": 1.2587,
|
|
"step": 3218
|
|
},
|
|
{
|
|
"epoch": 0.8561170212765957,
|
|
"grad_norm": 3.472355365753174,
|
|
"learning_rate": 9.603592711571581e-06,
|
|
"loss": 1.1544,
|
|
"step": 3219
|
|
},
|
|
{
|
|
"epoch": 0.8563829787234043,
|
|
"grad_norm": 3.7467241287231445,
|
|
"learning_rate": 9.603249433382145e-06,
|
|
"loss": 1.1884,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.8566489361702128,
|
|
"grad_norm": 4.016312599182129,
|
|
"learning_rate": 9.60290601276275e-06,
|
|
"loss": 1.2884,
|
|
"step": 3221
|
|
},
|
|
{
|
|
"epoch": 0.8569148936170212,
|
|
"grad_norm": 3.432687282562256,
|
|
"learning_rate": 9.602562449724027e-06,
|
|
"loss": 1.2495,
|
|
"step": 3222
|
|
},
|
|
{
|
|
"epoch": 0.8571808510638298,
|
|
"grad_norm": 3.466148614883423,
|
|
"learning_rate": 9.6022187442766e-06,
|
|
"loss": 1.0967,
|
|
"step": 3223
|
|
},
|
|
{
|
|
"epoch": 0.8574468085106383,
|
|
"grad_norm": 3.7120723724365234,
|
|
"learning_rate": 9.60187489643111e-06,
|
|
"loss": 1.1666,
|
|
"step": 3224
|
|
},
|
|
{
|
|
"epoch": 0.8577127659574468,
|
|
"grad_norm": 3.6994261741638184,
|
|
"learning_rate": 9.60153090619819e-06,
|
|
"loss": 1.3106,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 0.8579787234042553,
|
|
"grad_norm": 3.481760025024414,
|
|
"learning_rate": 9.601186773588486e-06,
|
|
"loss": 1.2581,
|
|
"step": 3226
|
|
},
|
|
{
|
|
"epoch": 0.8582446808510639,
|
|
"grad_norm": 3.5702121257781982,
|
|
"learning_rate": 9.600842498612647e-06,
|
|
"loss": 1.3228,
|
|
"step": 3227
|
|
},
|
|
{
|
|
"epoch": 0.8585106382978723,
|
|
"grad_norm": 4.04725980758667,
|
|
"learning_rate": 9.600498081281324e-06,
|
|
"loss": 1.2431,
|
|
"step": 3228
|
|
},
|
|
{
|
|
"epoch": 0.8587765957446809,
|
|
"grad_norm": 3.632622480392456,
|
|
"learning_rate": 9.600153521605176e-06,
|
|
"loss": 1.1693,
|
|
"step": 3229
|
|
},
|
|
{
|
|
"epoch": 0.8590425531914894,
|
|
"grad_norm": 3.6271767616271973,
|
|
"learning_rate": 9.59980881959486e-06,
|
|
"loss": 1.2398,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.8593085106382978,
|
|
"grad_norm": 3.3347911834716797,
|
|
"learning_rate": 9.599463975261042e-06,
|
|
"loss": 1.1603,
|
|
"step": 3231
|
|
},
|
|
{
|
|
"epoch": 0.8595744680851064,
|
|
"grad_norm": 3.6934587955474854,
|
|
"learning_rate": 9.599118988614396e-06,
|
|
"loss": 1.305,
|
|
"step": 3232
|
|
},
|
|
{
|
|
"epoch": 0.8598404255319149,
|
|
"grad_norm": 3.461353063583374,
|
|
"learning_rate": 9.598773859665593e-06,
|
|
"loss": 1.2013,
|
|
"step": 3233
|
|
},
|
|
{
|
|
"epoch": 0.8601063829787234,
|
|
"grad_norm": 3.2839810848236084,
|
|
"learning_rate": 9.598428588425312e-06,
|
|
"loss": 1.1208,
|
|
"step": 3234
|
|
},
|
|
{
|
|
"epoch": 0.8603723404255319,
|
|
"grad_norm": 3.599320650100708,
|
|
"learning_rate": 9.598083174904235e-06,
|
|
"loss": 1.4372,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 0.8606382978723405,
|
|
"grad_norm": 3.540738105773926,
|
|
"learning_rate": 9.597737619113055e-06,
|
|
"loss": 1.0961,
|
|
"step": 3236
|
|
},
|
|
{
|
|
"epoch": 0.8609042553191489,
|
|
"grad_norm": 3.327744722366333,
|
|
"learning_rate": 9.597391921062457e-06,
|
|
"loss": 1.2087,
|
|
"step": 3237
|
|
},
|
|
{
|
|
"epoch": 0.8611702127659574,
|
|
"grad_norm": 3.619152545928955,
|
|
"learning_rate": 9.59704608076314e-06,
|
|
"loss": 1.3197,
|
|
"step": 3238
|
|
},
|
|
{
|
|
"epoch": 0.861436170212766,
|
|
"grad_norm": 3.381136178970337,
|
|
"learning_rate": 9.596700098225806e-06,
|
|
"loss": 1.258,
|
|
"step": 3239
|
|
},
|
|
{
|
|
"epoch": 0.8617021276595744,
|
|
"grad_norm": 3.6447596549987793,
|
|
"learning_rate": 9.59635397346116e-06,
|
|
"loss": 1.1877,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.861968085106383,
|
|
"grad_norm": 4.12053918838501,
|
|
"learning_rate": 9.596007706479908e-06,
|
|
"loss": 1.3712,
|
|
"step": 3241
|
|
},
|
|
{
|
|
"epoch": 0.8622340425531915,
|
|
"grad_norm": 3.1644914150238037,
|
|
"learning_rate": 9.595661297292768e-06,
|
|
"loss": 1.079,
|
|
"step": 3242
|
|
},
|
|
{
|
|
"epoch": 0.8625,
|
|
"grad_norm": 4.086709022521973,
|
|
"learning_rate": 9.595314745910455e-06,
|
|
"loss": 1.2766,
|
|
"step": 3243
|
|
},
|
|
{
|
|
"epoch": 0.8627659574468085,
|
|
"grad_norm": 4.086410999298096,
|
|
"learning_rate": 9.594968052343697e-06,
|
|
"loss": 1.2103,
|
|
"step": 3244
|
|
},
|
|
{
|
|
"epoch": 0.863031914893617,
|
|
"grad_norm": 3.550549030303955,
|
|
"learning_rate": 9.594621216603215e-06,
|
|
"loss": 1.3625,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 0.8632978723404255,
|
|
"grad_norm": 3.555739402770996,
|
|
"learning_rate": 9.594274238699744e-06,
|
|
"loss": 1.2163,
|
|
"step": 3246
|
|
},
|
|
{
|
|
"epoch": 0.863563829787234,
|
|
"grad_norm": 3.2902424335479736,
|
|
"learning_rate": 9.593927118644017e-06,
|
|
"loss": 0.9849,
|
|
"step": 3247
|
|
},
|
|
{
|
|
"epoch": 0.8638297872340426,
|
|
"grad_norm": 3.554675579071045,
|
|
"learning_rate": 9.593579856446778e-06,
|
|
"loss": 1.1437,
|
|
"step": 3248
|
|
},
|
|
{
|
|
"epoch": 0.864095744680851,
|
|
"grad_norm": 3.3788020610809326,
|
|
"learning_rate": 9.59323245211877e-06,
|
|
"loss": 1.2336,
|
|
"step": 3249
|
|
},
|
|
{
|
|
"epoch": 0.8643617021276596,
|
|
"grad_norm": 3.4318618774414062,
|
|
"learning_rate": 9.592884905670742e-06,
|
|
"loss": 1.2021,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.8646276595744681,
|
|
"grad_norm": 3.5366907119750977,
|
|
"learning_rate": 9.592537217113446e-06,
|
|
"loss": 1.3365,
|
|
"step": 3251
|
|
},
|
|
{
|
|
"epoch": 0.8648936170212767,
|
|
"grad_norm": 3.7782368659973145,
|
|
"learning_rate": 9.592189386457645e-06,
|
|
"loss": 1.3855,
|
|
"step": 3252
|
|
},
|
|
{
|
|
"epoch": 0.8651595744680851,
|
|
"grad_norm": 3.480111837387085,
|
|
"learning_rate": 9.591841413714094e-06,
|
|
"loss": 1.2029,
|
|
"step": 3253
|
|
},
|
|
{
|
|
"epoch": 0.8654255319148936,
|
|
"grad_norm": 3.305756092071533,
|
|
"learning_rate": 9.591493298893567e-06,
|
|
"loss": 1.1172,
|
|
"step": 3254
|
|
},
|
|
{
|
|
"epoch": 0.8656914893617021,
|
|
"grad_norm": 3.342085361480713,
|
|
"learning_rate": 9.591145042006829e-06,
|
|
"loss": 1.0662,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 0.8659574468085106,
|
|
"grad_norm": 3.6532325744628906,
|
|
"learning_rate": 9.590796643064658e-06,
|
|
"loss": 1.2083,
|
|
"step": 3256
|
|
},
|
|
{
|
|
"epoch": 0.8662234042553192,
|
|
"grad_norm": 3.8469889163970947,
|
|
"learning_rate": 9.590448102077835e-06,
|
|
"loss": 1.1185,
|
|
"step": 3257
|
|
},
|
|
{
|
|
"epoch": 0.8664893617021276,
|
|
"grad_norm": 3.6516644954681396,
|
|
"learning_rate": 9.590099419057142e-06,
|
|
"loss": 1.314,
|
|
"step": 3258
|
|
},
|
|
{
|
|
"epoch": 0.8667553191489362,
|
|
"grad_norm": 3.6090152263641357,
|
|
"learning_rate": 9.58975059401337e-06,
|
|
"loss": 1.2411,
|
|
"step": 3259
|
|
},
|
|
{
|
|
"epoch": 0.8670212765957447,
|
|
"grad_norm": 3.436042308807373,
|
|
"learning_rate": 9.589401626957309e-06,
|
|
"loss": 1.3095,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.8672872340425531,
|
|
"grad_norm": 3.2654285430908203,
|
|
"learning_rate": 9.589052517899759e-06,
|
|
"loss": 1.1265,
|
|
"step": 3261
|
|
},
|
|
{
|
|
"epoch": 0.8675531914893617,
|
|
"grad_norm": 3.6885263919830322,
|
|
"learning_rate": 9.588703266851523e-06,
|
|
"loss": 1.2568,
|
|
"step": 3262
|
|
},
|
|
{
|
|
"epoch": 0.8678191489361702,
|
|
"grad_norm": 3.9233293533325195,
|
|
"learning_rate": 9.588353873823404e-06,
|
|
"loss": 1.2273,
|
|
"step": 3263
|
|
},
|
|
{
|
|
"epoch": 0.8680851063829788,
|
|
"grad_norm": 3.254892349243164,
|
|
"learning_rate": 9.588004338826213e-06,
|
|
"loss": 1.0894,
|
|
"step": 3264
|
|
},
|
|
{
|
|
"epoch": 0.8683510638297872,
|
|
"grad_norm": 3.3320047855377197,
|
|
"learning_rate": 9.58765466187077e-06,
|
|
"loss": 1.3296,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 0.8686170212765958,
|
|
"grad_norm": 3.730386972427368,
|
|
"learning_rate": 9.587304842967887e-06,
|
|
"loss": 1.3909,
|
|
"step": 3266
|
|
},
|
|
{
|
|
"epoch": 0.8688829787234043,
|
|
"grad_norm": 3.557739734649658,
|
|
"learning_rate": 9.586954882128391e-06,
|
|
"loss": 1.2858,
|
|
"step": 3267
|
|
},
|
|
{
|
|
"epoch": 0.8691489361702127,
|
|
"grad_norm": 3.292858362197876,
|
|
"learning_rate": 9.58660477936311e-06,
|
|
"loss": 1.2351,
|
|
"step": 3268
|
|
},
|
|
{
|
|
"epoch": 0.8694148936170213,
|
|
"grad_norm": 3.87530255317688,
|
|
"learning_rate": 9.58625453468288e-06,
|
|
"loss": 1.1993,
|
|
"step": 3269
|
|
},
|
|
{
|
|
"epoch": 0.8696808510638298,
|
|
"grad_norm": 3.5502493381500244,
|
|
"learning_rate": 9.585904148098532e-06,
|
|
"loss": 1.2225,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.8699468085106383,
|
|
"grad_norm": 3.9256691932678223,
|
|
"learning_rate": 9.585553619620913e-06,
|
|
"loss": 1.4114,
|
|
"step": 3271
|
|
},
|
|
{
|
|
"epoch": 0.8702127659574468,
|
|
"grad_norm": 3.4120373725891113,
|
|
"learning_rate": 9.585202949260866e-06,
|
|
"loss": 1.1049,
|
|
"step": 3272
|
|
},
|
|
{
|
|
"epoch": 0.8704787234042554,
|
|
"grad_norm": 3.6664795875549316,
|
|
"learning_rate": 9.58485213702924e-06,
|
|
"loss": 1.1906,
|
|
"step": 3273
|
|
},
|
|
{
|
|
"epoch": 0.8707446808510638,
|
|
"grad_norm": 3.315964460372925,
|
|
"learning_rate": 9.584501182936891e-06,
|
|
"loss": 1.1104,
|
|
"step": 3274
|
|
},
|
|
{
|
|
"epoch": 0.8710106382978723,
|
|
"grad_norm": 3.3911890983581543,
|
|
"learning_rate": 9.584150086994678e-06,
|
|
"loss": 1.1979,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 0.8712765957446809,
|
|
"grad_norm": 3.3415443897247314,
|
|
"learning_rate": 9.583798849213467e-06,
|
|
"loss": 1.2044,
|
|
"step": 3276
|
|
},
|
|
{
|
|
"epoch": 0.8715425531914893,
|
|
"grad_norm": 3.4745638370513916,
|
|
"learning_rate": 9.58344746960412e-06,
|
|
"loss": 1.2126,
|
|
"step": 3277
|
|
},
|
|
{
|
|
"epoch": 0.8718085106382979,
|
|
"grad_norm": 3.358224868774414,
|
|
"learning_rate": 9.58309594817751e-06,
|
|
"loss": 1.2591,
|
|
"step": 3278
|
|
},
|
|
{
|
|
"epoch": 0.8720744680851064,
|
|
"grad_norm": 3.607102155685425,
|
|
"learning_rate": 9.582744284944519e-06,
|
|
"loss": 1.2529,
|
|
"step": 3279
|
|
},
|
|
{
|
|
"epoch": 0.8723404255319149,
|
|
"grad_norm": 3.4642441272735596,
|
|
"learning_rate": 9.582392479916023e-06,
|
|
"loss": 1.1749,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.8726063829787234,
|
|
"grad_norm": 3.5729122161865234,
|
|
"learning_rate": 9.582040533102908e-06,
|
|
"loss": 1.3488,
|
|
"step": 3281
|
|
},
|
|
{
|
|
"epoch": 0.872872340425532,
|
|
"grad_norm": 3.499811887741089,
|
|
"learning_rate": 9.581688444516064e-06,
|
|
"loss": 1.1714,
|
|
"step": 3282
|
|
},
|
|
{
|
|
"epoch": 0.8731382978723404,
|
|
"grad_norm": 3.7235212326049805,
|
|
"learning_rate": 9.581336214166386e-06,
|
|
"loss": 1.2336,
|
|
"step": 3283
|
|
},
|
|
{
|
|
"epoch": 0.8734042553191489,
|
|
"grad_norm": 3.3966002464294434,
|
|
"learning_rate": 9.580983842064772e-06,
|
|
"loss": 1.2197,
|
|
"step": 3284
|
|
},
|
|
{
|
|
"epoch": 0.8736702127659575,
|
|
"grad_norm": 3.7711052894592285,
|
|
"learning_rate": 9.580631328222124e-06,
|
|
"loss": 1.3275,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 0.8739361702127659,
|
|
"grad_norm": 3.6308035850524902,
|
|
"learning_rate": 9.58027867264935e-06,
|
|
"loss": 1.1036,
|
|
"step": 3286
|
|
},
|
|
{
|
|
"epoch": 0.8742021276595745,
|
|
"grad_norm": 3.5871105194091797,
|
|
"learning_rate": 9.579925875357361e-06,
|
|
"loss": 1.2099,
|
|
"step": 3287
|
|
},
|
|
{
|
|
"epoch": 0.874468085106383,
|
|
"grad_norm": 3.3607616424560547,
|
|
"learning_rate": 9.579572936357073e-06,
|
|
"loss": 1.3576,
|
|
"step": 3288
|
|
},
|
|
{
|
|
"epoch": 0.8747340425531915,
|
|
"grad_norm": 3.5098683834075928,
|
|
"learning_rate": 9.579219855659407e-06,
|
|
"loss": 1.1218,
|
|
"step": 3289
|
|
},
|
|
{
|
|
"epoch": 0.875,
|
|
"grad_norm": 3.2693376541137695,
|
|
"learning_rate": 9.578866633275289e-06,
|
|
"loss": 1.2022,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.8752659574468085,
|
|
"grad_norm": 3.9929087162017822,
|
|
"learning_rate": 9.578513269215643e-06,
|
|
"loss": 1.2267,
|
|
"step": 3291
|
|
},
|
|
{
|
|
"epoch": 0.875531914893617,
|
|
"grad_norm": 3.7925865650177,
|
|
"learning_rate": 9.578159763491408e-06,
|
|
"loss": 1.3087,
|
|
"step": 3292
|
|
},
|
|
{
|
|
"epoch": 0.8757978723404255,
|
|
"grad_norm": 3.5196733474731445,
|
|
"learning_rate": 9.577806116113519e-06,
|
|
"loss": 1.2655,
|
|
"step": 3293
|
|
},
|
|
{
|
|
"epoch": 0.8760638297872341,
|
|
"grad_norm": 3.529148578643799,
|
|
"learning_rate": 9.57745232709292e-06,
|
|
"loss": 1.1591,
|
|
"step": 3294
|
|
},
|
|
{
|
|
"epoch": 0.8763297872340425,
|
|
"grad_norm": 3.423691987991333,
|
|
"learning_rate": 9.577098396440557e-06,
|
|
"loss": 1.2312,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 0.8765957446808511,
|
|
"grad_norm": 3.6896872520446777,
|
|
"learning_rate": 9.57674432416738e-06,
|
|
"loss": 1.3319,
|
|
"step": 3296
|
|
},
|
|
{
|
|
"epoch": 0.8768617021276596,
|
|
"grad_norm": 3.2412073612213135,
|
|
"learning_rate": 9.576390110284343e-06,
|
|
"loss": 1.1944,
|
|
"step": 3297
|
|
},
|
|
{
|
|
"epoch": 0.877127659574468,
|
|
"grad_norm": 3.716688871383667,
|
|
"learning_rate": 9.576035754802411e-06,
|
|
"loss": 1.1713,
|
|
"step": 3298
|
|
},
|
|
{
|
|
"epoch": 0.8773936170212766,
|
|
"grad_norm": 3.721823215484619,
|
|
"learning_rate": 9.575681257732546e-06,
|
|
"loss": 1.2639,
|
|
"step": 3299
|
|
},
|
|
{
|
|
"epoch": 0.8776595744680851,
|
|
"grad_norm": 3.4668095111846924,
|
|
"learning_rate": 9.575326619085713e-06,
|
|
"loss": 1.2198,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.8779255319148936,
|
|
"grad_norm": 3.647254467010498,
|
|
"learning_rate": 9.574971838872889e-06,
|
|
"loss": 1.2587,
|
|
"step": 3301
|
|
},
|
|
{
|
|
"epoch": 0.8781914893617021,
|
|
"grad_norm": 3.563108205795288,
|
|
"learning_rate": 9.574616917105049e-06,
|
|
"loss": 1.2173,
|
|
"step": 3302
|
|
},
|
|
{
|
|
"epoch": 0.8784574468085107,
|
|
"grad_norm": 5.121861457824707,
|
|
"learning_rate": 9.574261853793176e-06,
|
|
"loss": 1.2889,
|
|
"step": 3303
|
|
},
|
|
{
|
|
"epoch": 0.8787234042553191,
|
|
"grad_norm": 3.9446914196014404,
|
|
"learning_rate": 9.573906648948256e-06,
|
|
"loss": 1.4498,
|
|
"step": 3304
|
|
},
|
|
{
|
|
"epoch": 0.8789893617021277,
|
|
"grad_norm": 3.368877649307251,
|
|
"learning_rate": 9.573551302581279e-06,
|
|
"loss": 1.1592,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 0.8792553191489362,
|
|
"grad_norm": 3.4360673427581787,
|
|
"learning_rate": 9.57319581470324e-06,
|
|
"loss": 1.2784,
|
|
"step": 3306
|
|
},
|
|
{
|
|
"epoch": 0.8795212765957446,
|
|
"grad_norm": 3.9499571323394775,
|
|
"learning_rate": 9.572840185325139e-06,
|
|
"loss": 1.2127,
|
|
"step": 3307
|
|
},
|
|
{
|
|
"epoch": 0.8797872340425532,
|
|
"grad_norm": 3.3917598724365234,
|
|
"learning_rate": 9.572484414457976e-06,
|
|
"loss": 1.1193,
|
|
"step": 3308
|
|
},
|
|
{
|
|
"epoch": 0.8800531914893617,
|
|
"grad_norm": 3.3946712017059326,
|
|
"learning_rate": 9.572128502112765e-06,
|
|
"loss": 1.2026,
|
|
"step": 3309
|
|
},
|
|
{
|
|
"epoch": 0.8803191489361702,
|
|
"grad_norm": 3.7101964950561523,
|
|
"learning_rate": 9.571772448300514e-06,
|
|
"loss": 1.2095,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.8805851063829787,
|
|
"grad_norm": 3.727922201156616,
|
|
"learning_rate": 9.571416253032241e-06,
|
|
"loss": 1.4194,
|
|
"step": 3311
|
|
},
|
|
{
|
|
"epoch": 0.8808510638297873,
|
|
"grad_norm": 3.457578182220459,
|
|
"learning_rate": 9.571059916318967e-06,
|
|
"loss": 1.26,
|
|
"step": 3312
|
|
},
|
|
{
|
|
"epoch": 0.8811170212765957,
|
|
"grad_norm": 3.6214683055877686,
|
|
"learning_rate": 9.570703438171717e-06,
|
|
"loss": 1.3319,
|
|
"step": 3313
|
|
},
|
|
{
|
|
"epoch": 0.8813829787234042,
|
|
"grad_norm": 3.4604907035827637,
|
|
"learning_rate": 9.570346818601522e-06,
|
|
"loss": 1.1988,
|
|
"step": 3314
|
|
},
|
|
{
|
|
"epoch": 0.8816489361702128,
|
|
"grad_norm": 3.6304855346679688,
|
|
"learning_rate": 9.569990057619414e-06,
|
|
"loss": 1.3127,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 0.8819148936170212,
|
|
"grad_norm": 3.6774277687072754,
|
|
"learning_rate": 9.569633155236436e-06,
|
|
"loss": 1.1874,
|
|
"step": 3316
|
|
},
|
|
{
|
|
"epoch": 0.8821808510638298,
|
|
"grad_norm": 3.3065695762634277,
|
|
"learning_rate": 9.569276111463626e-06,
|
|
"loss": 1.2098,
|
|
"step": 3317
|
|
},
|
|
{
|
|
"epoch": 0.8824468085106383,
|
|
"grad_norm": 3.712066650390625,
|
|
"learning_rate": 9.568918926312033e-06,
|
|
"loss": 1.2148,
|
|
"step": 3318
|
|
},
|
|
{
|
|
"epoch": 0.8827127659574469,
|
|
"grad_norm": 3.215933084487915,
|
|
"learning_rate": 9.568561599792709e-06,
|
|
"loss": 1.2424,
|
|
"step": 3319
|
|
},
|
|
{
|
|
"epoch": 0.8829787234042553,
|
|
"grad_norm": 3.317523717880249,
|
|
"learning_rate": 9.568204131916712e-06,
|
|
"loss": 1.1701,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.8832446808510638,
|
|
"grad_norm": 4.0422749519348145,
|
|
"learning_rate": 9.5678465226951e-06,
|
|
"loss": 1.3527,
|
|
"step": 3321
|
|
},
|
|
{
|
|
"epoch": 0.8835106382978724,
|
|
"grad_norm": 3.700969934463501,
|
|
"learning_rate": 9.56748877213894e-06,
|
|
"loss": 1.243,
|
|
"step": 3322
|
|
},
|
|
{
|
|
"epoch": 0.8837765957446808,
|
|
"grad_norm": 3.6172409057617188,
|
|
"learning_rate": 9.567130880259296e-06,
|
|
"loss": 1.3409,
|
|
"step": 3323
|
|
},
|
|
{
|
|
"epoch": 0.8840425531914894,
|
|
"grad_norm": 3.587956190109253,
|
|
"learning_rate": 9.56677284706725e-06,
|
|
"loss": 1.327,
|
|
"step": 3324
|
|
},
|
|
{
|
|
"epoch": 0.8843085106382979,
|
|
"grad_norm": 3.8839058876037598,
|
|
"learning_rate": 9.566414672573873e-06,
|
|
"loss": 1.2556,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 0.8845744680851064,
|
|
"grad_norm": 3.610464572906494,
|
|
"learning_rate": 9.56605635679025e-06,
|
|
"loss": 1.2233,
|
|
"step": 3326
|
|
},
|
|
{
|
|
"epoch": 0.8848404255319149,
|
|
"grad_norm": 3.350374221801758,
|
|
"learning_rate": 9.565697899727466e-06,
|
|
"loss": 1.1454,
|
|
"step": 3327
|
|
},
|
|
{
|
|
"epoch": 0.8851063829787233,
|
|
"grad_norm": 3.175729513168335,
|
|
"learning_rate": 9.565339301396616e-06,
|
|
"loss": 1.1474,
|
|
"step": 3328
|
|
},
|
|
{
|
|
"epoch": 0.8853723404255319,
|
|
"grad_norm": 3.39150333404541,
|
|
"learning_rate": 9.564980561808793e-06,
|
|
"loss": 1.1578,
|
|
"step": 3329
|
|
},
|
|
{
|
|
"epoch": 0.8856382978723404,
|
|
"grad_norm": 4.003450393676758,
|
|
"learning_rate": 9.564621680975095e-06,
|
|
"loss": 1.3537,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.885904255319149,
|
|
"grad_norm": 3.366062879562378,
|
|
"learning_rate": 9.564262658906628e-06,
|
|
"loss": 1.2119,
|
|
"step": 3331
|
|
},
|
|
{
|
|
"epoch": 0.8861702127659574,
|
|
"grad_norm": 4.014388084411621,
|
|
"learning_rate": 9.563903495614503e-06,
|
|
"loss": 1.3046,
|
|
"step": 3332
|
|
},
|
|
{
|
|
"epoch": 0.886436170212766,
|
|
"grad_norm": 3.3641979694366455,
|
|
"learning_rate": 9.563544191109828e-06,
|
|
"loss": 1.1204,
|
|
"step": 3333
|
|
},
|
|
{
|
|
"epoch": 0.8867021276595745,
|
|
"grad_norm": 3.584113836288452,
|
|
"learning_rate": 9.563184745403725e-06,
|
|
"loss": 1.1223,
|
|
"step": 3334
|
|
},
|
|
{
|
|
"epoch": 0.886968085106383,
|
|
"grad_norm": 3.905111312866211,
|
|
"learning_rate": 9.562825158507311e-06,
|
|
"loss": 1.2031,
|
|
"step": 3335
|
|
},
|
|
{
|
|
"epoch": 0.8872340425531915,
|
|
"grad_norm": 3.787869453430176,
|
|
"learning_rate": 9.562465430431716e-06,
|
|
"loss": 1.1798,
|
|
"step": 3336
|
|
},
|
|
{
|
|
"epoch": 0.8875,
|
|
"grad_norm": 3.336646795272827,
|
|
"learning_rate": 9.562105561188069e-06,
|
|
"loss": 1.0405,
|
|
"step": 3337
|
|
},
|
|
{
|
|
"epoch": 0.8877659574468085,
|
|
"grad_norm": 3.7780652046203613,
|
|
"learning_rate": 9.561745550787504e-06,
|
|
"loss": 1.1147,
|
|
"step": 3338
|
|
},
|
|
{
|
|
"epoch": 0.888031914893617,
|
|
"grad_norm": 3.8940999507904053,
|
|
"learning_rate": 9.561385399241164e-06,
|
|
"loss": 1.371,
|
|
"step": 3339
|
|
},
|
|
{
|
|
"epoch": 0.8882978723404256,
|
|
"grad_norm": 3.7703256607055664,
|
|
"learning_rate": 9.561025106560184e-06,
|
|
"loss": 1.2073,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.888563829787234,
|
|
"grad_norm": 3.8208539485931396,
|
|
"learning_rate": 9.560664672755721e-06,
|
|
"loss": 1.3914,
|
|
"step": 3341
|
|
},
|
|
{
|
|
"epoch": 0.8888297872340426,
|
|
"grad_norm": 3.8787341117858887,
|
|
"learning_rate": 9.560304097838922e-06,
|
|
"loss": 1.2999,
|
|
"step": 3342
|
|
},
|
|
{
|
|
"epoch": 0.8890957446808511,
|
|
"grad_norm": 3.4178457260131836,
|
|
"learning_rate": 9.559943381820947e-06,
|
|
"loss": 1.2978,
|
|
"step": 3343
|
|
},
|
|
{
|
|
"epoch": 0.8893617021276595,
|
|
"grad_norm": 3.7168829441070557,
|
|
"learning_rate": 9.559582524712953e-06,
|
|
"loss": 1.2428,
|
|
"step": 3344
|
|
},
|
|
{
|
|
"epoch": 0.8896276595744681,
|
|
"grad_norm": 3.8447728157043457,
|
|
"learning_rate": 9.55922152652611e-06,
|
|
"loss": 1.3121,
|
|
"step": 3345
|
|
},
|
|
{
|
|
"epoch": 0.8898936170212766,
|
|
"grad_norm": 3.5572218894958496,
|
|
"learning_rate": 9.558860387271583e-06,
|
|
"loss": 1.3853,
|
|
"step": 3346
|
|
},
|
|
{
|
|
"epoch": 0.8901595744680851,
|
|
"grad_norm": 3.461214780807495,
|
|
"learning_rate": 9.558499106960548e-06,
|
|
"loss": 1.2634,
|
|
"step": 3347
|
|
},
|
|
{
|
|
"epoch": 0.8904255319148936,
|
|
"grad_norm": 3.4366822242736816,
|
|
"learning_rate": 9.558137685604184e-06,
|
|
"loss": 1.322,
|
|
"step": 3348
|
|
},
|
|
{
|
|
"epoch": 0.8906914893617022,
|
|
"grad_norm": 3.7072808742523193,
|
|
"learning_rate": 9.557776123213673e-06,
|
|
"loss": 1.2393,
|
|
"step": 3349
|
|
},
|
|
{
|
|
"epoch": 0.8909574468085106,
|
|
"grad_norm": 3.6192643642425537,
|
|
"learning_rate": 9.557414419800204e-06,
|
|
"loss": 1.2106,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.8912234042553191,
|
|
"grad_norm": 3.3502161502838135,
|
|
"learning_rate": 9.557052575374967e-06,
|
|
"loss": 1.1333,
|
|
"step": 3351
|
|
},
|
|
{
|
|
"epoch": 0.8914893617021277,
|
|
"grad_norm": 3.4909167289733887,
|
|
"learning_rate": 9.556690589949158e-06,
|
|
"loss": 1.2107,
|
|
"step": 3352
|
|
},
|
|
{
|
|
"epoch": 0.8917553191489361,
|
|
"grad_norm": 3.3816614151000977,
|
|
"learning_rate": 9.556328463533976e-06,
|
|
"loss": 1.217,
|
|
"step": 3353
|
|
},
|
|
{
|
|
"epoch": 0.8920212765957447,
|
|
"grad_norm": 3.6492433547973633,
|
|
"learning_rate": 9.55596619614063e-06,
|
|
"loss": 1.1954,
|
|
"step": 3354
|
|
},
|
|
{
|
|
"epoch": 0.8922872340425532,
|
|
"grad_norm": 3.4829185009002686,
|
|
"learning_rate": 9.555603787780321e-06,
|
|
"loss": 1.1374,
|
|
"step": 3355
|
|
},
|
|
{
|
|
"epoch": 0.8925531914893617,
|
|
"grad_norm": 3.2989566326141357,
|
|
"learning_rate": 9.555241238464271e-06,
|
|
"loss": 1.2678,
|
|
"step": 3356
|
|
},
|
|
{
|
|
"epoch": 0.8928191489361702,
|
|
"grad_norm": 3.325765609741211,
|
|
"learning_rate": 9.554878548203695e-06,
|
|
"loss": 1.1352,
|
|
"step": 3357
|
|
},
|
|
{
|
|
"epoch": 0.8930851063829788,
|
|
"grad_norm": 3.680143356323242,
|
|
"learning_rate": 9.55451571700981e-06,
|
|
"loss": 1.1376,
|
|
"step": 3358
|
|
},
|
|
{
|
|
"epoch": 0.8933510638297872,
|
|
"grad_norm": 3.4539363384246826,
|
|
"learning_rate": 9.554152744893848e-06,
|
|
"loss": 1.2099,
|
|
"step": 3359
|
|
},
|
|
{
|
|
"epoch": 0.8936170212765957,
|
|
"grad_norm": 3.541053295135498,
|
|
"learning_rate": 9.553789631867039e-06,
|
|
"loss": 1.2115,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.8938829787234043,
|
|
"grad_norm": 3.2321863174438477,
|
|
"learning_rate": 9.553426377940618e-06,
|
|
"loss": 1.2008,
|
|
"step": 3361
|
|
},
|
|
{
|
|
"epoch": 0.8941489361702127,
|
|
"grad_norm": 4.26365852355957,
|
|
"learning_rate": 9.553062983125822e-06,
|
|
"loss": 1.3757,
|
|
"step": 3362
|
|
},
|
|
{
|
|
"epoch": 0.8944148936170213,
|
|
"grad_norm": 3.7996468544006348,
|
|
"learning_rate": 9.552699447433899e-06,
|
|
"loss": 1.3071,
|
|
"step": 3363
|
|
},
|
|
{
|
|
"epoch": 0.8946808510638298,
|
|
"grad_norm": 3.2904140949249268,
|
|
"learning_rate": 9.552335770876094e-06,
|
|
"loss": 1.0914,
|
|
"step": 3364
|
|
},
|
|
{
|
|
"epoch": 0.8949468085106383,
|
|
"grad_norm": 3.48201584815979,
|
|
"learning_rate": 9.551971953463659e-06,
|
|
"loss": 1.1438,
|
|
"step": 3365
|
|
},
|
|
{
|
|
"epoch": 0.8952127659574468,
|
|
"grad_norm": 3.721348285675049,
|
|
"learning_rate": 9.551607995207854e-06,
|
|
"loss": 1.1116,
|
|
"step": 3366
|
|
},
|
|
{
|
|
"epoch": 0.8954787234042553,
|
|
"grad_norm": 3.6480965614318848,
|
|
"learning_rate": 9.551243896119938e-06,
|
|
"loss": 1.1571,
|
|
"step": 3367
|
|
},
|
|
{
|
|
"epoch": 0.8957446808510638,
|
|
"grad_norm": 3.7615323066711426,
|
|
"learning_rate": 9.550879656211179e-06,
|
|
"loss": 1.4653,
|
|
"step": 3368
|
|
},
|
|
{
|
|
"epoch": 0.8960106382978723,
|
|
"grad_norm": 3.1234636306762695,
|
|
"learning_rate": 9.550515275492843e-06,
|
|
"loss": 1.1518,
|
|
"step": 3369
|
|
},
|
|
{
|
|
"epoch": 0.8962765957446809,
|
|
"grad_norm": 3.5595285892486572,
|
|
"learning_rate": 9.550150753976209e-06,
|
|
"loss": 1.213,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.8965425531914893,
|
|
"grad_norm": 3.4824399948120117,
|
|
"learning_rate": 9.549786091672553e-06,
|
|
"loss": 1.1228,
|
|
"step": 3371
|
|
},
|
|
{
|
|
"epoch": 0.8968085106382979,
|
|
"grad_norm": 3.6110517978668213,
|
|
"learning_rate": 9.549421288593157e-06,
|
|
"loss": 1.3169,
|
|
"step": 3372
|
|
},
|
|
{
|
|
"epoch": 0.8970744680851064,
|
|
"grad_norm": 4.197827339172363,
|
|
"learning_rate": 9.549056344749312e-06,
|
|
"loss": 1.4542,
|
|
"step": 3373
|
|
},
|
|
{
|
|
"epoch": 0.8973404255319148,
|
|
"grad_norm": 3.3921542167663574,
|
|
"learning_rate": 9.548691260152308e-06,
|
|
"loss": 1.236,
|
|
"step": 3374
|
|
},
|
|
{
|
|
"epoch": 0.8976063829787234,
|
|
"grad_norm": 3.5142951011657715,
|
|
"learning_rate": 9.54832603481344e-06,
|
|
"loss": 1.2546,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 0.8978723404255319,
|
|
"grad_norm": 3.390557050704956,
|
|
"learning_rate": 9.547960668744009e-06,
|
|
"loss": 1.2041,
|
|
"step": 3376
|
|
},
|
|
{
|
|
"epoch": 0.8981382978723405,
|
|
"grad_norm": 3.5497653484344482,
|
|
"learning_rate": 9.547595161955321e-06,
|
|
"loss": 1.2139,
|
|
"step": 3377
|
|
},
|
|
{
|
|
"epoch": 0.8984042553191489,
|
|
"grad_norm": 3.379268169403076,
|
|
"learning_rate": 9.547229514458684e-06,
|
|
"loss": 1.1503,
|
|
"step": 3378
|
|
},
|
|
{
|
|
"epoch": 0.8986702127659575,
|
|
"grad_norm": 3.826500177383423,
|
|
"learning_rate": 9.546863726265414e-06,
|
|
"loss": 1.2808,
|
|
"step": 3379
|
|
},
|
|
{
|
|
"epoch": 0.898936170212766,
|
|
"grad_norm": 3.121777296066284,
|
|
"learning_rate": 9.546497797386824e-06,
|
|
"loss": 1.1966,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.8992021276595744,
|
|
"grad_norm": 3.6707565784454346,
|
|
"learning_rate": 9.546131727834242e-06,
|
|
"loss": 1.33,
|
|
"step": 3381
|
|
},
|
|
{
|
|
"epoch": 0.899468085106383,
|
|
"grad_norm": 3.555612325668335,
|
|
"learning_rate": 9.545765517618992e-06,
|
|
"loss": 1.1858,
|
|
"step": 3382
|
|
},
|
|
{
|
|
"epoch": 0.8997340425531914,
|
|
"grad_norm": 3.481360912322998,
|
|
"learning_rate": 9.545399166752402e-06,
|
|
"loss": 1.4109,
|
|
"step": 3383
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"grad_norm": 3.1930184364318848,
|
|
"learning_rate": 9.545032675245814e-06,
|
|
"loss": 1.1161,
|
|
"step": 3384
|
|
},
|
|
{
|
|
"epoch": 0.9002659574468085,
|
|
"grad_norm": 3.5262556076049805,
|
|
"learning_rate": 9.544666043110562e-06,
|
|
"loss": 1.2255,
|
|
"step": 3385
|
|
},
|
|
{
|
|
"epoch": 0.9005319148936171,
|
|
"grad_norm": 3.4826877117156982,
|
|
"learning_rate": 9.544299270357992e-06,
|
|
"loss": 1.2001,
|
|
"step": 3386
|
|
},
|
|
{
|
|
"epoch": 0.9007978723404255,
|
|
"grad_norm": 3.602201223373413,
|
|
"learning_rate": 9.543932356999452e-06,
|
|
"loss": 1.2133,
|
|
"step": 3387
|
|
},
|
|
{
|
|
"epoch": 0.9010638297872341,
|
|
"grad_norm": 3.6607158184051514,
|
|
"learning_rate": 9.543565303046297e-06,
|
|
"loss": 1.1962,
|
|
"step": 3388
|
|
},
|
|
{
|
|
"epoch": 0.9013297872340426,
|
|
"grad_norm": 3.664412260055542,
|
|
"learning_rate": 9.543198108509879e-06,
|
|
"loss": 1.2857,
|
|
"step": 3389
|
|
},
|
|
{
|
|
"epoch": 0.901595744680851,
|
|
"grad_norm": 3.5442616939544678,
|
|
"learning_rate": 9.542830773401564e-06,
|
|
"loss": 1.2096,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.9018617021276596,
|
|
"grad_norm": 4.058464527130127,
|
|
"learning_rate": 9.542463297732716e-06,
|
|
"loss": 1.4371,
|
|
"step": 3391
|
|
},
|
|
{
|
|
"epoch": 0.902127659574468,
|
|
"grad_norm": 3.6064326763153076,
|
|
"learning_rate": 9.542095681514708e-06,
|
|
"loss": 1.2809,
|
|
"step": 3392
|
|
},
|
|
{
|
|
"epoch": 0.9023936170212766,
|
|
"grad_norm": 3.585545301437378,
|
|
"learning_rate": 9.541727924758907e-06,
|
|
"loss": 1.3174,
|
|
"step": 3393
|
|
},
|
|
{
|
|
"epoch": 0.9026595744680851,
|
|
"grad_norm": 3.465228319168091,
|
|
"learning_rate": 9.5413600274767e-06,
|
|
"loss": 1.2042,
|
|
"step": 3394
|
|
},
|
|
{
|
|
"epoch": 0.9029255319148937,
|
|
"grad_norm": 3.581475019454956,
|
|
"learning_rate": 9.540991989679468e-06,
|
|
"loss": 1.3837,
|
|
"step": 3395
|
|
},
|
|
{
|
|
"epoch": 0.9031914893617021,
|
|
"grad_norm": 3.4275171756744385,
|
|
"learning_rate": 9.540623811378597e-06,
|
|
"loss": 1.209,
|
|
"step": 3396
|
|
},
|
|
{
|
|
"epoch": 0.9034574468085106,
|
|
"grad_norm": 3.159125328063965,
|
|
"learning_rate": 9.540255492585478e-06,
|
|
"loss": 1.2519,
|
|
"step": 3397
|
|
},
|
|
{
|
|
"epoch": 0.9037234042553192,
|
|
"grad_norm": 3.7644615173339844,
|
|
"learning_rate": 9.53988703331151e-06,
|
|
"loss": 1.2965,
|
|
"step": 3398
|
|
},
|
|
{
|
|
"epoch": 0.9039893617021276,
|
|
"grad_norm": 3.519270896911621,
|
|
"learning_rate": 9.53951843356809e-06,
|
|
"loss": 1.2125,
|
|
"step": 3399
|
|
},
|
|
{
|
|
"epoch": 0.9042553191489362,
|
|
"grad_norm": 3.7408711910247803,
|
|
"learning_rate": 9.539149693366628e-06,
|
|
"loss": 1.3432,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.9045212765957447,
|
|
"grad_norm": 3.343994617462158,
|
|
"learning_rate": 9.538780812718527e-06,
|
|
"loss": 1.2149,
|
|
"step": 3401
|
|
},
|
|
{
|
|
"epoch": 0.9047872340425532,
|
|
"grad_norm": 3.3215134143829346,
|
|
"learning_rate": 9.538411791635205e-06,
|
|
"loss": 1.2844,
|
|
"step": 3402
|
|
},
|
|
{
|
|
"epoch": 0.9050531914893617,
|
|
"grad_norm": 3.9590845108032227,
|
|
"learning_rate": 9.53804263012808e-06,
|
|
"loss": 1.289,
|
|
"step": 3403
|
|
},
|
|
{
|
|
"epoch": 0.9053191489361702,
|
|
"grad_norm": 3.299415349960327,
|
|
"learning_rate": 9.537673328208572e-06,
|
|
"loss": 1.0875,
|
|
"step": 3404
|
|
},
|
|
{
|
|
"epoch": 0.9055851063829787,
|
|
"grad_norm": 3.5640780925750732,
|
|
"learning_rate": 9.53730388588811e-06,
|
|
"loss": 1.2735,
|
|
"step": 3405
|
|
},
|
|
{
|
|
"epoch": 0.9058510638297872,
|
|
"grad_norm": 3.2300360202789307,
|
|
"learning_rate": 9.536934303178123e-06,
|
|
"loss": 1.3574,
|
|
"step": 3406
|
|
},
|
|
{
|
|
"epoch": 0.9061170212765958,
|
|
"grad_norm": 3.6983630657196045,
|
|
"learning_rate": 9.536564580090046e-06,
|
|
"loss": 1.2751,
|
|
"step": 3407
|
|
},
|
|
{
|
|
"epoch": 0.9063829787234042,
|
|
"grad_norm": 3.740288257598877,
|
|
"learning_rate": 9.536194716635322e-06,
|
|
"loss": 1.25,
|
|
"step": 3408
|
|
},
|
|
{
|
|
"epoch": 0.9066489361702128,
|
|
"grad_norm": 3.6063649654388428,
|
|
"learning_rate": 9.535824712825393e-06,
|
|
"loss": 1.1656,
|
|
"step": 3409
|
|
},
|
|
{
|
|
"epoch": 0.9069148936170213,
|
|
"grad_norm": 3.738442897796631,
|
|
"learning_rate": 9.535454568671705e-06,
|
|
"loss": 1.3204,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.9071808510638298,
|
|
"grad_norm": 3.7406976222991943,
|
|
"learning_rate": 9.535084284185714e-06,
|
|
"loss": 1.2681,
|
|
"step": 3411
|
|
},
|
|
{
|
|
"epoch": 0.9074468085106383,
|
|
"grad_norm": 3.7773613929748535,
|
|
"learning_rate": 9.534713859378875e-06,
|
|
"loss": 1.2303,
|
|
"step": 3412
|
|
},
|
|
{
|
|
"epoch": 0.9077127659574468,
|
|
"grad_norm": 3.531691312789917,
|
|
"learning_rate": 9.53434329426265e-06,
|
|
"loss": 1.1495,
|
|
"step": 3413
|
|
},
|
|
{
|
|
"epoch": 0.9079787234042553,
|
|
"grad_norm": 3.730365514755249,
|
|
"learning_rate": 9.533972588848507e-06,
|
|
"loss": 1.1998,
|
|
"step": 3414
|
|
},
|
|
{
|
|
"epoch": 0.9082446808510638,
|
|
"grad_norm": 4.04153299331665,
|
|
"learning_rate": 9.533601743147911e-06,
|
|
"loss": 1.2527,
|
|
"step": 3415
|
|
},
|
|
{
|
|
"epoch": 0.9085106382978724,
|
|
"grad_norm": 3.547910451889038,
|
|
"learning_rate": 9.53323075717234e-06,
|
|
"loss": 1.3033,
|
|
"step": 3416
|
|
},
|
|
{
|
|
"epoch": 0.9087765957446808,
|
|
"grad_norm": 3.444802761077881,
|
|
"learning_rate": 9.532859630933276e-06,
|
|
"loss": 1.2513,
|
|
"step": 3417
|
|
},
|
|
{
|
|
"epoch": 0.9090425531914894,
|
|
"grad_norm": 3.7553112506866455,
|
|
"learning_rate": 9.532488364442195e-06,
|
|
"loss": 1.1689,
|
|
"step": 3418
|
|
},
|
|
{
|
|
"epoch": 0.9093085106382979,
|
|
"grad_norm": 3.748389959335327,
|
|
"learning_rate": 9.532116957710587e-06,
|
|
"loss": 1.2341,
|
|
"step": 3419
|
|
},
|
|
{
|
|
"epoch": 0.9095744680851063,
|
|
"grad_norm": 3.5497937202453613,
|
|
"learning_rate": 9.531745410749946e-06,
|
|
"loss": 1.198,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.9098404255319149,
|
|
"grad_norm": 3.540468692779541,
|
|
"learning_rate": 9.531373723571765e-06,
|
|
"loss": 1.3774,
|
|
"step": 3421
|
|
},
|
|
{
|
|
"epoch": 0.9101063829787234,
|
|
"grad_norm": 3.332838535308838,
|
|
"learning_rate": 9.531001896187548e-06,
|
|
"loss": 1.3205,
|
|
"step": 3422
|
|
},
|
|
{
|
|
"epoch": 0.910372340425532,
|
|
"grad_norm": 3.7700576782226562,
|
|
"learning_rate": 9.530629928608797e-06,
|
|
"loss": 1.0956,
|
|
"step": 3423
|
|
},
|
|
{
|
|
"epoch": 0.9106382978723404,
|
|
"grad_norm": 3.387652635574341,
|
|
"learning_rate": 9.530257820847022e-06,
|
|
"loss": 1.1835,
|
|
"step": 3424
|
|
},
|
|
{
|
|
"epoch": 0.910904255319149,
|
|
"grad_norm": 3.9318602085113525,
|
|
"learning_rate": 9.529885572913735e-06,
|
|
"loss": 1.3197,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 0.9111702127659574,
|
|
"grad_norm": 3.158997058868408,
|
|
"learning_rate": 9.529513184820458e-06,
|
|
"loss": 1.2074,
|
|
"step": 3426
|
|
},
|
|
{
|
|
"epoch": 0.9114361702127659,
|
|
"grad_norm": 3.5039327144622803,
|
|
"learning_rate": 9.529140656578707e-06,
|
|
"loss": 1.3652,
|
|
"step": 3427
|
|
},
|
|
{
|
|
"epoch": 0.9117021276595745,
|
|
"grad_norm": 3.682145118713379,
|
|
"learning_rate": 9.528767988200015e-06,
|
|
"loss": 1.1703,
|
|
"step": 3428
|
|
},
|
|
{
|
|
"epoch": 0.9119680851063829,
|
|
"grad_norm": 3.6255364418029785,
|
|
"learning_rate": 9.528395179695907e-06,
|
|
"loss": 1.269,
|
|
"step": 3429
|
|
},
|
|
{
|
|
"epoch": 0.9122340425531915,
|
|
"grad_norm": 3.666750907897949,
|
|
"learning_rate": 9.528022231077921e-06,
|
|
"loss": 1.4003,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.9125,
|
|
"grad_norm": 3.167771816253662,
|
|
"learning_rate": 9.527649142357596e-06,
|
|
"loss": 1.1409,
|
|
"step": 3431
|
|
},
|
|
{
|
|
"epoch": 0.9127659574468086,
|
|
"grad_norm": 3.6556570529937744,
|
|
"learning_rate": 9.527275913546475e-06,
|
|
"loss": 1.3847,
|
|
"step": 3432
|
|
},
|
|
{
|
|
"epoch": 0.913031914893617,
|
|
"grad_norm": 3.794574737548828,
|
|
"learning_rate": 9.526902544656108e-06,
|
|
"loss": 1.3673,
|
|
"step": 3433
|
|
},
|
|
{
|
|
"epoch": 0.9132978723404256,
|
|
"grad_norm": 3.597594976425171,
|
|
"learning_rate": 9.526529035698046e-06,
|
|
"loss": 1.068,
|
|
"step": 3434
|
|
},
|
|
{
|
|
"epoch": 0.913563829787234,
|
|
"grad_norm": 3.1316208839416504,
|
|
"learning_rate": 9.526155386683848e-06,
|
|
"loss": 1.1379,
|
|
"step": 3435
|
|
},
|
|
{
|
|
"epoch": 0.9138297872340425,
|
|
"grad_norm": 3.3742425441741943,
|
|
"learning_rate": 9.525781597625073e-06,
|
|
"loss": 1.2233,
|
|
"step": 3436
|
|
},
|
|
{
|
|
"epoch": 0.9140957446808511,
|
|
"grad_norm": 3.6747100353240967,
|
|
"learning_rate": 9.525407668533286e-06,
|
|
"loss": 1.3035,
|
|
"step": 3437
|
|
},
|
|
{
|
|
"epoch": 0.9143617021276595,
|
|
"grad_norm": 3.4809205532073975,
|
|
"learning_rate": 9.525033599420058e-06,
|
|
"loss": 1.1033,
|
|
"step": 3438
|
|
},
|
|
{
|
|
"epoch": 0.9146276595744681,
|
|
"grad_norm": 3.575571298599243,
|
|
"learning_rate": 9.524659390296961e-06,
|
|
"loss": 1.222,
|
|
"step": 3439
|
|
},
|
|
{
|
|
"epoch": 0.9148936170212766,
|
|
"grad_norm": 3.502336263656616,
|
|
"learning_rate": 9.524285041175578e-06,
|
|
"loss": 1.1575,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.9151595744680852,
|
|
"grad_norm": 3.6172244548797607,
|
|
"learning_rate": 9.523910552067489e-06,
|
|
"loss": 1.1852,
|
|
"step": 3441
|
|
},
|
|
{
|
|
"epoch": 0.9154255319148936,
|
|
"grad_norm": 3.6247096061706543,
|
|
"learning_rate": 9.523535922984281e-06,
|
|
"loss": 1.4405,
|
|
"step": 3442
|
|
},
|
|
{
|
|
"epoch": 0.9156914893617021,
|
|
"grad_norm": 3.5026776790618896,
|
|
"learning_rate": 9.523161153937546e-06,
|
|
"loss": 1.2206,
|
|
"step": 3443
|
|
},
|
|
{
|
|
"epoch": 0.9159574468085107,
|
|
"grad_norm": 3.7139501571655273,
|
|
"learning_rate": 9.522786244938877e-06,
|
|
"loss": 1.3555,
|
|
"step": 3444
|
|
},
|
|
{
|
|
"epoch": 0.9162234042553191,
|
|
"grad_norm": 3.3043665885925293,
|
|
"learning_rate": 9.522411195999879e-06,
|
|
"loss": 1.0747,
|
|
"step": 3445
|
|
},
|
|
{
|
|
"epoch": 0.9164893617021277,
|
|
"grad_norm": 3.3844451904296875,
|
|
"learning_rate": 9.522036007132154e-06,
|
|
"loss": 1.2419,
|
|
"step": 3446
|
|
},
|
|
{
|
|
"epoch": 0.9167553191489362,
|
|
"grad_norm": 3.499330520629883,
|
|
"learning_rate": 9.521660678347311e-06,
|
|
"loss": 1.2287,
|
|
"step": 3447
|
|
},
|
|
{
|
|
"epoch": 0.9170212765957447,
|
|
"grad_norm": 3.4153192043304443,
|
|
"learning_rate": 9.521285209656964e-06,
|
|
"loss": 1.2425,
|
|
"step": 3448
|
|
},
|
|
{
|
|
"epoch": 0.9172872340425532,
|
|
"grad_norm": 3.838230848312378,
|
|
"learning_rate": 9.520909601072726e-06,
|
|
"loss": 1.2476,
|
|
"step": 3449
|
|
},
|
|
{
|
|
"epoch": 0.9175531914893617,
|
|
"grad_norm": 3.879303455352783,
|
|
"learning_rate": 9.520533852606226e-06,
|
|
"loss": 1.2743,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.9178191489361702,
|
|
"grad_norm": 3.2687835693359375,
|
|
"learning_rate": 9.520157964269083e-06,
|
|
"loss": 1.0722,
|
|
"step": 3451
|
|
},
|
|
{
|
|
"epoch": 0.9180851063829787,
|
|
"grad_norm": 3.6070616245269775,
|
|
"learning_rate": 9.519781936072933e-06,
|
|
"loss": 1.2863,
|
|
"step": 3452
|
|
},
|
|
{
|
|
"epoch": 0.9183510638297873,
|
|
"grad_norm": 3.410642623901367,
|
|
"learning_rate": 9.519405768029408e-06,
|
|
"loss": 1.2184,
|
|
"step": 3453
|
|
},
|
|
{
|
|
"epoch": 0.9186170212765957,
|
|
"grad_norm": 3.642425775527954,
|
|
"learning_rate": 9.519029460150148e-06,
|
|
"loss": 1.2836,
|
|
"step": 3454
|
|
},
|
|
{
|
|
"epoch": 0.9188829787234043,
|
|
"grad_norm": 3.6479597091674805,
|
|
"learning_rate": 9.518653012446794e-06,
|
|
"loss": 1.3349,
|
|
"step": 3455
|
|
},
|
|
{
|
|
"epoch": 0.9191489361702128,
|
|
"grad_norm": 3.2941248416900635,
|
|
"learning_rate": 9.518276424931e-06,
|
|
"loss": 1.1445,
|
|
"step": 3456
|
|
},
|
|
{
|
|
"epoch": 0.9194148936170212,
|
|
"grad_norm": 3.3414933681488037,
|
|
"learning_rate": 9.51789969761441e-06,
|
|
"loss": 1.3321,
|
|
"step": 3457
|
|
},
|
|
{
|
|
"epoch": 0.9196808510638298,
|
|
"grad_norm": 3.39167857170105,
|
|
"learning_rate": 9.517522830508685e-06,
|
|
"loss": 1.222,
|
|
"step": 3458
|
|
},
|
|
{
|
|
"epoch": 0.9199468085106383,
|
|
"grad_norm": 3.520202875137329,
|
|
"learning_rate": 9.517145823625485e-06,
|
|
"loss": 1.2299,
|
|
"step": 3459
|
|
},
|
|
{
|
|
"epoch": 0.9202127659574468,
|
|
"grad_norm": 3.953166961669922,
|
|
"learning_rate": 9.516768676976476e-06,
|
|
"loss": 1.3692,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.9204787234042553,
|
|
"grad_norm": 3.654834032058716,
|
|
"learning_rate": 9.516391390573326e-06,
|
|
"loss": 1.1788,
|
|
"step": 3461
|
|
},
|
|
{
|
|
"epoch": 0.9207446808510639,
|
|
"grad_norm": 4.268529415130615,
|
|
"learning_rate": 9.516013964427708e-06,
|
|
"loss": 1.3661,
|
|
"step": 3462
|
|
},
|
|
{
|
|
"epoch": 0.9210106382978723,
|
|
"grad_norm": 3.7426726818084717,
|
|
"learning_rate": 9.515636398551302e-06,
|
|
"loss": 1.3322,
|
|
"step": 3463
|
|
},
|
|
{
|
|
"epoch": 0.9212765957446809,
|
|
"grad_norm": 3.7757678031921387,
|
|
"learning_rate": 9.515258692955788e-06,
|
|
"loss": 1.2663,
|
|
"step": 3464
|
|
},
|
|
{
|
|
"epoch": 0.9215425531914894,
|
|
"grad_norm": 3.2425293922424316,
|
|
"learning_rate": 9.514880847652855e-06,
|
|
"loss": 1.1537,
|
|
"step": 3465
|
|
},
|
|
{
|
|
"epoch": 0.9218085106382978,
|
|
"grad_norm": 3.891484498977661,
|
|
"learning_rate": 9.514502862654192e-06,
|
|
"loss": 1.3394,
|
|
"step": 3466
|
|
},
|
|
{
|
|
"epoch": 0.9220744680851064,
|
|
"grad_norm": 3.499422788619995,
|
|
"learning_rate": 9.514124737971495e-06,
|
|
"loss": 1.3386,
|
|
"step": 3467
|
|
},
|
|
{
|
|
"epoch": 0.9223404255319149,
|
|
"grad_norm": 3.8201444149017334,
|
|
"learning_rate": 9.513746473616466e-06,
|
|
"loss": 1.2374,
|
|
"step": 3468
|
|
},
|
|
{
|
|
"epoch": 0.9226063829787234,
|
|
"grad_norm": 3.488330841064453,
|
|
"learning_rate": 9.513368069600806e-06,
|
|
"loss": 1.1239,
|
|
"step": 3469
|
|
},
|
|
{
|
|
"epoch": 0.9228723404255319,
|
|
"grad_norm": 3.2124156951904297,
|
|
"learning_rate": 9.512989525936223e-06,
|
|
"loss": 1.2058,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.9231382978723405,
|
|
"grad_norm": 3.4447717666625977,
|
|
"learning_rate": 9.512610842634432e-06,
|
|
"loss": 1.1785,
|
|
"step": 3471
|
|
},
|
|
{
|
|
"epoch": 0.9234042553191489,
|
|
"grad_norm": 3.3703794479370117,
|
|
"learning_rate": 9.512232019707148e-06,
|
|
"loss": 1.3696,
|
|
"step": 3472
|
|
},
|
|
{
|
|
"epoch": 0.9236702127659574,
|
|
"grad_norm": 3.2821013927459717,
|
|
"learning_rate": 9.511853057166094e-06,
|
|
"loss": 1.181,
|
|
"step": 3473
|
|
},
|
|
{
|
|
"epoch": 0.923936170212766,
|
|
"grad_norm": 3.2314436435699463,
|
|
"learning_rate": 9.511473955022992e-06,
|
|
"loss": 1.2571,
|
|
"step": 3474
|
|
},
|
|
{
|
|
"epoch": 0.9242021276595744,
|
|
"grad_norm": 3.635651111602783,
|
|
"learning_rate": 9.511094713289575e-06,
|
|
"loss": 1.2779,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 0.924468085106383,
|
|
"grad_norm": 3.7356226444244385,
|
|
"learning_rate": 9.510715331977579e-06,
|
|
"loss": 1.3406,
|
|
"step": 3476
|
|
},
|
|
{
|
|
"epoch": 0.9247340425531915,
|
|
"grad_norm": 3.5567257404327393,
|
|
"learning_rate": 9.510335811098737e-06,
|
|
"loss": 1.2792,
|
|
"step": 3477
|
|
},
|
|
{
|
|
"epoch": 0.925,
|
|
"grad_norm": 3.603287696838379,
|
|
"learning_rate": 9.509956150664796e-06,
|
|
"loss": 1.1966,
|
|
"step": 3478
|
|
},
|
|
{
|
|
"epoch": 0.9252659574468085,
|
|
"grad_norm": 3.915576219558716,
|
|
"learning_rate": 9.509576350687502e-06,
|
|
"loss": 1.2955,
|
|
"step": 3479
|
|
},
|
|
{
|
|
"epoch": 0.925531914893617,
|
|
"grad_norm": 3.7345378398895264,
|
|
"learning_rate": 9.509196411178605e-06,
|
|
"loss": 1.1994,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.9257978723404255,
|
|
"grad_norm": 3.4640583992004395,
|
|
"learning_rate": 9.508816332149862e-06,
|
|
"loss": 1.1937,
|
|
"step": 3481
|
|
},
|
|
{
|
|
"epoch": 0.926063829787234,
|
|
"grad_norm": 3.5885074138641357,
|
|
"learning_rate": 9.508436113613036e-06,
|
|
"loss": 1.2895,
|
|
"step": 3482
|
|
},
|
|
{
|
|
"epoch": 0.9263297872340426,
|
|
"grad_norm": 3.241925001144409,
|
|
"learning_rate": 9.508055755579886e-06,
|
|
"loss": 1.1693,
|
|
"step": 3483
|
|
},
|
|
{
|
|
"epoch": 0.926595744680851,
|
|
"grad_norm": 3.664020538330078,
|
|
"learning_rate": 9.507675258062183e-06,
|
|
"loss": 1.2333,
|
|
"step": 3484
|
|
},
|
|
{
|
|
"epoch": 0.9268617021276596,
|
|
"grad_norm": 3.365907669067383,
|
|
"learning_rate": 9.507294621071702e-06,
|
|
"loss": 1.1572,
|
|
"step": 3485
|
|
},
|
|
{
|
|
"epoch": 0.9271276595744681,
|
|
"grad_norm": 3.634084939956665,
|
|
"learning_rate": 9.506913844620217e-06,
|
|
"loss": 1.1676,
|
|
"step": 3486
|
|
},
|
|
{
|
|
"epoch": 0.9273936170212767,
|
|
"grad_norm": 3.2822062969207764,
|
|
"learning_rate": 9.506532928719514e-06,
|
|
"loss": 1.2271,
|
|
"step": 3487
|
|
},
|
|
{
|
|
"epoch": 0.9276595744680851,
|
|
"grad_norm": 3.920335292816162,
|
|
"learning_rate": 9.506151873381376e-06,
|
|
"loss": 1.3218,
|
|
"step": 3488
|
|
},
|
|
{
|
|
"epoch": 0.9279255319148936,
|
|
"grad_norm": 3.8373231887817383,
|
|
"learning_rate": 9.505770678617592e-06,
|
|
"loss": 1.2391,
|
|
"step": 3489
|
|
},
|
|
{
|
|
"epoch": 0.9281914893617021,
|
|
"grad_norm": 3.5426108837127686,
|
|
"learning_rate": 9.50538934443996e-06,
|
|
"loss": 1.2676,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.9284574468085106,
|
|
"grad_norm": 3.550251007080078,
|
|
"learning_rate": 9.505007870860276e-06,
|
|
"loss": 1.2651,
|
|
"step": 3491
|
|
},
|
|
{
|
|
"epoch": 0.9287234042553192,
|
|
"grad_norm": 3.3801169395446777,
|
|
"learning_rate": 9.504626257890345e-06,
|
|
"loss": 1.1764,
|
|
"step": 3492
|
|
},
|
|
{
|
|
"epoch": 0.9289893617021276,
|
|
"grad_norm": 4.002630233764648,
|
|
"learning_rate": 9.504244505541974e-06,
|
|
"loss": 1.2602,
|
|
"step": 3493
|
|
},
|
|
{
|
|
"epoch": 0.9292553191489362,
|
|
"grad_norm": 3.6300952434539795,
|
|
"learning_rate": 9.503862613826976e-06,
|
|
"loss": 1.1864,
|
|
"step": 3494
|
|
},
|
|
{
|
|
"epoch": 0.9295212765957447,
|
|
"grad_norm": 3.574536085128784,
|
|
"learning_rate": 9.503480582757163e-06,
|
|
"loss": 1.3364,
|
|
"step": 3495
|
|
},
|
|
{
|
|
"epoch": 0.9297872340425531,
|
|
"grad_norm": 3.6244354248046875,
|
|
"learning_rate": 9.50309841234436e-06,
|
|
"loss": 1.1998,
|
|
"step": 3496
|
|
},
|
|
{
|
|
"epoch": 0.9300531914893617,
|
|
"grad_norm": 3.826706886291504,
|
|
"learning_rate": 9.502716102600393e-06,
|
|
"loss": 1.1791,
|
|
"step": 3497
|
|
},
|
|
{
|
|
"epoch": 0.9303191489361702,
|
|
"grad_norm": 3.3346476554870605,
|
|
"learning_rate": 9.502333653537085e-06,
|
|
"loss": 1.1943,
|
|
"step": 3498
|
|
},
|
|
{
|
|
"epoch": 0.9305851063829788,
|
|
"grad_norm": 3.4599905014038086,
|
|
"learning_rate": 9.501951065166276e-06,
|
|
"loss": 1.2966,
|
|
"step": 3499
|
|
},
|
|
{
|
|
"epoch": 0.9308510638297872,
|
|
"grad_norm": 3.6470425128936768,
|
|
"learning_rate": 9.501568337499798e-06,
|
|
"loss": 1.2633,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.9308510638297872,
|
|
"eval_loss": 1.2690000534057617,
|
|
"eval_runtime": 12.8787,
|
|
"eval_samples_per_second": 31.059,
|
|
"eval_steps_per_second": 3.882,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.9311170212765958,
|
|
"grad_norm": 3.7849044799804688,
|
|
"learning_rate": 9.501185470549496e-06,
|
|
"loss": 1.2158,
|
|
"step": 3501
|
|
},
|
|
{
|
|
"epoch": 0.9313829787234043,
|
|
"grad_norm": 3.3262534141540527,
|
|
"learning_rate": 9.500802464327217e-06,
|
|
"loss": 1.2429,
|
|
"step": 3502
|
|
},
|
|
{
|
|
"epoch": 0.9316489361702127,
|
|
"grad_norm": 3.458172559738159,
|
|
"learning_rate": 9.500419318844811e-06,
|
|
"loss": 1.2177,
|
|
"step": 3503
|
|
},
|
|
{
|
|
"epoch": 0.9319148936170213,
|
|
"grad_norm": 3.7243428230285645,
|
|
"learning_rate": 9.500036034114132e-06,
|
|
"loss": 1.2877,
|
|
"step": 3504
|
|
},
|
|
{
|
|
"epoch": 0.9321808510638298,
|
|
"grad_norm": 3.6194655895233154,
|
|
"learning_rate": 9.49965261014704e-06,
|
|
"loss": 1.3507,
|
|
"step": 3505
|
|
},
|
|
{
|
|
"epoch": 0.9324468085106383,
|
|
"grad_norm": 3.4799468517303467,
|
|
"learning_rate": 9.499269046955398e-06,
|
|
"loss": 1.2658,
|
|
"step": 3506
|
|
},
|
|
{
|
|
"epoch": 0.9327127659574468,
|
|
"grad_norm": 3.6711440086364746,
|
|
"learning_rate": 9.498885344551077e-06,
|
|
"loss": 1.1922,
|
|
"step": 3507
|
|
},
|
|
{
|
|
"epoch": 0.9329787234042554,
|
|
"grad_norm": 3.7202506065368652,
|
|
"learning_rate": 9.498501502945943e-06,
|
|
"loss": 1.1922,
|
|
"step": 3508
|
|
},
|
|
{
|
|
"epoch": 0.9332446808510638,
|
|
"grad_norm": 3.440639019012451,
|
|
"learning_rate": 9.498117522151878e-06,
|
|
"loss": 1.1795,
|
|
"step": 3509
|
|
},
|
|
{
|
|
"epoch": 0.9335106382978723,
|
|
"grad_norm": 3.513429880142212,
|
|
"learning_rate": 9.497733402180761e-06,
|
|
"loss": 1.2098,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.9337765957446809,
|
|
"grad_norm": 3.599651575088501,
|
|
"learning_rate": 9.497349143044478e-06,
|
|
"loss": 1.2052,
|
|
"step": 3511
|
|
},
|
|
{
|
|
"epoch": 0.9340425531914893,
|
|
"grad_norm": 4.015235900878906,
|
|
"learning_rate": 9.496964744754915e-06,
|
|
"loss": 1.233,
|
|
"step": 3512
|
|
},
|
|
{
|
|
"epoch": 0.9343085106382979,
|
|
"grad_norm": 3.3815979957580566,
|
|
"learning_rate": 9.49658020732397e-06,
|
|
"loss": 1.1291,
|
|
"step": 3513
|
|
},
|
|
{
|
|
"epoch": 0.9345744680851064,
|
|
"grad_norm": 3.3032724857330322,
|
|
"learning_rate": 9.49619553076354e-06,
|
|
"loss": 1.2174,
|
|
"step": 3514
|
|
},
|
|
{
|
|
"epoch": 0.9348404255319149,
|
|
"grad_norm": 3.571817398071289,
|
|
"learning_rate": 9.495810715085526e-06,
|
|
"loss": 1.3212,
|
|
"step": 3515
|
|
},
|
|
{
|
|
"epoch": 0.9351063829787234,
|
|
"grad_norm": 3.5486996173858643,
|
|
"learning_rate": 9.495425760301836e-06,
|
|
"loss": 1.1428,
|
|
"step": 3516
|
|
},
|
|
{
|
|
"epoch": 0.935372340425532,
|
|
"grad_norm": 3.3801069259643555,
|
|
"learning_rate": 9.495040666424378e-06,
|
|
"loss": 1.1673,
|
|
"step": 3517
|
|
},
|
|
{
|
|
"epoch": 0.9356382978723404,
|
|
"grad_norm": 3.6057615280151367,
|
|
"learning_rate": 9.494655433465071e-06,
|
|
"loss": 1.1342,
|
|
"step": 3518
|
|
},
|
|
{
|
|
"epoch": 0.9359042553191489,
|
|
"grad_norm": 3.6146769523620605,
|
|
"learning_rate": 9.494270061435834e-06,
|
|
"loss": 1.4436,
|
|
"step": 3519
|
|
},
|
|
{
|
|
"epoch": 0.9361702127659575,
|
|
"grad_norm": 3.200052499771118,
|
|
"learning_rate": 9.493884550348589e-06,
|
|
"loss": 1.1598,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.9364361702127659,
|
|
"grad_norm": 3.6785783767700195,
|
|
"learning_rate": 9.493498900215265e-06,
|
|
"loss": 1.2838,
|
|
"step": 3521
|
|
},
|
|
{
|
|
"epoch": 0.9367021276595745,
|
|
"grad_norm": 3.905540943145752,
|
|
"learning_rate": 9.493113111047794e-06,
|
|
"loss": 1.2665,
|
|
"step": 3522
|
|
},
|
|
{
|
|
"epoch": 0.936968085106383,
|
|
"grad_norm": 3.300579786300659,
|
|
"learning_rate": 9.492727182858115e-06,
|
|
"loss": 1.2111,
|
|
"step": 3523
|
|
},
|
|
{
|
|
"epoch": 0.9372340425531915,
|
|
"grad_norm": 3.8752784729003906,
|
|
"learning_rate": 9.492341115658167e-06,
|
|
"loss": 1.2444,
|
|
"step": 3524
|
|
},
|
|
{
|
|
"epoch": 0.9375,
|
|
"grad_norm": 3.561800241470337,
|
|
"learning_rate": 9.491954909459895e-06,
|
|
"loss": 1.2224,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 0.9377659574468085,
|
|
"grad_norm": 3.434983730316162,
|
|
"learning_rate": 9.491568564275252e-06,
|
|
"loss": 1.2249,
|
|
"step": 3526
|
|
},
|
|
{
|
|
"epoch": 0.938031914893617,
|
|
"grad_norm": 3.5711958408355713,
|
|
"learning_rate": 9.491182080116185e-06,
|
|
"loss": 1.3134,
|
|
"step": 3527
|
|
},
|
|
{
|
|
"epoch": 0.9382978723404255,
|
|
"grad_norm": 3.2614593505859375,
|
|
"learning_rate": 9.490795456994658e-06,
|
|
"loss": 1.1418,
|
|
"step": 3528
|
|
},
|
|
{
|
|
"epoch": 0.9385638297872341,
|
|
"grad_norm": 3.7001163959503174,
|
|
"learning_rate": 9.490408694922635e-06,
|
|
"loss": 1.2611,
|
|
"step": 3529
|
|
},
|
|
{
|
|
"epoch": 0.9388297872340425,
|
|
"grad_norm": 3.287165880203247,
|
|
"learning_rate": 9.490021793912079e-06,
|
|
"loss": 1.1458,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.9390957446808511,
|
|
"grad_norm": 3.9669268131256104,
|
|
"learning_rate": 9.489634753974961e-06,
|
|
"loss": 1.1978,
|
|
"step": 3531
|
|
},
|
|
{
|
|
"epoch": 0.9393617021276596,
|
|
"grad_norm": 3.8696441650390625,
|
|
"learning_rate": 9.48924757512326e-06,
|
|
"loss": 1.3488,
|
|
"step": 3532
|
|
},
|
|
{
|
|
"epoch": 0.939627659574468,
|
|
"grad_norm": 3.8109893798828125,
|
|
"learning_rate": 9.48886025736895e-06,
|
|
"loss": 1.2341,
|
|
"step": 3533
|
|
},
|
|
{
|
|
"epoch": 0.9398936170212766,
|
|
"grad_norm": 3.3541629314422607,
|
|
"learning_rate": 9.488472800724022e-06,
|
|
"loss": 1.1629,
|
|
"step": 3534
|
|
},
|
|
{
|
|
"epoch": 0.9401595744680851,
|
|
"grad_norm": 3.4784152507781982,
|
|
"learning_rate": 9.48808520520046e-06,
|
|
"loss": 1.3021,
|
|
"step": 3535
|
|
},
|
|
{
|
|
"epoch": 0.9404255319148936,
|
|
"grad_norm": 3.4299418926239014,
|
|
"learning_rate": 9.487697470810257e-06,
|
|
"loss": 1.1674,
|
|
"step": 3536
|
|
},
|
|
{
|
|
"epoch": 0.9406914893617021,
|
|
"grad_norm": 3.467414617538452,
|
|
"learning_rate": 9.487309597565413e-06,
|
|
"loss": 1.1953,
|
|
"step": 3537
|
|
},
|
|
{
|
|
"epoch": 0.9409574468085107,
|
|
"grad_norm": 3.263312816619873,
|
|
"learning_rate": 9.486921585477924e-06,
|
|
"loss": 1.1662,
|
|
"step": 3538
|
|
},
|
|
{
|
|
"epoch": 0.9412234042553191,
|
|
"grad_norm": 3.3032853603363037,
|
|
"learning_rate": 9.486533434559801e-06,
|
|
"loss": 1.2386,
|
|
"step": 3539
|
|
},
|
|
{
|
|
"epoch": 0.9414893617021277,
|
|
"grad_norm": 3.641338348388672,
|
|
"learning_rate": 9.48614514482305e-06,
|
|
"loss": 1.25,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.9417553191489362,
|
|
"grad_norm": 3.5189712047576904,
|
|
"learning_rate": 9.485756716279686e-06,
|
|
"loss": 1.2763,
|
|
"step": 3541
|
|
},
|
|
{
|
|
"epoch": 0.9420212765957446,
|
|
"grad_norm": 3.464155912399292,
|
|
"learning_rate": 9.485368148941728e-06,
|
|
"loss": 1.278,
|
|
"step": 3542
|
|
},
|
|
{
|
|
"epoch": 0.9422872340425532,
|
|
"grad_norm": 3.5938682556152344,
|
|
"learning_rate": 9.484979442821199e-06,
|
|
"loss": 1.1817,
|
|
"step": 3543
|
|
},
|
|
{
|
|
"epoch": 0.9425531914893617,
|
|
"grad_norm": 3.399099588394165,
|
|
"learning_rate": 9.484590597930125e-06,
|
|
"loss": 1.3007,
|
|
"step": 3544
|
|
},
|
|
{
|
|
"epoch": 0.9428191489361702,
|
|
"grad_norm": 3.681652545928955,
|
|
"learning_rate": 9.484201614280539e-06,
|
|
"loss": 1.1233,
|
|
"step": 3545
|
|
},
|
|
{
|
|
"epoch": 0.9430851063829787,
|
|
"grad_norm": 3.4110119342803955,
|
|
"learning_rate": 9.483812491884475e-06,
|
|
"loss": 1.3159,
|
|
"step": 3546
|
|
},
|
|
{
|
|
"epoch": 0.9433510638297873,
|
|
"grad_norm": 3.347201347351074,
|
|
"learning_rate": 9.483423230753975e-06,
|
|
"loss": 1.2668,
|
|
"step": 3547
|
|
},
|
|
{
|
|
"epoch": 0.9436170212765957,
|
|
"grad_norm": 3.551835775375366,
|
|
"learning_rate": 9.48303383090108e-06,
|
|
"loss": 1.2695,
|
|
"step": 3548
|
|
},
|
|
{
|
|
"epoch": 0.9438829787234042,
|
|
"grad_norm": 7.742011547088623,
|
|
"learning_rate": 9.48264429233784e-06,
|
|
"loss": 1.3468,
|
|
"step": 3549
|
|
},
|
|
{
|
|
"epoch": 0.9441489361702128,
|
|
"grad_norm": 3.5810296535491943,
|
|
"learning_rate": 9.482254615076307e-06,
|
|
"loss": 1.2088,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.9444148936170212,
|
|
"grad_norm": 3.6081788539886475,
|
|
"learning_rate": 9.481864799128541e-06,
|
|
"loss": 1.199,
|
|
"step": 3551
|
|
},
|
|
{
|
|
"epoch": 0.9446808510638298,
|
|
"grad_norm": 3.4480881690979004,
|
|
"learning_rate": 9.481474844506602e-06,
|
|
"loss": 1.2016,
|
|
"step": 3552
|
|
},
|
|
{
|
|
"epoch": 0.9449468085106383,
|
|
"grad_norm": 3.4126522541046143,
|
|
"learning_rate": 9.481084751222553e-06,
|
|
"loss": 1.0633,
|
|
"step": 3553
|
|
},
|
|
{
|
|
"epoch": 0.9452127659574469,
|
|
"grad_norm": 3.731552839279175,
|
|
"learning_rate": 9.480694519288467e-06,
|
|
"loss": 1.3171,
|
|
"step": 3554
|
|
},
|
|
{
|
|
"epoch": 0.9454787234042553,
|
|
"grad_norm": 3.7800607681274414,
|
|
"learning_rate": 9.480304148716418e-06,
|
|
"loss": 1.4008,
|
|
"step": 3555
|
|
},
|
|
{
|
|
"epoch": 0.9457446808510638,
|
|
"grad_norm": 3.509230375289917,
|
|
"learning_rate": 9.47991363951848e-06,
|
|
"loss": 1.2949,
|
|
"step": 3556
|
|
},
|
|
{
|
|
"epoch": 0.9460106382978724,
|
|
"grad_norm": 3.7124991416931152,
|
|
"learning_rate": 9.479522991706744e-06,
|
|
"loss": 1.1951,
|
|
"step": 3557
|
|
},
|
|
{
|
|
"epoch": 0.9462765957446808,
|
|
"grad_norm": 3.6707465648651123,
|
|
"learning_rate": 9.479132205293291e-06,
|
|
"loss": 1.1625,
|
|
"step": 3558
|
|
},
|
|
{
|
|
"epoch": 0.9465425531914894,
|
|
"grad_norm": 3.456841468811035,
|
|
"learning_rate": 9.478741280290214e-06,
|
|
"loss": 1.1969,
|
|
"step": 3559
|
|
},
|
|
{
|
|
"epoch": 0.9468085106382979,
|
|
"grad_norm": 4.189627170562744,
|
|
"learning_rate": 9.478350216709609e-06,
|
|
"loss": 1.4571,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.9470744680851064,
|
|
"grad_norm": 3.5188887119293213,
|
|
"learning_rate": 9.477959014563575e-06,
|
|
"loss": 1.2589,
|
|
"step": 3561
|
|
},
|
|
{
|
|
"epoch": 0.9473404255319149,
|
|
"grad_norm": 3.594780206680298,
|
|
"learning_rate": 9.477567673864217e-06,
|
|
"loss": 1.2652,
|
|
"step": 3562
|
|
},
|
|
{
|
|
"epoch": 0.9476063829787233,
|
|
"grad_norm": 3.3485286235809326,
|
|
"learning_rate": 9.477176194623644e-06,
|
|
"loss": 1.2256,
|
|
"step": 3563
|
|
},
|
|
{
|
|
"epoch": 0.9478723404255319,
|
|
"grad_norm": 3.549306631088257,
|
|
"learning_rate": 9.476784576853967e-06,
|
|
"loss": 1.2868,
|
|
"step": 3564
|
|
},
|
|
{
|
|
"epoch": 0.9481382978723404,
|
|
"grad_norm": 3.50877046585083,
|
|
"learning_rate": 9.476392820567306e-06,
|
|
"loss": 1.0912,
|
|
"step": 3565
|
|
},
|
|
{
|
|
"epoch": 0.948404255319149,
|
|
"grad_norm": 3.3570492267608643,
|
|
"learning_rate": 9.476000925775782e-06,
|
|
"loss": 1.2827,
|
|
"step": 3566
|
|
},
|
|
{
|
|
"epoch": 0.9486702127659574,
|
|
"grad_norm": 3.3039703369140625,
|
|
"learning_rate": 9.475608892491516e-06,
|
|
"loss": 1.1552,
|
|
"step": 3567
|
|
},
|
|
{
|
|
"epoch": 0.948936170212766,
|
|
"grad_norm": 3.559574604034424,
|
|
"learning_rate": 9.475216720726644e-06,
|
|
"loss": 1.1988,
|
|
"step": 3568
|
|
},
|
|
{
|
|
"epoch": 0.9492021276595745,
|
|
"grad_norm": 3.8060848712921143,
|
|
"learning_rate": 9.474824410493298e-06,
|
|
"loss": 1.3264,
|
|
"step": 3569
|
|
},
|
|
{
|
|
"epoch": 0.949468085106383,
|
|
"grad_norm": 3.3232123851776123,
|
|
"learning_rate": 9.474431961803615e-06,
|
|
"loss": 1.1884,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.9497340425531915,
|
|
"grad_norm": 3.821077346801758,
|
|
"learning_rate": 9.47403937466974e-06,
|
|
"loss": 1.3414,
|
|
"step": 3571
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"grad_norm": 3.464698076248169,
|
|
"learning_rate": 9.473646649103819e-06,
|
|
"loss": 1.1284,
|
|
"step": 3572
|
|
},
|
|
{
|
|
"epoch": 0.9502659574468085,
|
|
"grad_norm": 3.464268922805786,
|
|
"learning_rate": 9.473253785118003e-06,
|
|
"loss": 1.3262,
|
|
"step": 3573
|
|
},
|
|
{
|
|
"epoch": 0.950531914893617,
|
|
"grad_norm": 3.7841787338256836,
|
|
"learning_rate": 9.472860782724448e-06,
|
|
"loss": 1.1169,
|
|
"step": 3574
|
|
},
|
|
{
|
|
"epoch": 0.9507978723404256,
|
|
"grad_norm": 3.278888463973999,
|
|
"learning_rate": 9.472467641935314e-06,
|
|
"loss": 1.1413,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 0.951063829787234,
|
|
"grad_norm": 3.321603536605835,
|
|
"learning_rate": 9.472074362762767e-06,
|
|
"loss": 1.0513,
|
|
"step": 3576
|
|
},
|
|
{
|
|
"epoch": 0.9513297872340426,
|
|
"grad_norm": 3.8839926719665527,
|
|
"learning_rate": 9.471680945218973e-06,
|
|
"loss": 1.2412,
|
|
"step": 3577
|
|
},
|
|
{
|
|
"epoch": 0.9515957446808511,
|
|
"grad_norm": 3.5885181427001953,
|
|
"learning_rate": 9.471287389316107e-06,
|
|
"loss": 1.1092,
|
|
"step": 3578
|
|
},
|
|
{
|
|
"epoch": 0.9518617021276595,
|
|
"grad_norm": 3.592010498046875,
|
|
"learning_rate": 9.470893695066345e-06,
|
|
"loss": 1.275,
|
|
"step": 3579
|
|
},
|
|
{
|
|
"epoch": 0.9521276595744681,
|
|
"grad_norm": 3.785581111907959,
|
|
"learning_rate": 9.470499862481867e-06,
|
|
"loss": 1.3256,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.9523936170212766,
|
|
"grad_norm": 3.41489315032959,
|
|
"learning_rate": 9.47010589157486e-06,
|
|
"loss": 1.2419,
|
|
"step": 3581
|
|
},
|
|
{
|
|
"epoch": 0.9526595744680851,
|
|
"grad_norm": 3.4412648677825928,
|
|
"learning_rate": 9.469711782357513e-06,
|
|
"loss": 1.3029,
|
|
"step": 3582
|
|
},
|
|
{
|
|
"epoch": 0.9529255319148936,
|
|
"grad_norm": 3.6879758834838867,
|
|
"learning_rate": 9.469317534842025e-06,
|
|
"loss": 1.217,
|
|
"step": 3583
|
|
},
|
|
{
|
|
"epoch": 0.9531914893617022,
|
|
"grad_norm": 3.8642208576202393,
|
|
"learning_rate": 9.468923149040587e-06,
|
|
"loss": 1.3035,
|
|
"step": 3584
|
|
},
|
|
{
|
|
"epoch": 0.9534574468085106,
|
|
"grad_norm": 3.9491965770721436,
|
|
"learning_rate": 9.468528624965406e-06,
|
|
"loss": 1.3494,
|
|
"step": 3585
|
|
},
|
|
{
|
|
"epoch": 0.9537234042553191,
|
|
"grad_norm": 3.6963748931884766,
|
|
"learning_rate": 9.468133962628688e-06,
|
|
"loss": 1.1793,
|
|
"step": 3586
|
|
},
|
|
{
|
|
"epoch": 0.9539893617021277,
|
|
"grad_norm": 3.4110567569732666,
|
|
"learning_rate": 9.467739162042643e-06,
|
|
"loss": 1.1798,
|
|
"step": 3587
|
|
},
|
|
{
|
|
"epoch": 0.9542553191489361,
|
|
"grad_norm": 3.718494176864624,
|
|
"learning_rate": 9.46734422321949e-06,
|
|
"loss": 1.3528,
|
|
"step": 3588
|
|
},
|
|
{
|
|
"epoch": 0.9545212765957447,
|
|
"grad_norm": 3.9455974102020264,
|
|
"learning_rate": 9.466949146171449e-06,
|
|
"loss": 1.341,
|
|
"step": 3589
|
|
},
|
|
{
|
|
"epoch": 0.9547872340425532,
|
|
"grad_norm": 3.668195962905884,
|
|
"learning_rate": 9.46655393091074e-06,
|
|
"loss": 1.1503,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.9550531914893617,
|
|
"grad_norm": 3.662208080291748,
|
|
"learning_rate": 9.466158577449593e-06,
|
|
"loss": 1.3243,
|
|
"step": 3591
|
|
},
|
|
{
|
|
"epoch": 0.9553191489361702,
|
|
"grad_norm": 3.463543176651001,
|
|
"learning_rate": 9.465763085800244e-06,
|
|
"loss": 1.187,
|
|
"step": 3592
|
|
},
|
|
{
|
|
"epoch": 0.9555851063829788,
|
|
"grad_norm": 3.6207196712493896,
|
|
"learning_rate": 9.465367455974926e-06,
|
|
"loss": 1.2523,
|
|
"step": 3593
|
|
},
|
|
{
|
|
"epoch": 0.9558510638297872,
|
|
"grad_norm": 3.3348443508148193,
|
|
"learning_rate": 9.46497168798588e-06,
|
|
"loss": 1.2145,
|
|
"step": 3594
|
|
},
|
|
{
|
|
"epoch": 0.9561170212765957,
|
|
"grad_norm": 4.174299240112305,
|
|
"learning_rate": 9.464575781845355e-06,
|
|
"loss": 1.4818,
|
|
"step": 3595
|
|
},
|
|
{
|
|
"epoch": 0.9563829787234043,
|
|
"grad_norm": 3.3657476902008057,
|
|
"learning_rate": 9.464179737565598e-06,
|
|
"loss": 1.2587,
|
|
"step": 3596
|
|
},
|
|
{
|
|
"epoch": 0.9566489361702127,
|
|
"grad_norm": 3.697920560836792,
|
|
"learning_rate": 9.463783555158866e-06,
|
|
"loss": 1.36,
|
|
"step": 3597
|
|
},
|
|
{
|
|
"epoch": 0.9569148936170213,
|
|
"grad_norm": 3.825244903564453,
|
|
"learning_rate": 9.463387234637413e-06,
|
|
"loss": 1.2879,
|
|
"step": 3598
|
|
},
|
|
{
|
|
"epoch": 0.9571808510638298,
|
|
"grad_norm": 3.5759551525115967,
|
|
"learning_rate": 9.462990776013504e-06,
|
|
"loss": 1.4189,
|
|
"step": 3599
|
|
},
|
|
{
|
|
"epoch": 0.9574468085106383,
|
|
"grad_norm": 3.6317455768585205,
|
|
"learning_rate": 9.462594179299408e-06,
|
|
"loss": 1.3723,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.9577127659574468,
|
|
"grad_norm": 3.254585027694702,
|
|
"learning_rate": 9.46219744450739e-06,
|
|
"loss": 1.1231,
|
|
"step": 3601
|
|
},
|
|
{
|
|
"epoch": 0.9579787234042553,
|
|
"grad_norm": 3.0535624027252197,
|
|
"learning_rate": 9.461800571649734e-06,
|
|
"loss": 1.0536,
|
|
"step": 3602
|
|
},
|
|
{
|
|
"epoch": 0.9582446808510638,
|
|
"grad_norm": 3.603959798812866,
|
|
"learning_rate": 9.461403560738713e-06,
|
|
"loss": 1.254,
|
|
"step": 3603
|
|
},
|
|
{
|
|
"epoch": 0.9585106382978723,
|
|
"grad_norm": 3.4408342838287354,
|
|
"learning_rate": 9.461006411786613e-06,
|
|
"loss": 1.2253,
|
|
"step": 3604
|
|
},
|
|
{
|
|
"epoch": 0.9587765957446809,
|
|
"grad_norm": 3.6801369190216064,
|
|
"learning_rate": 9.460609124805724e-06,
|
|
"loss": 1.2253,
|
|
"step": 3605
|
|
},
|
|
{
|
|
"epoch": 0.9590425531914893,
|
|
"grad_norm": 3.968122959136963,
|
|
"learning_rate": 9.460211699808334e-06,
|
|
"loss": 1.2456,
|
|
"step": 3606
|
|
},
|
|
{
|
|
"epoch": 0.9593085106382979,
|
|
"grad_norm": 3.602989912033081,
|
|
"learning_rate": 9.459814136806746e-06,
|
|
"loss": 1.2261,
|
|
"step": 3607
|
|
},
|
|
{
|
|
"epoch": 0.9595744680851064,
|
|
"grad_norm": 3.5720174312591553,
|
|
"learning_rate": 9.459416435813258e-06,
|
|
"loss": 1.1869,
|
|
"step": 3608
|
|
},
|
|
{
|
|
"epoch": 0.9598404255319148,
|
|
"grad_norm": 3.626312732696533,
|
|
"learning_rate": 9.459018596840173e-06,
|
|
"loss": 1.3385,
|
|
"step": 3609
|
|
},
|
|
{
|
|
"epoch": 0.9601063829787234,
|
|
"grad_norm": 3.5388100147247314,
|
|
"learning_rate": 9.458620619899803e-06,
|
|
"loss": 1.2523,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.9603723404255319,
|
|
"grad_norm": 3.8266894817352295,
|
|
"learning_rate": 9.458222505004462e-06,
|
|
"loss": 1.4002,
|
|
"step": 3611
|
|
},
|
|
{
|
|
"epoch": 0.9606382978723405,
|
|
"grad_norm": 3.576223373413086,
|
|
"learning_rate": 9.457824252166467e-06,
|
|
"loss": 1.2669,
|
|
"step": 3612
|
|
},
|
|
{
|
|
"epoch": 0.9609042553191489,
|
|
"grad_norm": 3.5163745880126953,
|
|
"learning_rate": 9.457425861398144e-06,
|
|
"loss": 1.1806,
|
|
"step": 3613
|
|
},
|
|
{
|
|
"epoch": 0.9611702127659575,
|
|
"grad_norm": 3.586691379547119,
|
|
"learning_rate": 9.457027332711814e-06,
|
|
"loss": 1.3403,
|
|
"step": 3614
|
|
},
|
|
{
|
|
"epoch": 0.961436170212766,
|
|
"grad_norm": 3.5483405590057373,
|
|
"learning_rate": 9.456628666119812e-06,
|
|
"loss": 1.2426,
|
|
"step": 3615
|
|
},
|
|
{
|
|
"epoch": 0.9617021276595744,
|
|
"grad_norm": 3.600684881210327,
|
|
"learning_rate": 9.456229861634471e-06,
|
|
"loss": 1.2333,
|
|
"step": 3616
|
|
},
|
|
{
|
|
"epoch": 0.961968085106383,
|
|
"grad_norm": 3.446035385131836,
|
|
"learning_rate": 9.455830919268134e-06,
|
|
"loss": 1.161,
|
|
"step": 3617
|
|
},
|
|
{
|
|
"epoch": 0.9622340425531914,
|
|
"grad_norm": 3.329267978668213,
|
|
"learning_rate": 9.45543183903314e-06,
|
|
"loss": 1.1162,
|
|
"step": 3618
|
|
},
|
|
{
|
|
"epoch": 0.9625,
|
|
"grad_norm": 3.4342401027679443,
|
|
"learning_rate": 9.45503262094184e-06,
|
|
"loss": 1.3068,
|
|
"step": 3619
|
|
},
|
|
{
|
|
"epoch": 0.9627659574468085,
|
|
"grad_norm": 3.230329751968384,
|
|
"learning_rate": 9.454633265006585e-06,
|
|
"loss": 1.1398,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.9630319148936171,
|
|
"grad_norm": 3.3767967224121094,
|
|
"learning_rate": 9.454233771239733e-06,
|
|
"loss": 1.3104,
|
|
"step": 3621
|
|
},
|
|
{
|
|
"epoch": 0.9632978723404255,
|
|
"grad_norm": 3.2001163959503174,
|
|
"learning_rate": 9.453834139653643e-06,
|
|
"loss": 1.1632,
|
|
"step": 3622
|
|
},
|
|
{
|
|
"epoch": 0.9635638297872341,
|
|
"grad_norm": 3.9331612586975098,
|
|
"learning_rate": 9.453434370260683e-06,
|
|
"loss": 1.3891,
|
|
"step": 3623
|
|
},
|
|
{
|
|
"epoch": 0.9638297872340426,
|
|
"grad_norm": 4.0084052085876465,
|
|
"learning_rate": 9.453034463073218e-06,
|
|
"loss": 1.4323,
|
|
"step": 3624
|
|
},
|
|
{
|
|
"epoch": 0.964095744680851,
|
|
"grad_norm": 3.2673776149749756,
|
|
"learning_rate": 9.452634418103626e-06,
|
|
"loss": 1.0984,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 0.9643617021276596,
|
|
"grad_norm": 3.2544898986816406,
|
|
"learning_rate": 9.45223423536428e-06,
|
|
"loss": 1.2681,
|
|
"step": 3626
|
|
},
|
|
{
|
|
"epoch": 0.964627659574468,
|
|
"grad_norm": 3.625535488128662,
|
|
"learning_rate": 9.451833914867567e-06,
|
|
"loss": 1.258,
|
|
"step": 3627
|
|
},
|
|
{
|
|
"epoch": 0.9648936170212766,
|
|
"grad_norm": 3.048551082611084,
|
|
"learning_rate": 9.451433456625871e-06,
|
|
"loss": 1.207,
|
|
"step": 3628
|
|
},
|
|
{
|
|
"epoch": 0.9651595744680851,
|
|
"grad_norm": 3.567139148712158,
|
|
"learning_rate": 9.451032860651583e-06,
|
|
"loss": 1.2771,
|
|
"step": 3629
|
|
},
|
|
{
|
|
"epoch": 0.9654255319148937,
|
|
"grad_norm": 3.618807077407837,
|
|
"learning_rate": 9.450632126957098e-06,
|
|
"loss": 1.2666,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.9656914893617021,
|
|
"grad_norm": 3.4883675575256348,
|
|
"learning_rate": 9.450231255554814e-06,
|
|
"loss": 1.1142,
|
|
"step": 3631
|
|
},
|
|
{
|
|
"epoch": 0.9659574468085106,
|
|
"grad_norm": 3.687424898147583,
|
|
"learning_rate": 9.449830246457136e-06,
|
|
"loss": 1.1745,
|
|
"step": 3632
|
|
},
|
|
{
|
|
"epoch": 0.9662234042553192,
|
|
"grad_norm": 3.457051992416382,
|
|
"learning_rate": 9.44942909967647e-06,
|
|
"loss": 1.1846,
|
|
"step": 3633
|
|
},
|
|
{
|
|
"epoch": 0.9664893617021276,
|
|
"grad_norm": 3.5090994834899902,
|
|
"learning_rate": 9.449027815225231e-06,
|
|
"loss": 1.3255,
|
|
"step": 3634
|
|
},
|
|
{
|
|
"epoch": 0.9667553191489362,
|
|
"grad_norm": 3.2658236026763916,
|
|
"learning_rate": 9.448626393115833e-06,
|
|
"loss": 1.0964,
|
|
"step": 3635
|
|
},
|
|
{
|
|
"epoch": 0.9670212765957447,
|
|
"grad_norm": 3.7192766666412354,
|
|
"learning_rate": 9.448224833360695e-06,
|
|
"loss": 1.3171,
|
|
"step": 3636
|
|
},
|
|
{
|
|
"epoch": 0.9672872340425532,
|
|
"grad_norm": 3.891343355178833,
|
|
"learning_rate": 9.447823135972247e-06,
|
|
"loss": 1.206,
|
|
"step": 3637
|
|
},
|
|
{
|
|
"epoch": 0.9675531914893617,
|
|
"grad_norm": 3.7228803634643555,
|
|
"learning_rate": 9.447421300962911e-06,
|
|
"loss": 1.2032,
|
|
"step": 3638
|
|
},
|
|
{
|
|
"epoch": 0.9678191489361702,
|
|
"grad_norm": 3.348090171813965,
|
|
"learning_rate": 9.447019328345125e-06,
|
|
"loss": 1.2437,
|
|
"step": 3639
|
|
},
|
|
{
|
|
"epoch": 0.9680851063829787,
|
|
"grad_norm": 3.3824315071105957,
|
|
"learning_rate": 9.446617218131326e-06,
|
|
"loss": 1.1005,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.9683510638297872,
|
|
"grad_norm": 4.107891082763672,
|
|
"learning_rate": 9.446214970333954e-06,
|
|
"loss": 1.3365,
|
|
"step": 3641
|
|
},
|
|
{
|
|
"epoch": 0.9686170212765958,
|
|
"grad_norm": 3.609551191329956,
|
|
"learning_rate": 9.445812584965458e-06,
|
|
"loss": 1.2756,
|
|
"step": 3642
|
|
},
|
|
{
|
|
"epoch": 0.9688829787234042,
|
|
"grad_norm": 3.625800371170044,
|
|
"learning_rate": 9.445410062038284e-06,
|
|
"loss": 1.2114,
|
|
"step": 3643
|
|
},
|
|
{
|
|
"epoch": 0.9691489361702128,
|
|
"grad_norm": 3.605753183364868,
|
|
"learning_rate": 9.445007401564889e-06,
|
|
"loss": 1.3025,
|
|
"step": 3644
|
|
},
|
|
{
|
|
"epoch": 0.9694148936170213,
|
|
"grad_norm": 3.2446835041046143,
|
|
"learning_rate": 9.444604603557733e-06,
|
|
"loss": 1.2037,
|
|
"step": 3645
|
|
},
|
|
{
|
|
"epoch": 0.9696808510638298,
|
|
"grad_norm": 3.478797674179077,
|
|
"learning_rate": 9.444201668029278e-06,
|
|
"loss": 1.2862,
|
|
"step": 3646
|
|
},
|
|
{
|
|
"epoch": 0.9699468085106383,
|
|
"grad_norm": 3.33634352684021,
|
|
"learning_rate": 9.443798594991989e-06,
|
|
"loss": 1.1298,
|
|
"step": 3647
|
|
},
|
|
{
|
|
"epoch": 0.9702127659574468,
|
|
"grad_norm": 3.82041597366333,
|
|
"learning_rate": 9.44339538445834e-06,
|
|
"loss": 1.2301,
|
|
"step": 3648
|
|
},
|
|
{
|
|
"epoch": 0.9704787234042553,
|
|
"grad_norm": 3.5176687240600586,
|
|
"learning_rate": 9.442992036440808e-06,
|
|
"loss": 1.1489,
|
|
"step": 3649
|
|
},
|
|
{
|
|
"epoch": 0.9707446808510638,
|
|
"grad_norm": 3.265772819519043,
|
|
"learning_rate": 9.44258855095187e-06,
|
|
"loss": 1.1147,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.9710106382978724,
|
|
"grad_norm": 3.5735883712768555,
|
|
"learning_rate": 9.442184928004012e-06,
|
|
"loss": 1.2768,
|
|
"step": 3651
|
|
},
|
|
{
|
|
"epoch": 0.9712765957446808,
|
|
"grad_norm": 3.6002047061920166,
|
|
"learning_rate": 9.441781167609722e-06,
|
|
"loss": 1.3395,
|
|
"step": 3652
|
|
},
|
|
{
|
|
"epoch": 0.9715425531914894,
|
|
"grad_norm": 3.8888189792633057,
|
|
"learning_rate": 9.441377269781496e-06,
|
|
"loss": 1.2223,
|
|
"step": 3653
|
|
},
|
|
{
|
|
"epoch": 0.9718085106382979,
|
|
"grad_norm": 3.6971378326416016,
|
|
"learning_rate": 9.440973234531825e-06,
|
|
"loss": 1.1813,
|
|
"step": 3654
|
|
},
|
|
{
|
|
"epoch": 0.9720744680851063,
|
|
"grad_norm": 3.6079912185668945,
|
|
"learning_rate": 9.440569061873213e-06,
|
|
"loss": 1.1156,
|
|
"step": 3655
|
|
},
|
|
{
|
|
"epoch": 0.9723404255319149,
|
|
"grad_norm": 3.839540481567383,
|
|
"learning_rate": 9.440164751818168e-06,
|
|
"loss": 1.4711,
|
|
"step": 3656
|
|
},
|
|
{
|
|
"epoch": 0.9726063829787234,
|
|
"grad_norm": 3.7191896438598633,
|
|
"learning_rate": 9.439760304379197e-06,
|
|
"loss": 1.2351,
|
|
"step": 3657
|
|
},
|
|
{
|
|
"epoch": 0.972872340425532,
|
|
"grad_norm": 3.902529001235962,
|
|
"learning_rate": 9.439355719568817e-06,
|
|
"loss": 1.3487,
|
|
"step": 3658
|
|
},
|
|
{
|
|
"epoch": 0.9731382978723404,
|
|
"grad_norm": 3.389925241470337,
|
|
"learning_rate": 9.438950997399543e-06,
|
|
"loss": 1.1905,
|
|
"step": 3659
|
|
},
|
|
{
|
|
"epoch": 0.973404255319149,
|
|
"grad_norm": 3.6134610176086426,
|
|
"learning_rate": 9.438546137883898e-06,
|
|
"loss": 1.2323,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.9736702127659574,
|
|
"grad_norm": 4.062784671783447,
|
|
"learning_rate": 9.438141141034409e-06,
|
|
"loss": 1.2437,
|
|
"step": 3661
|
|
},
|
|
{
|
|
"epoch": 0.9739361702127659,
|
|
"grad_norm": 3.6207644939422607,
|
|
"learning_rate": 9.437736006863611e-06,
|
|
"loss": 1.2922,
|
|
"step": 3662
|
|
},
|
|
{
|
|
"epoch": 0.9742021276595745,
|
|
"grad_norm": 3.2939248085021973,
|
|
"learning_rate": 9.437330735384034e-06,
|
|
"loss": 1.2348,
|
|
"step": 3663
|
|
},
|
|
{
|
|
"epoch": 0.9744680851063829,
|
|
"grad_norm": 3.6209723949432373,
|
|
"learning_rate": 9.43692532660822e-06,
|
|
"loss": 1.2698,
|
|
"step": 3664
|
|
},
|
|
{
|
|
"epoch": 0.9747340425531915,
|
|
"grad_norm": 3.766961097717285,
|
|
"learning_rate": 9.436519780548712e-06,
|
|
"loss": 1.3306,
|
|
"step": 3665
|
|
},
|
|
{
|
|
"epoch": 0.975,
|
|
"grad_norm": 3.1702146530151367,
|
|
"learning_rate": 9.43611409721806e-06,
|
|
"loss": 1.2877,
|
|
"step": 3666
|
|
},
|
|
{
|
|
"epoch": 0.9752659574468086,
|
|
"grad_norm": 3.411604642868042,
|
|
"learning_rate": 9.435708276628814e-06,
|
|
"loss": 1.1874,
|
|
"step": 3667
|
|
},
|
|
{
|
|
"epoch": 0.975531914893617,
|
|
"grad_norm": 3.3507773876190186,
|
|
"learning_rate": 9.435302318793533e-06,
|
|
"loss": 1.1614,
|
|
"step": 3668
|
|
},
|
|
{
|
|
"epoch": 0.9757978723404256,
|
|
"grad_norm": 3.42853045463562,
|
|
"learning_rate": 9.434896223724774e-06,
|
|
"loss": 1.128,
|
|
"step": 3669
|
|
},
|
|
{
|
|
"epoch": 0.976063829787234,
|
|
"grad_norm": 3.5911173820495605,
|
|
"learning_rate": 9.434489991435106e-06,
|
|
"loss": 1.2216,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.9763297872340425,
|
|
"grad_norm": 3.4679529666900635,
|
|
"learning_rate": 9.434083621937096e-06,
|
|
"loss": 1.1932,
|
|
"step": 3671
|
|
},
|
|
{
|
|
"epoch": 0.9765957446808511,
|
|
"grad_norm": 3.4107143878936768,
|
|
"learning_rate": 9.433677115243318e-06,
|
|
"loss": 1.1279,
|
|
"step": 3672
|
|
},
|
|
{
|
|
"epoch": 0.9768617021276595,
|
|
"grad_norm": 3.5593109130859375,
|
|
"learning_rate": 9.433270471366352e-06,
|
|
"loss": 1.1996,
|
|
"step": 3673
|
|
},
|
|
{
|
|
"epoch": 0.9771276595744681,
|
|
"grad_norm": 3.193164110183716,
|
|
"learning_rate": 9.432863690318777e-06,
|
|
"loss": 1.103,
|
|
"step": 3674
|
|
},
|
|
{
|
|
"epoch": 0.9773936170212766,
|
|
"grad_norm": 3.5351223945617676,
|
|
"learning_rate": 9.432456772113179e-06,
|
|
"loss": 1.2212,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 0.9776595744680852,
|
|
"grad_norm": 3.4629955291748047,
|
|
"learning_rate": 9.432049716762151e-06,
|
|
"loss": 1.2055,
|
|
"step": 3676
|
|
},
|
|
{
|
|
"epoch": 0.9779255319148936,
|
|
"grad_norm": 3.661907196044922,
|
|
"learning_rate": 9.431642524278286e-06,
|
|
"loss": 1.3389,
|
|
"step": 3677
|
|
},
|
|
{
|
|
"epoch": 0.9781914893617021,
|
|
"grad_norm": 3.140364408493042,
|
|
"learning_rate": 9.431235194674185e-06,
|
|
"loss": 1.2099,
|
|
"step": 3678
|
|
},
|
|
{
|
|
"epoch": 0.9784574468085107,
|
|
"grad_norm": 3.7145817279815674,
|
|
"learning_rate": 9.43082772796245e-06,
|
|
"loss": 1.49,
|
|
"step": 3679
|
|
},
|
|
{
|
|
"epoch": 0.9787234042553191,
|
|
"grad_norm": 3.3982760906219482,
|
|
"learning_rate": 9.430420124155687e-06,
|
|
"loss": 1.2001,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.9789893617021277,
|
|
"grad_norm": 3.7518324851989746,
|
|
"learning_rate": 9.43001238326651e-06,
|
|
"loss": 1.4143,
|
|
"step": 3681
|
|
},
|
|
{
|
|
"epoch": 0.9792553191489362,
|
|
"grad_norm": 3.708822250366211,
|
|
"learning_rate": 9.429604505307535e-06,
|
|
"loss": 1.2038,
|
|
"step": 3682
|
|
},
|
|
{
|
|
"epoch": 0.9795212765957447,
|
|
"grad_norm": 3.5261037349700928,
|
|
"learning_rate": 9.42919649029138e-06,
|
|
"loss": 1.2233,
|
|
"step": 3683
|
|
},
|
|
{
|
|
"epoch": 0.9797872340425532,
|
|
"grad_norm": 3.842564582824707,
|
|
"learning_rate": 9.428788338230672e-06,
|
|
"loss": 1.3385,
|
|
"step": 3684
|
|
},
|
|
{
|
|
"epoch": 0.9800531914893617,
|
|
"grad_norm": 3.688267230987549,
|
|
"learning_rate": 9.428380049138038e-06,
|
|
"loss": 1.2034,
|
|
"step": 3685
|
|
},
|
|
{
|
|
"epoch": 0.9803191489361702,
|
|
"grad_norm": 3.877396583557129,
|
|
"learning_rate": 9.42797162302611e-06,
|
|
"loss": 1.2775,
|
|
"step": 3686
|
|
},
|
|
{
|
|
"epoch": 0.9805851063829787,
|
|
"grad_norm": 3.4748518466949463,
|
|
"learning_rate": 9.427563059907528e-06,
|
|
"loss": 1.4141,
|
|
"step": 3687
|
|
},
|
|
{
|
|
"epoch": 0.9808510638297873,
|
|
"grad_norm": 3.0281589031219482,
|
|
"learning_rate": 9.427154359794931e-06,
|
|
"loss": 1.2591,
|
|
"step": 3688
|
|
},
|
|
{
|
|
"epoch": 0.9811170212765957,
|
|
"grad_norm": 3.5246212482452393,
|
|
"learning_rate": 9.42674552270097e-06,
|
|
"loss": 1.1775,
|
|
"step": 3689
|
|
},
|
|
{
|
|
"epoch": 0.9813829787234043,
|
|
"grad_norm": 3.599862813949585,
|
|
"learning_rate": 9.426336548638287e-06,
|
|
"loss": 1.187,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.9816489361702128,
|
|
"grad_norm": 3.8031740188598633,
|
|
"learning_rate": 9.42592743761954e-06,
|
|
"loss": 1.3704,
|
|
"step": 3691
|
|
},
|
|
{
|
|
"epoch": 0.9819148936170212,
|
|
"grad_norm": 3.708652973175049,
|
|
"learning_rate": 9.425518189657388e-06,
|
|
"loss": 1.2567,
|
|
"step": 3692
|
|
},
|
|
{
|
|
"epoch": 0.9821808510638298,
|
|
"grad_norm": 3.341240882873535,
|
|
"learning_rate": 9.425108804764493e-06,
|
|
"loss": 1.4062,
|
|
"step": 3693
|
|
},
|
|
{
|
|
"epoch": 0.9824468085106383,
|
|
"grad_norm": 3.5106687545776367,
|
|
"learning_rate": 9.42469928295352e-06,
|
|
"loss": 1.1759,
|
|
"step": 3694
|
|
},
|
|
{
|
|
"epoch": 0.9827127659574468,
|
|
"grad_norm": 3.153082847595215,
|
|
"learning_rate": 9.424289624237143e-06,
|
|
"loss": 1.1955,
|
|
"step": 3695
|
|
},
|
|
{
|
|
"epoch": 0.9829787234042553,
|
|
"grad_norm": 3.4173176288604736,
|
|
"learning_rate": 9.423879828628038e-06,
|
|
"loss": 1.3188,
|
|
"step": 3696
|
|
},
|
|
{
|
|
"epoch": 0.9832446808510639,
|
|
"grad_norm": 3.5854523181915283,
|
|
"learning_rate": 9.42346989613888e-06,
|
|
"loss": 1.2425,
|
|
"step": 3697
|
|
},
|
|
{
|
|
"epoch": 0.9835106382978723,
|
|
"grad_norm": 3.536123752593994,
|
|
"learning_rate": 9.423059826782355e-06,
|
|
"loss": 1.2088,
|
|
"step": 3698
|
|
},
|
|
{
|
|
"epoch": 0.9837765957446809,
|
|
"grad_norm": 3.5280613899230957,
|
|
"learning_rate": 9.422649620571155e-06,
|
|
"loss": 1.4956,
|
|
"step": 3699
|
|
},
|
|
{
|
|
"epoch": 0.9840425531914894,
|
|
"grad_norm": 3.896684169769287,
|
|
"learning_rate": 9.422239277517964e-06,
|
|
"loss": 1.3236,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.9843085106382978,
|
|
"grad_norm": 3.417961597442627,
|
|
"learning_rate": 9.421828797635487e-06,
|
|
"loss": 1.2044,
|
|
"step": 3701
|
|
},
|
|
{
|
|
"epoch": 0.9845744680851064,
|
|
"grad_norm": 3.4376044273376465,
|
|
"learning_rate": 9.421418180936419e-06,
|
|
"loss": 1.2657,
|
|
"step": 3702
|
|
},
|
|
{
|
|
"epoch": 0.9848404255319149,
|
|
"grad_norm": 3.8742475509643555,
|
|
"learning_rate": 9.421007427433467e-06,
|
|
"loss": 1.2526,
|
|
"step": 3703
|
|
},
|
|
{
|
|
"epoch": 0.9851063829787234,
|
|
"grad_norm": 4.002706527709961,
|
|
"learning_rate": 9.42059653713934e-06,
|
|
"loss": 1.446,
|
|
"step": 3704
|
|
},
|
|
{
|
|
"epoch": 0.9853723404255319,
|
|
"grad_norm": 3.462308883666992,
|
|
"learning_rate": 9.420185510066753e-06,
|
|
"loss": 1.2338,
|
|
"step": 3705
|
|
},
|
|
{
|
|
"epoch": 0.9856382978723405,
|
|
"grad_norm": 3.684730291366577,
|
|
"learning_rate": 9.41977434622842e-06,
|
|
"loss": 1.2417,
|
|
"step": 3706
|
|
},
|
|
{
|
|
"epoch": 0.9859042553191489,
|
|
"grad_norm": 3.5235018730163574,
|
|
"learning_rate": 9.419363045637067e-06,
|
|
"loss": 1.3775,
|
|
"step": 3707
|
|
},
|
|
{
|
|
"epoch": 0.9861702127659574,
|
|
"grad_norm": 3.2986860275268555,
|
|
"learning_rate": 9.418951608305417e-06,
|
|
"loss": 1.1967,
|
|
"step": 3708
|
|
},
|
|
{
|
|
"epoch": 0.986436170212766,
|
|
"grad_norm": 3.2341742515563965,
|
|
"learning_rate": 9.418540034246202e-06,
|
|
"loss": 1.1223,
|
|
"step": 3709
|
|
},
|
|
{
|
|
"epoch": 0.9867021276595744,
|
|
"grad_norm": 3.5601837635040283,
|
|
"learning_rate": 9.418128323472157e-06,
|
|
"loss": 1.2934,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.986968085106383,
|
|
"grad_norm": 4.002072811126709,
|
|
"learning_rate": 9.41771647599602e-06,
|
|
"loss": 1.2226,
|
|
"step": 3711
|
|
},
|
|
{
|
|
"epoch": 0.9872340425531915,
|
|
"grad_norm": 3.6095480918884277,
|
|
"learning_rate": 9.417304491830533e-06,
|
|
"loss": 1.2332,
|
|
"step": 3712
|
|
},
|
|
{
|
|
"epoch": 0.9875,
|
|
"grad_norm": 3.7682595252990723,
|
|
"learning_rate": 9.416892370988445e-06,
|
|
"loss": 1.1929,
|
|
"step": 3713
|
|
},
|
|
{
|
|
"epoch": 0.9877659574468085,
|
|
"grad_norm": 3.4983551502227783,
|
|
"learning_rate": 9.416480113482505e-06,
|
|
"loss": 1.2426,
|
|
"step": 3714
|
|
},
|
|
{
|
|
"epoch": 0.988031914893617,
|
|
"grad_norm": 3.490725040435791,
|
|
"learning_rate": 9.416067719325472e-06,
|
|
"loss": 1.2009,
|
|
"step": 3715
|
|
},
|
|
{
|
|
"epoch": 0.9882978723404255,
|
|
"grad_norm": 3.564605474472046,
|
|
"learning_rate": 9.415655188530104e-06,
|
|
"loss": 1.2105,
|
|
"step": 3716
|
|
},
|
|
{
|
|
"epoch": 0.988563829787234,
|
|
"grad_norm": 3.5361475944519043,
|
|
"learning_rate": 9.415242521109166e-06,
|
|
"loss": 1.3389,
|
|
"step": 3717
|
|
},
|
|
{
|
|
"epoch": 0.9888297872340426,
|
|
"grad_norm": 3.3671114444732666,
|
|
"learning_rate": 9.414829717075426e-06,
|
|
"loss": 1.3157,
|
|
"step": 3718
|
|
},
|
|
{
|
|
"epoch": 0.989095744680851,
|
|
"grad_norm": 3.7442715167999268,
|
|
"learning_rate": 9.414416776441656e-06,
|
|
"loss": 1.1551,
|
|
"step": 3719
|
|
},
|
|
{
|
|
"epoch": 0.9893617021276596,
|
|
"grad_norm": 3.4414875507354736,
|
|
"learning_rate": 9.414003699220636e-06,
|
|
"loss": 1.2135,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.9896276595744681,
|
|
"grad_norm": 4.052205562591553,
|
|
"learning_rate": 9.413590485425143e-06,
|
|
"loss": 1.3299,
|
|
"step": 3721
|
|
},
|
|
{
|
|
"epoch": 0.9898936170212767,
|
|
"grad_norm": 3.0953876972198486,
|
|
"learning_rate": 9.413177135067964e-06,
|
|
"loss": 1.1183,
|
|
"step": 3722
|
|
},
|
|
{
|
|
"epoch": 0.9901595744680851,
|
|
"grad_norm": 3.767108678817749,
|
|
"learning_rate": 9.41276364816189e-06,
|
|
"loss": 1.325,
|
|
"step": 3723
|
|
},
|
|
{
|
|
"epoch": 0.9904255319148936,
|
|
"grad_norm": 3.3017489910125732,
|
|
"learning_rate": 9.412350024719713e-06,
|
|
"loss": 1.2328,
|
|
"step": 3724
|
|
},
|
|
{
|
|
"epoch": 0.9906914893617021,
|
|
"grad_norm": 3.5287554264068604,
|
|
"learning_rate": 9.41193626475423e-06,
|
|
"loss": 1.2442,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 0.9909574468085106,
|
|
"grad_norm": 3.6898324489593506,
|
|
"learning_rate": 9.411522368278243e-06,
|
|
"loss": 1.2682,
|
|
"step": 3726
|
|
},
|
|
{
|
|
"epoch": 0.9912234042553192,
|
|
"grad_norm": 3.9228873252868652,
|
|
"learning_rate": 9.411108335304562e-06,
|
|
"loss": 1.3415,
|
|
"step": 3727
|
|
},
|
|
{
|
|
"epoch": 0.9914893617021276,
|
|
"grad_norm": 3.9011435508728027,
|
|
"learning_rate": 9.410694165845996e-06,
|
|
"loss": 1.2388,
|
|
"step": 3728
|
|
},
|
|
{
|
|
"epoch": 0.9917553191489362,
|
|
"grad_norm": 3.714230537414551,
|
|
"learning_rate": 9.41027985991536e-06,
|
|
"loss": 1.2085,
|
|
"step": 3729
|
|
},
|
|
{
|
|
"epoch": 0.9920212765957447,
|
|
"grad_norm": 3.627887010574341,
|
|
"learning_rate": 9.409865417525473e-06,
|
|
"loss": 1.2682,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.9922872340425531,
|
|
"grad_norm": 3.4126439094543457,
|
|
"learning_rate": 9.409450838689156e-06,
|
|
"loss": 1.2089,
|
|
"step": 3731
|
|
},
|
|
{
|
|
"epoch": 0.9925531914893617,
|
|
"grad_norm": 3.5555756092071533,
|
|
"learning_rate": 9.409036123419239e-06,
|
|
"loss": 1.2066,
|
|
"step": 3732
|
|
},
|
|
{
|
|
"epoch": 0.9928191489361702,
|
|
"grad_norm": 3.5292632579803467,
|
|
"learning_rate": 9.408621271728555e-06,
|
|
"loss": 1.1913,
|
|
"step": 3733
|
|
},
|
|
{
|
|
"epoch": 0.9930851063829788,
|
|
"grad_norm": 3.5443150997161865,
|
|
"learning_rate": 9.408206283629937e-06,
|
|
"loss": 1.2293,
|
|
"step": 3734
|
|
},
|
|
{
|
|
"epoch": 0.9933510638297872,
|
|
"grad_norm": 3.8415119647979736,
|
|
"learning_rate": 9.407791159136226e-06,
|
|
"loss": 1.496,
|
|
"step": 3735
|
|
},
|
|
{
|
|
"epoch": 0.9936170212765958,
|
|
"grad_norm": 3.647085189819336,
|
|
"learning_rate": 9.407375898260267e-06,
|
|
"loss": 1.1983,
|
|
"step": 3736
|
|
},
|
|
{
|
|
"epoch": 0.9938829787234043,
|
|
"grad_norm": 3.2950799465179443,
|
|
"learning_rate": 9.40696050101491e-06,
|
|
"loss": 1.1298,
|
|
"step": 3737
|
|
},
|
|
{
|
|
"epoch": 0.9941489361702127,
|
|
"grad_norm": 3.837249517440796,
|
|
"learning_rate": 9.406544967413008e-06,
|
|
"loss": 1.2763,
|
|
"step": 3738
|
|
},
|
|
{
|
|
"epoch": 0.9944148936170213,
|
|
"grad_norm": 3.437069892883301,
|
|
"learning_rate": 9.406129297467414e-06,
|
|
"loss": 1.1689,
|
|
"step": 3739
|
|
},
|
|
{
|
|
"epoch": 0.9946808510638298,
|
|
"grad_norm": 3.7600064277648926,
|
|
"learning_rate": 9.405713491190992e-06,
|
|
"loss": 1.4092,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.9949468085106383,
|
|
"grad_norm": 3.547830104827881,
|
|
"learning_rate": 9.405297548596607e-06,
|
|
"loss": 1.3794,
|
|
"step": 3741
|
|
},
|
|
{
|
|
"epoch": 0.9952127659574468,
|
|
"grad_norm": 3.673377752304077,
|
|
"learning_rate": 9.404881469697132e-06,
|
|
"loss": 1.1934,
|
|
"step": 3742
|
|
},
|
|
{
|
|
"epoch": 0.9954787234042554,
|
|
"grad_norm": 3.6018290519714355,
|
|
"learning_rate": 9.404465254505435e-06,
|
|
"loss": 1.2228,
|
|
"step": 3743
|
|
},
|
|
{
|
|
"epoch": 0.9957446808510638,
|
|
"grad_norm": 3.5014569759368896,
|
|
"learning_rate": 9.4040489030344e-06,
|
|
"loss": 1.1731,
|
|
"step": 3744
|
|
},
|
|
{
|
|
"epoch": 0.9960106382978723,
|
|
"grad_norm": 3.6044108867645264,
|
|
"learning_rate": 9.403632415296907e-06,
|
|
"loss": 1.2917,
|
|
"step": 3745
|
|
},
|
|
{
|
|
"epoch": 0.9962765957446809,
|
|
"grad_norm": 3.626147985458374,
|
|
"learning_rate": 9.40321579130584e-06,
|
|
"loss": 1.2297,
|
|
"step": 3746
|
|
},
|
|
{
|
|
"epoch": 0.9965425531914893,
|
|
"grad_norm": 3.5548157691955566,
|
|
"learning_rate": 9.402799031074095e-06,
|
|
"loss": 1.2096,
|
|
"step": 3747
|
|
},
|
|
{
|
|
"epoch": 0.9968085106382979,
|
|
"grad_norm": 4.016201019287109,
|
|
"learning_rate": 9.402382134614563e-06,
|
|
"loss": 1.2461,
|
|
"step": 3748
|
|
},
|
|
{
|
|
"epoch": 0.9970744680851064,
|
|
"grad_norm": 3.2637929916381836,
|
|
"learning_rate": 9.401965101940144e-06,
|
|
"loss": 1.1531,
|
|
"step": 3749
|
|
},
|
|
{
|
|
"epoch": 0.9973404255319149,
|
|
"grad_norm": 3.330240249633789,
|
|
"learning_rate": 9.40154793306374e-06,
|
|
"loss": 1.1598,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.9976063829787234,
|
|
"grad_norm": 3.522907257080078,
|
|
"learning_rate": 9.401130627998265e-06,
|
|
"loss": 1.1563,
|
|
"step": 3751
|
|
},
|
|
{
|
|
"epoch": 0.997872340425532,
|
|
"grad_norm": 3.462400197982788,
|
|
"learning_rate": 9.400713186756625e-06,
|
|
"loss": 1.0948,
|
|
"step": 3752
|
|
},
|
|
{
|
|
"epoch": 0.9981382978723404,
|
|
"grad_norm": 3.6393964290618896,
|
|
"learning_rate": 9.400295609351738e-06,
|
|
"loss": 1.2499,
|
|
"step": 3753
|
|
},
|
|
{
|
|
"epoch": 0.9984042553191489,
|
|
"grad_norm": 3.4382801055908203,
|
|
"learning_rate": 9.399877895796526e-06,
|
|
"loss": 1.2587,
|
|
"step": 3754
|
|
},
|
|
{
|
|
"epoch": 0.9986702127659575,
|
|
"grad_norm": 3.769301414489746,
|
|
"learning_rate": 9.399460046103908e-06,
|
|
"loss": 1.283,
|
|
"step": 3755
|
|
},
|
|
{
|
|
"epoch": 0.9989361702127659,
|
|
"grad_norm": 3.3904542922973633,
|
|
"learning_rate": 9.399042060286819e-06,
|
|
"loss": 1.3667,
|
|
"step": 3756
|
|
},
|
|
{
|
|
"epoch": 0.9992021276595745,
|
|
"grad_norm": 3.413027763366699,
|
|
"learning_rate": 9.398623938358188e-06,
|
|
"loss": 1.1575,
|
|
"step": 3757
|
|
},
|
|
{
|
|
"epoch": 0.999468085106383,
|
|
"grad_norm": 3.8313398361206055,
|
|
"learning_rate": 9.398205680330954e-06,
|
|
"loss": 1.1665,
|
|
"step": 3758
|
|
},
|
|
{
|
|
"epoch": 0.9997340425531915,
|
|
"grad_norm": 3.5040853023529053,
|
|
"learning_rate": 9.397787286218058e-06,
|
|
"loss": 1.3182,
|
|
"step": 3759
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 3.6746809482574463,
|
|
"learning_rate": 9.397368756032445e-06,
|
|
"loss": 1.2287,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 1.0002659574468085,
|
|
"grad_norm": 3.308379650115967,
|
|
"learning_rate": 9.396950089787066e-06,
|
|
"loss": 0.8299,
|
|
"step": 3761
|
|
},
|
|
{
|
|
"epoch": 1.000531914893617,
|
|
"grad_norm": 3.8195013999938965,
|
|
"learning_rate": 9.396531287494877e-06,
|
|
"loss": 0.8431,
|
|
"step": 3762
|
|
},
|
|
{
|
|
"epoch": 1.0007978723404256,
|
|
"grad_norm": 3.317417621612549,
|
|
"learning_rate": 9.396112349168832e-06,
|
|
"loss": 0.9087,
|
|
"step": 3763
|
|
},
|
|
{
|
|
"epoch": 1.001063829787234,
|
|
"grad_norm": 3.6359126567840576,
|
|
"learning_rate": 9.395693274821893e-06,
|
|
"loss": 0.8605,
|
|
"step": 3764
|
|
},
|
|
{
|
|
"epoch": 1.0013297872340425,
|
|
"grad_norm": 3.3946707248687744,
|
|
"learning_rate": 9.39527406446703e-06,
|
|
"loss": 0.9424,
|
|
"step": 3765
|
|
},
|
|
{
|
|
"epoch": 1.001595744680851,
|
|
"grad_norm": 3.7910523414611816,
|
|
"learning_rate": 9.394854718117214e-06,
|
|
"loss": 0.7635,
|
|
"step": 3766
|
|
},
|
|
{
|
|
"epoch": 1.0018617021276597,
|
|
"grad_norm": 3.847181558609009,
|
|
"learning_rate": 9.394435235785417e-06,
|
|
"loss": 0.8419,
|
|
"step": 3767
|
|
},
|
|
{
|
|
"epoch": 1.0021276595744681,
|
|
"grad_norm": 3.5999948978424072,
|
|
"learning_rate": 9.394015617484621e-06,
|
|
"loss": 0.7906,
|
|
"step": 3768
|
|
},
|
|
{
|
|
"epoch": 1.0023936170212766,
|
|
"grad_norm": 3.53528094291687,
|
|
"learning_rate": 9.393595863227808e-06,
|
|
"loss": 0.7652,
|
|
"step": 3769
|
|
},
|
|
{
|
|
"epoch": 1.002659574468085,
|
|
"grad_norm": 4.102449417114258,
|
|
"learning_rate": 9.393175973027967e-06,
|
|
"loss": 0.837,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 1.0029255319148935,
|
|
"grad_norm": 4.625784397125244,
|
|
"learning_rate": 9.392755946898087e-06,
|
|
"loss": 0.8694,
|
|
"step": 3771
|
|
},
|
|
{
|
|
"epoch": 1.0031914893617022,
|
|
"grad_norm": 3.7955758571624756,
|
|
"learning_rate": 9.392335784851168e-06,
|
|
"loss": 0.7127,
|
|
"step": 3772
|
|
},
|
|
{
|
|
"epoch": 1.0034574468085107,
|
|
"grad_norm": 4.6287970542907715,
|
|
"learning_rate": 9.39191548690021e-06,
|
|
"loss": 0.6634,
|
|
"step": 3773
|
|
},
|
|
{
|
|
"epoch": 1.0037234042553191,
|
|
"grad_norm": 4.188403129577637,
|
|
"learning_rate": 9.391495053058213e-06,
|
|
"loss": 0.7676,
|
|
"step": 3774
|
|
},
|
|
{
|
|
"epoch": 1.0039893617021276,
|
|
"grad_norm": 4.061558723449707,
|
|
"learning_rate": 9.39107448333819e-06,
|
|
"loss": 0.6863,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 1.004255319148936,
|
|
"grad_norm": 3.9614672660827637,
|
|
"learning_rate": 9.390653777753151e-06,
|
|
"loss": 0.8902,
|
|
"step": 3776
|
|
},
|
|
{
|
|
"epoch": 1.0045212765957447,
|
|
"grad_norm": 3.7978405952453613,
|
|
"learning_rate": 9.390232936316116e-06,
|
|
"loss": 0.8576,
|
|
"step": 3777
|
|
},
|
|
{
|
|
"epoch": 1.0047872340425532,
|
|
"grad_norm": 4.081401348114014,
|
|
"learning_rate": 9.389811959040106e-06,
|
|
"loss": 0.9293,
|
|
"step": 3778
|
|
},
|
|
{
|
|
"epoch": 1.0050531914893617,
|
|
"grad_norm": 4.4708123207092285,
|
|
"learning_rate": 9.389390845938147e-06,
|
|
"loss": 0.7971,
|
|
"step": 3779
|
|
},
|
|
{
|
|
"epoch": 1.0053191489361701,
|
|
"grad_norm": 3.670398235321045,
|
|
"learning_rate": 9.388969597023265e-06,
|
|
"loss": 0.7746,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 1.0055851063829788,
|
|
"grad_norm": 3.678659200668335,
|
|
"learning_rate": 9.388548212308496e-06,
|
|
"loss": 0.7505,
|
|
"step": 3781
|
|
},
|
|
{
|
|
"epoch": 1.0058510638297873,
|
|
"grad_norm": 3.943781614303589,
|
|
"learning_rate": 9.388126691806879e-06,
|
|
"loss": 0.7205,
|
|
"step": 3782
|
|
},
|
|
{
|
|
"epoch": 1.0061170212765957,
|
|
"grad_norm": 3.976630926132202,
|
|
"learning_rate": 9.387705035531455e-06,
|
|
"loss": 0.8597,
|
|
"step": 3783
|
|
},
|
|
{
|
|
"epoch": 1.0063829787234042,
|
|
"grad_norm": 3.6376004219055176,
|
|
"learning_rate": 9.387283243495273e-06,
|
|
"loss": 0.7911,
|
|
"step": 3784
|
|
},
|
|
{
|
|
"epoch": 1.0066489361702127,
|
|
"grad_norm": 3.698863983154297,
|
|
"learning_rate": 9.386861315711382e-06,
|
|
"loss": 0.7718,
|
|
"step": 3785
|
|
},
|
|
{
|
|
"epoch": 1.0069148936170214,
|
|
"grad_norm": 3.553309679031372,
|
|
"learning_rate": 9.386439252192836e-06,
|
|
"loss": 0.8233,
|
|
"step": 3786
|
|
},
|
|
{
|
|
"epoch": 1.0071808510638298,
|
|
"grad_norm": 3.588423252105713,
|
|
"learning_rate": 9.386017052952694e-06,
|
|
"loss": 0.782,
|
|
"step": 3787
|
|
},
|
|
{
|
|
"epoch": 1.0074468085106383,
|
|
"grad_norm": 3.5977461338043213,
|
|
"learning_rate": 9.385594718004023e-06,
|
|
"loss": 0.8548,
|
|
"step": 3788
|
|
},
|
|
{
|
|
"epoch": 1.0077127659574467,
|
|
"grad_norm": 4.447713375091553,
|
|
"learning_rate": 9.385172247359887e-06,
|
|
"loss": 0.833,
|
|
"step": 3789
|
|
},
|
|
{
|
|
"epoch": 1.0079787234042554,
|
|
"grad_norm": 3.6044774055480957,
|
|
"learning_rate": 9.384749641033358e-06,
|
|
"loss": 0.8453,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 1.008244680851064,
|
|
"grad_norm": 3.4909749031066895,
|
|
"learning_rate": 9.384326899037515e-06,
|
|
"loss": 0.7723,
|
|
"step": 3791
|
|
},
|
|
{
|
|
"epoch": 1.0085106382978724,
|
|
"grad_norm": 3.8825156688690186,
|
|
"learning_rate": 9.383904021385433e-06,
|
|
"loss": 0.7219,
|
|
"step": 3792
|
|
},
|
|
{
|
|
"epoch": 1.0087765957446808,
|
|
"grad_norm": 4.605208396911621,
|
|
"learning_rate": 9.3834810080902e-06,
|
|
"loss": 0.8625,
|
|
"step": 3793
|
|
},
|
|
{
|
|
"epoch": 1.0090425531914893,
|
|
"grad_norm": 3.8827695846557617,
|
|
"learning_rate": 9.383057859164904e-06,
|
|
"loss": 0.7579,
|
|
"step": 3794
|
|
},
|
|
{
|
|
"epoch": 1.009308510638298,
|
|
"grad_norm": 3.8152899742126465,
|
|
"learning_rate": 9.382634574622637e-06,
|
|
"loss": 0.7785,
|
|
"step": 3795
|
|
},
|
|
{
|
|
"epoch": 1.0095744680851064,
|
|
"grad_norm": 3.9749300479888916,
|
|
"learning_rate": 9.382211154476497e-06,
|
|
"loss": 0.7768,
|
|
"step": 3796
|
|
},
|
|
{
|
|
"epoch": 1.0098404255319149,
|
|
"grad_norm": 3.9352428913116455,
|
|
"learning_rate": 9.381787598739586e-06,
|
|
"loss": 0.9265,
|
|
"step": 3797
|
|
},
|
|
{
|
|
"epoch": 1.0101063829787233,
|
|
"grad_norm": 3.8235480785369873,
|
|
"learning_rate": 9.381363907425006e-06,
|
|
"loss": 0.7915,
|
|
"step": 3798
|
|
},
|
|
{
|
|
"epoch": 1.0103723404255318,
|
|
"grad_norm": 4.1063103675842285,
|
|
"learning_rate": 9.380940080545869e-06,
|
|
"loss": 0.8271,
|
|
"step": 3799
|
|
},
|
|
{
|
|
"epoch": 1.0106382978723405,
|
|
"grad_norm": 3.7685892581939697,
|
|
"learning_rate": 9.380516118115287e-06,
|
|
"loss": 0.7611,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 1.010904255319149,
|
|
"grad_norm": 3.679269790649414,
|
|
"learning_rate": 9.380092020146379e-06,
|
|
"loss": 0.7943,
|
|
"step": 3801
|
|
},
|
|
{
|
|
"epoch": 1.0111702127659574,
|
|
"grad_norm": 3.7096617221832275,
|
|
"learning_rate": 9.379667786652267e-06,
|
|
"loss": 0.8254,
|
|
"step": 3802
|
|
},
|
|
{
|
|
"epoch": 1.0114361702127659,
|
|
"grad_norm": 3.4425570964813232,
|
|
"learning_rate": 9.379243417646077e-06,
|
|
"loss": 0.7538,
|
|
"step": 3803
|
|
},
|
|
{
|
|
"epoch": 1.0117021276595746,
|
|
"grad_norm": 3.324869155883789,
|
|
"learning_rate": 9.378818913140941e-06,
|
|
"loss": 0.6687,
|
|
"step": 3804
|
|
},
|
|
{
|
|
"epoch": 1.011968085106383,
|
|
"grad_norm": 3.6117424964904785,
|
|
"learning_rate": 9.378394273149992e-06,
|
|
"loss": 0.8059,
|
|
"step": 3805
|
|
},
|
|
{
|
|
"epoch": 1.0122340425531915,
|
|
"grad_norm": 3.843747615814209,
|
|
"learning_rate": 9.377969497686369e-06,
|
|
"loss": 0.7257,
|
|
"step": 3806
|
|
},
|
|
{
|
|
"epoch": 1.0125,
|
|
"grad_norm": 3.997349977493286,
|
|
"learning_rate": 9.377544586763216e-06,
|
|
"loss": 0.837,
|
|
"step": 3807
|
|
},
|
|
{
|
|
"epoch": 1.0127659574468084,
|
|
"grad_norm": 3.5746796131134033,
|
|
"learning_rate": 9.377119540393677e-06,
|
|
"loss": 0.7891,
|
|
"step": 3808
|
|
},
|
|
{
|
|
"epoch": 1.013031914893617,
|
|
"grad_norm": 3.7787206172943115,
|
|
"learning_rate": 9.37669435859091e-06,
|
|
"loss": 0.7984,
|
|
"step": 3809
|
|
},
|
|
{
|
|
"epoch": 1.0132978723404256,
|
|
"grad_norm": 4.2211174964904785,
|
|
"learning_rate": 9.376269041368063e-06,
|
|
"loss": 0.7274,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 1.013563829787234,
|
|
"grad_norm": 3.591057300567627,
|
|
"learning_rate": 9.375843588738302e-06,
|
|
"loss": 0.807,
|
|
"step": 3811
|
|
},
|
|
{
|
|
"epoch": 1.0138297872340425,
|
|
"grad_norm": 3.5017266273498535,
|
|
"learning_rate": 9.375418000714787e-06,
|
|
"loss": 0.7173,
|
|
"step": 3812
|
|
},
|
|
{
|
|
"epoch": 1.014095744680851,
|
|
"grad_norm": 4.4692487716674805,
|
|
"learning_rate": 9.374992277310688e-06,
|
|
"loss": 0.7584,
|
|
"step": 3813
|
|
},
|
|
{
|
|
"epoch": 1.0143617021276596,
|
|
"grad_norm": 4.453067302703857,
|
|
"learning_rate": 9.374566418539178e-06,
|
|
"loss": 0.8444,
|
|
"step": 3814
|
|
},
|
|
{
|
|
"epoch": 1.014627659574468,
|
|
"grad_norm": 4.007133483886719,
|
|
"learning_rate": 9.37414042441343e-06,
|
|
"loss": 0.7163,
|
|
"step": 3815
|
|
},
|
|
{
|
|
"epoch": 1.0148936170212766,
|
|
"grad_norm": 3.714021682739258,
|
|
"learning_rate": 9.37371429494663e-06,
|
|
"loss": 0.7979,
|
|
"step": 3816
|
|
},
|
|
{
|
|
"epoch": 1.015159574468085,
|
|
"grad_norm": 4.196898460388184,
|
|
"learning_rate": 9.37328803015196e-06,
|
|
"loss": 0.8057,
|
|
"step": 3817
|
|
},
|
|
{
|
|
"epoch": 1.0154255319148937,
|
|
"grad_norm": 3.6794686317443848,
|
|
"learning_rate": 9.37286163004261e-06,
|
|
"loss": 0.8608,
|
|
"step": 3818
|
|
},
|
|
{
|
|
"epoch": 1.0156914893617022,
|
|
"grad_norm": 4.034078121185303,
|
|
"learning_rate": 9.37243509463177e-06,
|
|
"loss": 0.8794,
|
|
"step": 3819
|
|
},
|
|
{
|
|
"epoch": 1.0159574468085106,
|
|
"grad_norm": 3.671816110610962,
|
|
"learning_rate": 9.37200842393264e-06,
|
|
"loss": 0.755,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 1.016223404255319,
|
|
"grad_norm": 3.6856508255004883,
|
|
"learning_rate": 9.371581617958424e-06,
|
|
"loss": 0.7839,
|
|
"step": 3821
|
|
},
|
|
{
|
|
"epoch": 1.0164893617021276,
|
|
"grad_norm": 4.332293510437012,
|
|
"learning_rate": 9.371154676722326e-06,
|
|
"loss": 0.8305,
|
|
"step": 3822
|
|
},
|
|
{
|
|
"epoch": 1.0167553191489362,
|
|
"grad_norm": 4.032402038574219,
|
|
"learning_rate": 9.370727600237557e-06,
|
|
"loss": 0.8552,
|
|
"step": 3823
|
|
},
|
|
{
|
|
"epoch": 1.0170212765957447,
|
|
"grad_norm": 4.2808756828308105,
|
|
"learning_rate": 9.370300388517329e-06,
|
|
"loss": 0.8609,
|
|
"step": 3824
|
|
},
|
|
{
|
|
"epoch": 1.0172872340425532,
|
|
"grad_norm": 3.675684690475464,
|
|
"learning_rate": 9.36987304157486e-06,
|
|
"loss": 0.7307,
|
|
"step": 3825
|
|
},
|
|
{
|
|
"epoch": 1.0175531914893616,
|
|
"grad_norm": 3.6821727752685547,
|
|
"learning_rate": 9.369445559423376e-06,
|
|
"loss": 0.8393,
|
|
"step": 3826
|
|
},
|
|
{
|
|
"epoch": 1.0178191489361703,
|
|
"grad_norm": 4.112141132354736,
|
|
"learning_rate": 9.369017942076101e-06,
|
|
"loss": 0.8027,
|
|
"step": 3827
|
|
},
|
|
{
|
|
"epoch": 1.0180851063829788,
|
|
"grad_norm": 3.8829188346862793,
|
|
"learning_rate": 9.368590189546268e-06,
|
|
"loss": 0.8558,
|
|
"step": 3828
|
|
},
|
|
{
|
|
"epoch": 1.0183510638297872,
|
|
"grad_norm": 4.182821750640869,
|
|
"learning_rate": 9.368162301847112e-06,
|
|
"loss": 0.9872,
|
|
"step": 3829
|
|
},
|
|
{
|
|
"epoch": 1.0186170212765957,
|
|
"grad_norm": 4.043810844421387,
|
|
"learning_rate": 9.36773427899187e-06,
|
|
"loss": 0.731,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 1.0188829787234042,
|
|
"grad_norm": 3.6814448833465576,
|
|
"learning_rate": 9.367306120993787e-06,
|
|
"loss": 0.7434,
|
|
"step": 3831
|
|
},
|
|
{
|
|
"epoch": 1.0191489361702128,
|
|
"grad_norm": 3.823333978652954,
|
|
"learning_rate": 9.366877827866112e-06,
|
|
"loss": 0.7962,
|
|
"step": 3832
|
|
},
|
|
{
|
|
"epoch": 1.0194148936170213,
|
|
"grad_norm": 4.10197639465332,
|
|
"learning_rate": 9.366449399622092e-06,
|
|
"loss": 0.8655,
|
|
"step": 3833
|
|
},
|
|
{
|
|
"epoch": 1.0196808510638298,
|
|
"grad_norm": 3.4033734798431396,
|
|
"learning_rate": 9.366020836274991e-06,
|
|
"loss": 0.6871,
|
|
"step": 3834
|
|
},
|
|
{
|
|
"epoch": 1.0199468085106382,
|
|
"grad_norm": 3.9210493564605713,
|
|
"learning_rate": 9.365592137838063e-06,
|
|
"loss": 0.8913,
|
|
"step": 3835
|
|
},
|
|
{
|
|
"epoch": 1.0202127659574467,
|
|
"grad_norm": 3.972930431365967,
|
|
"learning_rate": 9.365163304324576e-06,
|
|
"loss": 0.7394,
|
|
"step": 3836
|
|
},
|
|
{
|
|
"epoch": 1.0204787234042554,
|
|
"grad_norm": 3.603489875793457,
|
|
"learning_rate": 9.364734335747795e-06,
|
|
"loss": 0.6501,
|
|
"step": 3837
|
|
},
|
|
{
|
|
"epoch": 1.0207446808510638,
|
|
"grad_norm": 3.678868532180786,
|
|
"learning_rate": 9.364305232120997e-06,
|
|
"loss": 0.7685,
|
|
"step": 3838
|
|
},
|
|
{
|
|
"epoch": 1.0210106382978723,
|
|
"grad_norm": 4.074692726135254,
|
|
"learning_rate": 9.363875993457454e-06,
|
|
"loss": 0.8085,
|
|
"step": 3839
|
|
},
|
|
{
|
|
"epoch": 1.0212765957446808,
|
|
"grad_norm": 3.683279514312744,
|
|
"learning_rate": 9.363446619770452e-06,
|
|
"loss": 0.7703,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 1.0215425531914895,
|
|
"grad_norm": 3.837007999420166,
|
|
"learning_rate": 9.363017111073273e-06,
|
|
"loss": 0.8403,
|
|
"step": 3841
|
|
},
|
|
{
|
|
"epoch": 1.021808510638298,
|
|
"grad_norm": 4.0264973640441895,
|
|
"learning_rate": 9.362587467379208e-06,
|
|
"loss": 0.8001,
|
|
"step": 3842
|
|
},
|
|
{
|
|
"epoch": 1.0220744680851064,
|
|
"grad_norm": 3.9169387817382812,
|
|
"learning_rate": 9.362157688701551e-06,
|
|
"loss": 0.7603,
|
|
"step": 3843
|
|
},
|
|
{
|
|
"epoch": 1.0223404255319148,
|
|
"grad_norm": 3.4985976219177246,
|
|
"learning_rate": 9.3617277750536e-06,
|
|
"loss": 0.6856,
|
|
"step": 3844
|
|
},
|
|
{
|
|
"epoch": 1.0226063829787233,
|
|
"grad_norm": 3.9737682342529297,
|
|
"learning_rate": 9.361297726448656e-06,
|
|
"loss": 0.8021,
|
|
"step": 3845
|
|
},
|
|
{
|
|
"epoch": 1.022872340425532,
|
|
"grad_norm": 4.206306457519531,
|
|
"learning_rate": 9.360867542900023e-06,
|
|
"loss": 0.7726,
|
|
"step": 3846
|
|
},
|
|
{
|
|
"epoch": 1.0231382978723405,
|
|
"grad_norm": 3.5013468265533447,
|
|
"learning_rate": 9.360437224421017e-06,
|
|
"loss": 0.7046,
|
|
"step": 3847
|
|
},
|
|
{
|
|
"epoch": 1.023404255319149,
|
|
"grad_norm": 4.186954021453857,
|
|
"learning_rate": 9.360006771024947e-06,
|
|
"loss": 0.8574,
|
|
"step": 3848
|
|
},
|
|
{
|
|
"epoch": 1.0236702127659574,
|
|
"grad_norm": 3.8380942344665527,
|
|
"learning_rate": 9.359576182725136e-06,
|
|
"loss": 0.8463,
|
|
"step": 3849
|
|
},
|
|
{
|
|
"epoch": 1.023936170212766,
|
|
"grad_norm": 4.439043998718262,
|
|
"learning_rate": 9.359145459534906e-06,
|
|
"loss": 0.868,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 1.0242021276595745,
|
|
"grad_norm": 3.555283546447754,
|
|
"learning_rate": 9.358714601467581e-06,
|
|
"loss": 0.7842,
|
|
"step": 3851
|
|
},
|
|
{
|
|
"epoch": 1.024468085106383,
|
|
"grad_norm": 3.4938576221466064,
|
|
"learning_rate": 9.358283608536498e-06,
|
|
"loss": 0.8562,
|
|
"step": 3852
|
|
},
|
|
{
|
|
"epoch": 1.0247340425531914,
|
|
"grad_norm": 3.709388256072998,
|
|
"learning_rate": 9.357852480754985e-06,
|
|
"loss": 0.7753,
|
|
"step": 3853
|
|
},
|
|
{
|
|
"epoch": 1.025,
|
|
"grad_norm": 3.594524621963501,
|
|
"learning_rate": 9.357421218136387e-06,
|
|
"loss": 0.9016,
|
|
"step": 3854
|
|
},
|
|
{
|
|
"epoch": 1.0252659574468086,
|
|
"grad_norm": 3.8423714637756348,
|
|
"learning_rate": 9.356989820694046e-06,
|
|
"loss": 0.918,
|
|
"step": 3855
|
|
},
|
|
{
|
|
"epoch": 1.025531914893617,
|
|
"grad_norm": 4.120334625244141,
|
|
"learning_rate": 9.356558288441312e-06,
|
|
"loss": 0.8276,
|
|
"step": 3856
|
|
},
|
|
{
|
|
"epoch": 1.0257978723404255,
|
|
"grad_norm": 3.7441205978393555,
|
|
"learning_rate": 9.356126621391532e-06,
|
|
"loss": 0.6485,
|
|
"step": 3857
|
|
},
|
|
{
|
|
"epoch": 1.026063829787234,
|
|
"grad_norm": 3.652815341949463,
|
|
"learning_rate": 9.35569481955807e-06,
|
|
"loss": 0.8443,
|
|
"step": 3858
|
|
},
|
|
{
|
|
"epoch": 1.0263297872340424,
|
|
"grad_norm": 3.8127315044403076,
|
|
"learning_rate": 9.355262882954277e-06,
|
|
"loss": 0.8928,
|
|
"step": 3859
|
|
},
|
|
{
|
|
"epoch": 1.0265957446808511,
|
|
"grad_norm": 4.254662036895752,
|
|
"learning_rate": 9.354830811593527e-06,
|
|
"loss": 0.7228,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 1.0268617021276596,
|
|
"grad_norm": 3.737208366394043,
|
|
"learning_rate": 9.354398605489182e-06,
|
|
"loss": 0.7144,
|
|
"step": 3861
|
|
},
|
|
{
|
|
"epoch": 1.027127659574468,
|
|
"grad_norm": 4.630359172821045,
|
|
"learning_rate": 9.353966264654619e-06,
|
|
"loss": 1.0136,
|
|
"step": 3862
|
|
},
|
|
{
|
|
"epoch": 1.0273936170212765,
|
|
"grad_norm": 4.139670372009277,
|
|
"learning_rate": 9.353533789103213e-06,
|
|
"loss": 0.7467,
|
|
"step": 3863
|
|
},
|
|
{
|
|
"epoch": 1.0276595744680852,
|
|
"grad_norm": 3.5735762119293213,
|
|
"learning_rate": 9.353101178848345e-06,
|
|
"loss": 0.6863,
|
|
"step": 3864
|
|
},
|
|
{
|
|
"epoch": 1.0279255319148937,
|
|
"grad_norm": 4.091590881347656,
|
|
"learning_rate": 9.352668433903402e-06,
|
|
"loss": 0.9083,
|
|
"step": 3865
|
|
},
|
|
{
|
|
"epoch": 1.0281914893617021,
|
|
"grad_norm": 4.462408065795898,
|
|
"learning_rate": 9.352235554281775e-06,
|
|
"loss": 0.8134,
|
|
"step": 3866
|
|
},
|
|
{
|
|
"epoch": 1.0284574468085106,
|
|
"grad_norm": 4.514068603515625,
|
|
"learning_rate": 9.351802539996853e-06,
|
|
"loss": 0.8516,
|
|
"step": 3867
|
|
},
|
|
{
|
|
"epoch": 1.028723404255319,
|
|
"grad_norm": 4.771678447723389,
|
|
"learning_rate": 9.351369391062037e-06,
|
|
"loss": 0.8317,
|
|
"step": 3868
|
|
},
|
|
{
|
|
"epoch": 1.0289893617021277,
|
|
"grad_norm": 3.9608962535858154,
|
|
"learning_rate": 9.350936107490731e-06,
|
|
"loss": 0.7668,
|
|
"step": 3869
|
|
},
|
|
{
|
|
"epoch": 1.0292553191489362,
|
|
"grad_norm": 3.6606082916259766,
|
|
"learning_rate": 9.350502689296337e-06,
|
|
"loss": 0.8021,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 1.0295212765957447,
|
|
"grad_norm": 3.395991563796997,
|
|
"learning_rate": 9.35006913649227e-06,
|
|
"loss": 0.7561,
|
|
"step": 3871
|
|
},
|
|
{
|
|
"epoch": 1.0297872340425531,
|
|
"grad_norm": 3.9416377544403076,
|
|
"learning_rate": 9.34963544909194e-06,
|
|
"loss": 0.6551,
|
|
"step": 3872
|
|
},
|
|
{
|
|
"epoch": 1.0300531914893618,
|
|
"grad_norm": 3.8515100479125977,
|
|
"learning_rate": 9.34920162710877e-06,
|
|
"loss": 0.9596,
|
|
"step": 3873
|
|
},
|
|
{
|
|
"epoch": 1.0303191489361703,
|
|
"grad_norm": 3.532066583633423,
|
|
"learning_rate": 9.34876767055618e-06,
|
|
"loss": 0.7312,
|
|
"step": 3874
|
|
},
|
|
{
|
|
"epoch": 1.0305851063829787,
|
|
"grad_norm": 3.523547887802124,
|
|
"learning_rate": 9.3483335794476e-06,
|
|
"loss": 0.9029,
|
|
"step": 3875
|
|
},
|
|
{
|
|
"epoch": 1.0308510638297872,
|
|
"grad_norm": 3.8942482471466064,
|
|
"learning_rate": 9.347899353796456e-06,
|
|
"loss": 0.852,
|
|
"step": 3876
|
|
},
|
|
{
|
|
"epoch": 1.0311170212765957,
|
|
"grad_norm": 3.8025577068328857,
|
|
"learning_rate": 9.347464993616191e-06,
|
|
"loss": 0.7704,
|
|
"step": 3877
|
|
},
|
|
{
|
|
"epoch": 1.0313829787234043,
|
|
"grad_norm": 3.5986201763153076,
|
|
"learning_rate": 9.347030498920239e-06,
|
|
"loss": 0.8289,
|
|
"step": 3878
|
|
},
|
|
{
|
|
"epoch": 1.0316489361702128,
|
|
"grad_norm": 4.27517032623291,
|
|
"learning_rate": 9.346595869722044e-06,
|
|
"loss": 0.9252,
|
|
"step": 3879
|
|
},
|
|
{
|
|
"epoch": 1.0319148936170213,
|
|
"grad_norm": 3.845385789871216,
|
|
"learning_rate": 9.346161106035056e-06,
|
|
"loss": 0.7372,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 1.0321808510638297,
|
|
"grad_norm": 3.875645875930786,
|
|
"learning_rate": 9.345726207872728e-06,
|
|
"loss": 0.9036,
|
|
"step": 3881
|
|
},
|
|
{
|
|
"epoch": 1.0324468085106382,
|
|
"grad_norm": 4.004083156585693,
|
|
"learning_rate": 9.345291175248514e-06,
|
|
"loss": 0.8,
|
|
"step": 3882
|
|
},
|
|
{
|
|
"epoch": 1.0327127659574469,
|
|
"grad_norm": 4.025826930999756,
|
|
"learning_rate": 9.344856008175874e-06,
|
|
"loss": 0.8063,
|
|
"step": 3883
|
|
},
|
|
{
|
|
"epoch": 1.0329787234042553,
|
|
"grad_norm": 4.168485641479492,
|
|
"learning_rate": 9.344420706668274e-06,
|
|
"loss": 0.8712,
|
|
"step": 3884
|
|
},
|
|
{
|
|
"epoch": 1.0332446808510638,
|
|
"grad_norm": 3.7525241374969482,
|
|
"learning_rate": 9.343985270739184e-06,
|
|
"loss": 0.8075,
|
|
"step": 3885
|
|
},
|
|
{
|
|
"epoch": 1.0335106382978723,
|
|
"grad_norm": 4.079540729522705,
|
|
"learning_rate": 9.343549700402073e-06,
|
|
"loss": 0.7574,
|
|
"step": 3886
|
|
},
|
|
{
|
|
"epoch": 1.033776595744681,
|
|
"grad_norm": 3.5480105876922607,
|
|
"learning_rate": 9.34311399567042e-06,
|
|
"loss": 0.8544,
|
|
"step": 3887
|
|
},
|
|
{
|
|
"epoch": 1.0340425531914894,
|
|
"grad_norm": 3.6420836448669434,
|
|
"learning_rate": 9.342678156557709e-06,
|
|
"loss": 0.8279,
|
|
"step": 3888
|
|
},
|
|
{
|
|
"epoch": 1.0343085106382979,
|
|
"grad_norm": 3.8541533946990967,
|
|
"learning_rate": 9.342242183077422e-06,
|
|
"loss": 0.8794,
|
|
"step": 3889
|
|
},
|
|
{
|
|
"epoch": 1.0345744680851063,
|
|
"grad_norm": 3.5861008167266846,
|
|
"learning_rate": 9.341806075243049e-06,
|
|
"loss": 0.7949,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 1.0348404255319148,
|
|
"grad_norm": 4.284236431121826,
|
|
"learning_rate": 9.341369833068086e-06,
|
|
"loss": 0.7882,
|
|
"step": 3891
|
|
},
|
|
{
|
|
"epoch": 1.0351063829787235,
|
|
"grad_norm": 4.239330768585205,
|
|
"learning_rate": 9.340933456566028e-06,
|
|
"loss": 0.8299,
|
|
"step": 3892
|
|
},
|
|
{
|
|
"epoch": 1.035372340425532,
|
|
"grad_norm": 4.633347988128662,
|
|
"learning_rate": 9.340496945750377e-06,
|
|
"loss": 0.9297,
|
|
"step": 3893
|
|
},
|
|
{
|
|
"epoch": 1.0356382978723404,
|
|
"grad_norm": 4.2658538818359375,
|
|
"learning_rate": 9.340060300634642e-06,
|
|
"loss": 0.7928,
|
|
"step": 3894
|
|
},
|
|
{
|
|
"epoch": 1.0359042553191489,
|
|
"grad_norm": 3.876652717590332,
|
|
"learning_rate": 9.33962352123233e-06,
|
|
"loss": 0.7742,
|
|
"step": 3895
|
|
},
|
|
{
|
|
"epoch": 1.0361702127659576,
|
|
"grad_norm": 3.939422130584717,
|
|
"learning_rate": 9.339186607556959e-06,
|
|
"loss": 0.7676,
|
|
"step": 3896
|
|
},
|
|
{
|
|
"epoch": 1.036436170212766,
|
|
"grad_norm": 3.9666736125946045,
|
|
"learning_rate": 9.338749559622042e-06,
|
|
"loss": 0.8759,
|
|
"step": 3897
|
|
},
|
|
{
|
|
"epoch": 1.0367021276595745,
|
|
"grad_norm": 3.6032910346984863,
|
|
"learning_rate": 9.338312377441108e-06,
|
|
"loss": 0.6806,
|
|
"step": 3898
|
|
},
|
|
{
|
|
"epoch": 1.036968085106383,
|
|
"grad_norm": 3.6236395835876465,
|
|
"learning_rate": 9.337875061027681e-06,
|
|
"loss": 0.8275,
|
|
"step": 3899
|
|
},
|
|
{
|
|
"epoch": 1.0372340425531914,
|
|
"grad_norm": 4.132247447967529,
|
|
"learning_rate": 9.337437610395292e-06,
|
|
"loss": 0.8429,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 1.0375,
|
|
"grad_norm": 3.7111639976501465,
|
|
"learning_rate": 9.337000025557477e-06,
|
|
"loss": 0.9638,
|
|
"step": 3901
|
|
},
|
|
{
|
|
"epoch": 1.0377659574468086,
|
|
"grad_norm": 3.9870896339416504,
|
|
"learning_rate": 9.336562306527775e-06,
|
|
"loss": 0.7931,
|
|
"step": 3902
|
|
},
|
|
{
|
|
"epoch": 1.038031914893617,
|
|
"grad_norm": 3.9265518188476562,
|
|
"learning_rate": 9.336124453319729e-06,
|
|
"loss": 0.7928,
|
|
"step": 3903
|
|
},
|
|
{
|
|
"epoch": 1.0382978723404255,
|
|
"grad_norm": 3.5974245071411133,
|
|
"learning_rate": 9.335686465946888e-06,
|
|
"loss": 0.7127,
|
|
"step": 3904
|
|
},
|
|
{
|
|
"epoch": 1.038563829787234,
|
|
"grad_norm": 3.6213388442993164,
|
|
"learning_rate": 9.335248344422803e-06,
|
|
"loss": 0.7669,
|
|
"step": 3905
|
|
},
|
|
{
|
|
"epoch": 1.0388297872340426,
|
|
"grad_norm": 4.555843830108643,
|
|
"learning_rate": 9.33481008876103e-06,
|
|
"loss": 0.8885,
|
|
"step": 3906
|
|
},
|
|
{
|
|
"epoch": 1.039095744680851,
|
|
"grad_norm": 4.553684234619141,
|
|
"learning_rate": 9.33437169897513e-06,
|
|
"loss": 0.9339,
|
|
"step": 3907
|
|
},
|
|
{
|
|
"epoch": 1.0393617021276595,
|
|
"grad_norm": 4.390134811401367,
|
|
"learning_rate": 9.333933175078665e-06,
|
|
"loss": 0.887,
|
|
"step": 3908
|
|
},
|
|
{
|
|
"epoch": 1.039627659574468,
|
|
"grad_norm": 4.3838677406311035,
|
|
"learning_rate": 9.333494517085205e-06,
|
|
"loss": 0.8234,
|
|
"step": 3909
|
|
},
|
|
{
|
|
"epoch": 1.0398936170212767,
|
|
"grad_norm": 4.019488334655762,
|
|
"learning_rate": 9.333055725008323e-06,
|
|
"loss": 0.9096,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 1.0401595744680852,
|
|
"grad_norm": 3.4591004848480225,
|
|
"learning_rate": 9.332616798861596e-06,
|
|
"loss": 0.7404,
|
|
"step": 3911
|
|
},
|
|
{
|
|
"epoch": 1.0404255319148936,
|
|
"grad_norm": 4.587208271026611,
|
|
"learning_rate": 9.332177738658603e-06,
|
|
"loss": 0.8192,
|
|
"step": 3912
|
|
},
|
|
{
|
|
"epoch": 1.040691489361702,
|
|
"grad_norm": 3.734438180923462,
|
|
"learning_rate": 9.331738544412932e-06,
|
|
"loss": 0.8286,
|
|
"step": 3913
|
|
},
|
|
{
|
|
"epoch": 1.0409574468085105,
|
|
"grad_norm": 3.7644083499908447,
|
|
"learning_rate": 9.33129921613817e-06,
|
|
"loss": 0.8243,
|
|
"step": 3914
|
|
},
|
|
{
|
|
"epoch": 1.0412234042553192,
|
|
"grad_norm": 3.412766456604004,
|
|
"learning_rate": 9.33085975384791e-06,
|
|
"loss": 0.8141,
|
|
"step": 3915
|
|
},
|
|
{
|
|
"epoch": 1.0414893617021277,
|
|
"grad_norm": 3.1695566177368164,
|
|
"learning_rate": 9.33042015755575e-06,
|
|
"loss": 0.6531,
|
|
"step": 3916
|
|
},
|
|
{
|
|
"epoch": 1.0417553191489362,
|
|
"grad_norm": 4.0986151695251465,
|
|
"learning_rate": 9.329980427275293e-06,
|
|
"loss": 0.8253,
|
|
"step": 3917
|
|
},
|
|
{
|
|
"epoch": 1.0420212765957446,
|
|
"grad_norm": 3.9123079776763916,
|
|
"learning_rate": 9.329540563020143e-06,
|
|
"loss": 0.8211,
|
|
"step": 3918
|
|
},
|
|
{
|
|
"epoch": 1.0422872340425533,
|
|
"grad_norm": 3.860915184020996,
|
|
"learning_rate": 9.32910056480391e-06,
|
|
"loss": 0.7886,
|
|
"step": 3919
|
|
},
|
|
{
|
|
"epoch": 1.0425531914893618,
|
|
"grad_norm": 3.6465773582458496,
|
|
"learning_rate": 9.328660432640211e-06,
|
|
"loss": 0.7254,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 1.0428191489361702,
|
|
"grad_norm": 4.174450874328613,
|
|
"learning_rate": 9.328220166542659e-06,
|
|
"loss": 0.8686,
|
|
"step": 3921
|
|
},
|
|
{
|
|
"epoch": 1.0430851063829787,
|
|
"grad_norm": 3.563661575317383,
|
|
"learning_rate": 9.32777976652488e-06,
|
|
"loss": 0.8862,
|
|
"step": 3922
|
|
},
|
|
{
|
|
"epoch": 1.0433510638297872,
|
|
"grad_norm": 3.976609468460083,
|
|
"learning_rate": 9.3273392326005e-06,
|
|
"loss": 0.9412,
|
|
"step": 3923
|
|
},
|
|
{
|
|
"epoch": 1.0436170212765958,
|
|
"grad_norm": 3.979386568069458,
|
|
"learning_rate": 9.32689856478315e-06,
|
|
"loss": 0.767,
|
|
"step": 3924
|
|
},
|
|
{
|
|
"epoch": 1.0438829787234043,
|
|
"grad_norm": 3.6504030227661133,
|
|
"learning_rate": 9.326457763086463e-06,
|
|
"loss": 0.7288,
|
|
"step": 3925
|
|
},
|
|
{
|
|
"epoch": 1.0441489361702128,
|
|
"grad_norm": 3.5788464546203613,
|
|
"learning_rate": 9.32601682752408e-06,
|
|
"loss": 0.7756,
|
|
"step": 3926
|
|
},
|
|
{
|
|
"epoch": 1.0444148936170212,
|
|
"grad_norm": 4.129055976867676,
|
|
"learning_rate": 9.325575758109642e-06,
|
|
"loss": 0.8129,
|
|
"step": 3927
|
|
},
|
|
{
|
|
"epoch": 1.0446808510638297,
|
|
"grad_norm": 4.022395133972168,
|
|
"learning_rate": 9.325134554856799e-06,
|
|
"loss": 0.8346,
|
|
"step": 3928
|
|
},
|
|
{
|
|
"epoch": 1.0449468085106384,
|
|
"grad_norm": 3.9106342792510986,
|
|
"learning_rate": 9.3246932177792e-06,
|
|
"loss": 0.7345,
|
|
"step": 3929
|
|
},
|
|
{
|
|
"epoch": 1.0452127659574468,
|
|
"grad_norm": 5.765318870544434,
|
|
"learning_rate": 9.324251746890501e-06,
|
|
"loss": 1.0247,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 1.0454787234042553,
|
|
"grad_norm": 3.858736276626587,
|
|
"learning_rate": 9.323810142204361e-06,
|
|
"loss": 0.8736,
|
|
"step": 3931
|
|
},
|
|
{
|
|
"epoch": 1.0457446808510638,
|
|
"grad_norm": 3.313824415206909,
|
|
"learning_rate": 9.323368403734445e-06,
|
|
"loss": 0.8105,
|
|
"step": 3932
|
|
},
|
|
{
|
|
"epoch": 1.0460106382978724,
|
|
"grad_norm": 3.7220394611358643,
|
|
"learning_rate": 9.32292653149442e-06,
|
|
"loss": 0.7904,
|
|
"step": 3933
|
|
},
|
|
{
|
|
"epoch": 1.046276595744681,
|
|
"grad_norm": 3.852928638458252,
|
|
"learning_rate": 9.32248452549796e-06,
|
|
"loss": 0.7263,
|
|
"step": 3934
|
|
},
|
|
{
|
|
"epoch": 1.0465425531914894,
|
|
"grad_norm": 3.9275519847869873,
|
|
"learning_rate": 9.322042385758738e-06,
|
|
"loss": 0.8318,
|
|
"step": 3935
|
|
},
|
|
{
|
|
"epoch": 1.0468085106382978,
|
|
"grad_norm": 4.239774227142334,
|
|
"learning_rate": 9.321600112290439e-06,
|
|
"loss": 0.7238,
|
|
"step": 3936
|
|
},
|
|
{
|
|
"epoch": 1.0470744680851063,
|
|
"grad_norm": 3.672391891479492,
|
|
"learning_rate": 9.321157705106741e-06,
|
|
"loss": 0.87,
|
|
"step": 3937
|
|
},
|
|
{
|
|
"epoch": 1.047340425531915,
|
|
"grad_norm": 3.510413646697998,
|
|
"learning_rate": 9.320715164221338e-06,
|
|
"loss": 0.7332,
|
|
"step": 3938
|
|
},
|
|
{
|
|
"epoch": 1.0476063829787234,
|
|
"grad_norm": 3.9943974018096924,
|
|
"learning_rate": 9.32027248964792e-06,
|
|
"loss": 0.7492,
|
|
"step": 3939
|
|
},
|
|
{
|
|
"epoch": 1.047872340425532,
|
|
"grad_norm": 3.3832719326019287,
|
|
"learning_rate": 9.319829681400185e-06,
|
|
"loss": 0.7657,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 1.0481382978723404,
|
|
"grad_norm": 3.761160135269165,
|
|
"learning_rate": 9.319386739491834e-06,
|
|
"loss": 0.7968,
|
|
"step": 3941
|
|
},
|
|
{
|
|
"epoch": 1.048404255319149,
|
|
"grad_norm": 3.9942009449005127,
|
|
"learning_rate": 9.31894366393657e-06,
|
|
"loss": 0.8027,
|
|
"step": 3942
|
|
},
|
|
{
|
|
"epoch": 1.0486702127659575,
|
|
"grad_norm": 3.8257179260253906,
|
|
"learning_rate": 9.318500454748105e-06,
|
|
"loss": 0.8245,
|
|
"step": 3943
|
|
},
|
|
{
|
|
"epoch": 1.048936170212766,
|
|
"grad_norm": 4.181244850158691,
|
|
"learning_rate": 9.318057111940153e-06,
|
|
"loss": 0.7048,
|
|
"step": 3944
|
|
},
|
|
{
|
|
"epoch": 1.0492021276595744,
|
|
"grad_norm": 4.021924018859863,
|
|
"learning_rate": 9.317613635526431e-06,
|
|
"loss": 0.8669,
|
|
"step": 3945
|
|
},
|
|
{
|
|
"epoch": 1.049468085106383,
|
|
"grad_norm": 4.112471580505371,
|
|
"learning_rate": 9.317170025520656e-06,
|
|
"loss": 0.7719,
|
|
"step": 3946
|
|
},
|
|
{
|
|
"epoch": 1.0497340425531916,
|
|
"grad_norm": 4.079671859741211,
|
|
"learning_rate": 9.31672628193656e-06,
|
|
"loss": 0.9156,
|
|
"step": 3947
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"grad_norm": 3.6803247928619385,
|
|
"learning_rate": 9.31628240478787e-06,
|
|
"loss": 0.741,
|
|
"step": 3948
|
|
},
|
|
{
|
|
"epoch": 1.0502659574468085,
|
|
"grad_norm": 3.8785572052001953,
|
|
"learning_rate": 9.315838394088322e-06,
|
|
"loss": 0.7652,
|
|
"step": 3949
|
|
},
|
|
{
|
|
"epoch": 1.050531914893617,
|
|
"grad_norm": 3.9115874767303467,
|
|
"learning_rate": 9.31539424985165e-06,
|
|
"loss": 0.8373,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 1.0507978723404254,
|
|
"grad_norm": 4.03147029876709,
|
|
"learning_rate": 9.3149499720916e-06,
|
|
"loss": 0.7918,
|
|
"step": 3951
|
|
},
|
|
{
|
|
"epoch": 1.0510638297872341,
|
|
"grad_norm": 3.7957963943481445,
|
|
"learning_rate": 9.31450556082192e-06,
|
|
"loss": 0.8583,
|
|
"step": 3952
|
|
},
|
|
{
|
|
"epoch": 1.0513297872340426,
|
|
"grad_norm": 3.83341646194458,
|
|
"learning_rate": 9.314061016056354e-06,
|
|
"loss": 0.8166,
|
|
"step": 3953
|
|
},
|
|
{
|
|
"epoch": 1.051595744680851,
|
|
"grad_norm": 3.7149436473846436,
|
|
"learning_rate": 9.313616337808664e-06,
|
|
"loss": 0.7958,
|
|
"step": 3954
|
|
},
|
|
{
|
|
"epoch": 1.0518617021276595,
|
|
"grad_norm": 3.941300392150879,
|
|
"learning_rate": 9.313171526092606e-06,
|
|
"loss": 0.8765,
|
|
"step": 3955
|
|
},
|
|
{
|
|
"epoch": 1.0521276595744682,
|
|
"grad_norm": 3.688690423965454,
|
|
"learning_rate": 9.312726580921942e-06,
|
|
"loss": 0.7011,
|
|
"step": 3956
|
|
},
|
|
{
|
|
"epoch": 1.0523936170212767,
|
|
"grad_norm": 3.683009147644043,
|
|
"learning_rate": 9.31228150231044e-06,
|
|
"loss": 0.7307,
|
|
"step": 3957
|
|
},
|
|
{
|
|
"epoch": 1.0526595744680851,
|
|
"grad_norm": 3.816660165786743,
|
|
"learning_rate": 9.311836290271872e-06,
|
|
"loss": 0.8001,
|
|
"step": 3958
|
|
},
|
|
{
|
|
"epoch": 1.0529255319148936,
|
|
"grad_norm": 3.8870654106140137,
|
|
"learning_rate": 9.311390944820012e-06,
|
|
"loss": 0.7563,
|
|
"step": 3959
|
|
},
|
|
{
|
|
"epoch": 1.053191489361702,
|
|
"grad_norm": 4.011544704437256,
|
|
"learning_rate": 9.31094546596864e-06,
|
|
"loss": 0.946,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 1.0534574468085107,
|
|
"grad_norm": 4.572283744812012,
|
|
"learning_rate": 9.31049985373154e-06,
|
|
"loss": 0.8803,
|
|
"step": 3961
|
|
},
|
|
{
|
|
"epoch": 1.0537234042553192,
|
|
"grad_norm": 3.7621991634368896,
|
|
"learning_rate": 9.310054108122499e-06,
|
|
"loss": 0.8607,
|
|
"step": 3962
|
|
},
|
|
{
|
|
"epoch": 1.0539893617021276,
|
|
"grad_norm": 3.4957644939422607,
|
|
"learning_rate": 9.309608229155311e-06,
|
|
"loss": 0.7627,
|
|
"step": 3963
|
|
},
|
|
{
|
|
"epoch": 1.054255319148936,
|
|
"grad_norm": 4.007942199707031,
|
|
"learning_rate": 9.30916221684377e-06,
|
|
"loss": 0.7599,
|
|
"step": 3964
|
|
},
|
|
{
|
|
"epoch": 1.0545212765957448,
|
|
"grad_norm": 3.790900945663452,
|
|
"learning_rate": 9.308716071201676e-06,
|
|
"loss": 0.6845,
|
|
"step": 3965
|
|
},
|
|
{
|
|
"epoch": 1.0547872340425533,
|
|
"grad_norm": 4.06134557723999,
|
|
"learning_rate": 9.308269792242833e-06,
|
|
"loss": 0.8446,
|
|
"step": 3966
|
|
},
|
|
{
|
|
"epoch": 1.0550531914893617,
|
|
"grad_norm": 3.927212715148926,
|
|
"learning_rate": 9.30782337998105e-06,
|
|
"loss": 0.8009,
|
|
"step": 3967
|
|
},
|
|
{
|
|
"epoch": 1.0553191489361702,
|
|
"grad_norm": 3.9333722591400146,
|
|
"learning_rate": 9.307376834430142e-06,
|
|
"loss": 0.8184,
|
|
"step": 3968
|
|
},
|
|
{
|
|
"epoch": 1.0555851063829786,
|
|
"grad_norm": 4.4977288246154785,
|
|
"learning_rate": 9.306930155603923e-06,
|
|
"loss": 0.841,
|
|
"step": 3969
|
|
},
|
|
{
|
|
"epoch": 1.0558510638297873,
|
|
"grad_norm": 3.587890386581421,
|
|
"learning_rate": 9.306483343516212e-06,
|
|
"loss": 0.6937,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 1.0561170212765958,
|
|
"grad_norm": 4.001445293426514,
|
|
"learning_rate": 9.30603639818084e-06,
|
|
"loss": 0.8711,
|
|
"step": 3971
|
|
},
|
|
{
|
|
"epoch": 1.0563829787234043,
|
|
"grad_norm": 3.6268887519836426,
|
|
"learning_rate": 9.30558931961163e-06,
|
|
"loss": 0.7053,
|
|
"step": 3972
|
|
},
|
|
{
|
|
"epoch": 1.0566489361702127,
|
|
"grad_norm": 3.929903030395508,
|
|
"learning_rate": 9.305142107822415e-06,
|
|
"loss": 0.8549,
|
|
"step": 3973
|
|
},
|
|
{
|
|
"epoch": 1.0569148936170212,
|
|
"grad_norm": 3.7672524452209473,
|
|
"learning_rate": 9.304694762827038e-06,
|
|
"loss": 0.6872,
|
|
"step": 3974
|
|
},
|
|
{
|
|
"epoch": 1.0571808510638299,
|
|
"grad_norm": 4.7689738273620605,
|
|
"learning_rate": 9.304247284639335e-06,
|
|
"loss": 0.8544,
|
|
"step": 3975
|
|
},
|
|
{
|
|
"epoch": 1.0574468085106383,
|
|
"grad_norm": 3.8088295459747314,
|
|
"learning_rate": 9.303799673273153e-06,
|
|
"loss": 0.7047,
|
|
"step": 3976
|
|
},
|
|
{
|
|
"epoch": 1.0577127659574468,
|
|
"grad_norm": 4.246236324310303,
|
|
"learning_rate": 9.303351928742344e-06,
|
|
"loss": 0.7887,
|
|
"step": 3977
|
|
},
|
|
{
|
|
"epoch": 1.0579787234042553,
|
|
"grad_norm": 3.864558696746826,
|
|
"learning_rate": 9.302904051060758e-06,
|
|
"loss": 0.828,
|
|
"step": 3978
|
|
},
|
|
{
|
|
"epoch": 1.058244680851064,
|
|
"grad_norm": 4.24592399597168,
|
|
"learning_rate": 9.302456040242257e-06,
|
|
"loss": 0.7851,
|
|
"step": 3979
|
|
},
|
|
{
|
|
"epoch": 1.0585106382978724,
|
|
"grad_norm": 4.1537909507751465,
|
|
"learning_rate": 9.302007896300697e-06,
|
|
"loss": 0.8281,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 1.0587765957446809,
|
|
"grad_norm": 4.180373668670654,
|
|
"learning_rate": 9.30155961924995e-06,
|
|
"loss": 0.8334,
|
|
"step": 3981
|
|
},
|
|
{
|
|
"epoch": 1.0590425531914893,
|
|
"grad_norm": 3.3669097423553467,
|
|
"learning_rate": 9.301111209103883e-06,
|
|
"loss": 0.745,
|
|
"step": 3982
|
|
},
|
|
{
|
|
"epoch": 1.0593085106382978,
|
|
"grad_norm": 3.8249645233154297,
|
|
"learning_rate": 9.300662665876373e-06,
|
|
"loss": 0.8035,
|
|
"step": 3983
|
|
},
|
|
{
|
|
"epoch": 1.0595744680851065,
|
|
"grad_norm": 3.8265540599823,
|
|
"learning_rate": 9.300213989581294e-06,
|
|
"loss": 0.708,
|
|
"step": 3984
|
|
},
|
|
{
|
|
"epoch": 1.059840425531915,
|
|
"grad_norm": 4.226235866546631,
|
|
"learning_rate": 9.299765180232534e-06,
|
|
"loss": 0.8594,
|
|
"step": 3985
|
|
},
|
|
{
|
|
"epoch": 1.0601063829787234,
|
|
"grad_norm": 4.107953071594238,
|
|
"learning_rate": 9.299316237843976e-06,
|
|
"loss": 0.8162,
|
|
"step": 3986
|
|
},
|
|
{
|
|
"epoch": 1.0603723404255319,
|
|
"grad_norm": 3.8606715202331543,
|
|
"learning_rate": 9.298867162429511e-06,
|
|
"loss": 0.7562,
|
|
"step": 3987
|
|
},
|
|
{
|
|
"epoch": 1.0606382978723403,
|
|
"grad_norm": 3.6489405632019043,
|
|
"learning_rate": 9.298417954003036e-06,
|
|
"loss": 0.7331,
|
|
"step": 3988
|
|
},
|
|
{
|
|
"epoch": 1.060904255319149,
|
|
"grad_norm": 4.5174150466918945,
|
|
"learning_rate": 9.297968612578448e-06,
|
|
"loss": 0.8392,
|
|
"step": 3989
|
|
},
|
|
{
|
|
"epoch": 1.0611702127659575,
|
|
"grad_norm": 3.8880250453948975,
|
|
"learning_rate": 9.29751913816965e-06,
|
|
"loss": 0.8565,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 1.061436170212766,
|
|
"grad_norm": 3.8482306003570557,
|
|
"learning_rate": 9.297069530790552e-06,
|
|
"loss": 0.6222,
|
|
"step": 3991
|
|
},
|
|
{
|
|
"epoch": 1.0617021276595744,
|
|
"grad_norm": 3.9345664978027344,
|
|
"learning_rate": 9.296619790455062e-06,
|
|
"loss": 0.7166,
|
|
"step": 3992
|
|
},
|
|
{
|
|
"epoch": 1.061968085106383,
|
|
"grad_norm": 4.360013961791992,
|
|
"learning_rate": 9.296169917177099e-06,
|
|
"loss": 0.7584,
|
|
"step": 3993
|
|
},
|
|
{
|
|
"epoch": 1.0622340425531915,
|
|
"grad_norm": 3.7796449661254883,
|
|
"learning_rate": 9.295719910970577e-06,
|
|
"loss": 0.8688,
|
|
"step": 3994
|
|
},
|
|
{
|
|
"epoch": 1.0625,
|
|
"grad_norm": 3.968502998352051,
|
|
"learning_rate": 9.295269771849426e-06,
|
|
"loss": 0.7795,
|
|
"step": 3995
|
|
},
|
|
{
|
|
"epoch": 1.0627659574468085,
|
|
"grad_norm": 4.514654636383057,
|
|
"learning_rate": 9.294819499827572e-06,
|
|
"loss": 0.8955,
|
|
"step": 3996
|
|
},
|
|
{
|
|
"epoch": 1.063031914893617,
|
|
"grad_norm": 3.8706483840942383,
|
|
"learning_rate": 9.294369094918945e-06,
|
|
"loss": 0.7875,
|
|
"step": 3997
|
|
},
|
|
{
|
|
"epoch": 1.0632978723404256,
|
|
"grad_norm": 3.6928679943084717,
|
|
"learning_rate": 9.293918557137483e-06,
|
|
"loss": 0.7198,
|
|
"step": 3998
|
|
},
|
|
{
|
|
"epoch": 1.063563829787234,
|
|
"grad_norm": 3.9840540885925293,
|
|
"learning_rate": 9.293467886497123e-06,
|
|
"loss": 0.8831,
|
|
"step": 3999
|
|
},
|
|
{
|
|
"epoch": 1.0638297872340425,
|
|
"grad_norm": 4.153161525726318,
|
|
"learning_rate": 9.293017083011814e-06,
|
|
"loss": 0.8204,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 1.0638297872340425,
|
|
"eval_loss": 1.3173630237579346,
|
|
"eval_runtime": 13.912,
|
|
"eval_samples_per_second": 28.752,
|
|
"eval_steps_per_second": 3.594,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 1.064095744680851,
|
|
"grad_norm": 3.50370717048645,
|
|
"learning_rate": 9.2925661466955e-06,
|
|
"loss": 0.6799,
|
|
"step": 4001
|
|
},
|
|
{
|
|
"epoch": 1.0643617021276595,
|
|
"grad_norm": 3.481992244720459,
|
|
"learning_rate": 9.292115077562138e-06,
|
|
"loss": 0.6651,
|
|
"step": 4002
|
|
},
|
|
{
|
|
"epoch": 1.0646276595744681,
|
|
"grad_norm": 3.986703634262085,
|
|
"learning_rate": 9.291663875625681e-06,
|
|
"loss": 0.713,
|
|
"step": 4003
|
|
},
|
|
{
|
|
"epoch": 1.0648936170212766,
|
|
"grad_norm": 3.7703604698181152,
|
|
"learning_rate": 9.291212540900091e-06,
|
|
"loss": 0.8728,
|
|
"step": 4004
|
|
},
|
|
{
|
|
"epoch": 1.065159574468085,
|
|
"grad_norm": 3.9758448600769043,
|
|
"learning_rate": 9.290761073399333e-06,
|
|
"loss": 0.8273,
|
|
"step": 4005
|
|
},
|
|
{
|
|
"epoch": 1.0654255319148935,
|
|
"grad_norm": 3.999802350997925,
|
|
"learning_rate": 9.290309473137376e-06,
|
|
"loss": 0.8826,
|
|
"step": 4006
|
|
},
|
|
{
|
|
"epoch": 1.0656914893617022,
|
|
"grad_norm": 4.072256088256836,
|
|
"learning_rate": 9.289857740128192e-06,
|
|
"loss": 0.8037,
|
|
"step": 4007
|
|
},
|
|
{
|
|
"epoch": 1.0659574468085107,
|
|
"grad_norm": 3.619701623916626,
|
|
"learning_rate": 9.289405874385759e-06,
|
|
"loss": 0.6833,
|
|
"step": 4008
|
|
},
|
|
{
|
|
"epoch": 1.0662234042553191,
|
|
"grad_norm": 4.227363586425781,
|
|
"learning_rate": 9.288953875924057e-06,
|
|
"loss": 0.8688,
|
|
"step": 4009
|
|
},
|
|
{
|
|
"epoch": 1.0664893617021276,
|
|
"grad_norm": 3.589017629623413,
|
|
"learning_rate": 9.288501744757073e-06,
|
|
"loss": 0.6888,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 1.0667553191489363,
|
|
"grad_norm": 3.9024956226348877,
|
|
"learning_rate": 9.288049480898797e-06,
|
|
"loss": 0.8349,
|
|
"step": 4011
|
|
},
|
|
{
|
|
"epoch": 1.0670212765957447,
|
|
"grad_norm": 3.854668617248535,
|
|
"learning_rate": 9.287597084363222e-06,
|
|
"loss": 0.8158,
|
|
"step": 4012
|
|
},
|
|
{
|
|
"epoch": 1.0672872340425532,
|
|
"grad_norm": 3.511909008026123,
|
|
"learning_rate": 9.287144555164343e-06,
|
|
"loss": 0.8076,
|
|
"step": 4013
|
|
},
|
|
{
|
|
"epoch": 1.0675531914893617,
|
|
"grad_norm": 4.2021098136901855,
|
|
"learning_rate": 9.286691893316165e-06,
|
|
"loss": 0.8434,
|
|
"step": 4014
|
|
},
|
|
{
|
|
"epoch": 1.0678191489361701,
|
|
"grad_norm": 3.823734760284424,
|
|
"learning_rate": 9.286239098832693e-06,
|
|
"loss": 0.8124,
|
|
"step": 4015
|
|
},
|
|
{
|
|
"epoch": 1.0680851063829788,
|
|
"grad_norm": 3.6504952907562256,
|
|
"learning_rate": 9.285786171727938e-06,
|
|
"loss": 0.7402,
|
|
"step": 4016
|
|
},
|
|
{
|
|
"epoch": 1.0683510638297873,
|
|
"grad_norm": 3.7579758167266846,
|
|
"learning_rate": 9.28533311201591e-06,
|
|
"loss": 0.8335,
|
|
"step": 4017
|
|
},
|
|
{
|
|
"epoch": 1.0686170212765957,
|
|
"grad_norm": 3.902036428451538,
|
|
"learning_rate": 9.284879919710631e-06,
|
|
"loss": 0.8564,
|
|
"step": 4018
|
|
},
|
|
{
|
|
"epoch": 1.0688829787234042,
|
|
"grad_norm": 3.6956422328948975,
|
|
"learning_rate": 9.284426594826124e-06,
|
|
"loss": 0.7766,
|
|
"step": 4019
|
|
},
|
|
{
|
|
"epoch": 1.0691489361702127,
|
|
"grad_norm": 3.866909980773926,
|
|
"learning_rate": 9.283973137376414e-06,
|
|
"loss": 0.8988,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 1.0694148936170214,
|
|
"grad_norm": 4.163184642791748,
|
|
"learning_rate": 9.28351954737553e-06,
|
|
"loss": 0.9235,
|
|
"step": 4021
|
|
},
|
|
{
|
|
"epoch": 1.0696808510638298,
|
|
"grad_norm": 4.208329200744629,
|
|
"learning_rate": 9.28306582483751e-06,
|
|
"loss": 0.7734,
|
|
"step": 4022
|
|
},
|
|
{
|
|
"epoch": 1.0699468085106383,
|
|
"grad_norm": 4.030316352844238,
|
|
"learning_rate": 9.28261196977639e-06,
|
|
"loss": 0.8427,
|
|
"step": 4023
|
|
},
|
|
{
|
|
"epoch": 1.0702127659574467,
|
|
"grad_norm": 3.842853307723999,
|
|
"learning_rate": 9.282157982206212e-06,
|
|
"loss": 0.8647,
|
|
"step": 4024
|
|
},
|
|
{
|
|
"epoch": 1.0704787234042552,
|
|
"grad_norm": 4.306194305419922,
|
|
"learning_rate": 9.281703862141024e-06,
|
|
"loss": 0.7107,
|
|
"step": 4025
|
|
},
|
|
{
|
|
"epoch": 1.070744680851064,
|
|
"grad_norm": 4.034607887268066,
|
|
"learning_rate": 9.28124960959488e-06,
|
|
"loss": 0.76,
|
|
"step": 4026
|
|
},
|
|
{
|
|
"epoch": 1.0710106382978724,
|
|
"grad_norm": 4.018486022949219,
|
|
"learning_rate": 9.280795224581832e-06,
|
|
"loss": 0.8058,
|
|
"step": 4027
|
|
},
|
|
{
|
|
"epoch": 1.0712765957446808,
|
|
"grad_norm": 4.060681343078613,
|
|
"learning_rate": 9.280340707115938e-06,
|
|
"loss": 0.772,
|
|
"step": 4028
|
|
},
|
|
{
|
|
"epoch": 1.0715425531914893,
|
|
"grad_norm": 3.8870697021484375,
|
|
"learning_rate": 9.279886057211264e-06,
|
|
"loss": 0.8036,
|
|
"step": 4029
|
|
},
|
|
{
|
|
"epoch": 1.071808510638298,
|
|
"grad_norm": 3.455979585647583,
|
|
"learning_rate": 9.279431274881876e-06,
|
|
"loss": 0.6292,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 1.0720744680851064,
|
|
"grad_norm": 3.5263242721557617,
|
|
"learning_rate": 9.278976360141848e-06,
|
|
"loss": 0.7937,
|
|
"step": 4031
|
|
},
|
|
{
|
|
"epoch": 1.0723404255319149,
|
|
"grad_norm": 4.214826583862305,
|
|
"learning_rate": 9.27852131300525e-06,
|
|
"loss": 0.8888,
|
|
"step": 4032
|
|
},
|
|
{
|
|
"epoch": 1.0726063829787233,
|
|
"grad_norm": 3.6315364837646484,
|
|
"learning_rate": 9.278066133486167e-06,
|
|
"loss": 0.7101,
|
|
"step": 4033
|
|
},
|
|
{
|
|
"epoch": 1.0728723404255318,
|
|
"grad_norm": 4.311771869659424,
|
|
"learning_rate": 9.277610821598682e-06,
|
|
"loss": 0.8687,
|
|
"step": 4034
|
|
},
|
|
{
|
|
"epoch": 1.0731382978723405,
|
|
"grad_norm": 3.720752716064453,
|
|
"learning_rate": 9.277155377356881e-06,
|
|
"loss": 0.709,
|
|
"step": 4035
|
|
},
|
|
{
|
|
"epoch": 1.073404255319149,
|
|
"grad_norm": 3.8687169551849365,
|
|
"learning_rate": 9.276699800774858e-06,
|
|
"loss": 0.7483,
|
|
"step": 4036
|
|
},
|
|
{
|
|
"epoch": 1.0736702127659574,
|
|
"grad_norm": 4.010682582855225,
|
|
"learning_rate": 9.276244091866706e-06,
|
|
"loss": 0.7954,
|
|
"step": 4037
|
|
},
|
|
{
|
|
"epoch": 1.0739361702127659,
|
|
"grad_norm": 3.9716639518737793,
|
|
"learning_rate": 9.27578825064653e-06,
|
|
"loss": 0.8228,
|
|
"step": 4038
|
|
},
|
|
{
|
|
"epoch": 1.0742021276595746,
|
|
"grad_norm": 3.6064131259918213,
|
|
"learning_rate": 9.275332277128428e-06,
|
|
"loss": 0.8019,
|
|
"step": 4039
|
|
},
|
|
{
|
|
"epoch": 1.074468085106383,
|
|
"grad_norm": 3.986684560775757,
|
|
"learning_rate": 9.274876171326514e-06,
|
|
"loss": 0.7684,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 1.0747340425531915,
|
|
"grad_norm": 3.6139955520629883,
|
|
"learning_rate": 9.274419933254897e-06,
|
|
"loss": 0.7885,
|
|
"step": 4041
|
|
},
|
|
{
|
|
"epoch": 1.075,
|
|
"grad_norm": 4.203228950500488,
|
|
"learning_rate": 9.273963562927695e-06,
|
|
"loss": 0.8082,
|
|
"step": 4042
|
|
},
|
|
{
|
|
"epoch": 1.0752659574468084,
|
|
"grad_norm": 4.109843730926514,
|
|
"learning_rate": 9.27350706035903e-06,
|
|
"loss": 0.6948,
|
|
"step": 4043
|
|
},
|
|
{
|
|
"epoch": 1.075531914893617,
|
|
"grad_norm": 3.8464603424072266,
|
|
"learning_rate": 9.273050425563023e-06,
|
|
"loss": 0.8871,
|
|
"step": 4044
|
|
},
|
|
{
|
|
"epoch": 1.0757978723404256,
|
|
"grad_norm": 3.8080790042877197,
|
|
"learning_rate": 9.272593658553806e-06,
|
|
"loss": 0.7375,
|
|
"step": 4045
|
|
},
|
|
{
|
|
"epoch": 1.076063829787234,
|
|
"grad_norm": 3.829904556274414,
|
|
"learning_rate": 9.272136759345512e-06,
|
|
"loss": 0.7572,
|
|
"step": 4046
|
|
},
|
|
{
|
|
"epoch": 1.0763297872340425,
|
|
"grad_norm": 4.1604390144348145,
|
|
"learning_rate": 9.271679727952274e-06,
|
|
"loss": 0.7503,
|
|
"step": 4047
|
|
},
|
|
{
|
|
"epoch": 1.076595744680851,
|
|
"grad_norm": 3.538896322250366,
|
|
"learning_rate": 9.271222564388238e-06,
|
|
"loss": 0.7042,
|
|
"step": 4048
|
|
},
|
|
{
|
|
"epoch": 1.0768617021276596,
|
|
"grad_norm": 3.960331439971924,
|
|
"learning_rate": 9.270765268667547e-06,
|
|
"loss": 0.8119,
|
|
"step": 4049
|
|
},
|
|
{
|
|
"epoch": 1.077127659574468,
|
|
"grad_norm": 4.355499267578125,
|
|
"learning_rate": 9.270307840804349e-06,
|
|
"loss": 0.8219,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 1.0773936170212766,
|
|
"grad_norm": 4.223673343658447,
|
|
"learning_rate": 9.2698502808128e-06,
|
|
"loss": 0.782,
|
|
"step": 4051
|
|
},
|
|
{
|
|
"epoch": 1.077659574468085,
|
|
"grad_norm": 3.8911452293395996,
|
|
"learning_rate": 9.269392588707056e-06,
|
|
"loss": 0.8562,
|
|
"step": 4052
|
|
},
|
|
{
|
|
"epoch": 1.0779255319148937,
|
|
"grad_norm": 3.9379541873931885,
|
|
"learning_rate": 9.268934764501279e-06,
|
|
"loss": 0.8103,
|
|
"step": 4053
|
|
},
|
|
{
|
|
"epoch": 1.0781914893617022,
|
|
"grad_norm": 4.371243000030518,
|
|
"learning_rate": 9.268476808209635e-06,
|
|
"loss": 0.7773,
|
|
"step": 4054
|
|
},
|
|
{
|
|
"epoch": 1.0784574468085106,
|
|
"grad_norm": 3.5743019580841064,
|
|
"learning_rate": 9.26801871984629e-06,
|
|
"loss": 0.8976,
|
|
"step": 4055
|
|
},
|
|
{
|
|
"epoch": 1.078723404255319,
|
|
"grad_norm": 3.959336280822754,
|
|
"learning_rate": 9.267560499425425e-06,
|
|
"loss": 0.8294,
|
|
"step": 4056
|
|
},
|
|
{
|
|
"epoch": 1.0789893617021276,
|
|
"grad_norm": 3.2908687591552734,
|
|
"learning_rate": 9.267102146961211e-06,
|
|
"loss": 0.7021,
|
|
"step": 4057
|
|
},
|
|
{
|
|
"epoch": 1.0792553191489362,
|
|
"grad_norm": 3.952495574951172,
|
|
"learning_rate": 9.266643662467834e-06,
|
|
"loss": 0.8368,
|
|
"step": 4058
|
|
},
|
|
{
|
|
"epoch": 1.0795212765957447,
|
|
"grad_norm": 3.691890239715576,
|
|
"learning_rate": 9.266185045959478e-06,
|
|
"loss": 0.7606,
|
|
"step": 4059
|
|
},
|
|
{
|
|
"epoch": 1.0797872340425532,
|
|
"grad_norm": 4.092920780181885,
|
|
"learning_rate": 9.265726297450332e-06,
|
|
"loss": 0.7791,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 1.0800531914893616,
|
|
"grad_norm": 4.004536151885986,
|
|
"learning_rate": 9.265267416954595e-06,
|
|
"loss": 0.7055,
|
|
"step": 4061
|
|
},
|
|
{
|
|
"epoch": 1.0803191489361703,
|
|
"grad_norm": 3.7672064304351807,
|
|
"learning_rate": 9.26480840448646e-06,
|
|
"loss": 0.7552,
|
|
"step": 4062
|
|
},
|
|
{
|
|
"epoch": 1.0805851063829788,
|
|
"grad_norm": 3.8815436363220215,
|
|
"learning_rate": 9.264349260060134e-06,
|
|
"loss": 0.7602,
|
|
"step": 4063
|
|
},
|
|
{
|
|
"epoch": 1.0808510638297872,
|
|
"grad_norm": 4.021637916564941,
|
|
"learning_rate": 9.26388998368982e-06,
|
|
"loss": 0.7595,
|
|
"step": 4064
|
|
},
|
|
{
|
|
"epoch": 1.0811170212765957,
|
|
"grad_norm": 3.9159035682678223,
|
|
"learning_rate": 9.26343057538973e-06,
|
|
"loss": 0.7554,
|
|
"step": 4065
|
|
},
|
|
{
|
|
"epoch": 1.0813829787234042,
|
|
"grad_norm": 3.9444377422332764,
|
|
"learning_rate": 9.26297103517408e-06,
|
|
"loss": 0.6694,
|
|
"step": 4066
|
|
},
|
|
{
|
|
"epoch": 1.0816489361702128,
|
|
"grad_norm": 3.8889427185058594,
|
|
"learning_rate": 9.262511363057085e-06,
|
|
"loss": 0.7356,
|
|
"step": 4067
|
|
},
|
|
{
|
|
"epoch": 1.0819148936170213,
|
|
"grad_norm": 4.03524923324585,
|
|
"learning_rate": 9.262051559052972e-06,
|
|
"loss": 0.6715,
|
|
"step": 4068
|
|
},
|
|
{
|
|
"epoch": 1.0821808510638298,
|
|
"grad_norm": 4.430936336517334,
|
|
"learning_rate": 9.261591623175965e-06,
|
|
"loss": 0.9173,
|
|
"step": 4069
|
|
},
|
|
{
|
|
"epoch": 1.0824468085106382,
|
|
"grad_norm": 3.784855604171753,
|
|
"learning_rate": 9.261131555440295e-06,
|
|
"loss": 0.8472,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 1.0827127659574467,
|
|
"grad_norm": 3.9647388458251953,
|
|
"learning_rate": 9.260671355860196e-06,
|
|
"loss": 0.6908,
|
|
"step": 4071
|
|
},
|
|
{
|
|
"epoch": 1.0829787234042554,
|
|
"grad_norm": 4.330158710479736,
|
|
"learning_rate": 9.260211024449913e-06,
|
|
"loss": 0.7744,
|
|
"step": 4072
|
|
},
|
|
{
|
|
"epoch": 1.0832446808510638,
|
|
"grad_norm": 3.934960126876831,
|
|
"learning_rate": 9.259750561223682e-06,
|
|
"loss": 0.7585,
|
|
"step": 4073
|
|
},
|
|
{
|
|
"epoch": 1.0835106382978723,
|
|
"grad_norm": 4.234976291656494,
|
|
"learning_rate": 9.259289966195754e-06,
|
|
"loss": 0.7642,
|
|
"step": 4074
|
|
},
|
|
{
|
|
"epoch": 1.0837765957446808,
|
|
"grad_norm": 4.297840118408203,
|
|
"learning_rate": 9.25882923938038e-06,
|
|
"loss": 0.8493,
|
|
"step": 4075
|
|
},
|
|
{
|
|
"epoch": 1.0840425531914895,
|
|
"grad_norm": 3.9343340396881104,
|
|
"learning_rate": 9.258368380791818e-06,
|
|
"loss": 0.8649,
|
|
"step": 4076
|
|
},
|
|
{
|
|
"epoch": 1.084308510638298,
|
|
"grad_norm": 4.02085018157959,
|
|
"learning_rate": 9.257907390444322e-06,
|
|
"loss": 0.7595,
|
|
"step": 4077
|
|
},
|
|
{
|
|
"epoch": 1.0845744680851064,
|
|
"grad_norm": 4.010712146759033,
|
|
"learning_rate": 9.257446268352158e-06,
|
|
"loss": 0.9151,
|
|
"step": 4078
|
|
},
|
|
{
|
|
"epoch": 1.0848404255319148,
|
|
"grad_norm": 3.8062400817871094,
|
|
"learning_rate": 9.256985014529595e-06,
|
|
"loss": 0.8318,
|
|
"step": 4079
|
|
},
|
|
{
|
|
"epoch": 1.0851063829787233,
|
|
"grad_norm": 4.219789505004883,
|
|
"learning_rate": 9.256523628990903e-06,
|
|
"loss": 0.7924,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 1.085372340425532,
|
|
"grad_norm": 3.7686777114868164,
|
|
"learning_rate": 9.25606211175036e-06,
|
|
"loss": 0.8027,
|
|
"step": 4081
|
|
},
|
|
{
|
|
"epoch": 1.0856382978723405,
|
|
"grad_norm": 3.6773087978363037,
|
|
"learning_rate": 9.255600462822241e-06,
|
|
"loss": 0.7568,
|
|
"step": 4082
|
|
},
|
|
{
|
|
"epoch": 1.085904255319149,
|
|
"grad_norm": 3.480522394180298,
|
|
"learning_rate": 9.255138682220837e-06,
|
|
"loss": 0.7156,
|
|
"step": 4083
|
|
},
|
|
{
|
|
"epoch": 1.0861702127659574,
|
|
"grad_norm": 3.8398611545562744,
|
|
"learning_rate": 9.254676769960429e-06,
|
|
"loss": 0.7162,
|
|
"step": 4084
|
|
},
|
|
{
|
|
"epoch": 1.086436170212766,
|
|
"grad_norm": 3.8505029678344727,
|
|
"learning_rate": 9.254214726055314e-06,
|
|
"loss": 0.8488,
|
|
"step": 4085
|
|
},
|
|
{
|
|
"epoch": 1.0867021276595745,
|
|
"grad_norm": 4.238323211669922,
|
|
"learning_rate": 9.253752550519787e-06,
|
|
"loss": 0.8742,
|
|
"step": 4086
|
|
},
|
|
{
|
|
"epoch": 1.086968085106383,
|
|
"grad_norm": 3.7396814823150635,
|
|
"learning_rate": 9.253290243368149e-06,
|
|
"loss": 0.8127,
|
|
"step": 4087
|
|
},
|
|
{
|
|
"epoch": 1.0872340425531914,
|
|
"grad_norm": 4.44807767868042,
|
|
"learning_rate": 9.2528278046147e-06,
|
|
"loss": 0.8144,
|
|
"step": 4088
|
|
},
|
|
{
|
|
"epoch": 1.0875,
|
|
"grad_norm": 3.88287091255188,
|
|
"learning_rate": 9.252365234273754e-06,
|
|
"loss": 0.691,
|
|
"step": 4089
|
|
},
|
|
{
|
|
"epoch": 1.0877659574468086,
|
|
"grad_norm": 3.7738873958587646,
|
|
"learning_rate": 9.251902532359622e-06,
|
|
"loss": 0.7662,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 1.088031914893617,
|
|
"grad_norm": 3.789278745651245,
|
|
"learning_rate": 9.251439698886618e-06,
|
|
"loss": 0.7773,
|
|
"step": 4091
|
|
},
|
|
{
|
|
"epoch": 1.0882978723404255,
|
|
"grad_norm": 3.8501172065734863,
|
|
"learning_rate": 9.250976733869065e-06,
|
|
"loss": 0.795,
|
|
"step": 4092
|
|
},
|
|
{
|
|
"epoch": 1.088563829787234,
|
|
"grad_norm": 4.324002265930176,
|
|
"learning_rate": 9.250513637321287e-06,
|
|
"loss": 0.7957,
|
|
"step": 4093
|
|
},
|
|
{
|
|
"epoch": 1.0888297872340424,
|
|
"grad_norm": 3.598450183868408,
|
|
"learning_rate": 9.250050409257612e-06,
|
|
"loss": 0.8029,
|
|
"step": 4094
|
|
},
|
|
{
|
|
"epoch": 1.0890957446808511,
|
|
"grad_norm": 3.749985694885254,
|
|
"learning_rate": 9.249587049692375e-06,
|
|
"loss": 0.7377,
|
|
"step": 4095
|
|
},
|
|
{
|
|
"epoch": 1.0893617021276596,
|
|
"grad_norm": 3.7555527687072754,
|
|
"learning_rate": 9.24912355863991e-06,
|
|
"loss": 0.7276,
|
|
"step": 4096
|
|
},
|
|
{
|
|
"epoch": 1.089627659574468,
|
|
"grad_norm": 3.826099395751953,
|
|
"learning_rate": 9.248659936114558e-06,
|
|
"loss": 0.9592,
|
|
"step": 4097
|
|
},
|
|
{
|
|
"epoch": 1.0898936170212765,
|
|
"grad_norm": 4.4053263664245605,
|
|
"learning_rate": 9.248196182130669e-06,
|
|
"loss": 0.846,
|
|
"step": 4098
|
|
},
|
|
{
|
|
"epoch": 1.0901595744680852,
|
|
"grad_norm": 3.7693631649017334,
|
|
"learning_rate": 9.247732296702586e-06,
|
|
"loss": 0.8702,
|
|
"step": 4099
|
|
},
|
|
{
|
|
"epoch": 1.0904255319148937,
|
|
"grad_norm": 3.8193347454071045,
|
|
"learning_rate": 9.247268279844666e-06,
|
|
"loss": 0.8124,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 1.0906914893617021,
|
|
"grad_norm": 3.5872762203216553,
|
|
"learning_rate": 9.246804131571263e-06,
|
|
"loss": 0.8409,
|
|
"step": 4101
|
|
},
|
|
{
|
|
"epoch": 1.0909574468085106,
|
|
"grad_norm": 3.6679608821868896,
|
|
"learning_rate": 9.246339851896742e-06,
|
|
"loss": 0.8331,
|
|
"step": 4102
|
|
},
|
|
{
|
|
"epoch": 1.091223404255319,
|
|
"grad_norm": 3.838644027709961,
|
|
"learning_rate": 9.245875440835466e-06,
|
|
"loss": 0.8683,
|
|
"step": 4103
|
|
},
|
|
{
|
|
"epoch": 1.0914893617021277,
|
|
"grad_norm": 4.146610736846924,
|
|
"learning_rate": 9.245410898401806e-06,
|
|
"loss": 0.7721,
|
|
"step": 4104
|
|
},
|
|
{
|
|
"epoch": 1.0917553191489362,
|
|
"grad_norm": 3.685303211212158,
|
|
"learning_rate": 9.244946224610132e-06,
|
|
"loss": 0.6993,
|
|
"step": 4105
|
|
},
|
|
{
|
|
"epoch": 1.0920212765957447,
|
|
"grad_norm": 3.9541261196136475,
|
|
"learning_rate": 9.244481419474824e-06,
|
|
"loss": 0.7942,
|
|
"step": 4106
|
|
},
|
|
{
|
|
"epoch": 1.0922872340425531,
|
|
"grad_norm": 4.122397422790527,
|
|
"learning_rate": 9.244016483010266e-06,
|
|
"loss": 0.7709,
|
|
"step": 4107
|
|
},
|
|
{
|
|
"epoch": 1.0925531914893618,
|
|
"grad_norm": 4.400294303894043,
|
|
"learning_rate": 9.24355141523084e-06,
|
|
"loss": 0.8702,
|
|
"step": 4108
|
|
},
|
|
{
|
|
"epoch": 1.0928191489361703,
|
|
"grad_norm": 4.555760383605957,
|
|
"learning_rate": 9.243086216150938e-06,
|
|
"loss": 0.8594,
|
|
"step": 4109
|
|
},
|
|
{
|
|
"epoch": 1.0930851063829787,
|
|
"grad_norm": 4.033708095550537,
|
|
"learning_rate": 9.242620885784952e-06,
|
|
"loss": 0.9066,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 1.0933510638297872,
|
|
"grad_norm": 3.908421754837036,
|
|
"learning_rate": 9.24215542414728e-06,
|
|
"loss": 0.7454,
|
|
"step": 4111
|
|
},
|
|
{
|
|
"epoch": 1.0936170212765957,
|
|
"grad_norm": 3.8368232250213623,
|
|
"learning_rate": 9.241689831252327e-06,
|
|
"loss": 0.6895,
|
|
"step": 4112
|
|
},
|
|
{
|
|
"epoch": 1.0938829787234043,
|
|
"grad_norm": 3.6774628162384033,
|
|
"learning_rate": 9.241224107114495e-06,
|
|
"loss": 0.8634,
|
|
"step": 4113
|
|
},
|
|
{
|
|
"epoch": 1.0941489361702128,
|
|
"grad_norm": 4.185787677764893,
|
|
"learning_rate": 9.240758251748195e-06,
|
|
"loss": 0.8685,
|
|
"step": 4114
|
|
},
|
|
{
|
|
"epoch": 1.0944148936170213,
|
|
"grad_norm": 3.8751626014709473,
|
|
"learning_rate": 9.240292265167843e-06,
|
|
"loss": 0.86,
|
|
"step": 4115
|
|
},
|
|
{
|
|
"epoch": 1.0946808510638297,
|
|
"grad_norm": 4.215353965759277,
|
|
"learning_rate": 9.239826147387857e-06,
|
|
"loss": 0.8188,
|
|
"step": 4116
|
|
},
|
|
{
|
|
"epoch": 1.0949468085106382,
|
|
"grad_norm": 3.7287204265594482,
|
|
"learning_rate": 9.239359898422656e-06,
|
|
"loss": 0.71,
|
|
"step": 4117
|
|
},
|
|
{
|
|
"epoch": 1.0952127659574469,
|
|
"grad_norm": 3.8123693466186523,
|
|
"learning_rate": 9.238893518286668e-06,
|
|
"loss": 0.7727,
|
|
"step": 4118
|
|
},
|
|
{
|
|
"epoch": 1.0954787234042553,
|
|
"grad_norm": 3.990419626235962,
|
|
"learning_rate": 9.238427006994325e-06,
|
|
"loss": 0.7953,
|
|
"step": 4119
|
|
},
|
|
{
|
|
"epoch": 1.0957446808510638,
|
|
"grad_norm": 3.976417303085327,
|
|
"learning_rate": 9.237960364560063e-06,
|
|
"loss": 0.8596,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 1.0960106382978723,
|
|
"grad_norm": 4.219186305999756,
|
|
"learning_rate": 9.237493590998315e-06,
|
|
"loss": 0.809,
|
|
"step": 4121
|
|
},
|
|
{
|
|
"epoch": 1.096276595744681,
|
|
"grad_norm": 3.693594455718994,
|
|
"learning_rate": 9.237026686323527e-06,
|
|
"loss": 0.8066,
|
|
"step": 4122
|
|
},
|
|
{
|
|
"epoch": 1.0965425531914894,
|
|
"grad_norm": 3.7492263317108154,
|
|
"learning_rate": 9.236559650550143e-06,
|
|
"loss": 0.7525,
|
|
"step": 4123
|
|
},
|
|
{
|
|
"epoch": 1.0968085106382979,
|
|
"grad_norm": 4.333737850189209,
|
|
"learning_rate": 9.236092483692617e-06,
|
|
"loss": 0.8718,
|
|
"step": 4124
|
|
},
|
|
{
|
|
"epoch": 1.0970744680851063,
|
|
"grad_norm": 3.505357503890991,
|
|
"learning_rate": 9.235625185765403e-06,
|
|
"loss": 0.8482,
|
|
"step": 4125
|
|
},
|
|
{
|
|
"epoch": 1.0973404255319148,
|
|
"grad_norm": 4.302443027496338,
|
|
"learning_rate": 9.235157756782957e-06,
|
|
"loss": 1.0046,
|
|
"step": 4126
|
|
},
|
|
{
|
|
"epoch": 1.0976063829787235,
|
|
"grad_norm": 3.8847270011901855,
|
|
"learning_rate": 9.234690196759746e-06,
|
|
"loss": 0.8921,
|
|
"step": 4127
|
|
},
|
|
{
|
|
"epoch": 1.097872340425532,
|
|
"grad_norm": 3.976154327392578,
|
|
"learning_rate": 9.234222505710232e-06,
|
|
"loss": 0.7338,
|
|
"step": 4128
|
|
},
|
|
{
|
|
"epoch": 1.0981382978723404,
|
|
"grad_norm": 3.829082489013672,
|
|
"learning_rate": 9.233754683648891e-06,
|
|
"loss": 0.7554,
|
|
"step": 4129
|
|
},
|
|
{
|
|
"epoch": 1.0984042553191489,
|
|
"grad_norm": 3.693549633026123,
|
|
"learning_rate": 9.233286730590195e-06,
|
|
"loss": 0.7555,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 1.0986702127659576,
|
|
"grad_norm": 3.9820609092712402,
|
|
"learning_rate": 9.232818646548622e-06,
|
|
"loss": 0.8567,
|
|
"step": 4131
|
|
},
|
|
{
|
|
"epoch": 1.098936170212766,
|
|
"grad_norm": 3.9395439624786377,
|
|
"learning_rate": 9.232350431538656e-06,
|
|
"loss": 0.7728,
|
|
"step": 4132
|
|
},
|
|
{
|
|
"epoch": 1.0992021276595745,
|
|
"grad_norm": 4.385442733764648,
|
|
"learning_rate": 9.231882085574788e-06,
|
|
"loss": 0.7803,
|
|
"step": 4133
|
|
},
|
|
{
|
|
"epoch": 1.099468085106383,
|
|
"grad_norm": 4.260448932647705,
|
|
"learning_rate": 9.231413608671504e-06,
|
|
"loss": 0.8111,
|
|
"step": 4134
|
|
},
|
|
{
|
|
"epoch": 1.0997340425531914,
|
|
"grad_norm": 3.9470431804656982,
|
|
"learning_rate": 9.2309450008433e-06,
|
|
"loss": 0.718,
|
|
"step": 4135
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"grad_norm": 3.897451877593994,
|
|
"learning_rate": 9.230476262104678e-06,
|
|
"loss": 0.7257,
|
|
"step": 4136
|
|
},
|
|
{
|
|
"epoch": 1.1002659574468086,
|
|
"grad_norm": 4.178949356079102,
|
|
"learning_rate": 9.23000739247014e-06,
|
|
"loss": 0.8704,
|
|
"step": 4137
|
|
},
|
|
{
|
|
"epoch": 1.100531914893617,
|
|
"grad_norm": 3.9306554794311523,
|
|
"learning_rate": 9.22953839195419e-06,
|
|
"loss": 0.8856,
|
|
"step": 4138
|
|
},
|
|
{
|
|
"epoch": 1.1007978723404255,
|
|
"grad_norm": 3.2699522972106934,
|
|
"learning_rate": 9.229069260571346e-06,
|
|
"loss": 0.7263,
|
|
"step": 4139
|
|
},
|
|
{
|
|
"epoch": 1.101063829787234,
|
|
"grad_norm": 3.980687141418457,
|
|
"learning_rate": 9.228599998336119e-06,
|
|
"loss": 0.8805,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 1.1013297872340426,
|
|
"grad_norm": 4.091682434082031,
|
|
"learning_rate": 9.228130605263028e-06,
|
|
"loss": 0.8572,
|
|
"step": 4141
|
|
},
|
|
{
|
|
"epoch": 1.101595744680851,
|
|
"grad_norm": 3.8642654418945312,
|
|
"learning_rate": 9.2276610813666e-06,
|
|
"loss": 0.7285,
|
|
"step": 4142
|
|
},
|
|
{
|
|
"epoch": 1.1018617021276595,
|
|
"grad_norm": 3.6476948261260986,
|
|
"learning_rate": 9.227191426661359e-06,
|
|
"loss": 0.7736,
|
|
"step": 4143
|
|
},
|
|
{
|
|
"epoch": 1.102127659574468,
|
|
"grad_norm": 3.8674888610839844,
|
|
"learning_rate": 9.22672164116184e-06,
|
|
"loss": 0.6885,
|
|
"step": 4144
|
|
},
|
|
{
|
|
"epoch": 1.1023936170212767,
|
|
"grad_norm": 3.6890833377838135,
|
|
"learning_rate": 9.226251724882576e-06,
|
|
"loss": 0.9683,
|
|
"step": 4145
|
|
},
|
|
{
|
|
"epoch": 1.1026595744680852,
|
|
"grad_norm": 3.688188314437866,
|
|
"learning_rate": 9.225781677838108e-06,
|
|
"loss": 0.8236,
|
|
"step": 4146
|
|
},
|
|
{
|
|
"epoch": 1.1029255319148936,
|
|
"grad_norm": 4.241778373718262,
|
|
"learning_rate": 9.22531150004298e-06,
|
|
"loss": 0.7666,
|
|
"step": 4147
|
|
},
|
|
{
|
|
"epoch": 1.103191489361702,
|
|
"grad_norm": 3.8804636001586914,
|
|
"learning_rate": 9.22484119151174e-06,
|
|
"loss": 0.7547,
|
|
"step": 4148
|
|
},
|
|
{
|
|
"epoch": 1.1034574468085105,
|
|
"grad_norm": 3.8728346824645996,
|
|
"learning_rate": 9.224370752258938e-06,
|
|
"loss": 0.7856,
|
|
"step": 4149
|
|
},
|
|
{
|
|
"epoch": 1.1037234042553192,
|
|
"grad_norm": 3.4745118618011475,
|
|
"learning_rate": 9.223900182299132e-06,
|
|
"loss": 0.8213,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 1.1039893617021277,
|
|
"grad_norm": 3.9133832454681396,
|
|
"learning_rate": 9.223429481646881e-06,
|
|
"loss": 0.8894,
|
|
"step": 4151
|
|
},
|
|
{
|
|
"epoch": 1.1042553191489362,
|
|
"grad_norm": 3.5466485023498535,
|
|
"learning_rate": 9.22295865031675e-06,
|
|
"loss": 0.7024,
|
|
"step": 4152
|
|
},
|
|
{
|
|
"epoch": 1.1045212765957446,
|
|
"grad_norm": 4.195438385009766,
|
|
"learning_rate": 9.222487688323306e-06,
|
|
"loss": 0.9108,
|
|
"step": 4153
|
|
},
|
|
{
|
|
"epoch": 1.1047872340425533,
|
|
"grad_norm": 4.125967025756836,
|
|
"learning_rate": 9.222016595681122e-06,
|
|
"loss": 0.7909,
|
|
"step": 4154
|
|
},
|
|
{
|
|
"epoch": 1.1050531914893618,
|
|
"grad_norm": 3.8983302116394043,
|
|
"learning_rate": 9.221545372404774e-06,
|
|
"loss": 0.8179,
|
|
"step": 4155
|
|
},
|
|
{
|
|
"epoch": 1.1053191489361702,
|
|
"grad_norm": 4.264431953430176,
|
|
"learning_rate": 9.22107401850884e-06,
|
|
"loss": 0.8438,
|
|
"step": 4156
|
|
},
|
|
{
|
|
"epoch": 1.1055851063829787,
|
|
"grad_norm": 3.9519243240356445,
|
|
"learning_rate": 9.220602534007908e-06,
|
|
"loss": 0.7254,
|
|
"step": 4157
|
|
},
|
|
{
|
|
"epoch": 1.1058510638297872,
|
|
"grad_norm": 4.435789585113525,
|
|
"learning_rate": 9.220130918916563e-06,
|
|
"loss": 0.8453,
|
|
"step": 4158
|
|
},
|
|
{
|
|
"epoch": 1.1061170212765958,
|
|
"grad_norm": 4.175622463226318,
|
|
"learning_rate": 9.2196591732494e-06,
|
|
"loss": 0.8253,
|
|
"step": 4159
|
|
},
|
|
{
|
|
"epoch": 1.1063829787234043,
|
|
"grad_norm": 3.691840410232544,
|
|
"learning_rate": 9.219187297021015e-06,
|
|
"loss": 0.7372,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 1.1066489361702128,
|
|
"grad_norm": 3.997159957885742,
|
|
"learning_rate": 9.218715290246007e-06,
|
|
"loss": 0.9002,
|
|
"step": 4161
|
|
},
|
|
{
|
|
"epoch": 1.1069148936170212,
|
|
"grad_norm": 3.8894736766815186,
|
|
"learning_rate": 9.21824315293898e-06,
|
|
"loss": 0.8466,
|
|
"step": 4162
|
|
},
|
|
{
|
|
"epoch": 1.1071808510638297,
|
|
"grad_norm": 4.081361293792725,
|
|
"learning_rate": 9.217770885114544e-06,
|
|
"loss": 0.8159,
|
|
"step": 4163
|
|
},
|
|
{
|
|
"epoch": 1.1074468085106384,
|
|
"grad_norm": 3.6552507877349854,
|
|
"learning_rate": 9.21729848678731e-06,
|
|
"loss": 0.7608,
|
|
"step": 4164
|
|
},
|
|
{
|
|
"epoch": 1.1077127659574468,
|
|
"grad_norm": 3.844689130783081,
|
|
"learning_rate": 9.216825957971898e-06,
|
|
"loss": 0.8599,
|
|
"step": 4165
|
|
},
|
|
{
|
|
"epoch": 1.1079787234042553,
|
|
"grad_norm": 3.742281198501587,
|
|
"learning_rate": 9.216353298682925e-06,
|
|
"loss": 0.8188,
|
|
"step": 4166
|
|
},
|
|
{
|
|
"epoch": 1.1082446808510638,
|
|
"grad_norm": 4.145520210266113,
|
|
"learning_rate": 9.215880508935016e-06,
|
|
"loss": 0.8485,
|
|
"step": 4167
|
|
},
|
|
{
|
|
"epoch": 1.1085106382978724,
|
|
"grad_norm": 4.048991680145264,
|
|
"learning_rate": 9.2154075887428e-06,
|
|
"loss": 0.8058,
|
|
"step": 4168
|
|
},
|
|
{
|
|
"epoch": 1.108776595744681,
|
|
"grad_norm": 3.9312491416931152,
|
|
"learning_rate": 9.214934538120912e-06,
|
|
"loss": 0.8728,
|
|
"step": 4169
|
|
},
|
|
{
|
|
"epoch": 1.1090425531914894,
|
|
"grad_norm": 4.000396251678467,
|
|
"learning_rate": 9.214461357083986e-06,
|
|
"loss": 0.8695,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 1.1093085106382978,
|
|
"grad_norm": 4.0020904541015625,
|
|
"learning_rate": 9.213988045646664e-06,
|
|
"loss": 0.7386,
|
|
"step": 4171
|
|
},
|
|
{
|
|
"epoch": 1.1095744680851063,
|
|
"grad_norm": 3.527221441268921,
|
|
"learning_rate": 9.21351460382359e-06,
|
|
"loss": 0.8856,
|
|
"step": 4172
|
|
},
|
|
{
|
|
"epoch": 1.109840425531915,
|
|
"grad_norm": 3.984145164489746,
|
|
"learning_rate": 9.213041031629413e-06,
|
|
"loss": 0.7518,
|
|
"step": 4173
|
|
},
|
|
{
|
|
"epoch": 1.1101063829787234,
|
|
"grad_norm": 3.6558425426483154,
|
|
"learning_rate": 9.212567329078787e-06,
|
|
"loss": 0.7465,
|
|
"step": 4174
|
|
},
|
|
{
|
|
"epoch": 1.110372340425532,
|
|
"grad_norm": 4.261702060699463,
|
|
"learning_rate": 9.21209349618637e-06,
|
|
"loss": 0.8813,
|
|
"step": 4175
|
|
},
|
|
{
|
|
"epoch": 1.1106382978723404,
|
|
"grad_norm": 3.556643486022949,
|
|
"learning_rate": 9.211619532966817e-06,
|
|
"loss": 0.8007,
|
|
"step": 4176
|
|
},
|
|
{
|
|
"epoch": 1.110904255319149,
|
|
"grad_norm": 3.8246734142303467,
|
|
"learning_rate": 9.211145439434801e-06,
|
|
"loss": 0.7599,
|
|
"step": 4177
|
|
},
|
|
{
|
|
"epoch": 1.1111702127659575,
|
|
"grad_norm": 3.6221678256988525,
|
|
"learning_rate": 9.210671215604985e-06,
|
|
"loss": 0.8526,
|
|
"step": 4178
|
|
},
|
|
{
|
|
"epoch": 1.111436170212766,
|
|
"grad_norm": 3.6839540004730225,
|
|
"learning_rate": 9.210196861492045e-06,
|
|
"loss": 0.88,
|
|
"step": 4179
|
|
},
|
|
{
|
|
"epoch": 1.1117021276595744,
|
|
"grad_norm": 3.7845680713653564,
|
|
"learning_rate": 9.209722377110657e-06,
|
|
"loss": 0.7316,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 1.111968085106383,
|
|
"grad_norm": 3.9798831939697266,
|
|
"learning_rate": 9.209247762475502e-06,
|
|
"loss": 0.7928,
|
|
"step": 4181
|
|
},
|
|
{
|
|
"epoch": 1.1122340425531916,
|
|
"grad_norm": 3.394745349884033,
|
|
"learning_rate": 9.208773017601265e-06,
|
|
"loss": 0.7692,
|
|
"step": 4182
|
|
},
|
|
{
|
|
"epoch": 1.1125,
|
|
"grad_norm": 3.9630630016326904,
|
|
"learning_rate": 9.208298142502637e-06,
|
|
"loss": 0.8699,
|
|
"step": 4183
|
|
},
|
|
{
|
|
"epoch": 1.1127659574468085,
|
|
"grad_norm": 4.089821815490723,
|
|
"learning_rate": 9.207823137194307e-06,
|
|
"loss": 0.8295,
|
|
"step": 4184
|
|
},
|
|
{
|
|
"epoch": 1.113031914893617,
|
|
"grad_norm": 3.949355125427246,
|
|
"learning_rate": 9.20734800169098e-06,
|
|
"loss": 0.8049,
|
|
"step": 4185
|
|
},
|
|
{
|
|
"epoch": 1.1132978723404254,
|
|
"grad_norm": 3.588606119155884,
|
|
"learning_rate": 9.206872736007348e-06,
|
|
"loss": 0.7184,
|
|
"step": 4186
|
|
},
|
|
{
|
|
"epoch": 1.1135638297872341,
|
|
"grad_norm": 4.689065933227539,
|
|
"learning_rate": 9.206397340158122e-06,
|
|
"loss": 0.8687,
|
|
"step": 4187
|
|
},
|
|
{
|
|
"epoch": 1.1138297872340426,
|
|
"grad_norm": 3.685701847076416,
|
|
"learning_rate": 9.20592181415801e-06,
|
|
"loss": 0.7918,
|
|
"step": 4188
|
|
},
|
|
{
|
|
"epoch": 1.114095744680851,
|
|
"grad_norm": 4.084209442138672,
|
|
"learning_rate": 9.205446158021725e-06,
|
|
"loss": 0.888,
|
|
"step": 4189
|
|
},
|
|
{
|
|
"epoch": 1.1143617021276595,
|
|
"grad_norm": 3.9949495792388916,
|
|
"learning_rate": 9.204970371763984e-06,
|
|
"loss": 0.7975,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 1.1146276595744682,
|
|
"grad_norm": 4.016841888427734,
|
|
"learning_rate": 9.204494455399509e-06,
|
|
"loss": 0.8413,
|
|
"step": 4191
|
|
},
|
|
{
|
|
"epoch": 1.1148936170212767,
|
|
"grad_norm": 4.1810712814331055,
|
|
"learning_rate": 9.204018408943026e-06,
|
|
"loss": 0.7981,
|
|
"step": 4192
|
|
},
|
|
{
|
|
"epoch": 1.1151595744680851,
|
|
"grad_norm": 3.305906295776367,
|
|
"learning_rate": 9.203542232409263e-06,
|
|
"loss": 0.6931,
|
|
"step": 4193
|
|
},
|
|
{
|
|
"epoch": 1.1154255319148936,
|
|
"grad_norm": 4.138253688812256,
|
|
"learning_rate": 9.203065925812955e-06,
|
|
"loss": 0.7971,
|
|
"step": 4194
|
|
},
|
|
{
|
|
"epoch": 1.115691489361702,
|
|
"grad_norm": 4.11892557144165,
|
|
"learning_rate": 9.20258948916884e-06,
|
|
"loss": 0.7175,
|
|
"step": 4195
|
|
},
|
|
{
|
|
"epoch": 1.1159574468085107,
|
|
"grad_norm": 3.4274680614471436,
|
|
"learning_rate": 9.202112922491657e-06,
|
|
"loss": 0.7685,
|
|
"step": 4196
|
|
},
|
|
{
|
|
"epoch": 1.1162234042553192,
|
|
"grad_norm": 3.894113540649414,
|
|
"learning_rate": 9.201636225796151e-06,
|
|
"loss": 0.6782,
|
|
"step": 4197
|
|
},
|
|
{
|
|
"epoch": 1.1164893617021276,
|
|
"grad_norm": 4.417131423950195,
|
|
"learning_rate": 9.201159399097077e-06,
|
|
"loss": 0.7756,
|
|
"step": 4198
|
|
},
|
|
{
|
|
"epoch": 1.116755319148936,
|
|
"grad_norm": 4.476882457733154,
|
|
"learning_rate": 9.200682442409183e-06,
|
|
"loss": 0.8896,
|
|
"step": 4199
|
|
},
|
|
{
|
|
"epoch": 1.1170212765957448,
|
|
"grad_norm": 3.9255595207214355,
|
|
"learning_rate": 9.200205355747228e-06,
|
|
"loss": 0.669,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 1.1172872340425533,
|
|
"grad_norm": 3.3451404571533203,
|
|
"learning_rate": 9.199728139125976e-06,
|
|
"loss": 0.6271,
|
|
"step": 4201
|
|
},
|
|
{
|
|
"epoch": 1.1175531914893617,
|
|
"grad_norm": 4.113248825073242,
|
|
"learning_rate": 9.199250792560187e-06,
|
|
"loss": 0.8501,
|
|
"step": 4202
|
|
},
|
|
{
|
|
"epoch": 1.1178191489361702,
|
|
"grad_norm": 3.8352253437042236,
|
|
"learning_rate": 9.198773316064639e-06,
|
|
"loss": 0.6881,
|
|
"step": 4203
|
|
},
|
|
{
|
|
"epoch": 1.1180851063829786,
|
|
"grad_norm": 3.8396568298339844,
|
|
"learning_rate": 9.1982957096541e-06,
|
|
"loss": 0.695,
|
|
"step": 4204
|
|
},
|
|
{
|
|
"epoch": 1.1183510638297873,
|
|
"grad_norm": 4.240661144256592,
|
|
"learning_rate": 9.197817973343347e-06,
|
|
"loss": 0.8287,
|
|
"step": 4205
|
|
},
|
|
{
|
|
"epoch": 1.1186170212765958,
|
|
"grad_norm": 3.553846836090088,
|
|
"learning_rate": 9.197340107147166e-06,
|
|
"loss": 0.7441,
|
|
"step": 4206
|
|
},
|
|
{
|
|
"epoch": 1.1188829787234043,
|
|
"grad_norm": 4.087765693664551,
|
|
"learning_rate": 9.196862111080339e-06,
|
|
"loss": 0.6896,
|
|
"step": 4207
|
|
},
|
|
{
|
|
"epoch": 1.1191489361702127,
|
|
"grad_norm": 4.254801273345947,
|
|
"learning_rate": 9.196383985157657e-06,
|
|
"loss": 0.794,
|
|
"step": 4208
|
|
},
|
|
{
|
|
"epoch": 1.1194148936170212,
|
|
"grad_norm": 3.8654487133026123,
|
|
"learning_rate": 9.195905729393913e-06,
|
|
"loss": 0.7891,
|
|
"step": 4209
|
|
},
|
|
{
|
|
"epoch": 1.1196808510638299,
|
|
"grad_norm": 4.078755855560303,
|
|
"learning_rate": 9.195427343803906e-06,
|
|
"loss": 0.9686,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 1.1199468085106383,
|
|
"grad_norm": 3.3730618953704834,
|
|
"learning_rate": 9.19494882840244e-06,
|
|
"loss": 0.7186,
|
|
"step": 4211
|
|
},
|
|
{
|
|
"epoch": 1.1202127659574468,
|
|
"grad_norm": 3.944267511367798,
|
|
"learning_rate": 9.194470183204315e-06,
|
|
"loss": 0.7949,
|
|
"step": 4212
|
|
},
|
|
{
|
|
"epoch": 1.1204787234042553,
|
|
"grad_norm": 3.8274521827697754,
|
|
"learning_rate": 9.193991408224347e-06,
|
|
"loss": 0.8237,
|
|
"step": 4213
|
|
},
|
|
{
|
|
"epoch": 1.1207446808510637,
|
|
"grad_norm": 3.8445777893066406,
|
|
"learning_rate": 9.193512503477345e-06,
|
|
"loss": 0.7119,
|
|
"step": 4214
|
|
},
|
|
{
|
|
"epoch": 1.1210106382978724,
|
|
"grad_norm": 4.098488807678223,
|
|
"learning_rate": 9.19303346897813e-06,
|
|
"loss": 0.9102,
|
|
"step": 4215
|
|
},
|
|
{
|
|
"epoch": 1.1212765957446809,
|
|
"grad_norm": 4.096566200256348,
|
|
"learning_rate": 9.192554304741522e-06,
|
|
"loss": 0.8465,
|
|
"step": 4216
|
|
},
|
|
{
|
|
"epoch": 1.1215425531914893,
|
|
"grad_norm": 3.770343065261841,
|
|
"learning_rate": 9.192075010782348e-06,
|
|
"loss": 0.8278,
|
|
"step": 4217
|
|
},
|
|
{
|
|
"epoch": 1.1218085106382978,
|
|
"grad_norm": 3.843766689300537,
|
|
"learning_rate": 9.191595587115439e-06,
|
|
"loss": 0.8402,
|
|
"step": 4218
|
|
},
|
|
{
|
|
"epoch": 1.1220744680851065,
|
|
"grad_norm": 4.594594478607178,
|
|
"learning_rate": 9.191116033755625e-06,
|
|
"loss": 0.8473,
|
|
"step": 4219
|
|
},
|
|
{
|
|
"epoch": 1.122340425531915,
|
|
"grad_norm": 4.192259311676025,
|
|
"learning_rate": 9.190636350717747e-06,
|
|
"loss": 0.8356,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 1.1226063829787234,
|
|
"grad_norm": 3.919210195541382,
|
|
"learning_rate": 9.190156538016648e-06,
|
|
"loss": 0.8494,
|
|
"step": 4221
|
|
},
|
|
{
|
|
"epoch": 1.1228723404255319,
|
|
"grad_norm": 4.091637134552002,
|
|
"learning_rate": 9.189676595667172e-06,
|
|
"loss": 0.7264,
|
|
"step": 4222
|
|
},
|
|
{
|
|
"epoch": 1.1231382978723405,
|
|
"grad_norm": 4.496889114379883,
|
|
"learning_rate": 9.189196523684168e-06,
|
|
"loss": 0.876,
|
|
"step": 4223
|
|
},
|
|
{
|
|
"epoch": 1.123404255319149,
|
|
"grad_norm": 3.492234230041504,
|
|
"learning_rate": 9.188716322082494e-06,
|
|
"loss": 0.7568,
|
|
"step": 4224
|
|
},
|
|
{
|
|
"epoch": 1.1236702127659575,
|
|
"grad_norm": 3.6598973274230957,
|
|
"learning_rate": 9.188235990877004e-06,
|
|
"loss": 0.683,
|
|
"step": 4225
|
|
},
|
|
{
|
|
"epoch": 1.123936170212766,
|
|
"grad_norm": 4.073709964752197,
|
|
"learning_rate": 9.18775553008256e-06,
|
|
"loss": 0.7798,
|
|
"step": 4226
|
|
},
|
|
{
|
|
"epoch": 1.1242021276595744,
|
|
"grad_norm": 4.100635528564453,
|
|
"learning_rate": 9.18727493971403e-06,
|
|
"loss": 0.8356,
|
|
"step": 4227
|
|
},
|
|
{
|
|
"epoch": 1.124468085106383,
|
|
"grad_norm": 4.231848239898682,
|
|
"learning_rate": 9.186794219786285e-06,
|
|
"loss": 0.8528,
|
|
"step": 4228
|
|
},
|
|
{
|
|
"epoch": 1.1247340425531915,
|
|
"grad_norm": 3.7461369037628174,
|
|
"learning_rate": 9.186313370314196e-06,
|
|
"loss": 0.7103,
|
|
"step": 4229
|
|
},
|
|
{
|
|
"epoch": 1.125,
|
|
"grad_norm": 3.610039234161377,
|
|
"learning_rate": 9.185832391312644e-06,
|
|
"loss": 0.7271,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 1.1252659574468085,
|
|
"grad_norm": 3.5538463592529297,
|
|
"learning_rate": 9.18535128279651e-06,
|
|
"loss": 0.82,
|
|
"step": 4231
|
|
},
|
|
{
|
|
"epoch": 1.125531914893617,
|
|
"grad_norm": 3.878833293914795,
|
|
"learning_rate": 9.184870044780677e-06,
|
|
"loss": 0.8418,
|
|
"step": 4232
|
|
},
|
|
{
|
|
"epoch": 1.1257978723404256,
|
|
"grad_norm": 4.012277126312256,
|
|
"learning_rate": 9.184388677280038e-06,
|
|
"loss": 0.8024,
|
|
"step": 4233
|
|
},
|
|
{
|
|
"epoch": 1.126063829787234,
|
|
"grad_norm": 3.702630043029785,
|
|
"learning_rate": 9.183907180309489e-06,
|
|
"loss": 0.7978,
|
|
"step": 4234
|
|
},
|
|
{
|
|
"epoch": 1.1263297872340425,
|
|
"grad_norm": 4.186684608459473,
|
|
"learning_rate": 9.183425553883925e-06,
|
|
"loss": 0.8459,
|
|
"step": 4235
|
|
},
|
|
{
|
|
"epoch": 1.126595744680851,
|
|
"grad_norm": 4.011842727661133,
|
|
"learning_rate": 9.18294379801825e-06,
|
|
"loss": 0.7931,
|
|
"step": 4236
|
|
},
|
|
{
|
|
"epoch": 1.1268617021276595,
|
|
"grad_norm": 4.870151042938232,
|
|
"learning_rate": 9.182461912727368e-06,
|
|
"loss": 0.9028,
|
|
"step": 4237
|
|
},
|
|
{
|
|
"epoch": 1.1271276595744681,
|
|
"grad_norm": 3.5846457481384277,
|
|
"learning_rate": 9.18197989802619e-06,
|
|
"loss": 0.783,
|
|
"step": 4238
|
|
},
|
|
{
|
|
"epoch": 1.1273936170212766,
|
|
"grad_norm": 3.910689115524292,
|
|
"learning_rate": 9.181497753929629e-06,
|
|
"loss": 0.8441,
|
|
"step": 4239
|
|
},
|
|
{
|
|
"epoch": 1.127659574468085,
|
|
"grad_norm": 3.768601894378662,
|
|
"learning_rate": 9.181015480452607e-06,
|
|
"loss": 0.8207,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 1.1279255319148935,
|
|
"grad_norm": 4.229056358337402,
|
|
"learning_rate": 9.18053307761004e-06,
|
|
"loss": 0.8025,
|
|
"step": 4241
|
|
},
|
|
{
|
|
"epoch": 1.1281914893617022,
|
|
"grad_norm": 4.3545050621032715,
|
|
"learning_rate": 9.180050545416861e-06,
|
|
"loss": 0.8154,
|
|
"step": 4242
|
|
},
|
|
{
|
|
"epoch": 1.1284574468085107,
|
|
"grad_norm": 4.138397693634033,
|
|
"learning_rate": 9.179567883887997e-06,
|
|
"loss": 0.8033,
|
|
"step": 4243
|
|
},
|
|
{
|
|
"epoch": 1.1287234042553191,
|
|
"grad_norm": 3.9504189491271973,
|
|
"learning_rate": 9.17908509303838e-06,
|
|
"loss": 0.85,
|
|
"step": 4244
|
|
},
|
|
{
|
|
"epoch": 1.1289893617021276,
|
|
"grad_norm": 3.9662301540374756,
|
|
"learning_rate": 9.178602172882951e-06,
|
|
"loss": 0.8327,
|
|
"step": 4245
|
|
},
|
|
{
|
|
"epoch": 1.1292553191489363,
|
|
"grad_norm": 4.157631874084473,
|
|
"learning_rate": 9.178119123436651e-06,
|
|
"loss": 0.8558,
|
|
"step": 4246
|
|
},
|
|
{
|
|
"epoch": 1.1295212765957447,
|
|
"grad_norm": 3.9172611236572266,
|
|
"learning_rate": 9.177635944714424e-06,
|
|
"loss": 0.9087,
|
|
"step": 4247
|
|
},
|
|
{
|
|
"epoch": 1.1297872340425532,
|
|
"grad_norm": 3.9250762462615967,
|
|
"learning_rate": 9.177152636731225e-06,
|
|
"loss": 0.7709,
|
|
"step": 4248
|
|
},
|
|
{
|
|
"epoch": 1.1300531914893617,
|
|
"grad_norm": 3.6299500465393066,
|
|
"learning_rate": 9.176669199502004e-06,
|
|
"loss": 0.717,
|
|
"step": 4249
|
|
},
|
|
{
|
|
"epoch": 1.1303191489361701,
|
|
"grad_norm": 4.225446701049805,
|
|
"learning_rate": 9.17618563304172e-06,
|
|
"loss": 0.8766,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 1.1305851063829788,
|
|
"grad_norm": 3.9178264141082764,
|
|
"learning_rate": 9.175701937365337e-06,
|
|
"loss": 0.7634,
|
|
"step": 4251
|
|
},
|
|
{
|
|
"epoch": 1.1308510638297873,
|
|
"grad_norm": 3.905505657196045,
|
|
"learning_rate": 9.175218112487821e-06,
|
|
"loss": 0.7784,
|
|
"step": 4252
|
|
},
|
|
{
|
|
"epoch": 1.1311170212765957,
|
|
"grad_norm": 4.228585243225098,
|
|
"learning_rate": 9.174734158424138e-06,
|
|
"loss": 0.8445,
|
|
"step": 4253
|
|
},
|
|
{
|
|
"epoch": 1.1313829787234042,
|
|
"grad_norm": 3.9836041927337646,
|
|
"learning_rate": 9.174250075189268e-06,
|
|
"loss": 0.8252,
|
|
"step": 4254
|
|
},
|
|
{
|
|
"epoch": 1.1316489361702127,
|
|
"grad_norm": 4.349749565124512,
|
|
"learning_rate": 9.173765862798185e-06,
|
|
"loss": 0.8154,
|
|
"step": 4255
|
|
},
|
|
{
|
|
"epoch": 1.1319148936170214,
|
|
"grad_norm": 3.7815349102020264,
|
|
"learning_rate": 9.17328152126587e-06,
|
|
"loss": 0.7356,
|
|
"step": 4256
|
|
},
|
|
{
|
|
"epoch": 1.1321808510638298,
|
|
"grad_norm": 3.9180119037628174,
|
|
"learning_rate": 9.172797050607313e-06,
|
|
"loss": 0.8098,
|
|
"step": 4257
|
|
},
|
|
{
|
|
"epoch": 1.1324468085106383,
|
|
"grad_norm": 3.720789670944214,
|
|
"learning_rate": 9.172312450837504e-06,
|
|
"loss": 0.815,
|
|
"step": 4258
|
|
},
|
|
{
|
|
"epoch": 1.1327127659574467,
|
|
"grad_norm": 4.155251502990723,
|
|
"learning_rate": 9.171827721971434e-06,
|
|
"loss": 0.8976,
|
|
"step": 4259
|
|
},
|
|
{
|
|
"epoch": 1.1329787234042552,
|
|
"grad_norm": 4.600409030914307,
|
|
"learning_rate": 9.171342864024103e-06,
|
|
"loss": 0.8868,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 1.133244680851064,
|
|
"grad_norm": 3.8379268646240234,
|
|
"learning_rate": 9.170857877010512e-06,
|
|
"loss": 0.7867,
|
|
"step": 4261
|
|
},
|
|
{
|
|
"epoch": 1.1335106382978724,
|
|
"grad_norm": 4.109460830688477,
|
|
"learning_rate": 9.170372760945668e-06,
|
|
"loss": 0.7826,
|
|
"step": 4262
|
|
},
|
|
{
|
|
"epoch": 1.1337765957446808,
|
|
"grad_norm": 3.895494222640991,
|
|
"learning_rate": 9.16988751584458e-06,
|
|
"loss": 0.854,
|
|
"step": 4263
|
|
},
|
|
{
|
|
"epoch": 1.1340425531914893,
|
|
"grad_norm": 3.7237160205841064,
|
|
"learning_rate": 9.169402141722264e-06,
|
|
"loss": 0.7098,
|
|
"step": 4264
|
|
},
|
|
{
|
|
"epoch": 1.134308510638298,
|
|
"grad_norm": 4.19631814956665,
|
|
"learning_rate": 9.168916638593736e-06,
|
|
"loss": 0.9218,
|
|
"step": 4265
|
|
},
|
|
{
|
|
"epoch": 1.1345744680851064,
|
|
"grad_norm": 4.052074909210205,
|
|
"learning_rate": 9.168431006474018e-06,
|
|
"loss": 0.8367,
|
|
"step": 4266
|
|
},
|
|
{
|
|
"epoch": 1.1348404255319149,
|
|
"grad_norm": 4.097432613372803,
|
|
"learning_rate": 9.167945245378139e-06,
|
|
"loss": 0.8705,
|
|
"step": 4267
|
|
},
|
|
{
|
|
"epoch": 1.1351063829787233,
|
|
"grad_norm": 3.81488037109375,
|
|
"learning_rate": 9.167459355321127e-06,
|
|
"loss": 0.6803,
|
|
"step": 4268
|
|
},
|
|
{
|
|
"epoch": 1.135372340425532,
|
|
"grad_norm": 4.266942501068115,
|
|
"learning_rate": 9.166973336318015e-06,
|
|
"loss": 0.8108,
|
|
"step": 4269
|
|
},
|
|
{
|
|
"epoch": 1.1356382978723405,
|
|
"grad_norm": 3.9824750423431396,
|
|
"learning_rate": 9.166487188383841e-06,
|
|
"loss": 0.811,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 1.135904255319149,
|
|
"grad_norm": 3.8896446228027344,
|
|
"learning_rate": 9.16600091153365e-06,
|
|
"loss": 0.8925,
|
|
"step": 4271
|
|
},
|
|
{
|
|
"epoch": 1.1361702127659574,
|
|
"grad_norm": 4.690064907073975,
|
|
"learning_rate": 9.165514505782484e-06,
|
|
"loss": 1.1356,
|
|
"step": 4272
|
|
},
|
|
{
|
|
"epoch": 1.1364361702127659,
|
|
"grad_norm": 4.304286479949951,
|
|
"learning_rate": 9.165027971145397e-06,
|
|
"loss": 0.8041,
|
|
"step": 4273
|
|
},
|
|
{
|
|
"epoch": 1.1367021276595746,
|
|
"grad_norm": 4.315762519836426,
|
|
"learning_rate": 9.16454130763744e-06,
|
|
"loss": 0.7519,
|
|
"step": 4274
|
|
},
|
|
{
|
|
"epoch": 1.136968085106383,
|
|
"grad_norm": 4.10341739654541,
|
|
"learning_rate": 9.16405451527367e-06,
|
|
"loss": 0.919,
|
|
"step": 4275
|
|
},
|
|
{
|
|
"epoch": 1.1372340425531915,
|
|
"grad_norm": 3.7802481651306152,
|
|
"learning_rate": 9.163567594069154e-06,
|
|
"loss": 0.8271,
|
|
"step": 4276
|
|
},
|
|
{
|
|
"epoch": 1.1375,
|
|
"grad_norm": 4.523904323577881,
|
|
"learning_rate": 9.163080544038953e-06,
|
|
"loss": 0.7865,
|
|
"step": 4277
|
|
},
|
|
{
|
|
"epoch": 1.1377659574468084,
|
|
"grad_norm": 3.958662509918213,
|
|
"learning_rate": 9.162593365198138e-06,
|
|
"loss": 0.8165,
|
|
"step": 4278
|
|
},
|
|
{
|
|
"epoch": 1.138031914893617,
|
|
"grad_norm": 3.8943662643432617,
|
|
"learning_rate": 9.162106057561784e-06,
|
|
"loss": 0.7951,
|
|
"step": 4279
|
|
},
|
|
{
|
|
"epoch": 1.1382978723404256,
|
|
"grad_norm": 3.9076874256134033,
|
|
"learning_rate": 9.161618621144967e-06,
|
|
"loss": 0.8135,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 1.138563829787234,
|
|
"grad_norm": 3.5434067249298096,
|
|
"learning_rate": 9.161131055962773e-06,
|
|
"loss": 0.7228,
|
|
"step": 4281
|
|
},
|
|
{
|
|
"epoch": 1.1388297872340425,
|
|
"grad_norm": 4.137996673583984,
|
|
"learning_rate": 9.160643362030284e-06,
|
|
"loss": 0.7711,
|
|
"step": 4282
|
|
},
|
|
{
|
|
"epoch": 1.139095744680851,
|
|
"grad_norm": 3.783001661300659,
|
|
"learning_rate": 9.160155539362589e-06,
|
|
"loss": 0.8494,
|
|
"step": 4283
|
|
},
|
|
{
|
|
"epoch": 1.1393617021276596,
|
|
"grad_norm": 3.8411149978637695,
|
|
"learning_rate": 9.159667587974786e-06,
|
|
"loss": 0.7447,
|
|
"step": 4284
|
|
},
|
|
{
|
|
"epoch": 1.139627659574468,
|
|
"grad_norm": 3.6387648582458496,
|
|
"learning_rate": 9.15917950788197e-06,
|
|
"loss": 0.8385,
|
|
"step": 4285
|
|
},
|
|
{
|
|
"epoch": 1.1398936170212766,
|
|
"grad_norm": 4.564189910888672,
|
|
"learning_rate": 9.158691299099241e-06,
|
|
"loss": 0.7572,
|
|
"step": 4286
|
|
},
|
|
{
|
|
"epoch": 1.140159574468085,
|
|
"grad_norm": 4.022932529449463,
|
|
"learning_rate": 9.15820296164171e-06,
|
|
"loss": 0.7129,
|
|
"step": 4287
|
|
},
|
|
{
|
|
"epoch": 1.1404255319148937,
|
|
"grad_norm": 4.345612525939941,
|
|
"learning_rate": 9.157714495524481e-06,
|
|
"loss": 0.8371,
|
|
"step": 4288
|
|
},
|
|
{
|
|
"epoch": 1.1406914893617022,
|
|
"grad_norm": 4.161421298980713,
|
|
"learning_rate": 9.157225900762672e-06,
|
|
"loss": 0.7528,
|
|
"step": 4289
|
|
},
|
|
{
|
|
"epoch": 1.1409574468085106,
|
|
"grad_norm": 4.042864799499512,
|
|
"learning_rate": 9.156737177371399e-06,
|
|
"loss": 0.8491,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 1.141223404255319,
|
|
"grad_norm": 3.8026928901672363,
|
|
"learning_rate": 9.156248325365782e-06,
|
|
"loss": 0.8444,
|
|
"step": 4291
|
|
},
|
|
{
|
|
"epoch": 1.1414893617021278,
|
|
"grad_norm": 4.251069068908691,
|
|
"learning_rate": 9.15575934476095e-06,
|
|
"loss": 0.7857,
|
|
"step": 4292
|
|
},
|
|
{
|
|
"epoch": 1.1417553191489362,
|
|
"grad_norm": 3.8531103134155273,
|
|
"learning_rate": 9.155270235572031e-06,
|
|
"loss": 0.867,
|
|
"step": 4293
|
|
},
|
|
{
|
|
"epoch": 1.1420212765957447,
|
|
"grad_norm": 3.975175142288208,
|
|
"learning_rate": 9.15478099781416e-06,
|
|
"loss": 0.808,
|
|
"step": 4294
|
|
},
|
|
{
|
|
"epoch": 1.1422872340425532,
|
|
"grad_norm": 3.695078134536743,
|
|
"learning_rate": 9.154291631502471e-06,
|
|
"loss": 0.7942,
|
|
"step": 4295
|
|
},
|
|
{
|
|
"epoch": 1.1425531914893616,
|
|
"grad_norm": 3.8435237407684326,
|
|
"learning_rate": 9.15380213665211e-06,
|
|
"loss": 0.8701,
|
|
"step": 4296
|
|
},
|
|
{
|
|
"epoch": 1.1428191489361703,
|
|
"grad_norm": 3.642451047897339,
|
|
"learning_rate": 9.153312513278219e-06,
|
|
"loss": 0.7479,
|
|
"step": 4297
|
|
},
|
|
{
|
|
"epoch": 1.1430851063829788,
|
|
"grad_norm": 3.8612117767333984,
|
|
"learning_rate": 9.15282276139595e-06,
|
|
"loss": 0.8394,
|
|
"step": 4298
|
|
},
|
|
{
|
|
"epoch": 1.1433510638297872,
|
|
"grad_norm": 3.818319082260132,
|
|
"learning_rate": 9.152332881020454e-06,
|
|
"loss": 0.789,
|
|
"step": 4299
|
|
},
|
|
{
|
|
"epoch": 1.1436170212765957,
|
|
"grad_norm": 3.6774802207946777,
|
|
"learning_rate": 9.15184287216689e-06,
|
|
"loss": 0.7991,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 1.1438829787234042,
|
|
"grad_norm": 4.338614463806152,
|
|
"learning_rate": 9.15135273485042e-06,
|
|
"loss": 0.8602,
|
|
"step": 4301
|
|
},
|
|
{
|
|
"epoch": 1.1441489361702128,
|
|
"grad_norm": 3.9688498973846436,
|
|
"learning_rate": 9.15086246908621e-06,
|
|
"loss": 0.7759,
|
|
"step": 4302
|
|
},
|
|
{
|
|
"epoch": 1.1444148936170213,
|
|
"grad_norm": 3.848708152770996,
|
|
"learning_rate": 9.150372074889427e-06,
|
|
"loss": 0.7635,
|
|
"step": 4303
|
|
},
|
|
{
|
|
"epoch": 1.1446808510638298,
|
|
"grad_norm": 4.042501926422119,
|
|
"learning_rate": 9.149881552275244e-06,
|
|
"loss": 0.8029,
|
|
"step": 4304
|
|
},
|
|
{
|
|
"epoch": 1.1449468085106382,
|
|
"grad_norm": 4.199094772338867,
|
|
"learning_rate": 9.149390901258841e-06,
|
|
"loss": 0.8343,
|
|
"step": 4305
|
|
},
|
|
{
|
|
"epoch": 1.1452127659574467,
|
|
"grad_norm": 4.045470714569092,
|
|
"learning_rate": 9.1489001218554e-06,
|
|
"loss": 0.831,
|
|
"step": 4306
|
|
},
|
|
{
|
|
"epoch": 1.1454787234042554,
|
|
"grad_norm": 3.7915914058685303,
|
|
"learning_rate": 9.148409214080103e-06,
|
|
"loss": 0.8476,
|
|
"step": 4307
|
|
},
|
|
{
|
|
"epoch": 1.1457446808510638,
|
|
"grad_norm": 3.7452378273010254,
|
|
"learning_rate": 9.14791817794814e-06,
|
|
"loss": 0.776,
|
|
"step": 4308
|
|
},
|
|
{
|
|
"epoch": 1.1460106382978723,
|
|
"grad_norm": 3.521505355834961,
|
|
"learning_rate": 9.147427013474706e-06,
|
|
"loss": 0.6753,
|
|
"step": 4309
|
|
},
|
|
{
|
|
"epoch": 1.1462765957446808,
|
|
"grad_norm": 3.906930923461914,
|
|
"learning_rate": 9.146935720674996e-06,
|
|
"loss": 0.6909,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 1.1465425531914895,
|
|
"grad_norm": 4.262080192565918,
|
|
"learning_rate": 9.146444299564215e-06,
|
|
"loss": 0.8444,
|
|
"step": 4311
|
|
},
|
|
{
|
|
"epoch": 1.146808510638298,
|
|
"grad_norm": 4.085954666137695,
|
|
"learning_rate": 9.145952750157563e-06,
|
|
"loss": 0.7587,
|
|
"step": 4312
|
|
},
|
|
{
|
|
"epoch": 1.1470744680851064,
|
|
"grad_norm": 3.9519617557525635,
|
|
"learning_rate": 9.145461072470253e-06,
|
|
"loss": 0.8757,
|
|
"step": 4313
|
|
},
|
|
{
|
|
"epoch": 1.1473404255319148,
|
|
"grad_norm": 4.349664211273193,
|
|
"learning_rate": 9.144969266517495e-06,
|
|
"loss": 0.7766,
|
|
"step": 4314
|
|
},
|
|
{
|
|
"epoch": 1.1476063829787235,
|
|
"grad_norm": 5.140100955963135,
|
|
"learning_rate": 9.144477332314509e-06,
|
|
"loss": 0.9414,
|
|
"step": 4315
|
|
},
|
|
{
|
|
"epoch": 1.147872340425532,
|
|
"grad_norm": 3.641763210296631,
|
|
"learning_rate": 9.143985269876516e-06,
|
|
"loss": 0.7562,
|
|
"step": 4316
|
|
},
|
|
{
|
|
"epoch": 1.1481382978723405,
|
|
"grad_norm": 3.641606092453003,
|
|
"learning_rate": 9.143493079218738e-06,
|
|
"loss": 0.7992,
|
|
"step": 4317
|
|
},
|
|
{
|
|
"epoch": 1.148404255319149,
|
|
"grad_norm": 4.611671447753906,
|
|
"learning_rate": 9.143000760356407e-06,
|
|
"loss": 0.8306,
|
|
"step": 4318
|
|
},
|
|
{
|
|
"epoch": 1.1486702127659574,
|
|
"grad_norm": 3.4973011016845703,
|
|
"learning_rate": 9.142508313304754e-06,
|
|
"loss": 0.7915,
|
|
"step": 4319
|
|
},
|
|
{
|
|
"epoch": 1.148936170212766,
|
|
"grad_norm": 3.9405927658081055,
|
|
"learning_rate": 9.142015738079017e-06,
|
|
"loss": 0.8279,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 1.1492021276595745,
|
|
"grad_norm": 4.37050199508667,
|
|
"learning_rate": 9.141523034694436e-06,
|
|
"loss": 0.8506,
|
|
"step": 4321
|
|
},
|
|
{
|
|
"epoch": 1.149468085106383,
|
|
"grad_norm": 4.181821346282959,
|
|
"learning_rate": 9.141030203166256e-06,
|
|
"loss": 0.8439,
|
|
"step": 4322
|
|
},
|
|
{
|
|
"epoch": 1.1497340425531914,
|
|
"grad_norm": 3.8523123264312744,
|
|
"learning_rate": 9.140537243509729e-06,
|
|
"loss": 0.7565,
|
|
"step": 4323
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"grad_norm": 3.5637168884277344,
|
|
"learning_rate": 9.140044155740102e-06,
|
|
"loss": 0.7406,
|
|
"step": 4324
|
|
},
|
|
{
|
|
"epoch": 1.1502659574468086,
|
|
"grad_norm": 3.8401317596435547,
|
|
"learning_rate": 9.139550939872635e-06,
|
|
"loss": 0.8231,
|
|
"step": 4325
|
|
},
|
|
{
|
|
"epoch": 1.150531914893617,
|
|
"grad_norm": 4.033459186553955,
|
|
"learning_rate": 9.139057595922587e-06,
|
|
"loss": 0.7585,
|
|
"step": 4326
|
|
},
|
|
{
|
|
"epoch": 1.1507978723404255,
|
|
"grad_norm": 4.144162654876709,
|
|
"learning_rate": 9.138564123905225e-06,
|
|
"loss": 0.8237,
|
|
"step": 4327
|
|
},
|
|
{
|
|
"epoch": 1.151063829787234,
|
|
"grad_norm": 4.219383716583252,
|
|
"learning_rate": 9.138070523835816e-06,
|
|
"loss": 0.793,
|
|
"step": 4328
|
|
},
|
|
{
|
|
"epoch": 1.1513297872340424,
|
|
"grad_norm": 4.144248962402344,
|
|
"learning_rate": 9.137576795729635e-06,
|
|
"loss": 0.743,
|
|
"step": 4329
|
|
},
|
|
{
|
|
"epoch": 1.1515957446808511,
|
|
"grad_norm": 3.836845636367798,
|
|
"learning_rate": 9.137082939601953e-06,
|
|
"loss": 0.7829,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 1.1518617021276596,
|
|
"grad_norm": 3.8342814445495605,
|
|
"learning_rate": 9.136588955468057e-06,
|
|
"loss": 0.7298,
|
|
"step": 4331
|
|
},
|
|
{
|
|
"epoch": 1.152127659574468,
|
|
"grad_norm": 3.852695941925049,
|
|
"learning_rate": 9.136094843343228e-06,
|
|
"loss": 0.8051,
|
|
"step": 4332
|
|
},
|
|
{
|
|
"epoch": 1.1523936170212765,
|
|
"grad_norm": 3.9740166664123535,
|
|
"learning_rate": 9.135600603242753e-06,
|
|
"loss": 0.8096,
|
|
"step": 4333
|
|
},
|
|
{
|
|
"epoch": 1.1526595744680852,
|
|
"grad_norm": 4.557644367218018,
|
|
"learning_rate": 9.13510623518193e-06,
|
|
"loss": 0.8826,
|
|
"step": 4334
|
|
},
|
|
{
|
|
"epoch": 1.1529255319148937,
|
|
"grad_norm": 4.095839500427246,
|
|
"learning_rate": 9.13461173917605e-06,
|
|
"loss": 0.7624,
|
|
"step": 4335
|
|
},
|
|
{
|
|
"epoch": 1.1531914893617021,
|
|
"grad_norm": 3.6598823070526123,
|
|
"learning_rate": 9.134117115240412e-06,
|
|
"loss": 0.6786,
|
|
"step": 4336
|
|
},
|
|
{
|
|
"epoch": 1.1534574468085106,
|
|
"grad_norm": 4.052873611450195,
|
|
"learning_rate": 9.133622363390326e-06,
|
|
"loss": 0.7476,
|
|
"step": 4337
|
|
},
|
|
{
|
|
"epoch": 1.1537234042553193,
|
|
"grad_norm": 3.892709255218506,
|
|
"learning_rate": 9.133127483641096e-06,
|
|
"loss": 0.7902,
|
|
"step": 4338
|
|
},
|
|
{
|
|
"epoch": 1.1539893617021277,
|
|
"grad_norm": 4.127117156982422,
|
|
"learning_rate": 9.132632476008036e-06,
|
|
"loss": 0.8427,
|
|
"step": 4339
|
|
},
|
|
{
|
|
"epoch": 1.1542553191489362,
|
|
"grad_norm": 3.911402463912964,
|
|
"learning_rate": 9.132137340506464e-06,
|
|
"loss": 0.744,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 1.1545212765957447,
|
|
"grad_norm": 4.6202826499938965,
|
|
"learning_rate": 9.131642077151695e-06,
|
|
"loss": 0.816,
|
|
"step": 4341
|
|
},
|
|
{
|
|
"epoch": 1.1547872340425531,
|
|
"grad_norm": 3.967888593673706,
|
|
"learning_rate": 9.131146685959055e-06,
|
|
"loss": 0.8608,
|
|
"step": 4342
|
|
},
|
|
{
|
|
"epoch": 1.1550531914893618,
|
|
"grad_norm": 3.7461965084075928,
|
|
"learning_rate": 9.130651166943875e-06,
|
|
"loss": 0.8002,
|
|
"step": 4343
|
|
},
|
|
{
|
|
"epoch": 1.1553191489361703,
|
|
"grad_norm": 3.893925666809082,
|
|
"learning_rate": 9.130155520121484e-06,
|
|
"loss": 0.7651,
|
|
"step": 4344
|
|
},
|
|
{
|
|
"epoch": 1.1555851063829787,
|
|
"grad_norm": 4.108353614807129,
|
|
"learning_rate": 9.129659745507219e-06,
|
|
"loss": 0.847,
|
|
"step": 4345
|
|
},
|
|
{
|
|
"epoch": 1.1558510638297872,
|
|
"grad_norm": 3.766580104827881,
|
|
"learning_rate": 9.129163843116417e-06,
|
|
"loss": 0.7361,
|
|
"step": 4346
|
|
},
|
|
{
|
|
"epoch": 1.1561170212765957,
|
|
"grad_norm": 4.005224227905273,
|
|
"learning_rate": 9.128667812964428e-06,
|
|
"loss": 0.846,
|
|
"step": 4347
|
|
},
|
|
{
|
|
"epoch": 1.1563829787234043,
|
|
"grad_norm": 4.085299491882324,
|
|
"learning_rate": 9.128171655066592e-06,
|
|
"loss": 0.7435,
|
|
"step": 4348
|
|
},
|
|
{
|
|
"epoch": 1.1566489361702128,
|
|
"grad_norm": 3.649341583251953,
|
|
"learning_rate": 9.127675369438267e-06,
|
|
"loss": 0.7848,
|
|
"step": 4349
|
|
},
|
|
{
|
|
"epoch": 1.1569148936170213,
|
|
"grad_norm": 4.286210536956787,
|
|
"learning_rate": 9.127178956094805e-06,
|
|
"loss": 0.8657,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 1.1571808510638297,
|
|
"grad_norm": 3.8484995365142822,
|
|
"learning_rate": 9.12668241505157e-06,
|
|
"loss": 0.7356,
|
|
"step": 4351
|
|
},
|
|
{
|
|
"epoch": 1.1574468085106382,
|
|
"grad_norm": 3.80110239982605,
|
|
"learning_rate": 9.12618574632392e-06,
|
|
"loss": 0.8581,
|
|
"step": 4352
|
|
},
|
|
{
|
|
"epoch": 1.1577127659574469,
|
|
"grad_norm": 4.16612434387207,
|
|
"learning_rate": 9.125688949927223e-06,
|
|
"loss": 0.9135,
|
|
"step": 4353
|
|
},
|
|
{
|
|
"epoch": 1.1579787234042553,
|
|
"grad_norm": 4.107837677001953,
|
|
"learning_rate": 9.125192025876855e-06,
|
|
"loss": 0.8993,
|
|
"step": 4354
|
|
},
|
|
{
|
|
"epoch": 1.1582446808510638,
|
|
"grad_norm": 3.7631843090057373,
|
|
"learning_rate": 9.124694974188188e-06,
|
|
"loss": 0.7997,
|
|
"step": 4355
|
|
},
|
|
{
|
|
"epoch": 1.1585106382978723,
|
|
"grad_norm": 4.244007587432861,
|
|
"learning_rate": 9.124197794876604e-06,
|
|
"loss": 0.806,
|
|
"step": 4356
|
|
},
|
|
{
|
|
"epoch": 1.1587765957446807,
|
|
"grad_norm": 3.4537291526794434,
|
|
"learning_rate": 9.123700487957484e-06,
|
|
"loss": 0.7259,
|
|
"step": 4357
|
|
},
|
|
{
|
|
"epoch": 1.1590425531914894,
|
|
"grad_norm": 4.083813667297363,
|
|
"learning_rate": 9.123203053446215e-06,
|
|
"loss": 0.7935,
|
|
"step": 4358
|
|
},
|
|
{
|
|
"epoch": 1.1593085106382979,
|
|
"grad_norm": 3.842515707015991,
|
|
"learning_rate": 9.12270549135819e-06,
|
|
"loss": 0.8403,
|
|
"step": 4359
|
|
},
|
|
{
|
|
"epoch": 1.1595744680851063,
|
|
"grad_norm": 3.8198819160461426,
|
|
"learning_rate": 9.122207801708802e-06,
|
|
"loss": 0.8035,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 1.1598404255319148,
|
|
"grad_norm": 4.05394172668457,
|
|
"learning_rate": 9.121709984513453e-06,
|
|
"loss": 0.6678,
|
|
"step": 4361
|
|
},
|
|
{
|
|
"epoch": 1.1601063829787235,
|
|
"grad_norm": 3.8895061016082764,
|
|
"learning_rate": 9.121212039787543e-06,
|
|
"loss": 0.7822,
|
|
"step": 4362
|
|
},
|
|
{
|
|
"epoch": 1.160372340425532,
|
|
"grad_norm": 4.040393829345703,
|
|
"learning_rate": 9.12071396754648e-06,
|
|
"loss": 0.8669,
|
|
"step": 4363
|
|
},
|
|
{
|
|
"epoch": 1.1606382978723404,
|
|
"grad_norm": 3.8143858909606934,
|
|
"learning_rate": 9.120215767805677e-06,
|
|
"loss": 0.9251,
|
|
"step": 4364
|
|
},
|
|
{
|
|
"epoch": 1.1609042553191489,
|
|
"grad_norm": 3.8011443614959717,
|
|
"learning_rate": 9.119717440580547e-06,
|
|
"loss": 0.7142,
|
|
"step": 4365
|
|
},
|
|
{
|
|
"epoch": 1.1611702127659576,
|
|
"grad_norm": 4.147587776184082,
|
|
"learning_rate": 9.119218985886506e-06,
|
|
"loss": 0.8196,
|
|
"step": 4366
|
|
},
|
|
{
|
|
"epoch": 1.161436170212766,
|
|
"grad_norm": 4.035295009613037,
|
|
"learning_rate": 9.118720403738984e-06,
|
|
"loss": 0.9006,
|
|
"step": 4367
|
|
},
|
|
{
|
|
"epoch": 1.1617021276595745,
|
|
"grad_norm": 4.253767967224121,
|
|
"learning_rate": 9.118221694153401e-06,
|
|
"loss": 0.9149,
|
|
"step": 4368
|
|
},
|
|
{
|
|
"epoch": 1.161968085106383,
|
|
"grad_norm": 3.7400970458984375,
|
|
"learning_rate": 9.11772285714519e-06,
|
|
"loss": 0.847,
|
|
"step": 4369
|
|
},
|
|
{
|
|
"epoch": 1.1622340425531914,
|
|
"grad_norm": 4.12266731262207,
|
|
"learning_rate": 9.117223892729788e-06,
|
|
"loss": 0.8159,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 1.1625,
|
|
"grad_norm": 3.939617395401001,
|
|
"learning_rate": 9.11672480092263e-06,
|
|
"loss": 0.8515,
|
|
"step": 4371
|
|
},
|
|
{
|
|
"epoch": 1.1627659574468086,
|
|
"grad_norm": 3.597660541534424,
|
|
"learning_rate": 9.11622558173916e-06,
|
|
"loss": 0.7139,
|
|
"step": 4372
|
|
},
|
|
{
|
|
"epoch": 1.163031914893617,
|
|
"grad_norm": 3.8929126262664795,
|
|
"learning_rate": 9.115726235194825e-06,
|
|
"loss": 0.755,
|
|
"step": 4373
|
|
},
|
|
{
|
|
"epoch": 1.1632978723404255,
|
|
"grad_norm": 3.9748990535736084,
|
|
"learning_rate": 9.115226761305071e-06,
|
|
"loss": 0.9779,
|
|
"step": 4374
|
|
},
|
|
{
|
|
"epoch": 1.163563829787234,
|
|
"grad_norm": 3.6702117919921875,
|
|
"learning_rate": 9.11472716008536e-06,
|
|
"loss": 0.7913,
|
|
"step": 4375
|
|
},
|
|
{
|
|
"epoch": 1.1638297872340426,
|
|
"grad_norm": 3.5676674842834473,
|
|
"learning_rate": 9.114227431551144e-06,
|
|
"loss": 0.8714,
|
|
"step": 4376
|
|
},
|
|
{
|
|
"epoch": 1.164095744680851,
|
|
"grad_norm": 3.871457576751709,
|
|
"learning_rate": 9.113727575717887e-06,
|
|
"loss": 0.7551,
|
|
"step": 4377
|
|
},
|
|
{
|
|
"epoch": 1.1643617021276595,
|
|
"grad_norm": 3.709536552429199,
|
|
"learning_rate": 9.113227592601057e-06,
|
|
"loss": 0.7476,
|
|
"step": 4378
|
|
},
|
|
{
|
|
"epoch": 1.164627659574468,
|
|
"grad_norm": 4.048936367034912,
|
|
"learning_rate": 9.112727482216123e-06,
|
|
"loss": 0.822,
|
|
"step": 4379
|
|
},
|
|
{
|
|
"epoch": 1.1648936170212765,
|
|
"grad_norm": 4.941551685333252,
|
|
"learning_rate": 9.112227244578557e-06,
|
|
"loss": 0.942,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 1.1651595744680852,
|
|
"grad_norm": 3.971956491470337,
|
|
"learning_rate": 9.111726879703839e-06,
|
|
"loss": 0.898,
|
|
"step": 4381
|
|
},
|
|
{
|
|
"epoch": 1.1654255319148936,
|
|
"grad_norm": 4.139491558074951,
|
|
"learning_rate": 9.111226387607452e-06,
|
|
"loss": 0.9185,
|
|
"step": 4382
|
|
},
|
|
{
|
|
"epoch": 1.165691489361702,
|
|
"grad_norm": 3.8217787742614746,
|
|
"learning_rate": 9.110725768304878e-06,
|
|
"loss": 0.8598,
|
|
"step": 4383
|
|
},
|
|
{
|
|
"epoch": 1.1659574468085105,
|
|
"grad_norm": 3.656966209411621,
|
|
"learning_rate": 9.11022502181161e-06,
|
|
"loss": 0.7433,
|
|
"step": 4384
|
|
},
|
|
{
|
|
"epoch": 1.1662234042553192,
|
|
"grad_norm": 4.29415225982666,
|
|
"learning_rate": 9.10972414814314e-06,
|
|
"loss": 0.7777,
|
|
"step": 4385
|
|
},
|
|
{
|
|
"epoch": 1.1664893617021277,
|
|
"grad_norm": 3.9143810272216797,
|
|
"learning_rate": 9.109223147314968e-06,
|
|
"loss": 0.678,
|
|
"step": 4386
|
|
},
|
|
{
|
|
"epoch": 1.1667553191489362,
|
|
"grad_norm": 4.056838512420654,
|
|
"learning_rate": 9.108722019342592e-06,
|
|
"loss": 0.6778,
|
|
"step": 4387
|
|
},
|
|
{
|
|
"epoch": 1.1670212765957446,
|
|
"grad_norm": 3.9018867015838623,
|
|
"learning_rate": 9.10822076424152e-06,
|
|
"loss": 0.8195,
|
|
"step": 4388
|
|
},
|
|
{
|
|
"epoch": 1.1672872340425533,
|
|
"grad_norm": 4.0093994140625,
|
|
"learning_rate": 9.10771938202726e-06,
|
|
"loss": 0.9474,
|
|
"step": 4389
|
|
},
|
|
{
|
|
"epoch": 1.1675531914893618,
|
|
"grad_norm": 4.224606037139893,
|
|
"learning_rate": 9.107217872715326e-06,
|
|
"loss": 0.7376,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 1.1678191489361702,
|
|
"grad_norm": 3.831489086151123,
|
|
"learning_rate": 9.106716236321236e-06,
|
|
"loss": 0.731,
|
|
"step": 4391
|
|
},
|
|
{
|
|
"epoch": 1.1680851063829787,
|
|
"grad_norm": 3.8180394172668457,
|
|
"learning_rate": 9.106214472860511e-06,
|
|
"loss": 0.7458,
|
|
"step": 4392
|
|
},
|
|
{
|
|
"epoch": 1.1683510638297872,
|
|
"grad_norm": 3.393148899078369,
|
|
"learning_rate": 9.105712582348676e-06,
|
|
"loss": 0.7216,
|
|
"step": 4393
|
|
},
|
|
{
|
|
"epoch": 1.1686170212765958,
|
|
"grad_norm": 4.6142964363098145,
|
|
"learning_rate": 9.105210564801259e-06,
|
|
"loss": 0.7643,
|
|
"step": 4394
|
|
},
|
|
{
|
|
"epoch": 1.1688829787234043,
|
|
"grad_norm": 4.428558826446533,
|
|
"learning_rate": 9.104708420233794e-06,
|
|
"loss": 0.8364,
|
|
"step": 4395
|
|
},
|
|
{
|
|
"epoch": 1.1691489361702128,
|
|
"grad_norm": 4.209799766540527,
|
|
"learning_rate": 9.104206148661819e-06,
|
|
"loss": 0.7965,
|
|
"step": 4396
|
|
},
|
|
{
|
|
"epoch": 1.1694148936170212,
|
|
"grad_norm": 4.0707831382751465,
|
|
"learning_rate": 9.10370375010087e-06,
|
|
"loss": 0.7676,
|
|
"step": 4397
|
|
},
|
|
{
|
|
"epoch": 1.1696808510638297,
|
|
"grad_norm": 3.684016227722168,
|
|
"learning_rate": 9.103201224566499e-06,
|
|
"loss": 0.8018,
|
|
"step": 4398
|
|
},
|
|
{
|
|
"epoch": 1.1699468085106384,
|
|
"grad_norm": 4.157726287841797,
|
|
"learning_rate": 9.10269857207425e-06,
|
|
"loss": 0.8431,
|
|
"step": 4399
|
|
},
|
|
{
|
|
"epoch": 1.1702127659574468,
|
|
"grad_norm": 3.866776704788208,
|
|
"learning_rate": 9.102195792639677e-06,
|
|
"loss": 0.9013,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 1.1704787234042553,
|
|
"grad_norm": 3.8174455165863037,
|
|
"learning_rate": 9.101692886278336e-06,
|
|
"loss": 0.8174,
|
|
"step": 4401
|
|
},
|
|
{
|
|
"epoch": 1.1707446808510638,
|
|
"grad_norm": 4.051540851593018,
|
|
"learning_rate": 9.101189853005788e-06,
|
|
"loss": 0.8006,
|
|
"step": 4402
|
|
},
|
|
{
|
|
"epoch": 1.1710106382978722,
|
|
"grad_norm": 4.115768909454346,
|
|
"learning_rate": 9.100686692837598e-06,
|
|
"loss": 0.8905,
|
|
"step": 4403
|
|
},
|
|
{
|
|
"epoch": 1.171276595744681,
|
|
"grad_norm": 3.989694595336914,
|
|
"learning_rate": 9.100183405789334e-06,
|
|
"loss": 0.8763,
|
|
"step": 4404
|
|
},
|
|
{
|
|
"epoch": 1.1715425531914894,
|
|
"grad_norm": 3.5945072174072266,
|
|
"learning_rate": 9.099679991876567e-06,
|
|
"loss": 0.7173,
|
|
"step": 4405
|
|
},
|
|
{
|
|
"epoch": 1.1718085106382978,
|
|
"grad_norm": 3.627795934677124,
|
|
"learning_rate": 9.099176451114876e-06,
|
|
"loss": 0.7708,
|
|
"step": 4406
|
|
},
|
|
{
|
|
"epoch": 1.1720744680851063,
|
|
"grad_norm": 4.366139888763428,
|
|
"learning_rate": 9.098672783519837e-06,
|
|
"loss": 0.7882,
|
|
"step": 4407
|
|
},
|
|
{
|
|
"epoch": 1.172340425531915,
|
|
"grad_norm": 4.13855504989624,
|
|
"learning_rate": 9.098168989107038e-06,
|
|
"loss": 0.7776,
|
|
"step": 4408
|
|
},
|
|
{
|
|
"epoch": 1.1726063829787234,
|
|
"grad_norm": 3.8078205585479736,
|
|
"learning_rate": 9.097665067892066e-06,
|
|
"loss": 0.7194,
|
|
"step": 4409
|
|
},
|
|
{
|
|
"epoch": 1.172872340425532,
|
|
"grad_norm": 3.676452398300171,
|
|
"learning_rate": 9.09716101989051e-06,
|
|
"loss": 0.7386,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 1.1731382978723404,
|
|
"grad_norm": 4.525330066680908,
|
|
"learning_rate": 9.09665684511797e-06,
|
|
"loss": 0.8734,
|
|
"step": 4411
|
|
},
|
|
{
|
|
"epoch": 1.173404255319149,
|
|
"grad_norm": 4.38550329208374,
|
|
"learning_rate": 9.096152543590045e-06,
|
|
"loss": 0.8248,
|
|
"step": 4412
|
|
},
|
|
{
|
|
"epoch": 1.1736702127659575,
|
|
"grad_norm": 4.337765693664551,
|
|
"learning_rate": 9.095648115322336e-06,
|
|
"loss": 0.8992,
|
|
"step": 4413
|
|
},
|
|
{
|
|
"epoch": 1.173936170212766,
|
|
"grad_norm": 4.145912170410156,
|
|
"learning_rate": 9.095143560330453e-06,
|
|
"loss": 0.8119,
|
|
"step": 4414
|
|
},
|
|
{
|
|
"epoch": 1.1742021276595744,
|
|
"grad_norm": 3.5085721015930176,
|
|
"learning_rate": 9.094638878630007e-06,
|
|
"loss": 0.744,
|
|
"step": 4415
|
|
},
|
|
{
|
|
"epoch": 1.174468085106383,
|
|
"grad_norm": 4.225882053375244,
|
|
"learning_rate": 9.094134070236614e-06,
|
|
"loss": 0.8368,
|
|
"step": 4416
|
|
},
|
|
{
|
|
"epoch": 1.1747340425531916,
|
|
"grad_norm": 4.2498273849487305,
|
|
"learning_rate": 9.09362913516589e-06,
|
|
"loss": 0.7281,
|
|
"step": 4417
|
|
},
|
|
{
|
|
"epoch": 1.175,
|
|
"grad_norm": 3.8343684673309326,
|
|
"learning_rate": 9.093124073433464e-06,
|
|
"loss": 0.8521,
|
|
"step": 4418
|
|
},
|
|
{
|
|
"epoch": 1.1752659574468085,
|
|
"grad_norm": 4.265048503875732,
|
|
"learning_rate": 9.092618885054958e-06,
|
|
"loss": 0.8624,
|
|
"step": 4419
|
|
},
|
|
{
|
|
"epoch": 1.175531914893617,
|
|
"grad_norm": 4.251501560211182,
|
|
"learning_rate": 9.092113570046005e-06,
|
|
"loss": 0.7163,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 1.1757978723404254,
|
|
"grad_norm": 3.9519202709198,
|
|
"learning_rate": 9.091608128422243e-06,
|
|
"loss": 0.8139,
|
|
"step": 4421
|
|
},
|
|
{
|
|
"epoch": 1.1760638297872341,
|
|
"grad_norm": 3.785550832748413,
|
|
"learning_rate": 9.091102560199306e-06,
|
|
"loss": 0.7897,
|
|
"step": 4422
|
|
},
|
|
{
|
|
"epoch": 1.1763297872340426,
|
|
"grad_norm": 4.2011260986328125,
|
|
"learning_rate": 9.090596865392838e-06,
|
|
"loss": 0.8119,
|
|
"step": 4423
|
|
},
|
|
{
|
|
"epoch": 1.176595744680851,
|
|
"grad_norm": 3.7419655323028564,
|
|
"learning_rate": 9.090091044018488e-06,
|
|
"loss": 0.64,
|
|
"step": 4424
|
|
},
|
|
{
|
|
"epoch": 1.1768617021276595,
|
|
"grad_norm": 3.561340093612671,
|
|
"learning_rate": 9.089585096091906e-06,
|
|
"loss": 0.7546,
|
|
"step": 4425
|
|
},
|
|
{
|
|
"epoch": 1.177127659574468,
|
|
"grad_norm": 3.971997022628784,
|
|
"learning_rate": 9.089079021628746e-06,
|
|
"loss": 0.8783,
|
|
"step": 4426
|
|
},
|
|
{
|
|
"epoch": 1.1773936170212767,
|
|
"grad_norm": 4.214608669281006,
|
|
"learning_rate": 9.088572820644667e-06,
|
|
"loss": 0.9312,
|
|
"step": 4427
|
|
},
|
|
{
|
|
"epoch": 1.1776595744680851,
|
|
"grad_norm": 3.867511749267578,
|
|
"learning_rate": 9.088066493155332e-06,
|
|
"loss": 0.9171,
|
|
"step": 4428
|
|
},
|
|
{
|
|
"epoch": 1.1779255319148936,
|
|
"grad_norm": 3.8267605304718018,
|
|
"learning_rate": 9.087560039176407e-06,
|
|
"loss": 0.7369,
|
|
"step": 4429
|
|
},
|
|
{
|
|
"epoch": 1.178191489361702,
|
|
"grad_norm": 3.9210994243621826,
|
|
"learning_rate": 9.08705345872356e-06,
|
|
"loss": 0.7975,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 1.1784574468085107,
|
|
"grad_norm": 3.820697069168091,
|
|
"learning_rate": 9.086546751812467e-06,
|
|
"loss": 0.7579,
|
|
"step": 4431
|
|
},
|
|
{
|
|
"epoch": 1.1787234042553192,
|
|
"grad_norm": 4.319027423858643,
|
|
"learning_rate": 9.086039918458806e-06,
|
|
"loss": 0.7671,
|
|
"step": 4432
|
|
},
|
|
{
|
|
"epoch": 1.1789893617021276,
|
|
"grad_norm": 3.768254280090332,
|
|
"learning_rate": 9.085532958678262e-06,
|
|
"loss": 0.7075,
|
|
"step": 4433
|
|
},
|
|
{
|
|
"epoch": 1.179255319148936,
|
|
"grad_norm": 3.8115556240081787,
|
|
"learning_rate": 9.085025872486516e-06,
|
|
"loss": 0.6844,
|
|
"step": 4434
|
|
},
|
|
{
|
|
"epoch": 1.1795212765957448,
|
|
"grad_norm": 3.6113126277923584,
|
|
"learning_rate": 9.08451865989926e-06,
|
|
"loss": 0.7161,
|
|
"step": 4435
|
|
},
|
|
{
|
|
"epoch": 1.1797872340425533,
|
|
"grad_norm": 4.16688871383667,
|
|
"learning_rate": 9.08401132093219e-06,
|
|
"loss": 0.8756,
|
|
"step": 4436
|
|
},
|
|
{
|
|
"epoch": 1.1800531914893617,
|
|
"grad_norm": 4.136419773101807,
|
|
"learning_rate": 9.083503855600997e-06,
|
|
"loss": 0.8072,
|
|
"step": 4437
|
|
},
|
|
{
|
|
"epoch": 1.1803191489361702,
|
|
"grad_norm": 4.0323357582092285,
|
|
"learning_rate": 9.08299626392139e-06,
|
|
"loss": 0.7889,
|
|
"step": 4438
|
|
},
|
|
{
|
|
"epoch": 1.1805851063829786,
|
|
"grad_norm": 3.848400354385376,
|
|
"learning_rate": 9.082488545909072e-06,
|
|
"loss": 0.8467,
|
|
"step": 4439
|
|
},
|
|
{
|
|
"epoch": 1.1808510638297873,
|
|
"grad_norm": 3.8820831775665283,
|
|
"learning_rate": 9.08198070157975e-06,
|
|
"loss": 0.7926,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 1.1811170212765958,
|
|
"grad_norm": 3.9585654735565186,
|
|
"learning_rate": 9.08147273094914e-06,
|
|
"loss": 0.8671,
|
|
"step": 4441
|
|
},
|
|
{
|
|
"epoch": 1.1813829787234043,
|
|
"grad_norm": 4.736848831176758,
|
|
"learning_rate": 9.080964634032958e-06,
|
|
"loss": 0.8953,
|
|
"step": 4442
|
|
},
|
|
{
|
|
"epoch": 1.1816489361702127,
|
|
"grad_norm": 4.1310343742370605,
|
|
"learning_rate": 9.080456410846926e-06,
|
|
"loss": 0.7878,
|
|
"step": 4443
|
|
},
|
|
{
|
|
"epoch": 1.1819148936170212,
|
|
"grad_norm": 3.701655149459839,
|
|
"learning_rate": 9.079948061406769e-06,
|
|
"loss": 0.7205,
|
|
"step": 4444
|
|
},
|
|
{
|
|
"epoch": 1.1821808510638299,
|
|
"grad_norm": 4.258152008056641,
|
|
"learning_rate": 9.079439585728214e-06,
|
|
"loss": 0.8573,
|
|
"step": 4445
|
|
},
|
|
{
|
|
"epoch": 1.1824468085106383,
|
|
"grad_norm": 4.08727502822876,
|
|
"learning_rate": 9.078930983826997e-06,
|
|
"loss": 0.8661,
|
|
"step": 4446
|
|
},
|
|
{
|
|
"epoch": 1.1827127659574468,
|
|
"grad_norm": 4.263191223144531,
|
|
"learning_rate": 9.078422255718852e-06,
|
|
"loss": 0.9975,
|
|
"step": 4447
|
|
},
|
|
{
|
|
"epoch": 1.1829787234042553,
|
|
"grad_norm": 3.8881144523620605,
|
|
"learning_rate": 9.07791340141952e-06,
|
|
"loss": 0.8825,
|
|
"step": 4448
|
|
},
|
|
{
|
|
"epoch": 1.1832446808510637,
|
|
"grad_norm": 4.034143924713135,
|
|
"learning_rate": 9.077404420944746e-06,
|
|
"loss": 0.7645,
|
|
"step": 4449
|
|
},
|
|
{
|
|
"epoch": 1.1835106382978724,
|
|
"grad_norm": 3.6815900802612305,
|
|
"learning_rate": 9.076895314310282e-06,
|
|
"loss": 0.845,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 1.1837765957446809,
|
|
"grad_norm": 4.061761379241943,
|
|
"learning_rate": 9.076386081531873e-06,
|
|
"loss": 0.715,
|
|
"step": 4451
|
|
},
|
|
{
|
|
"epoch": 1.1840425531914893,
|
|
"grad_norm": 3.675588846206665,
|
|
"learning_rate": 9.075876722625281e-06,
|
|
"loss": 0.6865,
|
|
"step": 4452
|
|
},
|
|
{
|
|
"epoch": 1.1843085106382978,
|
|
"grad_norm": 3.922511577606201,
|
|
"learning_rate": 9.075367237606265e-06,
|
|
"loss": 0.8139,
|
|
"step": 4453
|
|
},
|
|
{
|
|
"epoch": 1.1845744680851065,
|
|
"grad_norm": 4.45919132232666,
|
|
"learning_rate": 9.074857626490587e-06,
|
|
"loss": 0.8832,
|
|
"step": 4454
|
|
},
|
|
{
|
|
"epoch": 1.184840425531915,
|
|
"grad_norm": 3.8306045532226562,
|
|
"learning_rate": 9.074347889294017e-06,
|
|
"loss": 0.775,
|
|
"step": 4455
|
|
},
|
|
{
|
|
"epoch": 1.1851063829787234,
|
|
"grad_norm": 4.380180358886719,
|
|
"learning_rate": 9.073838026032328e-06,
|
|
"loss": 0.8028,
|
|
"step": 4456
|
|
},
|
|
{
|
|
"epoch": 1.1853723404255319,
|
|
"grad_norm": 3.6403377056121826,
|
|
"learning_rate": 9.073328036721292e-06,
|
|
"loss": 0.7365,
|
|
"step": 4457
|
|
},
|
|
{
|
|
"epoch": 1.1856382978723405,
|
|
"grad_norm": 4.642416477203369,
|
|
"learning_rate": 9.072817921376692e-06,
|
|
"loss": 1.0456,
|
|
"step": 4458
|
|
},
|
|
{
|
|
"epoch": 1.185904255319149,
|
|
"grad_norm": 4.2514753341674805,
|
|
"learning_rate": 9.07230768001431e-06,
|
|
"loss": 0.8752,
|
|
"step": 4459
|
|
},
|
|
{
|
|
"epoch": 1.1861702127659575,
|
|
"grad_norm": 4.097993850708008,
|
|
"learning_rate": 9.071797312649934e-06,
|
|
"loss": 0.8805,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 1.186436170212766,
|
|
"grad_norm": 3.6704015731811523,
|
|
"learning_rate": 9.071286819299355e-06,
|
|
"loss": 0.7362,
|
|
"step": 4461
|
|
},
|
|
{
|
|
"epoch": 1.1867021276595744,
|
|
"grad_norm": 3.5198822021484375,
|
|
"learning_rate": 9.070776199978369e-06,
|
|
"loss": 0.6528,
|
|
"step": 4462
|
|
},
|
|
{
|
|
"epoch": 1.186968085106383,
|
|
"grad_norm": 4.044826507568359,
|
|
"learning_rate": 9.070265454702774e-06,
|
|
"loss": 0.785,
|
|
"step": 4463
|
|
},
|
|
{
|
|
"epoch": 1.1872340425531915,
|
|
"grad_norm": 3.775392770767212,
|
|
"learning_rate": 9.069754583488375e-06,
|
|
"loss": 0.7664,
|
|
"step": 4464
|
|
},
|
|
{
|
|
"epoch": 1.1875,
|
|
"grad_norm": 3.9251670837402344,
|
|
"learning_rate": 9.069243586350976e-06,
|
|
"loss": 0.7694,
|
|
"step": 4465
|
|
},
|
|
{
|
|
"epoch": 1.1877659574468085,
|
|
"grad_norm": 4.138858318328857,
|
|
"learning_rate": 9.06873246330639e-06,
|
|
"loss": 0.8734,
|
|
"step": 4466
|
|
},
|
|
{
|
|
"epoch": 1.188031914893617,
|
|
"grad_norm": 3.8749899864196777,
|
|
"learning_rate": 9.06822121437043e-06,
|
|
"loss": 0.7114,
|
|
"step": 4467
|
|
},
|
|
{
|
|
"epoch": 1.1882978723404256,
|
|
"grad_norm": 4.107519626617432,
|
|
"learning_rate": 9.067709839558917e-06,
|
|
"loss": 0.7998,
|
|
"step": 4468
|
|
},
|
|
{
|
|
"epoch": 1.188563829787234,
|
|
"grad_norm": 3.6962497234344482,
|
|
"learning_rate": 9.067198338887673e-06,
|
|
"loss": 0.8317,
|
|
"step": 4469
|
|
},
|
|
{
|
|
"epoch": 1.1888297872340425,
|
|
"grad_norm": 4.575094223022461,
|
|
"learning_rate": 9.066686712372524e-06,
|
|
"loss": 0.8399,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 1.189095744680851,
|
|
"grad_norm": 4.391597747802734,
|
|
"learning_rate": 9.0661749600293e-06,
|
|
"loss": 0.8801,
|
|
"step": 4471
|
|
},
|
|
{
|
|
"epoch": 1.1893617021276595,
|
|
"grad_norm": 3.650452136993408,
|
|
"learning_rate": 9.065663081873834e-06,
|
|
"loss": 0.7738,
|
|
"step": 4472
|
|
},
|
|
{
|
|
"epoch": 1.1896276595744681,
|
|
"grad_norm": 4.12108039855957,
|
|
"learning_rate": 9.065151077921968e-06,
|
|
"loss": 0.8333,
|
|
"step": 4473
|
|
},
|
|
{
|
|
"epoch": 1.1898936170212766,
|
|
"grad_norm": 4.204649925231934,
|
|
"learning_rate": 9.064638948189539e-06,
|
|
"loss": 0.8531,
|
|
"step": 4474
|
|
},
|
|
{
|
|
"epoch": 1.190159574468085,
|
|
"grad_norm": 4.241077423095703,
|
|
"learning_rate": 9.064126692692397e-06,
|
|
"loss": 0.8215,
|
|
"step": 4475
|
|
},
|
|
{
|
|
"epoch": 1.1904255319148935,
|
|
"grad_norm": 4.215181350708008,
|
|
"learning_rate": 9.06361431144639e-06,
|
|
"loss": 0.7595,
|
|
"step": 4476
|
|
},
|
|
{
|
|
"epoch": 1.1906914893617022,
|
|
"grad_norm": 3.597543239593506,
|
|
"learning_rate": 9.06310180446737e-06,
|
|
"loss": 0.7967,
|
|
"step": 4477
|
|
},
|
|
{
|
|
"epoch": 1.1909574468085107,
|
|
"grad_norm": 4.075351238250732,
|
|
"learning_rate": 9.0625891717712e-06,
|
|
"loss": 0.8158,
|
|
"step": 4478
|
|
},
|
|
{
|
|
"epoch": 1.1912234042553191,
|
|
"grad_norm": 3.5748724937438965,
|
|
"learning_rate": 9.062076413373735e-06,
|
|
"loss": 0.733,
|
|
"step": 4479
|
|
},
|
|
{
|
|
"epoch": 1.1914893617021276,
|
|
"grad_norm": 3.9107751846313477,
|
|
"learning_rate": 9.061563529290845e-06,
|
|
"loss": 0.8057,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 1.1917553191489363,
|
|
"grad_norm": 4.108970642089844,
|
|
"learning_rate": 9.061050519538397e-06,
|
|
"loss": 0.9214,
|
|
"step": 4481
|
|
},
|
|
{
|
|
"epoch": 1.1920212765957447,
|
|
"grad_norm": 3.9196219444274902,
|
|
"learning_rate": 9.060537384132264e-06,
|
|
"loss": 0.8046,
|
|
"step": 4482
|
|
},
|
|
{
|
|
"epoch": 1.1922872340425532,
|
|
"grad_norm": 3.312999963760376,
|
|
"learning_rate": 9.060024123088324e-06,
|
|
"loss": 0.6791,
|
|
"step": 4483
|
|
},
|
|
{
|
|
"epoch": 1.1925531914893617,
|
|
"grad_norm": 4.010212421417236,
|
|
"learning_rate": 9.05951073642246e-06,
|
|
"loss": 0.8244,
|
|
"step": 4484
|
|
},
|
|
{
|
|
"epoch": 1.1928191489361701,
|
|
"grad_norm": 3.9299821853637695,
|
|
"learning_rate": 9.05899722415055e-06,
|
|
"loss": 0.7054,
|
|
"step": 4485
|
|
},
|
|
{
|
|
"epoch": 1.1930851063829788,
|
|
"grad_norm": 4.205704212188721,
|
|
"learning_rate": 9.05848358628849e-06,
|
|
"loss": 0.9058,
|
|
"step": 4486
|
|
},
|
|
{
|
|
"epoch": 1.1933510638297873,
|
|
"grad_norm": 4.133444309234619,
|
|
"learning_rate": 9.057969822852168e-06,
|
|
"loss": 0.8414,
|
|
"step": 4487
|
|
},
|
|
{
|
|
"epoch": 1.1936170212765957,
|
|
"grad_norm": 3.7199227809906006,
|
|
"learning_rate": 9.057455933857483e-06,
|
|
"loss": 0.7884,
|
|
"step": 4488
|
|
},
|
|
{
|
|
"epoch": 1.1938829787234042,
|
|
"grad_norm": 4.377199172973633,
|
|
"learning_rate": 9.056941919320335e-06,
|
|
"loss": 0.7732,
|
|
"step": 4489
|
|
},
|
|
{
|
|
"epoch": 1.1941489361702127,
|
|
"grad_norm": 4.171092987060547,
|
|
"learning_rate": 9.056427779256624e-06,
|
|
"loss": 0.8652,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 1.1944148936170214,
|
|
"grad_norm": 3.7670929431915283,
|
|
"learning_rate": 9.055913513682267e-06,
|
|
"loss": 0.7825,
|
|
"step": 4491
|
|
},
|
|
{
|
|
"epoch": 1.1946808510638298,
|
|
"grad_norm": 3.9210784435272217,
|
|
"learning_rate": 9.055399122613166e-06,
|
|
"loss": 0.8515,
|
|
"step": 4492
|
|
},
|
|
{
|
|
"epoch": 1.1949468085106383,
|
|
"grad_norm": 3.543363094329834,
|
|
"learning_rate": 9.054884606065243e-06,
|
|
"loss": 0.6883,
|
|
"step": 4493
|
|
},
|
|
{
|
|
"epoch": 1.1952127659574467,
|
|
"grad_norm": 3.9357686042785645,
|
|
"learning_rate": 9.054369964054418e-06,
|
|
"loss": 0.7847,
|
|
"step": 4494
|
|
},
|
|
{
|
|
"epoch": 1.1954787234042552,
|
|
"grad_norm": 3.5497348308563232,
|
|
"learning_rate": 9.05385519659661e-06,
|
|
"loss": 0.8664,
|
|
"step": 4495
|
|
},
|
|
{
|
|
"epoch": 1.195744680851064,
|
|
"grad_norm": 4.09616756439209,
|
|
"learning_rate": 9.053340303707752e-06,
|
|
"loss": 0.7928,
|
|
"step": 4496
|
|
},
|
|
{
|
|
"epoch": 1.1960106382978724,
|
|
"grad_norm": 4.135888576507568,
|
|
"learning_rate": 9.052825285403771e-06,
|
|
"loss": 0.8372,
|
|
"step": 4497
|
|
},
|
|
{
|
|
"epoch": 1.1962765957446808,
|
|
"grad_norm": 4.014375686645508,
|
|
"learning_rate": 9.052310141700605e-06,
|
|
"loss": 0.7838,
|
|
"step": 4498
|
|
},
|
|
{
|
|
"epoch": 1.1965425531914893,
|
|
"grad_norm": 4.164703369140625,
|
|
"learning_rate": 9.051794872614193e-06,
|
|
"loss": 0.7346,
|
|
"step": 4499
|
|
},
|
|
{
|
|
"epoch": 1.196808510638298,
|
|
"grad_norm": 3.9445199966430664,
|
|
"learning_rate": 9.051279478160475e-06,
|
|
"loss": 0.7969,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 1.196808510638298,
|
|
"eval_loss": 1.3114004135131836,
|
|
"eval_runtime": 13.8708,
|
|
"eval_samples_per_second": 28.838,
|
|
"eval_steps_per_second": 3.605,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 1.1970744680851064,
|
|
"grad_norm": 4.145724773406982,
|
|
"learning_rate": 9.050763958355401e-06,
|
|
"loss": 0.864,
|
|
"step": 4501
|
|
},
|
|
{
|
|
"epoch": 1.1973404255319149,
|
|
"grad_norm": 3.9395062923431396,
|
|
"learning_rate": 9.050248313214921e-06,
|
|
"loss": 0.8854,
|
|
"step": 4502
|
|
},
|
|
{
|
|
"epoch": 1.1976063829787233,
|
|
"grad_norm": 3.7419703006744385,
|
|
"learning_rate": 9.04973254275499e-06,
|
|
"loss": 0.778,
|
|
"step": 4503
|
|
},
|
|
{
|
|
"epoch": 1.197872340425532,
|
|
"grad_norm": 3.620009422302246,
|
|
"learning_rate": 9.049216646991568e-06,
|
|
"loss": 0.6522,
|
|
"step": 4504
|
|
},
|
|
{
|
|
"epoch": 1.1981382978723405,
|
|
"grad_norm": 4.093226909637451,
|
|
"learning_rate": 9.048700625940613e-06,
|
|
"loss": 0.7909,
|
|
"step": 4505
|
|
},
|
|
{
|
|
"epoch": 1.198404255319149,
|
|
"grad_norm": 4.31190824508667,
|
|
"learning_rate": 9.048184479618094e-06,
|
|
"loss": 0.87,
|
|
"step": 4506
|
|
},
|
|
{
|
|
"epoch": 1.1986702127659574,
|
|
"grad_norm": 3.5274550914764404,
|
|
"learning_rate": 9.047668208039981e-06,
|
|
"loss": 0.7015,
|
|
"step": 4507
|
|
},
|
|
{
|
|
"epoch": 1.1989361702127659,
|
|
"grad_norm": 4.295877933502197,
|
|
"learning_rate": 9.04715181122225e-06,
|
|
"loss": 0.8673,
|
|
"step": 4508
|
|
},
|
|
{
|
|
"epoch": 1.1992021276595746,
|
|
"grad_norm": 4.239846706390381,
|
|
"learning_rate": 9.046635289180875e-06,
|
|
"loss": 0.7815,
|
|
"step": 4509
|
|
},
|
|
{
|
|
"epoch": 1.199468085106383,
|
|
"grad_norm": 4.294873237609863,
|
|
"learning_rate": 9.046118641931841e-06,
|
|
"loss": 0.8275,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 1.1997340425531915,
|
|
"grad_norm": 4.2128586769104,
|
|
"learning_rate": 9.045601869491131e-06,
|
|
"loss": 0.885,
|
|
"step": 4511
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"grad_norm": 4.04133415222168,
|
|
"learning_rate": 9.045084971874738e-06,
|
|
"loss": 0.6479,
|
|
"step": 4512
|
|
},
|
|
{
|
|
"epoch": 1.2002659574468084,
|
|
"grad_norm": 4.300421714782715,
|
|
"learning_rate": 9.044567949098653e-06,
|
|
"loss": 0.7596,
|
|
"step": 4513
|
|
},
|
|
{
|
|
"epoch": 1.200531914893617,
|
|
"grad_norm": 4.0186896324157715,
|
|
"learning_rate": 9.044050801178873e-06,
|
|
"loss": 0.9244,
|
|
"step": 4514
|
|
},
|
|
{
|
|
"epoch": 1.2007978723404256,
|
|
"grad_norm": 3.989703416824341,
|
|
"learning_rate": 9.043533528131401e-06,
|
|
"loss": 0.8296,
|
|
"step": 4515
|
|
},
|
|
{
|
|
"epoch": 1.201063829787234,
|
|
"grad_norm": 3.6627588272094727,
|
|
"learning_rate": 9.043016129972239e-06,
|
|
"loss": 0.6557,
|
|
"step": 4516
|
|
},
|
|
{
|
|
"epoch": 1.2013297872340425,
|
|
"grad_norm": 4.000990867614746,
|
|
"learning_rate": 9.042498606717401e-06,
|
|
"loss": 0.8114,
|
|
"step": 4517
|
|
},
|
|
{
|
|
"epoch": 1.201595744680851,
|
|
"grad_norm": 4.12056827545166,
|
|
"learning_rate": 9.041980958382895e-06,
|
|
"loss": 0.7866,
|
|
"step": 4518
|
|
},
|
|
{
|
|
"epoch": 1.2018617021276596,
|
|
"grad_norm": 4.345433712005615,
|
|
"learning_rate": 9.041463184984739e-06,
|
|
"loss": 0.9222,
|
|
"step": 4519
|
|
},
|
|
{
|
|
"epoch": 1.202127659574468,
|
|
"grad_norm": 3.629518747329712,
|
|
"learning_rate": 9.040945286538954e-06,
|
|
"loss": 0.6739,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 1.2023936170212766,
|
|
"grad_norm": 4.012117862701416,
|
|
"learning_rate": 9.040427263061563e-06,
|
|
"loss": 0.8168,
|
|
"step": 4521
|
|
},
|
|
{
|
|
"epoch": 1.202659574468085,
|
|
"grad_norm": 3.6947031021118164,
|
|
"learning_rate": 9.039909114568597e-06,
|
|
"loss": 0.7811,
|
|
"step": 4522
|
|
},
|
|
{
|
|
"epoch": 1.2029255319148937,
|
|
"grad_norm": 4.276979446411133,
|
|
"learning_rate": 9.039390841076086e-06,
|
|
"loss": 0.9514,
|
|
"step": 4523
|
|
},
|
|
{
|
|
"epoch": 1.2031914893617022,
|
|
"grad_norm": 3.970949411392212,
|
|
"learning_rate": 9.038872442600066e-06,
|
|
"loss": 0.832,
|
|
"step": 4524
|
|
},
|
|
{
|
|
"epoch": 1.2034574468085106,
|
|
"grad_norm": 4.2050323486328125,
|
|
"learning_rate": 9.038353919156579e-06,
|
|
"loss": 0.838,
|
|
"step": 4525
|
|
},
|
|
{
|
|
"epoch": 1.203723404255319,
|
|
"grad_norm": 3.872286319732666,
|
|
"learning_rate": 9.037835270761667e-06,
|
|
"loss": 0.8424,
|
|
"step": 4526
|
|
},
|
|
{
|
|
"epoch": 1.2039893617021278,
|
|
"grad_norm": 4.053325653076172,
|
|
"learning_rate": 9.037316497431377e-06,
|
|
"loss": 0.8673,
|
|
"step": 4527
|
|
},
|
|
{
|
|
"epoch": 1.2042553191489362,
|
|
"grad_norm": 3.982133388519287,
|
|
"learning_rate": 9.036797599181762e-06,
|
|
"loss": 0.7101,
|
|
"step": 4528
|
|
},
|
|
{
|
|
"epoch": 1.2045212765957447,
|
|
"grad_norm": 4.298680782318115,
|
|
"learning_rate": 9.036278576028876e-06,
|
|
"loss": 0.8027,
|
|
"step": 4529
|
|
},
|
|
{
|
|
"epoch": 1.2047872340425532,
|
|
"grad_norm": 3.7166576385498047,
|
|
"learning_rate": 9.035759427988779e-06,
|
|
"loss": 0.8048,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 1.2050531914893616,
|
|
"grad_norm": 4.02637243270874,
|
|
"learning_rate": 9.035240155077532e-06,
|
|
"loss": 0.8519,
|
|
"step": 4531
|
|
},
|
|
{
|
|
"epoch": 1.2053191489361703,
|
|
"grad_norm": 4.048903942108154,
|
|
"learning_rate": 9.034720757311206e-06,
|
|
"loss": 0.8076,
|
|
"step": 4532
|
|
},
|
|
{
|
|
"epoch": 1.2055851063829788,
|
|
"grad_norm": 3.8102221488952637,
|
|
"learning_rate": 9.034201234705869e-06,
|
|
"loss": 0.8361,
|
|
"step": 4533
|
|
},
|
|
{
|
|
"epoch": 1.2058510638297872,
|
|
"grad_norm": 4.269223213195801,
|
|
"learning_rate": 9.033681587277596e-06,
|
|
"loss": 0.9528,
|
|
"step": 4534
|
|
},
|
|
{
|
|
"epoch": 1.2061170212765957,
|
|
"grad_norm": 4.001543998718262,
|
|
"learning_rate": 9.033161815042465e-06,
|
|
"loss": 0.8678,
|
|
"step": 4535
|
|
},
|
|
{
|
|
"epoch": 1.2063829787234042,
|
|
"grad_norm": 4.034337997436523,
|
|
"learning_rate": 9.032641918016559e-06,
|
|
"loss": 0.7533,
|
|
"step": 4536
|
|
},
|
|
{
|
|
"epoch": 1.2066489361702128,
|
|
"grad_norm": 3.7186598777770996,
|
|
"learning_rate": 9.032121896215965e-06,
|
|
"loss": 0.8469,
|
|
"step": 4537
|
|
},
|
|
{
|
|
"epoch": 1.2069148936170213,
|
|
"grad_norm": 3.8396542072296143,
|
|
"learning_rate": 9.03160174965677e-06,
|
|
"loss": 0.7419,
|
|
"step": 4538
|
|
},
|
|
{
|
|
"epoch": 1.2071808510638298,
|
|
"grad_norm": 3.971125602722168,
|
|
"learning_rate": 9.031081478355074e-06,
|
|
"loss": 0.7997,
|
|
"step": 4539
|
|
},
|
|
{
|
|
"epoch": 1.2074468085106382,
|
|
"grad_norm": 3.9450175762176514,
|
|
"learning_rate": 9.03056108232697e-06,
|
|
"loss": 0.9049,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 1.2077127659574467,
|
|
"grad_norm": 3.878206729888916,
|
|
"learning_rate": 9.03004056158856e-06,
|
|
"loss": 0.7389,
|
|
"step": 4541
|
|
},
|
|
{
|
|
"epoch": 1.2079787234042554,
|
|
"grad_norm": 4.157868385314941,
|
|
"learning_rate": 9.02951991615595e-06,
|
|
"loss": 0.8474,
|
|
"step": 4542
|
|
},
|
|
{
|
|
"epoch": 1.2082446808510638,
|
|
"grad_norm": 4.203000068664551,
|
|
"learning_rate": 9.02899914604525e-06,
|
|
"loss": 0.7146,
|
|
"step": 4543
|
|
},
|
|
{
|
|
"epoch": 1.2085106382978723,
|
|
"grad_norm": 4.336871147155762,
|
|
"learning_rate": 9.028478251272573e-06,
|
|
"loss": 0.7901,
|
|
"step": 4544
|
|
},
|
|
{
|
|
"epoch": 1.2087765957446808,
|
|
"grad_norm": 4.467360973358154,
|
|
"learning_rate": 9.027957231854034e-06,
|
|
"loss": 0.6987,
|
|
"step": 4545
|
|
},
|
|
{
|
|
"epoch": 1.2090425531914895,
|
|
"grad_norm": 4.293298721313477,
|
|
"learning_rate": 9.027436087805759e-06,
|
|
"loss": 0.8706,
|
|
"step": 4546
|
|
},
|
|
{
|
|
"epoch": 1.209308510638298,
|
|
"grad_norm": 4.344003200531006,
|
|
"learning_rate": 9.026914819143867e-06,
|
|
"loss": 0.8803,
|
|
"step": 4547
|
|
},
|
|
{
|
|
"epoch": 1.2095744680851064,
|
|
"grad_norm": 3.9396615028381348,
|
|
"learning_rate": 9.026393425884491e-06,
|
|
"loss": 0.8195,
|
|
"step": 4548
|
|
},
|
|
{
|
|
"epoch": 1.2098404255319148,
|
|
"grad_norm": 4.163116931915283,
|
|
"learning_rate": 9.025871908043762e-06,
|
|
"loss": 0.8396,
|
|
"step": 4549
|
|
},
|
|
{
|
|
"epoch": 1.2101063829787235,
|
|
"grad_norm": 3.790417194366455,
|
|
"learning_rate": 9.025350265637816e-06,
|
|
"loss": 0.9279,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 1.210372340425532,
|
|
"grad_norm": 3.6482441425323486,
|
|
"learning_rate": 9.024828498682793e-06,
|
|
"loss": 0.8154,
|
|
"step": 4551
|
|
},
|
|
{
|
|
"epoch": 1.2106382978723405,
|
|
"grad_norm": 4.012534141540527,
|
|
"learning_rate": 9.024306607194839e-06,
|
|
"loss": 0.777,
|
|
"step": 4552
|
|
},
|
|
{
|
|
"epoch": 1.210904255319149,
|
|
"grad_norm": 3.850843906402588,
|
|
"learning_rate": 9.0237845911901e-06,
|
|
"loss": 0.6989,
|
|
"step": 4553
|
|
},
|
|
{
|
|
"epoch": 1.2111702127659574,
|
|
"grad_norm": 3.810297966003418,
|
|
"learning_rate": 9.023262450684727e-06,
|
|
"loss": 0.8284,
|
|
"step": 4554
|
|
},
|
|
{
|
|
"epoch": 1.211436170212766,
|
|
"grad_norm": 3.643862247467041,
|
|
"learning_rate": 9.022740185694877e-06,
|
|
"loss": 0.9392,
|
|
"step": 4555
|
|
},
|
|
{
|
|
"epoch": 1.2117021276595745,
|
|
"grad_norm": 3.707839012145996,
|
|
"learning_rate": 9.022217796236711e-06,
|
|
"loss": 0.794,
|
|
"step": 4556
|
|
},
|
|
{
|
|
"epoch": 1.211968085106383,
|
|
"grad_norm": 4.23673152923584,
|
|
"learning_rate": 9.02169528232639e-06,
|
|
"loss": 0.7546,
|
|
"step": 4557
|
|
},
|
|
{
|
|
"epoch": 1.2122340425531914,
|
|
"grad_norm": 4.236415386199951,
|
|
"learning_rate": 9.021172643980082e-06,
|
|
"loss": 0.9645,
|
|
"step": 4558
|
|
},
|
|
{
|
|
"epoch": 1.2125,
|
|
"grad_norm": 3.956615686416626,
|
|
"learning_rate": 9.02064988121396e-06,
|
|
"loss": 0.9095,
|
|
"step": 4559
|
|
},
|
|
{
|
|
"epoch": 1.2127659574468086,
|
|
"grad_norm": 4.126330852508545,
|
|
"learning_rate": 9.020126994044194e-06,
|
|
"loss": 0.7762,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 1.213031914893617,
|
|
"grad_norm": 4.501354694366455,
|
|
"learning_rate": 9.019603982486967e-06,
|
|
"loss": 0.873,
|
|
"step": 4561
|
|
},
|
|
{
|
|
"epoch": 1.2132978723404255,
|
|
"grad_norm": 4.185324192047119,
|
|
"learning_rate": 9.01908084655846e-06,
|
|
"loss": 0.8071,
|
|
"step": 4562
|
|
},
|
|
{
|
|
"epoch": 1.213563829787234,
|
|
"grad_norm": 4.112594127655029,
|
|
"learning_rate": 9.018557586274858e-06,
|
|
"loss": 0.7762,
|
|
"step": 4563
|
|
},
|
|
{
|
|
"epoch": 1.2138297872340424,
|
|
"grad_norm": 3.841365098953247,
|
|
"learning_rate": 9.018034201652357e-06,
|
|
"loss": 0.8042,
|
|
"step": 4564
|
|
},
|
|
{
|
|
"epoch": 1.2140957446808511,
|
|
"grad_norm": 3.9603569507598877,
|
|
"learning_rate": 9.017510692707144e-06,
|
|
"loss": 0.6254,
|
|
"step": 4565
|
|
},
|
|
{
|
|
"epoch": 1.2143617021276596,
|
|
"grad_norm": 3.6832830905914307,
|
|
"learning_rate": 9.016987059455422e-06,
|
|
"loss": 0.7013,
|
|
"step": 4566
|
|
},
|
|
{
|
|
"epoch": 1.214627659574468,
|
|
"grad_norm": 4.155395030975342,
|
|
"learning_rate": 9.01646330191339e-06,
|
|
"loss": 0.8052,
|
|
"step": 4567
|
|
},
|
|
{
|
|
"epoch": 1.2148936170212765,
|
|
"grad_norm": 3.9648375511169434,
|
|
"learning_rate": 9.015939420097255e-06,
|
|
"loss": 0.778,
|
|
"step": 4568
|
|
},
|
|
{
|
|
"epoch": 1.2151595744680852,
|
|
"grad_norm": 3.8621366024017334,
|
|
"learning_rate": 9.015415414023226e-06,
|
|
"loss": 0.7851,
|
|
"step": 4569
|
|
},
|
|
{
|
|
"epoch": 1.2154255319148937,
|
|
"grad_norm": 4.207528114318848,
|
|
"learning_rate": 9.014891283707517e-06,
|
|
"loss": 0.9192,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 1.2156914893617021,
|
|
"grad_norm": 4.204238414764404,
|
|
"learning_rate": 9.014367029166344e-06,
|
|
"loss": 0.8175,
|
|
"step": 4571
|
|
},
|
|
{
|
|
"epoch": 1.2159574468085106,
|
|
"grad_norm": 4.0870537757873535,
|
|
"learning_rate": 9.013842650415927e-06,
|
|
"loss": 0.8294,
|
|
"step": 4572
|
|
},
|
|
{
|
|
"epoch": 1.2162234042553193,
|
|
"grad_norm": 4.164912700653076,
|
|
"learning_rate": 9.013318147472497e-06,
|
|
"loss": 0.8457,
|
|
"step": 4573
|
|
},
|
|
{
|
|
"epoch": 1.2164893617021277,
|
|
"grad_norm": 4.122684478759766,
|
|
"learning_rate": 9.012793520352276e-06,
|
|
"loss": 0.7565,
|
|
"step": 4574
|
|
},
|
|
{
|
|
"epoch": 1.2167553191489362,
|
|
"grad_norm": 4.155274391174316,
|
|
"learning_rate": 9.012268769071499e-06,
|
|
"loss": 0.7522,
|
|
"step": 4575
|
|
},
|
|
{
|
|
"epoch": 1.2170212765957447,
|
|
"grad_norm": 4.182219505310059,
|
|
"learning_rate": 9.011743893646402e-06,
|
|
"loss": 0.842,
|
|
"step": 4576
|
|
},
|
|
{
|
|
"epoch": 1.2172872340425531,
|
|
"grad_norm": 3.9600305557250977,
|
|
"learning_rate": 9.011218894093226e-06,
|
|
"loss": 0.7938,
|
|
"step": 4577
|
|
},
|
|
{
|
|
"epoch": 1.2175531914893618,
|
|
"grad_norm": 3.977374792098999,
|
|
"learning_rate": 9.010693770428217e-06,
|
|
"loss": 0.7021,
|
|
"step": 4578
|
|
},
|
|
{
|
|
"epoch": 1.2178191489361703,
|
|
"grad_norm": 4.227469444274902,
|
|
"learning_rate": 9.010168522667617e-06,
|
|
"loss": 0.8016,
|
|
"step": 4579
|
|
},
|
|
{
|
|
"epoch": 1.2180851063829787,
|
|
"grad_norm": 3.7802317142486572,
|
|
"learning_rate": 9.009643150827683e-06,
|
|
"loss": 0.7565,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 1.2183510638297872,
|
|
"grad_norm": 3.9615867137908936,
|
|
"learning_rate": 9.00911765492467e-06,
|
|
"loss": 0.8134,
|
|
"step": 4581
|
|
},
|
|
{
|
|
"epoch": 1.2186170212765957,
|
|
"grad_norm": 3.852104902267456,
|
|
"learning_rate": 9.008592034974836e-06,
|
|
"loss": 0.7654,
|
|
"step": 4582
|
|
},
|
|
{
|
|
"epoch": 1.2188829787234043,
|
|
"grad_norm": 3.5889623165130615,
|
|
"learning_rate": 9.008066290994443e-06,
|
|
"loss": 0.816,
|
|
"step": 4583
|
|
},
|
|
{
|
|
"epoch": 1.2191489361702128,
|
|
"grad_norm": 3.7613863945007324,
|
|
"learning_rate": 9.007540422999762e-06,
|
|
"loss": 0.7356,
|
|
"step": 4584
|
|
},
|
|
{
|
|
"epoch": 1.2194148936170213,
|
|
"grad_norm": 4.141067981719971,
|
|
"learning_rate": 9.007014431007064e-06,
|
|
"loss": 0.8445,
|
|
"step": 4585
|
|
},
|
|
{
|
|
"epoch": 1.2196808510638297,
|
|
"grad_norm": 3.842954635620117,
|
|
"learning_rate": 9.00648831503262e-06,
|
|
"loss": 0.7844,
|
|
"step": 4586
|
|
},
|
|
{
|
|
"epoch": 1.2199468085106382,
|
|
"grad_norm": 3.799661159515381,
|
|
"learning_rate": 9.00596207509271e-06,
|
|
"loss": 0.8777,
|
|
"step": 4587
|
|
},
|
|
{
|
|
"epoch": 1.2202127659574469,
|
|
"grad_norm": 4.335452079772949,
|
|
"learning_rate": 9.005435711203619e-06,
|
|
"loss": 0.936,
|
|
"step": 4588
|
|
},
|
|
{
|
|
"epoch": 1.2204787234042553,
|
|
"grad_norm": 3.905426025390625,
|
|
"learning_rate": 9.004909223381628e-06,
|
|
"loss": 0.7583,
|
|
"step": 4589
|
|
},
|
|
{
|
|
"epoch": 1.2207446808510638,
|
|
"grad_norm": 3.950054168701172,
|
|
"learning_rate": 9.004382611643032e-06,
|
|
"loss": 0.8512,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 1.2210106382978723,
|
|
"grad_norm": 4.1044135093688965,
|
|
"learning_rate": 9.003855876004124e-06,
|
|
"loss": 0.7941,
|
|
"step": 4591
|
|
},
|
|
{
|
|
"epoch": 1.2212765957446807,
|
|
"grad_norm": 3.908524751663208,
|
|
"learning_rate": 9.003329016481201e-06,
|
|
"loss": 0.7502,
|
|
"step": 4592
|
|
},
|
|
{
|
|
"epoch": 1.2215425531914894,
|
|
"grad_norm": 3.6956968307495117,
|
|
"learning_rate": 9.002802033090564e-06,
|
|
"loss": 0.7847,
|
|
"step": 4593
|
|
},
|
|
{
|
|
"epoch": 1.2218085106382979,
|
|
"grad_norm": 4.292162895202637,
|
|
"learning_rate": 9.00227492584852e-06,
|
|
"loss": 0.7966,
|
|
"step": 4594
|
|
},
|
|
{
|
|
"epoch": 1.2220744680851063,
|
|
"grad_norm": 4.15654993057251,
|
|
"learning_rate": 9.001747694771378e-06,
|
|
"loss": 0.7523,
|
|
"step": 4595
|
|
},
|
|
{
|
|
"epoch": 1.2223404255319148,
|
|
"grad_norm": 3.5688204765319824,
|
|
"learning_rate": 9.00122033987545e-06,
|
|
"loss": 0.6891,
|
|
"step": 4596
|
|
},
|
|
{
|
|
"epoch": 1.2226063829787235,
|
|
"grad_norm": 3.962028980255127,
|
|
"learning_rate": 9.000692861177056e-06,
|
|
"loss": 0.7285,
|
|
"step": 4597
|
|
},
|
|
{
|
|
"epoch": 1.222872340425532,
|
|
"grad_norm": 4.2762651443481445,
|
|
"learning_rate": 9.000165258692512e-06,
|
|
"loss": 0.8359,
|
|
"step": 4598
|
|
},
|
|
{
|
|
"epoch": 1.2231382978723404,
|
|
"grad_norm": 4.260420799255371,
|
|
"learning_rate": 8.999637532438145e-06,
|
|
"loss": 0.9171,
|
|
"step": 4599
|
|
},
|
|
{
|
|
"epoch": 1.2234042553191489,
|
|
"grad_norm": 4.032958507537842,
|
|
"learning_rate": 8.999109682430288e-06,
|
|
"loss": 0.8082,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 1.2236702127659576,
|
|
"grad_norm": 3.772594690322876,
|
|
"learning_rate": 8.998581708685264e-06,
|
|
"loss": 0.8029,
|
|
"step": 4601
|
|
},
|
|
{
|
|
"epoch": 1.223936170212766,
|
|
"grad_norm": 4.074283123016357,
|
|
"learning_rate": 8.998053611219418e-06,
|
|
"loss": 0.729,
|
|
"step": 4602
|
|
},
|
|
{
|
|
"epoch": 1.2242021276595745,
|
|
"grad_norm": 3.5871801376342773,
|
|
"learning_rate": 8.997525390049084e-06,
|
|
"loss": 0.8645,
|
|
"step": 4603
|
|
},
|
|
{
|
|
"epoch": 1.224468085106383,
|
|
"grad_norm": 3.789030075073242,
|
|
"learning_rate": 8.996997045190608e-06,
|
|
"loss": 0.7226,
|
|
"step": 4604
|
|
},
|
|
{
|
|
"epoch": 1.2247340425531914,
|
|
"grad_norm": 3.840949296951294,
|
|
"learning_rate": 8.996468576660337e-06,
|
|
"loss": 0.8817,
|
|
"step": 4605
|
|
},
|
|
{
|
|
"epoch": 1.225,
|
|
"grad_norm": 4.251964569091797,
|
|
"learning_rate": 8.995939984474624e-06,
|
|
"loss": 0.7567,
|
|
"step": 4606
|
|
},
|
|
{
|
|
"epoch": 1.2252659574468086,
|
|
"grad_norm": 3.7050812244415283,
|
|
"learning_rate": 8.995411268649823e-06,
|
|
"loss": 0.8609,
|
|
"step": 4607
|
|
},
|
|
{
|
|
"epoch": 1.225531914893617,
|
|
"grad_norm": 4.209064483642578,
|
|
"learning_rate": 8.994882429202294e-06,
|
|
"loss": 0.8653,
|
|
"step": 4608
|
|
},
|
|
{
|
|
"epoch": 1.2257978723404255,
|
|
"grad_norm": 4.214296340942383,
|
|
"learning_rate": 8.994353466148399e-06,
|
|
"loss": 0.8262,
|
|
"step": 4609
|
|
},
|
|
{
|
|
"epoch": 1.226063829787234,
|
|
"grad_norm": 3.9574646949768066,
|
|
"learning_rate": 8.993824379504505e-06,
|
|
"loss": 0.7383,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 1.2263297872340426,
|
|
"grad_norm": 4.194293975830078,
|
|
"learning_rate": 8.993295169286982e-06,
|
|
"loss": 0.7483,
|
|
"step": 4611
|
|
},
|
|
{
|
|
"epoch": 1.226595744680851,
|
|
"grad_norm": 3.9258837699890137,
|
|
"learning_rate": 8.992765835512205e-06,
|
|
"loss": 0.7151,
|
|
"step": 4612
|
|
},
|
|
{
|
|
"epoch": 1.2268617021276595,
|
|
"grad_norm": 3.662429094314575,
|
|
"learning_rate": 8.992236378196552e-06,
|
|
"loss": 0.8595,
|
|
"step": 4613
|
|
},
|
|
{
|
|
"epoch": 1.227127659574468,
|
|
"grad_norm": 3.745591640472412,
|
|
"learning_rate": 8.991706797356407e-06,
|
|
"loss": 0.8065,
|
|
"step": 4614
|
|
},
|
|
{
|
|
"epoch": 1.2273936170212765,
|
|
"grad_norm": 3.8420639038085938,
|
|
"learning_rate": 8.991177093008153e-06,
|
|
"loss": 0.7613,
|
|
"step": 4615
|
|
},
|
|
{
|
|
"epoch": 1.2276595744680852,
|
|
"grad_norm": 3.994805097579956,
|
|
"learning_rate": 8.990647265168179e-06,
|
|
"loss": 0.7919,
|
|
"step": 4616
|
|
},
|
|
{
|
|
"epoch": 1.2279255319148936,
|
|
"grad_norm": 4.0484514236450195,
|
|
"learning_rate": 8.990117313852882e-06,
|
|
"loss": 0.9,
|
|
"step": 4617
|
|
},
|
|
{
|
|
"epoch": 1.228191489361702,
|
|
"grad_norm": 3.999068260192871,
|
|
"learning_rate": 8.989587239078658e-06,
|
|
"loss": 0.7472,
|
|
"step": 4618
|
|
},
|
|
{
|
|
"epoch": 1.2284574468085105,
|
|
"grad_norm": 3.9625680446624756,
|
|
"learning_rate": 8.989057040861905e-06,
|
|
"loss": 1.0265,
|
|
"step": 4619
|
|
},
|
|
{
|
|
"epoch": 1.2287234042553192,
|
|
"grad_norm": 4.0248284339904785,
|
|
"learning_rate": 8.988526719219035e-06,
|
|
"loss": 0.7525,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 1.2289893617021277,
|
|
"grad_norm": 3.985003709793091,
|
|
"learning_rate": 8.987996274166449e-06,
|
|
"loss": 0.8491,
|
|
"step": 4621
|
|
},
|
|
{
|
|
"epoch": 1.2292553191489362,
|
|
"grad_norm": 3.5832836627960205,
|
|
"learning_rate": 8.987465705720565e-06,
|
|
"loss": 0.6647,
|
|
"step": 4622
|
|
},
|
|
{
|
|
"epoch": 1.2295212765957446,
|
|
"grad_norm": 3.5431840419769287,
|
|
"learning_rate": 8.986935013897796e-06,
|
|
"loss": 0.7142,
|
|
"step": 4623
|
|
},
|
|
{
|
|
"epoch": 1.2297872340425533,
|
|
"grad_norm": 3.745082139968872,
|
|
"learning_rate": 8.986404198714561e-06,
|
|
"loss": 0.6538,
|
|
"step": 4624
|
|
},
|
|
{
|
|
"epoch": 1.2300531914893618,
|
|
"grad_norm": 3.653146982192993,
|
|
"learning_rate": 8.98587326018729e-06,
|
|
"loss": 0.7833,
|
|
"step": 4625
|
|
},
|
|
{
|
|
"epoch": 1.2303191489361702,
|
|
"grad_norm": 3.9238173961639404,
|
|
"learning_rate": 8.985342198332407e-06,
|
|
"loss": 0.8265,
|
|
"step": 4626
|
|
},
|
|
{
|
|
"epoch": 1.2305851063829787,
|
|
"grad_norm": 4.6217265129089355,
|
|
"learning_rate": 8.984811013166345e-06,
|
|
"loss": 0.9442,
|
|
"step": 4627
|
|
},
|
|
{
|
|
"epoch": 1.2308510638297872,
|
|
"grad_norm": 3.7040395736694336,
|
|
"learning_rate": 8.98427970470554e-06,
|
|
"loss": 0.8234,
|
|
"step": 4628
|
|
},
|
|
{
|
|
"epoch": 1.2311170212765958,
|
|
"grad_norm": 3.8721320629119873,
|
|
"learning_rate": 8.983748272966426e-06,
|
|
"loss": 0.8997,
|
|
"step": 4629
|
|
},
|
|
{
|
|
"epoch": 1.2313829787234043,
|
|
"grad_norm": 3.5621466636657715,
|
|
"learning_rate": 8.983216717965453e-06,
|
|
"loss": 0.8186,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 1.2316489361702128,
|
|
"grad_norm": 3.854879379272461,
|
|
"learning_rate": 8.982685039719064e-06,
|
|
"loss": 0.773,
|
|
"step": 4631
|
|
},
|
|
{
|
|
"epoch": 1.2319148936170212,
|
|
"grad_norm": 3.9702491760253906,
|
|
"learning_rate": 8.982153238243712e-06,
|
|
"loss": 0.8645,
|
|
"step": 4632
|
|
},
|
|
{
|
|
"epoch": 1.2321808510638297,
|
|
"grad_norm": 4.122603416442871,
|
|
"learning_rate": 8.981621313555849e-06,
|
|
"loss": 0.7651,
|
|
"step": 4633
|
|
},
|
|
{
|
|
"epoch": 1.2324468085106384,
|
|
"grad_norm": 4.362513065338135,
|
|
"learning_rate": 8.981089265671936e-06,
|
|
"loss": 0.8279,
|
|
"step": 4634
|
|
},
|
|
{
|
|
"epoch": 1.2327127659574468,
|
|
"grad_norm": 4.333089351654053,
|
|
"learning_rate": 8.980557094608433e-06,
|
|
"loss": 0.8613,
|
|
"step": 4635
|
|
},
|
|
{
|
|
"epoch": 1.2329787234042553,
|
|
"grad_norm": 3.9214844703674316,
|
|
"learning_rate": 8.980024800381807e-06,
|
|
"loss": 0.8316,
|
|
"step": 4636
|
|
},
|
|
{
|
|
"epoch": 1.2332446808510638,
|
|
"grad_norm": 3.9786224365234375,
|
|
"learning_rate": 8.979492383008528e-06,
|
|
"loss": 0.8405,
|
|
"step": 4637
|
|
},
|
|
{
|
|
"epoch": 1.2335106382978722,
|
|
"grad_norm": 4.105279445648193,
|
|
"learning_rate": 8.978959842505071e-06,
|
|
"loss": 0.8187,
|
|
"step": 4638
|
|
},
|
|
{
|
|
"epoch": 1.233776595744681,
|
|
"grad_norm": 4.662153244018555,
|
|
"learning_rate": 8.97842717888791e-06,
|
|
"loss": 0.8309,
|
|
"step": 4639
|
|
},
|
|
{
|
|
"epoch": 1.2340425531914894,
|
|
"grad_norm": 4.0390400886535645,
|
|
"learning_rate": 8.977894392173527e-06,
|
|
"loss": 0.823,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 1.2343085106382978,
|
|
"grad_norm": 3.574883222579956,
|
|
"learning_rate": 8.97736148237841e-06,
|
|
"loss": 0.899,
|
|
"step": 4641
|
|
},
|
|
{
|
|
"epoch": 1.2345744680851063,
|
|
"grad_norm": 3.9242796897888184,
|
|
"learning_rate": 8.976828449519047e-06,
|
|
"loss": 0.9994,
|
|
"step": 4642
|
|
},
|
|
{
|
|
"epoch": 1.234840425531915,
|
|
"grad_norm": 3.9096062183380127,
|
|
"learning_rate": 8.976295293611927e-06,
|
|
"loss": 0.907,
|
|
"step": 4643
|
|
},
|
|
{
|
|
"epoch": 1.2351063829787234,
|
|
"grad_norm": 4.211862087249756,
|
|
"learning_rate": 8.97576201467355e-06,
|
|
"loss": 0.807,
|
|
"step": 4644
|
|
},
|
|
{
|
|
"epoch": 1.235372340425532,
|
|
"grad_norm": 3.7779862880706787,
|
|
"learning_rate": 8.975228612720415e-06,
|
|
"loss": 0.7325,
|
|
"step": 4645
|
|
},
|
|
{
|
|
"epoch": 1.2356382978723404,
|
|
"grad_norm": 4.162439823150635,
|
|
"learning_rate": 8.974695087769027e-06,
|
|
"loss": 0.9018,
|
|
"step": 4646
|
|
},
|
|
{
|
|
"epoch": 1.235904255319149,
|
|
"grad_norm": 3.9376440048217773,
|
|
"learning_rate": 8.974161439835894e-06,
|
|
"loss": 0.7467,
|
|
"step": 4647
|
|
},
|
|
{
|
|
"epoch": 1.2361702127659575,
|
|
"grad_norm": 3.728128433227539,
|
|
"learning_rate": 8.973627668937528e-06,
|
|
"loss": 0.6471,
|
|
"step": 4648
|
|
},
|
|
{
|
|
"epoch": 1.236436170212766,
|
|
"grad_norm": 4.1924967765808105,
|
|
"learning_rate": 8.97309377509044e-06,
|
|
"loss": 0.8827,
|
|
"step": 4649
|
|
},
|
|
{
|
|
"epoch": 1.2367021276595744,
|
|
"grad_norm": 3.9644808769226074,
|
|
"learning_rate": 8.972559758311156e-06,
|
|
"loss": 0.737,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 1.236968085106383,
|
|
"grad_norm": 4.276489734649658,
|
|
"learning_rate": 8.972025618616195e-06,
|
|
"loss": 0.7805,
|
|
"step": 4651
|
|
},
|
|
{
|
|
"epoch": 1.2372340425531916,
|
|
"grad_norm": 4.115257263183594,
|
|
"learning_rate": 8.971491356022086e-06,
|
|
"loss": 0.8479,
|
|
"step": 4652
|
|
},
|
|
{
|
|
"epoch": 1.2375,
|
|
"grad_norm": 4.143589019775391,
|
|
"learning_rate": 8.970956970545356e-06,
|
|
"loss": 0.7716,
|
|
"step": 4653
|
|
},
|
|
{
|
|
"epoch": 1.2377659574468085,
|
|
"grad_norm": 3.872377634048462,
|
|
"learning_rate": 8.970422462202543e-06,
|
|
"loss": 0.7949,
|
|
"step": 4654
|
|
},
|
|
{
|
|
"epoch": 1.238031914893617,
|
|
"grad_norm": 3.9074594974517822,
|
|
"learning_rate": 8.969887831010185e-06,
|
|
"loss": 0.818,
|
|
"step": 4655
|
|
},
|
|
{
|
|
"epoch": 1.2382978723404254,
|
|
"grad_norm": 3.7083117961883545,
|
|
"learning_rate": 8.969353076984823e-06,
|
|
"loss": 0.823,
|
|
"step": 4656
|
|
},
|
|
{
|
|
"epoch": 1.2385638297872341,
|
|
"grad_norm": 3.952829122543335,
|
|
"learning_rate": 8.968818200143005e-06,
|
|
"loss": 0.7928,
|
|
"step": 4657
|
|
},
|
|
{
|
|
"epoch": 1.2388297872340426,
|
|
"grad_norm": 4.015969276428223,
|
|
"learning_rate": 8.96828320050128e-06,
|
|
"loss": 0.8713,
|
|
"step": 4658
|
|
},
|
|
{
|
|
"epoch": 1.239095744680851,
|
|
"grad_norm": 4.456661701202393,
|
|
"learning_rate": 8.967748078076197e-06,
|
|
"loss": 0.8482,
|
|
"step": 4659
|
|
},
|
|
{
|
|
"epoch": 1.2393617021276595,
|
|
"grad_norm": 3.8664846420288086,
|
|
"learning_rate": 8.96721283288432e-06,
|
|
"loss": 0.7526,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 1.239627659574468,
|
|
"grad_norm": 4.358894348144531,
|
|
"learning_rate": 8.966677464942206e-06,
|
|
"loss": 0.7756,
|
|
"step": 4661
|
|
},
|
|
{
|
|
"epoch": 1.2398936170212767,
|
|
"grad_norm": 3.8991811275482178,
|
|
"learning_rate": 8.96614197426642e-06,
|
|
"loss": 0.7629,
|
|
"step": 4662
|
|
},
|
|
{
|
|
"epoch": 1.2401595744680851,
|
|
"grad_norm": 3.752913236618042,
|
|
"learning_rate": 8.965606360873533e-06,
|
|
"loss": 0.7598,
|
|
"step": 4663
|
|
},
|
|
{
|
|
"epoch": 1.2404255319148936,
|
|
"grad_norm": 4.097616672515869,
|
|
"learning_rate": 8.965070624780117e-06,
|
|
"loss": 0.7635,
|
|
"step": 4664
|
|
},
|
|
{
|
|
"epoch": 1.240691489361702,
|
|
"grad_norm": 3.855180025100708,
|
|
"learning_rate": 8.964534766002747e-06,
|
|
"loss": 0.8571,
|
|
"step": 4665
|
|
},
|
|
{
|
|
"epoch": 1.2409574468085107,
|
|
"grad_norm": 4.117387771606445,
|
|
"learning_rate": 8.963998784558001e-06,
|
|
"loss": 0.8517,
|
|
"step": 4666
|
|
},
|
|
{
|
|
"epoch": 1.2412234042553192,
|
|
"grad_norm": 4.247325897216797,
|
|
"learning_rate": 8.963462680462469e-06,
|
|
"loss": 0.7862,
|
|
"step": 4667
|
|
},
|
|
{
|
|
"epoch": 1.2414893617021276,
|
|
"grad_norm": 4.604616165161133,
|
|
"learning_rate": 8.962926453732734e-06,
|
|
"loss": 0.8325,
|
|
"step": 4668
|
|
},
|
|
{
|
|
"epoch": 1.241755319148936,
|
|
"grad_norm": 4.283206462860107,
|
|
"learning_rate": 8.96239010438539e-06,
|
|
"loss": 0.7897,
|
|
"step": 4669
|
|
},
|
|
{
|
|
"epoch": 1.2420212765957448,
|
|
"grad_norm": 4.039552688598633,
|
|
"learning_rate": 8.96185363243703e-06,
|
|
"loss": 0.8889,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 1.2422872340425533,
|
|
"grad_norm": 3.6952388286590576,
|
|
"learning_rate": 8.961317037904253e-06,
|
|
"loss": 0.7318,
|
|
"step": 4671
|
|
},
|
|
{
|
|
"epoch": 1.2425531914893617,
|
|
"grad_norm": 4.330514907836914,
|
|
"learning_rate": 8.960780320803665e-06,
|
|
"loss": 0.8473,
|
|
"step": 4672
|
|
},
|
|
{
|
|
"epoch": 1.2428191489361702,
|
|
"grad_norm": 3.8652656078338623,
|
|
"learning_rate": 8.960243481151869e-06,
|
|
"loss": 0.7744,
|
|
"step": 4673
|
|
},
|
|
{
|
|
"epoch": 1.2430851063829786,
|
|
"grad_norm": 4.232844352722168,
|
|
"learning_rate": 8.959706518965479e-06,
|
|
"loss": 0.7232,
|
|
"step": 4674
|
|
},
|
|
{
|
|
"epoch": 1.2433510638297873,
|
|
"grad_norm": 3.9439735412597656,
|
|
"learning_rate": 8.959169434261106e-06,
|
|
"loss": 0.7025,
|
|
"step": 4675
|
|
},
|
|
{
|
|
"epoch": 1.2436170212765958,
|
|
"grad_norm": 3.876521587371826,
|
|
"learning_rate": 8.958632227055369e-06,
|
|
"loss": 0.6779,
|
|
"step": 4676
|
|
},
|
|
{
|
|
"epoch": 1.2438829787234043,
|
|
"grad_norm": 3.7715842723846436,
|
|
"learning_rate": 8.95809489736489e-06,
|
|
"loss": 0.7331,
|
|
"step": 4677
|
|
},
|
|
{
|
|
"epoch": 1.2441489361702127,
|
|
"grad_norm": 4.344306945800781,
|
|
"learning_rate": 8.957557445206297e-06,
|
|
"loss": 0.797,
|
|
"step": 4678
|
|
},
|
|
{
|
|
"epoch": 1.2444148936170212,
|
|
"grad_norm": 3.924248218536377,
|
|
"learning_rate": 8.957019870596216e-06,
|
|
"loss": 0.9321,
|
|
"step": 4679
|
|
},
|
|
{
|
|
"epoch": 1.2446808510638299,
|
|
"grad_norm": 3.8048911094665527,
|
|
"learning_rate": 8.956482173551281e-06,
|
|
"loss": 0.7405,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 1.2449468085106383,
|
|
"grad_norm": 4.218112468719482,
|
|
"learning_rate": 8.95594435408813e-06,
|
|
"loss": 0.8395,
|
|
"step": 4681
|
|
},
|
|
{
|
|
"epoch": 1.2452127659574468,
|
|
"grad_norm": 3.683992385864258,
|
|
"learning_rate": 8.955406412223402e-06,
|
|
"loss": 0.7261,
|
|
"step": 4682
|
|
},
|
|
{
|
|
"epoch": 1.2454787234042553,
|
|
"grad_norm": 4.05771541595459,
|
|
"learning_rate": 8.954868347973742e-06,
|
|
"loss": 0.85,
|
|
"step": 4683
|
|
},
|
|
{
|
|
"epoch": 1.2457446808510637,
|
|
"grad_norm": 4.423064708709717,
|
|
"learning_rate": 8.954330161355803e-06,
|
|
"loss": 0.8632,
|
|
"step": 4684
|
|
},
|
|
{
|
|
"epoch": 1.2460106382978724,
|
|
"grad_norm": 4.039585113525391,
|
|
"learning_rate": 8.953791852386229e-06,
|
|
"loss": 0.8078,
|
|
"step": 4685
|
|
},
|
|
{
|
|
"epoch": 1.2462765957446809,
|
|
"grad_norm": 4.336376190185547,
|
|
"learning_rate": 8.953253421081682e-06,
|
|
"loss": 0.807,
|
|
"step": 4686
|
|
},
|
|
{
|
|
"epoch": 1.2465425531914893,
|
|
"grad_norm": 4.025651454925537,
|
|
"learning_rate": 8.95271486745882e-06,
|
|
"loss": 0.8651,
|
|
"step": 4687
|
|
},
|
|
{
|
|
"epoch": 1.2468085106382978,
|
|
"grad_norm": 3.839545488357544,
|
|
"learning_rate": 8.952176191534305e-06,
|
|
"loss": 0.7696,
|
|
"step": 4688
|
|
},
|
|
{
|
|
"epoch": 1.2470744680851065,
|
|
"grad_norm": 3.4037442207336426,
|
|
"learning_rate": 8.951637393324806e-06,
|
|
"loss": 0.7827,
|
|
"step": 4689
|
|
},
|
|
{
|
|
"epoch": 1.247340425531915,
|
|
"grad_norm": 4.202190399169922,
|
|
"learning_rate": 8.951098472846994e-06,
|
|
"loss": 0.6717,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 1.2476063829787234,
|
|
"grad_norm": 4.145596027374268,
|
|
"learning_rate": 8.950559430117542e-06,
|
|
"loss": 0.8201,
|
|
"step": 4691
|
|
},
|
|
{
|
|
"epoch": 1.2478723404255319,
|
|
"grad_norm": 4.066543102264404,
|
|
"learning_rate": 8.950020265153133e-06,
|
|
"loss": 0.7651,
|
|
"step": 4692
|
|
},
|
|
{
|
|
"epoch": 1.2481382978723405,
|
|
"grad_norm": 3.9612643718719482,
|
|
"learning_rate": 8.949480977970444e-06,
|
|
"loss": 0.7625,
|
|
"step": 4693
|
|
},
|
|
{
|
|
"epoch": 1.248404255319149,
|
|
"grad_norm": 3.6797444820404053,
|
|
"learning_rate": 8.948941568586165e-06,
|
|
"loss": 0.7396,
|
|
"step": 4694
|
|
},
|
|
{
|
|
"epoch": 1.2486702127659575,
|
|
"grad_norm": 4.5470662117004395,
|
|
"learning_rate": 8.948402037016984e-06,
|
|
"loss": 0.831,
|
|
"step": 4695
|
|
},
|
|
{
|
|
"epoch": 1.248936170212766,
|
|
"grad_norm": 3.3565194606781006,
|
|
"learning_rate": 8.947862383279594e-06,
|
|
"loss": 0.6773,
|
|
"step": 4696
|
|
},
|
|
{
|
|
"epoch": 1.2492021276595744,
|
|
"grad_norm": 4.042359352111816,
|
|
"learning_rate": 8.947322607390694e-06,
|
|
"loss": 0.8052,
|
|
"step": 4697
|
|
},
|
|
{
|
|
"epoch": 1.249468085106383,
|
|
"grad_norm": 3.909513235092163,
|
|
"learning_rate": 8.946782709366988e-06,
|
|
"loss": 0.8849,
|
|
"step": 4698
|
|
},
|
|
{
|
|
"epoch": 1.2497340425531915,
|
|
"grad_norm": 4.553561687469482,
|
|
"learning_rate": 8.946242689225175e-06,
|
|
"loss": 0.9048,
|
|
"step": 4699
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"grad_norm": 4.289936542510986,
|
|
"learning_rate": 8.94570254698197e-06,
|
|
"loss": 0.8465,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 1.2502659574468085,
|
|
"grad_norm": 3.7364187240600586,
|
|
"learning_rate": 8.94516228265408e-06,
|
|
"loss": 0.9081,
|
|
"step": 4701
|
|
},
|
|
{
|
|
"epoch": 1.250531914893617,
|
|
"grad_norm": 3.8869049549102783,
|
|
"learning_rate": 8.944621896258226e-06,
|
|
"loss": 0.7625,
|
|
"step": 4702
|
|
},
|
|
{
|
|
"epoch": 1.2507978723404256,
|
|
"grad_norm": 4.203104019165039,
|
|
"learning_rate": 8.944081387811126e-06,
|
|
"loss": 0.7822,
|
|
"step": 4703
|
|
},
|
|
{
|
|
"epoch": 1.251063829787234,
|
|
"grad_norm": 3.810011148452759,
|
|
"learning_rate": 8.943540757329503e-06,
|
|
"loss": 0.9403,
|
|
"step": 4704
|
|
},
|
|
{
|
|
"epoch": 1.2513297872340425,
|
|
"grad_norm": 3.795477867126465,
|
|
"learning_rate": 8.943000004830087e-06,
|
|
"loss": 0.7856,
|
|
"step": 4705
|
|
},
|
|
{
|
|
"epoch": 1.251595744680851,
|
|
"grad_norm": 4.174344062805176,
|
|
"learning_rate": 8.942459130329608e-06,
|
|
"loss": 0.8522,
|
|
"step": 4706
|
|
},
|
|
{
|
|
"epoch": 1.2518617021276595,
|
|
"grad_norm": 3.6374874114990234,
|
|
"learning_rate": 8.941918133844803e-06,
|
|
"loss": 0.8471,
|
|
"step": 4707
|
|
},
|
|
{
|
|
"epoch": 1.2521276595744681,
|
|
"grad_norm": 3.645719528198242,
|
|
"learning_rate": 8.941377015392407e-06,
|
|
"loss": 0.7564,
|
|
"step": 4708
|
|
},
|
|
{
|
|
"epoch": 1.2523936170212766,
|
|
"grad_norm": 4.238284587860107,
|
|
"learning_rate": 8.94083577498917e-06,
|
|
"loss": 0.9556,
|
|
"step": 4709
|
|
},
|
|
{
|
|
"epoch": 1.252659574468085,
|
|
"grad_norm": 4.101098537445068,
|
|
"learning_rate": 8.940294412651831e-06,
|
|
"loss": 0.9095,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 1.2529255319148938,
|
|
"grad_norm": 3.56626296043396,
|
|
"learning_rate": 8.939752928397146e-06,
|
|
"loss": 0.7358,
|
|
"step": 4711
|
|
},
|
|
{
|
|
"epoch": 1.253191489361702,
|
|
"grad_norm": 3.680903434753418,
|
|
"learning_rate": 8.939211322241866e-06,
|
|
"loss": 0.7556,
|
|
"step": 4712
|
|
},
|
|
{
|
|
"epoch": 1.2534574468085107,
|
|
"grad_norm": 4.173125267028809,
|
|
"learning_rate": 8.938669594202748e-06,
|
|
"loss": 0.7488,
|
|
"step": 4713
|
|
},
|
|
{
|
|
"epoch": 1.2537234042553191,
|
|
"grad_norm": 4.197647571563721,
|
|
"learning_rate": 8.938127744296559e-06,
|
|
"loss": 0.8367,
|
|
"step": 4714
|
|
},
|
|
{
|
|
"epoch": 1.2539893617021276,
|
|
"grad_norm": 3.5184898376464844,
|
|
"learning_rate": 8.937585772540058e-06,
|
|
"loss": 0.7586,
|
|
"step": 4715
|
|
},
|
|
{
|
|
"epoch": 1.2542553191489363,
|
|
"grad_norm": 4.331880569458008,
|
|
"learning_rate": 8.93704367895002e-06,
|
|
"loss": 0.9277,
|
|
"step": 4716
|
|
},
|
|
{
|
|
"epoch": 1.2545212765957447,
|
|
"grad_norm": 4.3062238693237305,
|
|
"learning_rate": 8.936501463543213e-06,
|
|
"loss": 0.7798,
|
|
"step": 4717
|
|
},
|
|
{
|
|
"epoch": 1.2547872340425532,
|
|
"grad_norm": 4.3987956047058105,
|
|
"learning_rate": 8.935959126336418e-06,
|
|
"loss": 0.8121,
|
|
"step": 4718
|
|
},
|
|
{
|
|
"epoch": 1.2550531914893617,
|
|
"grad_norm": 3.8964762687683105,
|
|
"learning_rate": 8.935416667346412e-06,
|
|
"loss": 0.8318,
|
|
"step": 4719
|
|
},
|
|
{
|
|
"epoch": 1.2553191489361701,
|
|
"grad_norm": 4.110397815704346,
|
|
"learning_rate": 8.934874086589981e-06,
|
|
"loss": 0.7502,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 1.2555851063829788,
|
|
"grad_norm": 3.531947135925293,
|
|
"learning_rate": 8.934331384083914e-06,
|
|
"loss": 0.7613,
|
|
"step": 4721
|
|
},
|
|
{
|
|
"epoch": 1.2558510638297873,
|
|
"grad_norm": 3.8877408504486084,
|
|
"learning_rate": 8.933788559845001e-06,
|
|
"loss": 0.7568,
|
|
"step": 4722
|
|
},
|
|
{
|
|
"epoch": 1.2561170212765957,
|
|
"grad_norm": 3.653062582015991,
|
|
"learning_rate": 8.93324561389004e-06,
|
|
"loss": 0.7156,
|
|
"step": 4723
|
|
},
|
|
{
|
|
"epoch": 1.2563829787234042,
|
|
"grad_norm": 3.9823882579803467,
|
|
"learning_rate": 8.932702546235827e-06,
|
|
"loss": 0.8349,
|
|
"step": 4724
|
|
},
|
|
{
|
|
"epoch": 1.2566489361702127,
|
|
"grad_norm": 3.867664337158203,
|
|
"learning_rate": 8.932159356899169e-06,
|
|
"loss": 0.7605,
|
|
"step": 4725
|
|
},
|
|
{
|
|
"epoch": 1.2569148936170214,
|
|
"grad_norm": 3.945042371749878,
|
|
"learning_rate": 8.93161604589687e-06,
|
|
"loss": 0.698,
|
|
"step": 4726
|
|
},
|
|
{
|
|
"epoch": 1.2571808510638298,
|
|
"grad_norm": 4.207972049713135,
|
|
"learning_rate": 8.93107261324574e-06,
|
|
"loss": 0.9514,
|
|
"step": 4727
|
|
},
|
|
{
|
|
"epoch": 1.2574468085106383,
|
|
"grad_norm": 3.8403220176696777,
|
|
"learning_rate": 8.930529058962597e-06,
|
|
"loss": 0.7912,
|
|
"step": 4728
|
|
},
|
|
{
|
|
"epoch": 1.2577127659574467,
|
|
"grad_norm": 3.9817752838134766,
|
|
"learning_rate": 8.929985383064257e-06,
|
|
"loss": 0.752,
|
|
"step": 4729
|
|
},
|
|
{
|
|
"epoch": 1.2579787234042552,
|
|
"grad_norm": 3.786790132522583,
|
|
"learning_rate": 8.929441585567543e-06,
|
|
"loss": 0.7753,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 1.258244680851064,
|
|
"grad_norm": 3.5705316066741943,
|
|
"learning_rate": 8.928897666489278e-06,
|
|
"loss": 0.6983,
|
|
"step": 4731
|
|
},
|
|
{
|
|
"epoch": 1.2585106382978724,
|
|
"grad_norm": 3.8111605644226074,
|
|
"learning_rate": 8.928353625846294e-06,
|
|
"loss": 0.9261,
|
|
"step": 4732
|
|
},
|
|
{
|
|
"epoch": 1.2587765957446808,
|
|
"grad_norm": 3.8016891479492188,
|
|
"learning_rate": 8.927809463655424e-06,
|
|
"loss": 0.9297,
|
|
"step": 4733
|
|
},
|
|
{
|
|
"epoch": 1.2590425531914895,
|
|
"grad_norm": 3.998060941696167,
|
|
"learning_rate": 8.927265179933506e-06,
|
|
"loss": 0.8105,
|
|
"step": 4734
|
|
},
|
|
{
|
|
"epoch": 1.2593085106382977,
|
|
"grad_norm": 3.4611032009124756,
|
|
"learning_rate": 8.926720774697379e-06,
|
|
"loss": 0.7404,
|
|
"step": 4735
|
|
},
|
|
{
|
|
"epoch": 1.2595744680851064,
|
|
"grad_norm": 4.086428165435791,
|
|
"learning_rate": 8.926176247963886e-06,
|
|
"loss": 0.7905,
|
|
"step": 4736
|
|
},
|
|
{
|
|
"epoch": 1.2598404255319149,
|
|
"grad_norm": 4.124720573425293,
|
|
"learning_rate": 8.92563159974988e-06,
|
|
"loss": 0.9439,
|
|
"step": 4737
|
|
},
|
|
{
|
|
"epoch": 1.2601063829787233,
|
|
"grad_norm": 3.536327600479126,
|
|
"learning_rate": 8.92508683007221e-06,
|
|
"loss": 0.7992,
|
|
"step": 4738
|
|
},
|
|
{
|
|
"epoch": 1.260372340425532,
|
|
"grad_norm": 3.884551763534546,
|
|
"learning_rate": 8.924541938947731e-06,
|
|
"loss": 0.8708,
|
|
"step": 4739
|
|
},
|
|
{
|
|
"epoch": 1.2606382978723405,
|
|
"grad_norm": 4.106461048126221,
|
|
"learning_rate": 8.923996926393306e-06,
|
|
"loss": 0.8013,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 1.260904255319149,
|
|
"grad_norm": 3.6707823276519775,
|
|
"learning_rate": 8.923451792425795e-06,
|
|
"loss": 0.7818,
|
|
"step": 4741
|
|
},
|
|
{
|
|
"epoch": 1.2611702127659574,
|
|
"grad_norm": 4.26462984085083,
|
|
"learning_rate": 8.922906537062066e-06,
|
|
"loss": 0.9622,
|
|
"step": 4742
|
|
},
|
|
{
|
|
"epoch": 1.2614361702127659,
|
|
"grad_norm": 4.356677055358887,
|
|
"learning_rate": 8.92236116031899e-06,
|
|
"loss": 0.9918,
|
|
"step": 4743
|
|
},
|
|
{
|
|
"epoch": 1.2617021276595746,
|
|
"grad_norm": 3.735673427581787,
|
|
"learning_rate": 8.921815662213442e-06,
|
|
"loss": 0.6767,
|
|
"step": 4744
|
|
},
|
|
{
|
|
"epoch": 1.261968085106383,
|
|
"grad_norm": 3.9601590633392334,
|
|
"learning_rate": 8.9212700427623e-06,
|
|
"loss": 0.8667,
|
|
"step": 4745
|
|
},
|
|
{
|
|
"epoch": 1.2622340425531915,
|
|
"grad_norm": 3.9646952152252197,
|
|
"learning_rate": 8.920724301982446e-06,
|
|
"loss": 0.7383,
|
|
"step": 4746
|
|
},
|
|
{
|
|
"epoch": 1.2625,
|
|
"grad_norm": 3.402167320251465,
|
|
"learning_rate": 8.920178439890765e-06,
|
|
"loss": 0.7373,
|
|
"step": 4747
|
|
},
|
|
{
|
|
"epoch": 1.2627659574468084,
|
|
"grad_norm": 4.096093654632568,
|
|
"learning_rate": 8.91963245650415e-06,
|
|
"loss": 0.7765,
|
|
"step": 4748
|
|
},
|
|
{
|
|
"epoch": 1.263031914893617,
|
|
"grad_norm": 3.612751007080078,
|
|
"learning_rate": 8.91908635183949e-06,
|
|
"loss": 0.8401,
|
|
"step": 4749
|
|
},
|
|
{
|
|
"epoch": 1.2632978723404256,
|
|
"grad_norm": 4.043914318084717,
|
|
"learning_rate": 8.918540125913686e-06,
|
|
"loss": 0.7371,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 1.263563829787234,
|
|
"grad_norm": 3.865091562271118,
|
|
"learning_rate": 8.917993778743636e-06,
|
|
"loss": 0.6962,
|
|
"step": 4751
|
|
},
|
|
{
|
|
"epoch": 1.2638297872340425,
|
|
"grad_norm": 4.154531478881836,
|
|
"learning_rate": 8.917447310346245e-06,
|
|
"loss": 0.8158,
|
|
"step": 4752
|
|
},
|
|
{
|
|
"epoch": 1.264095744680851,
|
|
"grad_norm": 3.6052658557891846,
|
|
"learning_rate": 8.916900720738423e-06,
|
|
"loss": 0.7131,
|
|
"step": 4753
|
|
},
|
|
{
|
|
"epoch": 1.2643617021276596,
|
|
"grad_norm": 4.163410186767578,
|
|
"learning_rate": 8.916354009937081e-06,
|
|
"loss": 0.8955,
|
|
"step": 4754
|
|
},
|
|
{
|
|
"epoch": 1.264627659574468,
|
|
"grad_norm": 3.979421377182007,
|
|
"learning_rate": 8.915807177959133e-06,
|
|
"loss": 0.8712,
|
|
"step": 4755
|
|
},
|
|
{
|
|
"epoch": 1.2648936170212766,
|
|
"grad_norm": 3.4931585788726807,
|
|
"learning_rate": 8.915260224821504e-06,
|
|
"loss": 0.8079,
|
|
"step": 4756
|
|
},
|
|
{
|
|
"epoch": 1.265159574468085,
|
|
"grad_norm": 3.8094661235809326,
|
|
"learning_rate": 8.914713150541113e-06,
|
|
"loss": 0.8143,
|
|
"step": 4757
|
|
},
|
|
{
|
|
"epoch": 1.2654255319148935,
|
|
"grad_norm": 4.149999618530273,
|
|
"learning_rate": 8.914165955134886e-06,
|
|
"loss": 0.789,
|
|
"step": 4758
|
|
},
|
|
{
|
|
"epoch": 1.2656914893617022,
|
|
"grad_norm": 3.9979913234710693,
|
|
"learning_rate": 8.913618638619757e-06,
|
|
"loss": 0.8312,
|
|
"step": 4759
|
|
},
|
|
{
|
|
"epoch": 1.2659574468085106,
|
|
"grad_norm": 4.05308723449707,
|
|
"learning_rate": 8.91307120101266e-06,
|
|
"loss": 0.8029,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 1.266223404255319,
|
|
"grad_norm": 4.013595104217529,
|
|
"learning_rate": 8.912523642330533e-06,
|
|
"loss": 0.8625,
|
|
"step": 4761
|
|
},
|
|
{
|
|
"epoch": 1.2664893617021278,
|
|
"grad_norm": 3.932847023010254,
|
|
"learning_rate": 8.911975962590319e-06,
|
|
"loss": 0.8532,
|
|
"step": 4762
|
|
},
|
|
{
|
|
"epoch": 1.2667553191489362,
|
|
"grad_norm": 4.163691520690918,
|
|
"learning_rate": 8.911428161808962e-06,
|
|
"loss": 0.9048,
|
|
"step": 4763
|
|
},
|
|
{
|
|
"epoch": 1.2670212765957447,
|
|
"grad_norm": 4.368598461151123,
|
|
"learning_rate": 8.910880240003413e-06,
|
|
"loss": 0.7907,
|
|
"step": 4764
|
|
},
|
|
{
|
|
"epoch": 1.2672872340425532,
|
|
"grad_norm": 4.071594715118408,
|
|
"learning_rate": 8.910332197190623e-06,
|
|
"loss": 0.8764,
|
|
"step": 4765
|
|
},
|
|
{
|
|
"epoch": 1.2675531914893616,
|
|
"grad_norm": 3.6952078342437744,
|
|
"learning_rate": 8.909784033387552e-06,
|
|
"loss": 0.8343,
|
|
"step": 4766
|
|
},
|
|
{
|
|
"epoch": 1.2678191489361703,
|
|
"grad_norm": 3.967707872390747,
|
|
"learning_rate": 8.909235748611161e-06,
|
|
"loss": 0.7465,
|
|
"step": 4767
|
|
},
|
|
{
|
|
"epoch": 1.2680851063829788,
|
|
"grad_norm": 4.079662799835205,
|
|
"learning_rate": 8.908687342878413e-06,
|
|
"loss": 0.8126,
|
|
"step": 4768
|
|
},
|
|
{
|
|
"epoch": 1.2683510638297872,
|
|
"grad_norm": 3.95373272895813,
|
|
"learning_rate": 8.908138816206275e-06,
|
|
"loss": 0.7309,
|
|
"step": 4769
|
|
},
|
|
{
|
|
"epoch": 1.2686170212765957,
|
|
"grad_norm": 3.959603786468506,
|
|
"learning_rate": 8.907590168611724e-06,
|
|
"loss": 0.7635,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 1.2688829787234042,
|
|
"grad_norm": 3.9669322967529297,
|
|
"learning_rate": 8.90704140011173e-06,
|
|
"loss": 0.9031,
|
|
"step": 4771
|
|
},
|
|
{
|
|
"epoch": 1.2691489361702128,
|
|
"grad_norm": 4.063694477081299,
|
|
"learning_rate": 8.906492510723276e-06,
|
|
"loss": 0.8292,
|
|
"step": 4772
|
|
},
|
|
{
|
|
"epoch": 1.2694148936170213,
|
|
"grad_norm": 3.9221720695495605,
|
|
"learning_rate": 8.905943500463344e-06,
|
|
"loss": 0.7683,
|
|
"step": 4773
|
|
},
|
|
{
|
|
"epoch": 1.2696808510638298,
|
|
"grad_norm": 3.9919097423553467,
|
|
"learning_rate": 8.905394369348921e-06,
|
|
"loss": 0.7647,
|
|
"step": 4774
|
|
},
|
|
{
|
|
"epoch": 1.2699468085106382,
|
|
"grad_norm": 3.8253092765808105,
|
|
"learning_rate": 8.904845117397e-06,
|
|
"loss": 0.7056,
|
|
"step": 4775
|
|
},
|
|
{
|
|
"epoch": 1.2702127659574467,
|
|
"grad_norm": 3.5580105781555176,
|
|
"learning_rate": 8.904295744624572e-06,
|
|
"loss": 0.7939,
|
|
"step": 4776
|
|
},
|
|
{
|
|
"epoch": 1.2704787234042554,
|
|
"grad_norm": 3.987231492996216,
|
|
"learning_rate": 8.903746251048638e-06,
|
|
"loss": 0.8708,
|
|
"step": 4777
|
|
},
|
|
{
|
|
"epoch": 1.2707446808510638,
|
|
"grad_norm": 3.8669490814208984,
|
|
"learning_rate": 8.903196636686198e-06,
|
|
"loss": 0.776,
|
|
"step": 4778
|
|
},
|
|
{
|
|
"epoch": 1.2710106382978723,
|
|
"grad_norm": 3.940711259841919,
|
|
"learning_rate": 8.902646901554258e-06,
|
|
"loss": 0.7831,
|
|
"step": 4779
|
|
},
|
|
{
|
|
"epoch": 1.2712765957446808,
|
|
"grad_norm": 4.304079055786133,
|
|
"learning_rate": 8.90209704566983e-06,
|
|
"loss": 0.8243,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 1.2715425531914892,
|
|
"grad_norm": 4.165473937988281,
|
|
"learning_rate": 8.901547069049924e-06,
|
|
"loss": 0.8804,
|
|
"step": 4781
|
|
},
|
|
{
|
|
"epoch": 1.271808510638298,
|
|
"grad_norm": 3.84690260887146,
|
|
"learning_rate": 8.900996971711558e-06,
|
|
"loss": 0.8067,
|
|
"step": 4782
|
|
},
|
|
{
|
|
"epoch": 1.2720744680851064,
|
|
"grad_norm": 3.9118542671203613,
|
|
"learning_rate": 8.900446753671754e-06,
|
|
"loss": 0.8676,
|
|
"step": 4783
|
|
},
|
|
{
|
|
"epoch": 1.2723404255319148,
|
|
"grad_norm": 4.110815525054932,
|
|
"learning_rate": 8.899896414947534e-06,
|
|
"loss": 0.6605,
|
|
"step": 4784
|
|
},
|
|
{
|
|
"epoch": 1.2726063829787235,
|
|
"grad_norm": 3.7008938789367676,
|
|
"learning_rate": 8.899345955555928e-06,
|
|
"loss": 0.7201,
|
|
"step": 4785
|
|
},
|
|
{
|
|
"epoch": 1.272872340425532,
|
|
"grad_norm": 4.3613691329956055,
|
|
"learning_rate": 8.898795375513966e-06,
|
|
"loss": 0.806,
|
|
"step": 4786
|
|
},
|
|
{
|
|
"epoch": 1.2731382978723405,
|
|
"grad_norm": 4.315506458282471,
|
|
"learning_rate": 8.898244674838687e-06,
|
|
"loss": 0.8599,
|
|
"step": 4787
|
|
},
|
|
{
|
|
"epoch": 1.273404255319149,
|
|
"grad_norm": 3.8863260746002197,
|
|
"learning_rate": 8.897693853547127e-06,
|
|
"loss": 0.7735,
|
|
"step": 4788
|
|
},
|
|
{
|
|
"epoch": 1.2736702127659574,
|
|
"grad_norm": 4.221061706542969,
|
|
"learning_rate": 8.89714291165633e-06,
|
|
"loss": 0.9449,
|
|
"step": 4789
|
|
},
|
|
{
|
|
"epoch": 1.273936170212766,
|
|
"grad_norm": 3.727510929107666,
|
|
"learning_rate": 8.896591849183343e-06,
|
|
"loss": 0.8311,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 1.2742021276595745,
|
|
"grad_norm": 3.9543018341064453,
|
|
"learning_rate": 8.896040666145218e-06,
|
|
"loss": 0.6876,
|
|
"step": 4791
|
|
},
|
|
{
|
|
"epoch": 1.274468085106383,
|
|
"grad_norm": 3.7465333938598633,
|
|
"learning_rate": 8.895489362559007e-06,
|
|
"loss": 0.7677,
|
|
"step": 4792
|
|
},
|
|
{
|
|
"epoch": 1.2747340425531914,
|
|
"grad_norm": 4.069217205047607,
|
|
"learning_rate": 8.894937938441768e-06,
|
|
"loss": 0.8168,
|
|
"step": 4793
|
|
},
|
|
{
|
|
"epoch": 1.275,
|
|
"grad_norm": 4.367965221405029,
|
|
"learning_rate": 8.894386393810563e-06,
|
|
"loss": 0.7627,
|
|
"step": 4794
|
|
},
|
|
{
|
|
"epoch": 1.2752659574468086,
|
|
"grad_norm": 3.4115452766418457,
|
|
"learning_rate": 8.893834728682459e-06,
|
|
"loss": 0.6498,
|
|
"step": 4795
|
|
},
|
|
{
|
|
"epoch": 1.275531914893617,
|
|
"grad_norm": 3.94594669342041,
|
|
"learning_rate": 8.893282943074524e-06,
|
|
"loss": 0.7735,
|
|
"step": 4796
|
|
},
|
|
{
|
|
"epoch": 1.2757978723404255,
|
|
"grad_norm": 3.6856279373168945,
|
|
"learning_rate": 8.89273103700383e-06,
|
|
"loss": 0.8616,
|
|
"step": 4797
|
|
},
|
|
{
|
|
"epoch": 1.276063829787234,
|
|
"grad_norm": 3.8516628742218018,
|
|
"learning_rate": 8.892179010487456e-06,
|
|
"loss": 0.8549,
|
|
"step": 4798
|
|
},
|
|
{
|
|
"epoch": 1.2763297872340424,
|
|
"grad_norm": 4.085914611816406,
|
|
"learning_rate": 8.891626863542479e-06,
|
|
"loss": 0.7623,
|
|
"step": 4799
|
|
},
|
|
{
|
|
"epoch": 1.2765957446808511,
|
|
"grad_norm": 3.8456547260284424,
|
|
"learning_rate": 8.891074596185987e-06,
|
|
"loss": 0.8117,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 1.2768617021276596,
|
|
"grad_norm": 4.302917003631592,
|
|
"learning_rate": 8.890522208435067e-06,
|
|
"loss": 0.8329,
|
|
"step": 4801
|
|
},
|
|
{
|
|
"epoch": 1.277127659574468,
|
|
"grad_norm": 4.0489912033081055,
|
|
"learning_rate": 8.889969700306807e-06,
|
|
"loss": 0.8957,
|
|
"step": 4802
|
|
},
|
|
{
|
|
"epoch": 1.2773936170212765,
|
|
"grad_norm": 4.2099199295043945,
|
|
"learning_rate": 8.889417071818306e-06,
|
|
"loss": 0.7582,
|
|
"step": 4803
|
|
},
|
|
{
|
|
"epoch": 1.277659574468085,
|
|
"grad_norm": 3.925480842590332,
|
|
"learning_rate": 8.888864322986658e-06,
|
|
"loss": 0.814,
|
|
"step": 4804
|
|
},
|
|
{
|
|
"epoch": 1.2779255319148937,
|
|
"grad_norm": 3.9066643714904785,
|
|
"learning_rate": 8.888311453828973e-06,
|
|
"loss": 0.798,
|
|
"step": 4805
|
|
},
|
|
{
|
|
"epoch": 1.2781914893617021,
|
|
"grad_norm": 3.6610445976257324,
|
|
"learning_rate": 8.887758464362352e-06,
|
|
"loss": 0.708,
|
|
"step": 4806
|
|
},
|
|
{
|
|
"epoch": 1.2784574468085106,
|
|
"grad_norm": 3.639225482940674,
|
|
"learning_rate": 8.887205354603908e-06,
|
|
"loss": 0.9377,
|
|
"step": 4807
|
|
},
|
|
{
|
|
"epoch": 1.2787234042553193,
|
|
"grad_norm": 4.213227272033691,
|
|
"learning_rate": 8.886652124570753e-06,
|
|
"loss": 0.8664,
|
|
"step": 4808
|
|
},
|
|
{
|
|
"epoch": 1.2789893617021277,
|
|
"grad_norm": 3.916071653366089,
|
|
"learning_rate": 8.886098774280006e-06,
|
|
"loss": 0.8438,
|
|
"step": 4809
|
|
},
|
|
{
|
|
"epoch": 1.2792553191489362,
|
|
"grad_norm": 3.6656155586242676,
|
|
"learning_rate": 8.885545303748786e-06,
|
|
"loss": 0.8395,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 1.2795212765957447,
|
|
"grad_norm": 3.8457565307617188,
|
|
"learning_rate": 8.884991712994223e-06,
|
|
"loss": 0.7528,
|
|
"step": 4811
|
|
},
|
|
{
|
|
"epoch": 1.2797872340425531,
|
|
"grad_norm": 4.223479270935059,
|
|
"learning_rate": 8.88443800203344e-06,
|
|
"loss": 0.8702,
|
|
"step": 4812
|
|
},
|
|
{
|
|
"epoch": 1.2800531914893618,
|
|
"grad_norm": 3.9296419620513916,
|
|
"learning_rate": 8.88388417088357e-06,
|
|
"loss": 0.8804,
|
|
"step": 4813
|
|
},
|
|
{
|
|
"epoch": 1.2803191489361703,
|
|
"grad_norm": 4.048618316650391,
|
|
"learning_rate": 8.883330219561754e-06,
|
|
"loss": 0.8696,
|
|
"step": 4814
|
|
},
|
|
{
|
|
"epoch": 1.2805851063829787,
|
|
"grad_norm": 3.960580825805664,
|
|
"learning_rate": 8.882776148085129e-06,
|
|
"loss": 0.7783,
|
|
"step": 4815
|
|
},
|
|
{
|
|
"epoch": 1.2808510638297872,
|
|
"grad_norm": 4.032505035400391,
|
|
"learning_rate": 8.882221956470838e-06,
|
|
"loss": 0.8208,
|
|
"step": 4816
|
|
},
|
|
{
|
|
"epoch": 1.2811170212765957,
|
|
"grad_norm": 4.192906379699707,
|
|
"learning_rate": 8.881667644736028e-06,
|
|
"loss": 0.8411,
|
|
"step": 4817
|
|
},
|
|
{
|
|
"epoch": 1.2813829787234043,
|
|
"grad_norm": 3.9931344985961914,
|
|
"learning_rate": 8.881113212897851e-06,
|
|
"loss": 0.8844,
|
|
"step": 4818
|
|
},
|
|
{
|
|
"epoch": 1.2816489361702128,
|
|
"grad_norm": 4.1028923988342285,
|
|
"learning_rate": 8.880558660973462e-06,
|
|
"loss": 0.7664,
|
|
"step": 4819
|
|
},
|
|
{
|
|
"epoch": 1.2819148936170213,
|
|
"grad_norm": 4.039322376251221,
|
|
"learning_rate": 8.880003988980019e-06,
|
|
"loss": 0.8436,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 1.2821808510638297,
|
|
"grad_norm": 4.0381388664245605,
|
|
"learning_rate": 8.879449196934687e-06,
|
|
"loss": 0.749,
|
|
"step": 4821
|
|
},
|
|
{
|
|
"epoch": 1.2824468085106382,
|
|
"grad_norm": 4.3847222328186035,
|
|
"learning_rate": 8.878894284854626e-06,
|
|
"loss": 0.8086,
|
|
"step": 4822
|
|
},
|
|
{
|
|
"epoch": 1.2827127659574469,
|
|
"grad_norm": 4.213246822357178,
|
|
"learning_rate": 8.878339252757011e-06,
|
|
"loss": 0.9063,
|
|
"step": 4823
|
|
},
|
|
{
|
|
"epoch": 1.2829787234042553,
|
|
"grad_norm": 4.628039360046387,
|
|
"learning_rate": 8.877784100659013e-06,
|
|
"loss": 0.9035,
|
|
"step": 4824
|
|
},
|
|
{
|
|
"epoch": 1.2832446808510638,
|
|
"grad_norm": 3.940800905227661,
|
|
"learning_rate": 8.877228828577809e-06,
|
|
"loss": 0.8975,
|
|
"step": 4825
|
|
},
|
|
{
|
|
"epoch": 1.2835106382978723,
|
|
"grad_norm": 3.82865571975708,
|
|
"learning_rate": 8.87667343653058e-06,
|
|
"loss": 0.7283,
|
|
"step": 4826
|
|
},
|
|
{
|
|
"epoch": 1.2837765957446807,
|
|
"grad_norm": 4.173588752746582,
|
|
"learning_rate": 8.876117924534511e-06,
|
|
"loss": 0.8323,
|
|
"step": 4827
|
|
},
|
|
{
|
|
"epoch": 1.2840425531914894,
|
|
"grad_norm": 3.6624155044555664,
|
|
"learning_rate": 8.87556229260679e-06,
|
|
"loss": 0.8799,
|
|
"step": 4828
|
|
},
|
|
{
|
|
"epoch": 1.2843085106382979,
|
|
"grad_norm": 3.8801040649414062,
|
|
"learning_rate": 8.875006540764607e-06,
|
|
"loss": 0.7246,
|
|
"step": 4829
|
|
},
|
|
{
|
|
"epoch": 1.2845744680851063,
|
|
"grad_norm": 3.9223177433013916,
|
|
"learning_rate": 8.874450669025161e-06,
|
|
"loss": 0.8083,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 1.284840425531915,
|
|
"grad_norm": 3.640429735183716,
|
|
"learning_rate": 8.87389467740565e-06,
|
|
"loss": 0.8996,
|
|
"step": 4831
|
|
},
|
|
{
|
|
"epoch": 1.2851063829787235,
|
|
"grad_norm": 3.7746853828430176,
|
|
"learning_rate": 8.873338565923275e-06,
|
|
"loss": 0.6899,
|
|
"step": 4832
|
|
},
|
|
{
|
|
"epoch": 1.285372340425532,
|
|
"grad_norm": 4.439557075500488,
|
|
"learning_rate": 8.872782334595246e-06,
|
|
"loss": 0.9741,
|
|
"step": 4833
|
|
},
|
|
{
|
|
"epoch": 1.2856382978723404,
|
|
"grad_norm": 4.051036834716797,
|
|
"learning_rate": 8.872225983438774e-06,
|
|
"loss": 0.8935,
|
|
"step": 4834
|
|
},
|
|
{
|
|
"epoch": 1.2859042553191489,
|
|
"grad_norm": 4.3584370613098145,
|
|
"learning_rate": 8.871669512471068e-06,
|
|
"loss": 0.8499,
|
|
"step": 4835
|
|
},
|
|
{
|
|
"epoch": 1.2861702127659576,
|
|
"grad_norm": 3.96370792388916,
|
|
"learning_rate": 8.87111292170935e-06,
|
|
"loss": 0.8756,
|
|
"step": 4836
|
|
},
|
|
{
|
|
"epoch": 1.286436170212766,
|
|
"grad_norm": 3.8416450023651123,
|
|
"learning_rate": 8.87055621117084e-06,
|
|
"loss": 0.7347,
|
|
"step": 4837
|
|
},
|
|
{
|
|
"epoch": 1.2867021276595745,
|
|
"grad_norm": 3.84533429145813,
|
|
"learning_rate": 8.869999380872765e-06,
|
|
"loss": 0.7894,
|
|
"step": 4838
|
|
},
|
|
{
|
|
"epoch": 1.286968085106383,
|
|
"grad_norm": 4.616893768310547,
|
|
"learning_rate": 8.869442430832351e-06,
|
|
"loss": 0.8618,
|
|
"step": 4839
|
|
},
|
|
{
|
|
"epoch": 1.2872340425531914,
|
|
"grad_norm": 3.9372458457946777,
|
|
"learning_rate": 8.868885361066835e-06,
|
|
"loss": 0.785,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 1.2875,
|
|
"grad_norm": 3.895632743835449,
|
|
"learning_rate": 8.868328171593448e-06,
|
|
"loss": 0.7812,
|
|
"step": 4841
|
|
},
|
|
{
|
|
"epoch": 1.2877659574468086,
|
|
"grad_norm": 4.029928684234619,
|
|
"learning_rate": 8.867770862429434e-06,
|
|
"loss": 0.8724,
|
|
"step": 4842
|
|
},
|
|
{
|
|
"epoch": 1.288031914893617,
|
|
"grad_norm": 3.8094303607940674,
|
|
"learning_rate": 8.867213433592037e-06,
|
|
"loss": 0.791,
|
|
"step": 4843
|
|
},
|
|
{
|
|
"epoch": 1.2882978723404255,
|
|
"grad_norm": 3.862415313720703,
|
|
"learning_rate": 8.866655885098502e-06,
|
|
"loss": 0.8223,
|
|
"step": 4844
|
|
},
|
|
{
|
|
"epoch": 1.288563829787234,
|
|
"grad_norm": 4.023502826690674,
|
|
"learning_rate": 8.866098216966081e-06,
|
|
"loss": 0.8339,
|
|
"step": 4845
|
|
},
|
|
{
|
|
"epoch": 1.2888297872340426,
|
|
"grad_norm": 3.7530012130737305,
|
|
"learning_rate": 8.865540429212031e-06,
|
|
"loss": 0.7766,
|
|
"step": 4846
|
|
},
|
|
{
|
|
"epoch": 1.289095744680851,
|
|
"grad_norm": 3.7417378425598145,
|
|
"learning_rate": 8.864982521853609e-06,
|
|
"loss": 0.9348,
|
|
"step": 4847
|
|
},
|
|
{
|
|
"epoch": 1.2893617021276595,
|
|
"grad_norm": 4.337246417999268,
|
|
"learning_rate": 8.864424494908076e-06,
|
|
"loss": 0.8423,
|
|
"step": 4848
|
|
},
|
|
{
|
|
"epoch": 1.289627659574468,
|
|
"grad_norm": 4.149337291717529,
|
|
"learning_rate": 8.8638663483927e-06,
|
|
"loss": 0.9212,
|
|
"step": 4849
|
|
},
|
|
{
|
|
"epoch": 1.2898936170212765,
|
|
"grad_norm": 4.155276298522949,
|
|
"learning_rate": 8.86330808232475e-06,
|
|
"loss": 0.9331,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 1.2901595744680852,
|
|
"grad_norm": 3.66481876373291,
|
|
"learning_rate": 8.8627496967215e-06,
|
|
"loss": 0.7795,
|
|
"step": 4851
|
|
},
|
|
{
|
|
"epoch": 1.2904255319148936,
|
|
"grad_norm": 4.018246650695801,
|
|
"learning_rate": 8.862191191600227e-06,
|
|
"loss": 0.8021,
|
|
"step": 4852
|
|
},
|
|
{
|
|
"epoch": 1.290691489361702,
|
|
"grad_norm": 4.123905658721924,
|
|
"learning_rate": 8.86163256697821e-06,
|
|
"loss": 0.8106,
|
|
"step": 4853
|
|
},
|
|
{
|
|
"epoch": 1.2909574468085108,
|
|
"grad_norm": 4.097765922546387,
|
|
"learning_rate": 8.861073822872735e-06,
|
|
"loss": 0.8006,
|
|
"step": 4854
|
|
},
|
|
{
|
|
"epoch": 1.2912234042553192,
|
|
"grad_norm": 4.317656517028809,
|
|
"learning_rate": 8.86051495930109e-06,
|
|
"loss": 0.8026,
|
|
"step": 4855
|
|
},
|
|
{
|
|
"epoch": 1.2914893617021277,
|
|
"grad_norm": 3.8379859924316406,
|
|
"learning_rate": 8.859955976280568e-06,
|
|
"loss": 0.813,
|
|
"step": 4856
|
|
},
|
|
{
|
|
"epoch": 1.2917553191489362,
|
|
"grad_norm": 4.173714637756348,
|
|
"learning_rate": 8.859396873828461e-06,
|
|
"loss": 0.8064,
|
|
"step": 4857
|
|
},
|
|
{
|
|
"epoch": 1.2920212765957446,
|
|
"grad_norm": 4.439601898193359,
|
|
"learning_rate": 8.858837651962073e-06,
|
|
"loss": 0.8187,
|
|
"step": 4858
|
|
},
|
|
{
|
|
"epoch": 1.2922872340425533,
|
|
"grad_norm": 3.970308542251587,
|
|
"learning_rate": 8.858278310698705e-06,
|
|
"loss": 0.7977,
|
|
"step": 4859
|
|
},
|
|
{
|
|
"epoch": 1.2925531914893618,
|
|
"grad_norm": 3.7830026149749756,
|
|
"learning_rate": 8.857718850055663e-06,
|
|
"loss": 0.7371,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 1.2928191489361702,
|
|
"grad_norm": 3.9715933799743652,
|
|
"learning_rate": 8.857159270050258e-06,
|
|
"loss": 0.9022,
|
|
"step": 4861
|
|
},
|
|
{
|
|
"epoch": 1.2930851063829787,
|
|
"grad_norm": 3.824910879135132,
|
|
"learning_rate": 8.856599570699805e-06,
|
|
"loss": 0.7895,
|
|
"step": 4862
|
|
},
|
|
{
|
|
"epoch": 1.2933510638297872,
|
|
"grad_norm": 4.079301357269287,
|
|
"learning_rate": 8.856039752021619e-06,
|
|
"loss": 0.8215,
|
|
"step": 4863
|
|
},
|
|
{
|
|
"epoch": 1.2936170212765958,
|
|
"grad_norm": 3.722262382507324,
|
|
"learning_rate": 8.855479814033024e-06,
|
|
"loss": 0.7611,
|
|
"step": 4864
|
|
},
|
|
{
|
|
"epoch": 1.2938829787234043,
|
|
"grad_norm": 3.853123664855957,
|
|
"learning_rate": 8.854919756751343e-06,
|
|
"loss": 0.7494,
|
|
"step": 4865
|
|
},
|
|
{
|
|
"epoch": 1.2941489361702128,
|
|
"grad_norm": 3.9518027305603027,
|
|
"learning_rate": 8.854359580193907e-06,
|
|
"loss": 0.7751,
|
|
"step": 4866
|
|
},
|
|
{
|
|
"epoch": 1.2944148936170212,
|
|
"grad_norm": 4.295631408691406,
|
|
"learning_rate": 8.853799284378048e-06,
|
|
"loss": 0.8227,
|
|
"step": 4867
|
|
},
|
|
{
|
|
"epoch": 1.2946808510638297,
|
|
"grad_norm": 3.7936043739318848,
|
|
"learning_rate": 8.853238869321104e-06,
|
|
"loss": 0.7634,
|
|
"step": 4868
|
|
},
|
|
{
|
|
"epoch": 1.2949468085106384,
|
|
"grad_norm": 4.017428874969482,
|
|
"learning_rate": 8.85267833504041e-06,
|
|
"loss": 0.732,
|
|
"step": 4869
|
|
},
|
|
{
|
|
"epoch": 1.2952127659574468,
|
|
"grad_norm": 4.081499099731445,
|
|
"learning_rate": 8.852117681553312e-06,
|
|
"loss": 0.8568,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 1.2954787234042553,
|
|
"grad_norm": 4.4456281661987305,
|
|
"learning_rate": 8.851556908877159e-06,
|
|
"loss": 0.8038,
|
|
"step": 4871
|
|
},
|
|
{
|
|
"epoch": 1.2957446808510638,
|
|
"grad_norm": 4.371933460235596,
|
|
"learning_rate": 8.8509960170293e-06,
|
|
"loss": 0.7515,
|
|
"step": 4872
|
|
},
|
|
{
|
|
"epoch": 1.2960106382978722,
|
|
"grad_norm": 3.5804035663604736,
|
|
"learning_rate": 8.85043500602709e-06,
|
|
"loss": 0.7818,
|
|
"step": 4873
|
|
},
|
|
{
|
|
"epoch": 1.296276595744681,
|
|
"grad_norm": 4.176633834838867,
|
|
"learning_rate": 8.849873875887888e-06,
|
|
"loss": 0.8217,
|
|
"step": 4874
|
|
},
|
|
{
|
|
"epoch": 1.2965425531914894,
|
|
"grad_norm": 3.9609858989715576,
|
|
"learning_rate": 8.849312626629055e-06,
|
|
"loss": 0.8517,
|
|
"step": 4875
|
|
},
|
|
{
|
|
"epoch": 1.2968085106382978,
|
|
"grad_norm": 4.5829291343688965,
|
|
"learning_rate": 8.848751258267959e-06,
|
|
"loss": 1.0122,
|
|
"step": 4876
|
|
},
|
|
{
|
|
"epoch": 1.2970744680851065,
|
|
"grad_norm": 3.677952766418457,
|
|
"learning_rate": 8.848189770821965e-06,
|
|
"loss": 0.8094,
|
|
"step": 4877
|
|
},
|
|
{
|
|
"epoch": 1.297340425531915,
|
|
"grad_norm": 4.067968368530273,
|
|
"learning_rate": 8.84762816430845e-06,
|
|
"loss": 0.8764,
|
|
"step": 4878
|
|
},
|
|
{
|
|
"epoch": 1.2976063829787234,
|
|
"grad_norm": 3.8500382900238037,
|
|
"learning_rate": 8.847066438744792e-06,
|
|
"loss": 0.8741,
|
|
"step": 4879
|
|
},
|
|
{
|
|
"epoch": 1.297872340425532,
|
|
"grad_norm": 3.8818368911743164,
|
|
"learning_rate": 8.846504594148366e-06,
|
|
"loss": 0.8485,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 1.2981382978723404,
|
|
"grad_norm": 3.9118518829345703,
|
|
"learning_rate": 8.84594263053656e-06,
|
|
"loss": 0.9005,
|
|
"step": 4881
|
|
},
|
|
{
|
|
"epoch": 1.298404255319149,
|
|
"grad_norm": 3.889709711074829,
|
|
"learning_rate": 8.84538054792676e-06,
|
|
"loss": 0.9367,
|
|
"step": 4882
|
|
},
|
|
{
|
|
"epoch": 1.2986702127659575,
|
|
"grad_norm": 3.9546077251434326,
|
|
"learning_rate": 8.844818346336361e-06,
|
|
"loss": 0.8102,
|
|
"step": 4883
|
|
},
|
|
{
|
|
"epoch": 1.298936170212766,
|
|
"grad_norm": 4.036288738250732,
|
|
"learning_rate": 8.844256025782754e-06,
|
|
"loss": 0.9124,
|
|
"step": 4884
|
|
},
|
|
{
|
|
"epoch": 1.2992021276595744,
|
|
"grad_norm": 3.9991087913513184,
|
|
"learning_rate": 8.84369358628334e-06,
|
|
"loss": 0.7885,
|
|
"step": 4885
|
|
},
|
|
{
|
|
"epoch": 1.299468085106383,
|
|
"grad_norm": 3.767066478729248,
|
|
"learning_rate": 8.84313102785552e-06,
|
|
"loss": 0.8147,
|
|
"step": 4886
|
|
},
|
|
{
|
|
"epoch": 1.2997340425531916,
|
|
"grad_norm": 3.645434617996216,
|
|
"learning_rate": 8.842568350516702e-06,
|
|
"loss": 0.7238,
|
|
"step": 4887
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"grad_norm": 3.777766466140747,
|
|
"learning_rate": 8.842005554284296e-06,
|
|
"loss": 0.816,
|
|
"step": 4888
|
|
},
|
|
{
|
|
"epoch": 1.3002659574468085,
|
|
"grad_norm": 3.8868510723114014,
|
|
"learning_rate": 8.841442639175714e-06,
|
|
"loss": 0.8835,
|
|
"step": 4889
|
|
},
|
|
{
|
|
"epoch": 1.300531914893617,
|
|
"grad_norm": 4.271452903747559,
|
|
"learning_rate": 8.840879605208374e-06,
|
|
"loss": 0.8119,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 1.3007978723404254,
|
|
"grad_norm": 3.4486215114593506,
|
|
"learning_rate": 8.840316452399697e-06,
|
|
"loss": 0.7602,
|
|
"step": 4891
|
|
},
|
|
{
|
|
"epoch": 1.3010638297872341,
|
|
"grad_norm": 3.726085901260376,
|
|
"learning_rate": 8.839753180767108e-06,
|
|
"loss": 0.7252,
|
|
"step": 4892
|
|
},
|
|
{
|
|
"epoch": 1.3013297872340426,
|
|
"grad_norm": 4.51430082321167,
|
|
"learning_rate": 8.839189790328033e-06,
|
|
"loss": 0.8133,
|
|
"step": 4893
|
|
},
|
|
{
|
|
"epoch": 1.301595744680851,
|
|
"grad_norm": 4.0574469566345215,
|
|
"learning_rate": 8.838626281099908e-06,
|
|
"loss": 0.8436,
|
|
"step": 4894
|
|
},
|
|
{
|
|
"epoch": 1.3018617021276595,
|
|
"grad_norm": 4.096327304840088,
|
|
"learning_rate": 8.838062653100165e-06,
|
|
"loss": 0.8056,
|
|
"step": 4895
|
|
},
|
|
{
|
|
"epoch": 1.302127659574468,
|
|
"grad_norm": 4.048945903778076,
|
|
"learning_rate": 8.837498906346247e-06,
|
|
"loss": 0.8764,
|
|
"step": 4896
|
|
},
|
|
{
|
|
"epoch": 1.3023936170212767,
|
|
"grad_norm": 3.9284706115722656,
|
|
"learning_rate": 8.836935040855591e-06,
|
|
"loss": 0.7626,
|
|
"step": 4897
|
|
},
|
|
{
|
|
"epoch": 1.3026595744680851,
|
|
"grad_norm": 3.914583444595337,
|
|
"learning_rate": 8.83637105664565e-06,
|
|
"loss": 0.7855,
|
|
"step": 4898
|
|
},
|
|
{
|
|
"epoch": 1.3029255319148936,
|
|
"grad_norm": 4.442378520965576,
|
|
"learning_rate": 8.835806953733871e-06,
|
|
"loss": 0.8103,
|
|
"step": 4899
|
|
},
|
|
{
|
|
"epoch": 1.3031914893617023,
|
|
"grad_norm": 3.8343191146850586,
|
|
"learning_rate": 8.83524273213771e-06,
|
|
"loss": 0.8425,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 1.3034574468085105,
|
|
"grad_norm": 4.154768943786621,
|
|
"learning_rate": 8.834678391874623e-06,
|
|
"loss": 0.7792,
|
|
"step": 4901
|
|
},
|
|
{
|
|
"epoch": 1.3037234042553192,
|
|
"grad_norm": 4.136390209197998,
|
|
"learning_rate": 8.834113932962071e-06,
|
|
"loss": 0.8578,
|
|
"step": 4902
|
|
},
|
|
{
|
|
"epoch": 1.3039893617021276,
|
|
"grad_norm": 4.139702320098877,
|
|
"learning_rate": 8.833549355417518e-06,
|
|
"loss": 0.724,
|
|
"step": 4903
|
|
},
|
|
{
|
|
"epoch": 1.304255319148936,
|
|
"grad_norm": 4.213815689086914,
|
|
"learning_rate": 8.83298465925844e-06,
|
|
"loss": 0.7892,
|
|
"step": 4904
|
|
},
|
|
{
|
|
"epoch": 1.3045212765957448,
|
|
"grad_norm": 4.048974990844727,
|
|
"learning_rate": 8.832419844502298e-06,
|
|
"loss": 0.829,
|
|
"step": 4905
|
|
},
|
|
{
|
|
"epoch": 1.3047872340425533,
|
|
"grad_norm": 4.729825496673584,
|
|
"learning_rate": 8.831854911166577e-06,
|
|
"loss": 0.9176,
|
|
"step": 4906
|
|
},
|
|
{
|
|
"epoch": 1.3050531914893617,
|
|
"grad_norm": 3.5801501274108887,
|
|
"learning_rate": 8.831289859268753e-06,
|
|
"loss": 0.724,
|
|
"step": 4907
|
|
},
|
|
{
|
|
"epoch": 1.3053191489361702,
|
|
"grad_norm": 4.097287654876709,
|
|
"learning_rate": 8.83072468882631e-06,
|
|
"loss": 0.8299,
|
|
"step": 4908
|
|
},
|
|
{
|
|
"epoch": 1.3055851063829786,
|
|
"grad_norm": 4.027351379394531,
|
|
"learning_rate": 8.830159399856734e-06,
|
|
"loss": 0.9384,
|
|
"step": 4909
|
|
},
|
|
{
|
|
"epoch": 1.3058510638297873,
|
|
"grad_norm": 4.275338649749756,
|
|
"learning_rate": 8.829593992377518e-06,
|
|
"loss": 0.7921,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 1.3061170212765958,
|
|
"grad_norm": 4.1409220695495605,
|
|
"learning_rate": 8.829028466406156e-06,
|
|
"loss": 0.8888,
|
|
"step": 4911
|
|
},
|
|
{
|
|
"epoch": 1.3063829787234043,
|
|
"grad_norm": 3.6458733081817627,
|
|
"learning_rate": 8.828462821960143e-06,
|
|
"loss": 0.7371,
|
|
"step": 4912
|
|
},
|
|
{
|
|
"epoch": 1.3066489361702127,
|
|
"grad_norm": 3.8695321083068848,
|
|
"learning_rate": 8.827897059056983e-06,
|
|
"loss": 0.8467,
|
|
"step": 4913
|
|
},
|
|
{
|
|
"epoch": 1.3069148936170212,
|
|
"grad_norm": 3.693190336227417,
|
|
"learning_rate": 8.827331177714183e-06,
|
|
"loss": 0.8182,
|
|
"step": 4914
|
|
},
|
|
{
|
|
"epoch": 1.3071808510638299,
|
|
"grad_norm": 3.806725263595581,
|
|
"learning_rate": 8.826765177949248e-06,
|
|
"loss": 0.8669,
|
|
"step": 4915
|
|
},
|
|
{
|
|
"epoch": 1.3074468085106383,
|
|
"grad_norm": 3.970451593399048,
|
|
"learning_rate": 8.826199059779695e-06,
|
|
"loss": 0.9024,
|
|
"step": 4916
|
|
},
|
|
{
|
|
"epoch": 1.3077127659574468,
|
|
"grad_norm": 3.7471280097961426,
|
|
"learning_rate": 8.825632823223037e-06,
|
|
"loss": 0.7707,
|
|
"step": 4917
|
|
},
|
|
{
|
|
"epoch": 1.3079787234042553,
|
|
"grad_norm": 4.0794267654418945,
|
|
"learning_rate": 8.825066468296796e-06,
|
|
"loss": 0.8489,
|
|
"step": 4918
|
|
},
|
|
{
|
|
"epoch": 1.3082446808510637,
|
|
"grad_norm": 3.681044578552246,
|
|
"learning_rate": 8.824499995018494e-06,
|
|
"loss": 0.7854,
|
|
"step": 4919
|
|
},
|
|
{
|
|
"epoch": 1.3085106382978724,
|
|
"grad_norm": 3.9300031661987305,
|
|
"learning_rate": 8.82393340340566e-06,
|
|
"loss": 0.8076,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 1.3087765957446809,
|
|
"grad_norm": 3.5358026027679443,
|
|
"learning_rate": 8.823366693475826e-06,
|
|
"loss": 0.7239,
|
|
"step": 4921
|
|
},
|
|
{
|
|
"epoch": 1.3090425531914893,
|
|
"grad_norm": 3.7831380367279053,
|
|
"learning_rate": 8.822799865246522e-06,
|
|
"loss": 0.8004,
|
|
"step": 4922
|
|
},
|
|
{
|
|
"epoch": 1.309308510638298,
|
|
"grad_norm": 3.6898906230926514,
|
|
"learning_rate": 8.822232918735292e-06,
|
|
"loss": 0.765,
|
|
"step": 4923
|
|
},
|
|
{
|
|
"epoch": 1.3095744680851062,
|
|
"grad_norm": 3.685541868209839,
|
|
"learning_rate": 8.821665853959673e-06,
|
|
"loss": 0.9544,
|
|
"step": 4924
|
|
},
|
|
{
|
|
"epoch": 1.309840425531915,
|
|
"grad_norm": 4.169592380523682,
|
|
"learning_rate": 8.821098670937215e-06,
|
|
"loss": 0.9082,
|
|
"step": 4925
|
|
},
|
|
{
|
|
"epoch": 1.3101063829787234,
|
|
"grad_norm": 3.870544910430908,
|
|
"learning_rate": 8.820531369685464e-06,
|
|
"loss": 0.7508,
|
|
"step": 4926
|
|
},
|
|
{
|
|
"epoch": 1.3103723404255319,
|
|
"grad_norm": 3.920816659927368,
|
|
"learning_rate": 8.819963950221976e-06,
|
|
"loss": 0.849,
|
|
"step": 4927
|
|
},
|
|
{
|
|
"epoch": 1.3106382978723405,
|
|
"grad_norm": 3.8789918422698975,
|
|
"learning_rate": 8.819396412564305e-06,
|
|
"loss": 0.7916,
|
|
"step": 4928
|
|
},
|
|
{
|
|
"epoch": 1.310904255319149,
|
|
"grad_norm": 3.8481719493865967,
|
|
"learning_rate": 8.818828756730012e-06,
|
|
"loss": 0.7985,
|
|
"step": 4929
|
|
},
|
|
{
|
|
"epoch": 1.3111702127659575,
|
|
"grad_norm": 4.481472015380859,
|
|
"learning_rate": 8.818260982736662e-06,
|
|
"loss": 0.7636,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 1.311436170212766,
|
|
"grad_norm": 3.4751243591308594,
|
|
"learning_rate": 8.81769309060182e-06,
|
|
"loss": 0.7336,
|
|
"step": 4931
|
|
},
|
|
{
|
|
"epoch": 1.3117021276595744,
|
|
"grad_norm": 4.149890899658203,
|
|
"learning_rate": 8.81712508034306e-06,
|
|
"loss": 0.8473,
|
|
"step": 4932
|
|
},
|
|
{
|
|
"epoch": 1.311968085106383,
|
|
"grad_norm": 3.9108872413635254,
|
|
"learning_rate": 8.816556951977955e-06,
|
|
"loss": 0.7656,
|
|
"step": 4933
|
|
},
|
|
{
|
|
"epoch": 1.3122340425531915,
|
|
"grad_norm": 3.8704488277435303,
|
|
"learning_rate": 8.815988705524086e-06,
|
|
"loss": 0.8214,
|
|
"step": 4934
|
|
},
|
|
{
|
|
"epoch": 1.3125,
|
|
"grad_norm": 4.183962821960449,
|
|
"learning_rate": 8.815420340999034e-06,
|
|
"loss": 0.8411,
|
|
"step": 4935
|
|
},
|
|
{
|
|
"epoch": 1.3127659574468085,
|
|
"grad_norm": 3.7032434940338135,
|
|
"learning_rate": 8.814851858420384e-06,
|
|
"loss": 0.8455,
|
|
"step": 4936
|
|
},
|
|
{
|
|
"epoch": 1.313031914893617,
|
|
"grad_norm": 3.5762336254119873,
|
|
"learning_rate": 8.814283257805724e-06,
|
|
"loss": 0.7208,
|
|
"step": 4937
|
|
},
|
|
{
|
|
"epoch": 1.3132978723404256,
|
|
"grad_norm": 4.197664260864258,
|
|
"learning_rate": 8.813714539172653e-06,
|
|
"loss": 0.8642,
|
|
"step": 4938
|
|
},
|
|
{
|
|
"epoch": 1.313563829787234,
|
|
"grad_norm": 3.5386626720428467,
|
|
"learning_rate": 8.81314570253876e-06,
|
|
"loss": 0.6846,
|
|
"step": 4939
|
|
},
|
|
{
|
|
"epoch": 1.3138297872340425,
|
|
"grad_norm": 4.332328796386719,
|
|
"learning_rate": 8.812576747921653e-06,
|
|
"loss": 0.7862,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 1.314095744680851,
|
|
"grad_norm": 3.6495919227600098,
|
|
"learning_rate": 8.81200767533893e-06,
|
|
"loss": 0.676,
|
|
"step": 4941
|
|
},
|
|
{
|
|
"epoch": 1.3143617021276595,
|
|
"grad_norm": 3.717625617980957,
|
|
"learning_rate": 8.811438484808204e-06,
|
|
"loss": 0.8879,
|
|
"step": 4942
|
|
},
|
|
{
|
|
"epoch": 1.3146276595744681,
|
|
"grad_norm": 4.201274394989014,
|
|
"learning_rate": 8.810869176347082e-06,
|
|
"loss": 0.9174,
|
|
"step": 4943
|
|
},
|
|
{
|
|
"epoch": 1.3148936170212766,
|
|
"grad_norm": 3.3899879455566406,
|
|
"learning_rate": 8.810299749973182e-06,
|
|
"loss": 0.7209,
|
|
"step": 4944
|
|
},
|
|
{
|
|
"epoch": 1.315159574468085,
|
|
"grad_norm": 3.821558713912964,
|
|
"learning_rate": 8.80973020570412e-06,
|
|
"loss": 0.647,
|
|
"step": 4945
|
|
},
|
|
{
|
|
"epoch": 1.3154255319148938,
|
|
"grad_norm": 4.011831760406494,
|
|
"learning_rate": 8.809160543557523e-06,
|
|
"loss": 0.8387,
|
|
"step": 4946
|
|
},
|
|
{
|
|
"epoch": 1.315691489361702,
|
|
"grad_norm": 4.121433258056641,
|
|
"learning_rate": 8.80859076355101e-06,
|
|
"loss": 0.7835,
|
|
"step": 4947
|
|
},
|
|
{
|
|
"epoch": 1.3159574468085107,
|
|
"grad_norm": 4.066422462463379,
|
|
"learning_rate": 8.808020865702218e-06,
|
|
"loss": 0.7569,
|
|
"step": 4948
|
|
},
|
|
{
|
|
"epoch": 1.3162234042553191,
|
|
"grad_norm": 3.7616024017333984,
|
|
"learning_rate": 8.807450850028776e-06,
|
|
"loss": 0.7514,
|
|
"step": 4949
|
|
},
|
|
{
|
|
"epoch": 1.3164893617021276,
|
|
"grad_norm": 3.809521198272705,
|
|
"learning_rate": 8.806880716548322e-06,
|
|
"loss": 0.8212,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 1.3167553191489363,
|
|
"grad_norm": 3.664140224456787,
|
|
"learning_rate": 8.806310465278496e-06,
|
|
"loss": 0.8303,
|
|
"step": 4951
|
|
},
|
|
{
|
|
"epoch": 1.3170212765957447,
|
|
"grad_norm": 3.978876829147339,
|
|
"learning_rate": 8.805740096236943e-06,
|
|
"loss": 0.8149,
|
|
"step": 4952
|
|
},
|
|
{
|
|
"epoch": 1.3172872340425532,
|
|
"grad_norm": 4.436275959014893,
|
|
"learning_rate": 8.805169609441312e-06,
|
|
"loss": 0.9033,
|
|
"step": 4953
|
|
},
|
|
{
|
|
"epoch": 1.3175531914893617,
|
|
"grad_norm": 3.9355101585388184,
|
|
"learning_rate": 8.804599004909251e-06,
|
|
"loss": 0.8599,
|
|
"step": 4954
|
|
},
|
|
{
|
|
"epoch": 1.3178191489361701,
|
|
"grad_norm": 3.6748297214508057,
|
|
"learning_rate": 8.80402828265842e-06,
|
|
"loss": 0.6637,
|
|
"step": 4955
|
|
},
|
|
{
|
|
"epoch": 1.3180851063829788,
|
|
"grad_norm": 3.953321695327759,
|
|
"learning_rate": 8.803457442706473e-06,
|
|
"loss": 0.7684,
|
|
"step": 4956
|
|
},
|
|
{
|
|
"epoch": 1.3183510638297873,
|
|
"grad_norm": 3.9680938720703125,
|
|
"learning_rate": 8.802886485071078e-06,
|
|
"loss": 0.8377,
|
|
"step": 4957
|
|
},
|
|
{
|
|
"epoch": 1.3186170212765957,
|
|
"grad_norm": 3.608375072479248,
|
|
"learning_rate": 8.802315409769894e-06,
|
|
"loss": 0.7671,
|
|
"step": 4958
|
|
},
|
|
{
|
|
"epoch": 1.3188829787234042,
|
|
"grad_norm": 3.7180373668670654,
|
|
"learning_rate": 8.801744216820596e-06,
|
|
"loss": 0.794,
|
|
"step": 4959
|
|
},
|
|
{
|
|
"epoch": 1.3191489361702127,
|
|
"grad_norm": 3.490082263946533,
|
|
"learning_rate": 8.801172906240857e-06,
|
|
"loss": 0.7993,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 1.3194148936170214,
|
|
"grad_norm": 3.9783389568328857,
|
|
"learning_rate": 8.800601478048351e-06,
|
|
"loss": 0.7455,
|
|
"step": 4961
|
|
},
|
|
{
|
|
"epoch": 1.3196808510638298,
|
|
"grad_norm": 4.333663463592529,
|
|
"learning_rate": 8.800029932260764e-06,
|
|
"loss": 0.8772,
|
|
"step": 4962
|
|
},
|
|
{
|
|
"epoch": 1.3199468085106383,
|
|
"grad_norm": 3.9584553241729736,
|
|
"learning_rate": 8.799458268895774e-06,
|
|
"loss": 0.8622,
|
|
"step": 4963
|
|
},
|
|
{
|
|
"epoch": 1.3202127659574467,
|
|
"grad_norm": 4.271299362182617,
|
|
"learning_rate": 8.798886487971073e-06,
|
|
"loss": 0.7591,
|
|
"step": 4964
|
|
},
|
|
{
|
|
"epoch": 1.3204787234042552,
|
|
"grad_norm": 4.128324508666992,
|
|
"learning_rate": 8.798314589504348e-06,
|
|
"loss": 0.7294,
|
|
"step": 4965
|
|
},
|
|
{
|
|
"epoch": 1.320744680851064,
|
|
"grad_norm": 3.613626718521118,
|
|
"learning_rate": 8.797742573513302e-06,
|
|
"loss": 0.8173,
|
|
"step": 4966
|
|
},
|
|
{
|
|
"epoch": 1.3210106382978724,
|
|
"grad_norm": 3.665271043777466,
|
|
"learning_rate": 8.797170440015627e-06,
|
|
"loss": 0.7592,
|
|
"step": 4967
|
|
},
|
|
{
|
|
"epoch": 1.3212765957446808,
|
|
"grad_norm": 4.036754608154297,
|
|
"learning_rate": 8.79659818902903e-06,
|
|
"loss": 0.7705,
|
|
"step": 4968
|
|
},
|
|
{
|
|
"epoch": 1.3215425531914895,
|
|
"grad_norm": 4.09188175201416,
|
|
"learning_rate": 8.796025820571213e-06,
|
|
"loss": 0.9028,
|
|
"step": 4969
|
|
},
|
|
{
|
|
"epoch": 1.3218085106382977,
|
|
"grad_norm": 3.8270485401153564,
|
|
"learning_rate": 8.795453334659889e-06,
|
|
"loss": 0.7337,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 1.3220744680851064,
|
|
"grad_norm": 4.005841255187988,
|
|
"learning_rate": 8.794880731312771e-06,
|
|
"loss": 0.8789,
|
|
"step": 4971
|
|
},
|
|
{
|
|
"epoch": 1.3223404255319149,
|
|
"grad_norm": 3.894681930541992,
|
|
"learning_rate": 8.794308010547574e-06,
|
|
"loss": 0.7452,
|
|
"step": 4972
|
|
},
|
|
{
|
|
"epoch": 1.3226063829787233,
|
|
"grad_norm": 3.7697856426239014,
|
|
"learning_rate": 8.79373517238202e-06,
|
|
"loss": 0.7111,
|
|
"step": 4973
|
|
},
|
|
{
|
|
"epoch": 1.322872340425532,
|
|
"grad_norm": 4.162429332733154,
|
|
"learning_rate": 8.793162216833835e-06,
|
|
"loss": 0.8352,
|
|
"step": 4974
|
|
},
|
|
{
|
|
"epoch": 1.3231382978723405,
|
|
"grad_norm": 4.8362298011779785,
|
|
"learning_rate": 8.792589143920743e-06,
|
|
"loss": 0.8807,
|
|
"step": 4975
|
|
},
|
|
{
|
|
"epoch": 1.323404255319149,
|
|
"grad_norm": 4.283027172088623,
|
|
"learning_rate": 8.792015953660478e-06,
|
|
"loss": 0.9241,
|
|
"step": 4976
|
|
},
|
|
{
|
|
"epoch": 1.3236702127659574,
|
|
"grad_norm": 3.7246296405792236,
|
|
"learning_rate": 8.791442646070776e-06,
|
|
"loss": 0.8158,
|
|
"step": 4977
|
|
},
|
|
{
|
|
"epoch": 1.3239361702127659,
|
|
"grad_norm": 3.9116530418395996,
|
|
"learning_rate": 8.790869221169374e-06,
|
|
"loss": 0.7603,
|
|
"step": 4978
|
|
},
|
|
{
|
|
"epoch": 1.3242021276595746,
|
|
"grad_norm": 4.164322853088379,
|
|
"learning_rate": 8.790295678974015e-06,
|
|
"loss": 0.7518,
|
|
"step": 4979
|
|
},
|
|
{
|
|
"epoch": 1.324468085106383,
|
|
"grad_norm": 3.459543228149414,
|
|
"learning_rate": 8.789722019502444e-06,
|
|
"loss": 0.8216,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 1.3247340425531915,
|
|
"grad_norm": 3.4385783672332764,
|
|
"learning_rate": 8.789148242772414e-06,
|
|
"loss": 0.5722,
|
|
"step": 4981
|
|
},
|
|
{
|
|
"epoch": 1.325,
|
|
"grad_norm": 3.881467580795288,
|
|
"learning_rate": 8.788574348801676e-06,
|
|
"loss": 0.7652,
|
|
"step": 4982
|
|
},
|
|
{
|
|
"epoch": 1.3252659574468084,
|
|
"grad_norm": 3.8028674125671387,
|
|
"learning_rate": 8.788000337607984e-06,
|
|
"loss": 0.7125,
|
|
"step": 4983
|
|
},
|
|
{
|
|
"epoch": 1.325531914893617,
|
|
"grad_norm": 3.595238447189331,
|
|
"learning_rate": 8.787426209209104e-06,
|
|
"loss": 0.6849,
|
|
"step": 4984
|
|
},
|
|
{
|
|
"epoch": 1.3257978723404256,
|
|
"grad_norm": 4.597902774810791,
|
|
"learning_rate": 8.786851963622799e-06,
|
|
"loss": 0.8314,
|
|
"step": 4985
|
|
},
|
|
{
|
|
"epoch": 1.326063829787234,
|
|
"grad_norm": 4.151714324951172,
|
|
"learning_rate": 8.786277600866834e-06,
|
|
"loss": 0.8624,
|
|
"step": 4986
|
|
},
|
|
{
|
|
"epoch": 1.3263297872340425,
|
|
"grad_norm": 3.7185237407684326,
|
|
"learning_rate": 8.785703120958984e-06,
|
|
"loss": 0.7547,
|
|
"step": 4987
|
|
},
|
|
{
|
|
"epoch": 1.326595744680851,
|
|
"grad_norm": 3.964048385620117,
|
|
"learning_rate": 8.785128523917022e-06,
|
|
"loss": 0.8626,
|
|
"step": 4988
|
|
},
|
|
{
|
|
"epoch": 1.3268617021276596,
|
|
"grad_norm": 3.9490604400634766,
|
|
"learning_rate": 8.784553809758724e-06,
|
|
"loss": 0.7927,
|
|
"step": 4989
|
|
},
|
|
{
|
|
"epoch": 1.327127659574468,
|
|
"grad_norm": 3.736051321029663,
|
|
"learning_rate": 8.783978978501879e-06,
|
|
"loss": 0.7581,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 1.3273936170212766,
|
|
"grad_norm": 4.048060417175293,
|
|
"learning_rate": 8.783404030164269e-06,
|
|
"loss": 0.8141,
|
|
"step": 4991
|
|
},
|
|
{
|
|
"epoch": 1.327659574468085,
|
|
"grad_norm": 3.542971134185791,
|
|
"learning_rate": 8.782828964763683e-06,
|
|
"loss": 0.8244,
|
|
"step": 4992
|
|
},
|
|
{
|
|
"epoch": 1.3279255319148935,
|
|
"grad_norm": 4.4042439460754395,
|
|
"learning_rate": 8.782253782317914e-06,
|
|
"loss": 0.7623,
|
|
"step": 4993
|
|
},
|
|
{
|
|
"epoch": 1.3281914893617022,
|
|
"grad_norm": 4.011150360107422,
|
|
"learning_rate": 8.781678482844763e-06,
|
|
"loss": 0.7879,
|
|
"step": 4994
|
|
},
|
|
{
|
|
"epoch": 1.3284574468085106,
|
|
"grad_norm": 3.9396347999572754,
|
|
"learning_rate": 8.781103066362024e-06,
|
|
"loss": 0.8731,
|
|
"step": 4995
|
|
},
|
|
{
|
|
"epoch": 1.328723404255319,
|
|
"grad_norm": 4.063819408416748,
|
|
"learning_rate": 8.780527532887506e-06,
|
|
"loss": 0.7255,
|
|
"step": 4996
|
|
},
|
|
{
|
|
"epoch": 1.3289893617021278,
|
|
"grad_norm": 3.684864044189453,
|
|
"learning_rate": 8.779951882439016e-06,
|
|
"loss": 0.7447,
|
|
"step": 4997
|
|
},
|
|
{
|
|
"epoch": 1.3292553191489362,
|
|
"grad_norm": 4.3980207443237305,
|
|
"learning_rate": 8.77937611503436e-06,
|
|
"loss": 0.8104,
|
|
"step": 4998
|
|
},
|
|
{
|
|
"epoch": 1.3295212765957447,
|
|
"grad_norm": 4.019001483917236,
|
|
"learning_rate": 8.778800230691363e-06,
|
|
"loss": 0.7426,
|
|
"step": 4999
|
|
},
|
|
{
|
|
"epoch": 1.3297872340425532,
|
|
"grad_norm": 4.1492486000061035,
|
|
"learning_rate": 8.778224229427836e-06,
|
|
"loss": 0.7929,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 1.3297872340425532,
|
|
"eval_loss": 1.2957489490509033,
|
|
"eval_runtime": 14.7283,
|
|
"eval_samples_per_second": 27.159,
|
|
"eval_steps_per_second": 3.395,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 1.3300531914893616,
|
|
"grad_norm": 3.742830753326416,
|
|
"learning_rate": 8.777648111261601e-06,
|
|
"loss": 0.6807,
|
|
"step": 5001
|
|
},
|
|
{
|
|
"epoch": 1.3303191489361703,
|
|
"grad_norm": 4.3522114753723145,
|
|
"learning_rate": 8.77707187621049e-06,
|
|
"loss": 0.8048,
|
|
"step": 5002
|
|
},
|
|
{
|
|
"epoch": 1.3305851063829788,
|
|
"grad_norm": 3.7916550636291504,
|
|
"learning_rate": 8.776495524292325e-06,
|
|
"loss": 0.8209,
|
|
"step": 5003
|
|
},
|
|
{
|
|
"epoch": 1.3308510638297872,
|
|
"grad_norm": 3.642531156539917,
|
|
"learning_rate": 8.775919055524941e-06,
|
|
"loss": 0.7274,
|
|
"step": 5004
|
|
},
|
|
{
|
|
"epoch": 1.3311170212765957,
|
|
"grad_norm": 3.885079860687256,
|
|
"learning_rate": 8.775342469926178e-06,
|
|
"loss": 0.8305,
|
|
"step": 5005
|
|
},
|
|
{
|
|
"epoch": 1.3313829787234042,
|
|
"grad_norm": 3.816824436187744,
|
|
"learning_rate": 8.774765767513876e-06,
|
|
"loss": 0.7605,
|
|
"step": 5006
|
|
},
|
|
{
|
|
"epoch": 1.3316489361702128,
|
|
"grad_norm": 4.696832656860352,
|
|
"learning_rate": 8.774188948305874e-06,
|
|
"loss": 0.8907,
|
|
"step": 5007
|
|
},
|
|
{
|
|
"epoch": 1.3319148936170213,
|
|
"grad_norm": 4.030970096588135,
|
|
"learning_rate": 8.773612012320023e-06,
|
|
"loss": 0.9613,
|
|
"step": 5008
|
|
},
|
|
{
|
|
"epoch": 1.3321808510638298,
|
|
"grad_norm": 4.046240329742432,
|
|
"learning_rate": 8.773034959574173e-06,
|
|
"loss": 0.7066,
|
|
"step": 5009
|
|
},
|
|
{
|
|
"epoch": 1.3324468085106382,
|
|
"grad_norm": 3.916098117828369,
|
|
"learning_rate": 8.77245779008618e-06,
|
|
"loss": 0.7762,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 1.3327127659574467,
|
|
"grad_norm": 4.096320629119873,
|
|
"learning_rate": 8.771880503873902e-06,
|
|
"loss": 0.7222,
|
|
"step": 5011
|
|
},
|
|
{
|
|
"epoch": 1.3329787234042554,
|
|
"grad_norm": 4.3136467933654785,
|
|
"learning_rate": 8.771303100955199e-06,
|
|
"loss": 0.8265,
|
|
"step": 5012
|
|
},
|
|
{
|
|
"epoch": 1.3332446808510638,
|
|
"grad_norm": 3.972031593322754,
|
|
"learning_rate": 8.770725581347938e-06,
|
|
"loss": 0.7263,
|
|
"step": 5013
|
|
},
|
|
{
|
|
"epoch": 1.3335106382978723,
|
|
"grad_norm": 4.295060634613037,
|
|
"learning_rate": 8.770147945069988e-06,
|
|
"loss": 0.8489,
|
|
"step": 5014
|
|
},
|
|
{
|
|
"epoch": 1.3337765957446808,
|
|
"grad_norm": 3.8986477851867676,
|
|
"learning_rate": 8.769570192139224e-06,
|
|
"loss": 0.7101,
|
|
"step": 5015
|
|
},
|
|
{
|
|
"epoch": 1.3340425531914892,
|
|
"grad_norm": 3.8135452270507812,
|
|
"learning_rate": 8.768992322573518e-06,
|
|
"loss": 0.7885,
|
|
"step": 5016
|
|
},
|
|
{
|
|
"epoch": 1.334308510638298,
|
|
"grad_norm": 3.727550983428955,
|
|
"learning_rate": 8.768414336390752e-06,
|
|
"loss": 0.8622,
|
|
"step": 5017
|
|
},
|
|
{
|
|
"epoch": 1.3345744680851064,
|
|
"grad_norm": 4.012676239013672,
|
|
"learning_rate": 8.76783623360881e-06,
|
|
"loss": 0.8938,
|
|
"step": 5018
|
|
},
|
|
{
|
|
"epoch": 1.3348404255319148,
|
|
"grad_norm": 4.344918727874756,
|
|
"learning_rate": 8.767258014245578e-06,
|
|
"loss": 0.8228,
|
|
"step": 5019
|
|
},
|
|
{
|
|
"epoch": 1.3351063829787235,
|
|
"grad_norm": 3.9926249980926514,
|
|
"learning_rate": 8.76667967831895e-06,
|
|
"loss": 0.6513,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 1.335372340425532,
|
|
"grad_norm": 4.119525909423828,
|
|
"learning_rate": 8.766101225846816e-06,
|
|
"loss": 0.7887,
|
|
"step": 5021
|
|
},
|
|
{
|
|
"epoch": 1.3356382978723405,
|
|
"grad_norm": 4.538883686065674,
|
|
"learning_rate": 8.765522656847077e-06,
|
|
"loss": 0.796,
|
|
"step": 5022
|
|
},
|
|
{
|
|
"epoch": 1.335904255319149,
|
|
"grad_norm": 3.7550501823425293,
|
|
"learning_rate": 8.764943971337633e-06,
|
|
"loss": 0.7695,
|
|
"step": 5023
|
|
},
|
|
{
|
|
"epoch": 1.3361702127659574,
|
|
"grad_norm": 3.611605405807495,
|
|
"learning_rate": 8.76436516933639e-06,
|
|
"loss": 0.7483,
|
|
"step": 5024
|
|
},
|
|
{
|
|
"epoch": 1.336436170212766,
|
|
"grad_norm": 4.187867164611816,
|
|
"learning_rate": 8.763786250861258e-06,
|
|
"loss": 0.8277,
|
|
"step": 5025
|
|
},
|
|
{
|
|
"epoch": 1.3367021276595745,
|
|
"grad_norm": 3.9223055839538574,
|
|
"learning_rate": 8.763207215930147e-06,
|
|
"loss": 0.7724,
|
|
"step": 5026
|
|
},
|
|
{
|
|
"epoch": 1.336968085106383,
|
|
"grad_norm": 4.048906326293945,
|
|
"learning_rate": 8.762628064560975e-06,
|
|
"loss": 0.7923,
|
|
"step": 5027
|
|
},
|
|
{
|
|
"epoch": 1.3372340425531914,
|
|
"grad_norm": 4.241153240203857,
|
|
"learning_rate": 8.762048796771659e-06,
|
|
"loss": 0.8776,
|
|
"step": 5028
|
|
},
|
|
{
|
|
"epoch": 1.3375,
|
|
"grad_norm": 3.759209632873535,
|
|
"learning_rate": 8.761469412580126e-06,
|
|
"loss": 0.7554,
|
|
"step": 5029
|
|
},
|
|
{
|
|
"epoch": 1.3377659574468086,
|
|
"grad_norm": 3.8906912803649902,
|
|
"learning_rate": 8.760889912004297e-06,
|
|
"loss": 0.6977,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 1.338031914893617,
|
|
"grad_norm": 3.9501161575317383,
|
|
"learning_rate": 8.760310295062112e-06,
|
|
"loss": 0.9481,
|
|
"step": 5031
|
|
},
|
|
{
|
|
"epoch": 1.3382978723404255,
|
|
"grad_norm": 3.918553590774536,
|
|
"learning_rate": 8.759730561771494e-06,
|
|
"loss": 0.7882,
|
|
"step": 5032
|
|
},
|
|
{
|
|
"epoch": 1.338563829787234,
|
|
"grad_norm": 4.063170909881592,
|
|
"learning_rate": 8.759150712150388e-06,
|
|
"loss": 0.8415,
|
|
"step": 5033
|
|
},
|
|
{
|
|
"epoch": 1.3388297872340424,
|
|
"grad_norm": 3.863600015640259,
|
|
"learning_rate": 8.758570746216732e-06,
|
|
"loss": 0.807,
|
|
"step": 5034
|
|
},
|
|
{
|
|
"epoch": 1.3390957446808511,
|
|
"grad_norm": 3.9519717693328857,
|
|
"learning_rate": 8.757990663988474e-06,
|
|
"loss": 0.8594,
|
|
"step": 5035
|
|
},
|
|
{
|
|
"epoch": 1.3393617021276596,
|
|
"grad_norm": 4.245703220367432,
|
|
"learning_rate": 8.75741046548356e-06,
|
|
"loss": 0.7987,
|
|
"step": 5036
|
|
},
|
|
{
|
|
"epoch": 1.339627659574468,
|
|
"grad_norm": 4.1299729347229,
|
|
"learning_rate": 8.75683015071994e-06,
|
|
"loss": 0.9377,
|
|
"step": 5037
|
|
},
|
|
{
|
|
"epoch": 1.3398936170212765,
|
|
"grad_norm": 3.744929552078247,
|
|
"learning_rate": 8.756249719715576e-06,
|
|
"loss": 0.6875,
|
|
"step": 5038
|
|
},
|
|
{
|
|
"epoch": 1.340159574468085,
|
|
"grad_norm": 3.7629339694976807,
|
|
"learning_rate": 8.75566917248842e-06,
|
|
"loss": 0.7619,
|
|
"step": 5039
|
|
},
|
|
{
|
|
"epoch": 1.3404255319148937,
|
|
"grad_norm": 4.09276819229126,
|
|
"learning_rate": 8.75508850905644e-06,
|
|
"loss": 0.7618,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 1.3406914893617021,
|
|
"grad_norm": 4.220356464385986,
|
|
"learning_rate": 8.7545077294376e-06,
|
|
"loss": 0.9246,
|
|
"step": 5041
|
|
},
|
|
{
|
|
"epoch": 1.3409574468085106,
|
|
"grad_norm": 3.9419326782226562,
|
|
"learning_rate": 8.753926833649871e-06,
|
|
"loss": 0.7463,
|
|
"step": 5042
|
|
},
|
|
{
|
|
"epoch": 1.3412234042553193,
|
|
"grad_norm": 4.060051918029785,
|
|
"learning_rate": 8.753345821711224e-06,
|
|
"loss": 0.9061,
|
|
"step": 5043
|
|
},
|
|
{
|
|
"epoch": 1.3414893617021277,
|
|
"grad_norm": 3.7086057662963867,
|
|
"learning_rate": 8.75276469363964e-06,
|
|
"loss": 0.8177,
|
|
"step": 5044
|
|
},
|
|
{
|
|
"epoch": 1.3417553191489362,
|
|
"grad_norm": 4.173861503601074,
|
|
"learning_rate": 8.752183449453098e-06,
|
|
"loss": 0.8117,
|
|
"step": 5045
|
|
},
|
|
{
|
|
"epoch": 1.3420212765957447,
|
|
"grad_norm": 4.282475471496582,
|
|
"learning_rate": 8.75160208916958e-06,
|
|
"loss": 0.8352,
|
|
"step": 5046
|
|
},
|
|
{
|
|
"epoch": 1.3422872340425531,
|
|
"grad_norm": 3.9250497817993164,
|
|
"learning_rate": 8.75102061280708e-06,
|
|
"loss": 0.8292,
|
|
"step": 5047
|
|
},
|
|
{
|
|
"epoch": 1.3425531914893618,
|
|
"grad_norm": 4.28936767578125,
|
|
"learning_rate": 8.750439020383584e-06,
|
|
"loss": 0.8269,
|
|
"step": 5048
|
|
},
|
|
{
|
|
"epoch": 1.3428191489361703,
|
|
"grad_norm": 4.007338523864746,
|
|
"learning_rate": 8.749857311917089e-06,
|
|
"loss": 0.8376,
|
|
"step": 5049
|
|
},
|
|
{
|
|
"epoch": 1.3430851063829787,
|
|
"grad_norm": 3.741140842437744,
|
|
"learning_rate": 8.749275487425595e-06,
|
|
"loss": 0.7936,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 1.3433510638297872,
|
|
"grad_norm": 3.8448450565338135,
|
|
"learning_rate": 8.748693546927101e-06,
|
|
"loss": 0.8088,
|
|
"step": 5051
|
|
},
|
|
{
|
|
"epoch": 1.3436170212765957,
|
|
"grad_norm": 4.5769782066345215,
|
|
"learning_rate": 8.748111490439617e-06,
|
|
"loss": 0.8315,
|
|
"step": 5052
|
|
},
|
|
{
|
|
"epoch": 1.3438829787234043,
|
|
"grad_norm": 4.1284871101379395,
|
|
"learning_rate": 8.74752931798115e-06,
|
|
"loss": 0.8866,
|
|
"step": 5053
|
|
},
|
|
{
|
|
"epoch": 1.3441489361702128,
|
|
"grad_norm": 3.9224517345428467,
|
|
"learning_rate": 8.746947029569715e-06,
|
|
"loss": 0.6403,
|
|
"step": 5054
|
|
},
|
|
{
|
|
"epoch": 1.3444148936170213,
|
|
"grad_norm": 4.114837169647217,
|
|
"learning_rate": 8.746364625223326e-06,
|
|
"loss": 0.7303,
|
|
"step": 5055
|
|
},
|
|
{
|
|
"epoch": 1.3446808510638297,
|
|
"grad_norm": 3.9492406845092773,
|
|
"learning_rate": 8.745782104960006e-06,
|
|
"loss": 0.7462,
|
|
"step": 5056
|
|
},
|
|
{
|
|
"epoch": 1.3449468085106382,
|
|
"grad_norm": 3.5633533000946045,
|
|
"learning_rate": 8.745199468797775e-06,
|
|
"loss": 0.8241,
|
|
"step": 5057
|
|
},
|
|
{
|
|
"epoch": 1.3452127659574469,
|
|
"grad_norm": 3.9602227210998535,
|
|
"learning_rate": 8.744616716754665e-06,
|
|
"loss": 0.8142,
|
|
"step": 5058
|
|
},
|
|
{
|
|
"epoch": 1.3454787234042553,
|
|
"grad_norm": 3.6486499309539795,
|
|
"learning_rate": 8.744033848848705e-06,
|
|
"loss": 0.7932,
|
|
"step": 5059
|
|
},
|
|
{
|
|
"epoch": 1.3457446808510638,
|
|
"grad_norm": 3.9516966342926025,
|
|
"learning_rate": 8.743450865097929e-06,
|
|
"loss": 0.7334,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 1.3460106382978723,
|
|
"grad_norm": 4.261397361755371,
|
|
"learning_rate": 8.742867765520377e-06,
|
|
"loss": 0.7549,
|
|
"step": 5061
|
|
},
|
|
{
|
|
"epoch": 1.3462765957446807,
|
|
"grad_norm": 4.082563877105713,
|
|
"learning_rate": 8.742284550134088e-06,
|
|
"loss": 0.8306,
|
|
"step": 5062
|
|
},
|
|
{
|
|
"epoch": 1.3465425531914894,
|
|
"grad_norm": 3.9603230953216553,
|
|
"learning_rate": 8.74170121895711e-06,
|
|
"loss": 0.832,
|
|
"step": 5063
|
|
},
|
|
{
|
|
"epoch": 1.3468085106382979,
|
|
"grad_norm": 4.0057692527771,
|
|
"learning_rate": 8.741117772007492e-06,
|
|
"loss": 0.783,
|
|
"step": 5064
|
|
},
|
|
{
|
|
"epoch": 1.3470744680851063,
|
|
"grad_norm": 4.130981922149658,
|
|
"learning_rate": 8.740534209303285e-06,
|
|
"loss": 0.6476,
|
|
"step": 5065
|
|
},
|
|
{
|
|
"epoch": 1.347340425531915,
|
|
"grad_norm": 3.641900062561035,
|
|
"learning_rate": 8.739950530862544e-06,
|
|
"loss": 0.9809,
|
|
"step": 5066
|
|
},
|
|
{
|
|
"epoch": 1.3476063829787235,
|
|
"grad_norm": 3.607656955718994,
|
|
"learning_rate": 8.739366736703331e-06,
|
|
"loss": 0.7784,
|
|
"step": 5067
|
|
},
|
|
{
|
|
"epoch": 1.347872340425532,
|
|
"grad_norm": 4.068065166473389,
|
|
"learning_rate": 8.73878282684371e-06,
|
|
"loss": 0.9063,
|
|
"step": 5068
|
|
},
|
|
{
|
|
"epoch": 1.3481382978723404,
|
|
"grad_norm": 3.952601671218872,
|
|
"learning_rate": 8.738198801301745e-06,
|
|
"loss": 0.9279,
|
|
"step": 5069
|
|
},
|
|
{
|
|
"epoch": 1.3484042553191489,
|
|
"grad_norm": 4.016735553741455,
|
|
"learning_rate": 8.737614660095507e-06,
|
|
"loss": 0.7658,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 1.3486702127659576,
|
|
"grad_norm": 3.669020891189575,
|
|
"learning_rate": 8.737030403243074e-06,
|
|
"loss": 0.6806,
|
|
"step": 5071
|
|
},
|
|
{
|
|
"epoch": 1.348936170212766,
|
|
"grad_norm": 3.659308910369873,
|
|
"learning_rate": 8.736446030762518e-06,
|
|
"loss": 0.7539,
|
|
"step": 5072
|
|
},
|
|
{
|
|
"epoch": 1.3492021276595745,
|
|
"grad_norm": 3.9839887619018555,
|
|
"learning_rate": 8.735861542671924e-06,
|
|
"loss": 0.7342,
|
|
"step": 5073
|
|
},
|
|
{
|
|
"epoch": 1.349468085106383,
|
|
"grad_norm": 3.9134328365325928,
|
|
"learning_rate": 8.735276938989375e-06,
|
|
"loss": 0.8636,
|
|
"step": 5074
|
|
},
|
|
{
|
|
"epoch": 1.3497340425531914,
|
|
"grad_norm": 3.841643810272217,
|
|
"learning_rate": 8.73469221973296e-06,
|
|
"loss": 0.7273,
|
|
"step": 5075
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"grad_norm": 3.903296947479248,
|
|
"learning_rate": 8.734107384920771e-06,
|
|
"loss": 0.8596,
|
|
"step": 5076
|
|
},
|
|
{
|
|
"epoch": 1.3502659574468086,
|
|
"grad_norm": 4.10729455947876,
|
|
"learning_rate": 8.733522434570901e-06,
|
|
"loss": 0.8268,
|
|
"step": 5077
|
|
},
|
|
{
|
|
"epoch": 1.350531914893617,
|
|
"grad_norm": 3.913231611251831,
|
|
"learning_rate": 8.732937368701453e-06,
|
|
"loss": 0.8017,
|
|
"step": 5078
|
|
},
|
|
{
|
|
"epoch": 1.3507978723404255,
|
|
"grad_norm": 3.795318365097046,
|
|
"learning_rate": 8.732352187330528e-06,
|
|
"loss": 0.6833,
|
|
"step": 5079
|
|
},
|
|
{
|
|
"epoch": 1.351063829787234,
|
|
"grad_norm": 3.991790294647217,
|
|
"learning_rate": 8.731766890476232e-06,
|
|
"loss": 0.7068,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 1.3513297872340426,
|
|
"grad_norm": 4.177598476409912,
|
|
"learning_rate": 8.731181478156673e-06,
|
|
"loss": 0.806,
|
|
"step": 5081
|
|
},
|
|
{
|
|
"epoch": 1.351595744680851,
|
|
"grad_norm": 3.855368137359619,
|
|
"learning_rate": 8.730595950389968e-06,
|
|
"loss": 0.7752,
|
|
"step": 5082
|
|
},
|
|
{
|
|
"epoch": 1.3518617021276595,
|
|
"grad_norm": 4.333880424499512,
|
|
"learning_rate": 8.730010307194232e-06,
|
|
"loss": 0.771,
|
|
"step": 5083
|
|
},
|
|
{
|
|
"epoch": 1.352127659574468,
|
|
"grad_norm": 3.9861552715301514,
|
|
"learning_rate": 8.729424548587585e-06,
|
|
"loss": 0.873,
|
|
"step": 5084
|
|
},
|
|
{
|
|
"epoch": 1.3523936170212765,
|
|
"grad_norm": 4.271336078643799,
|
|
"learning_rate": 8.728838674588151e-06,
|
|
"loss": 0.8345,
|
|
"step": 5085
|
|
},
|
|
{
|
|
"epoch": 1.3526595744680852,
|
|
"grad_norm": 4.418639659881592,
|
|
"learning_rate": 8.72825268521406e-06,
|
|
"loss": 0.9593,
|
|
"step": 5086
|
|
},
|
|
{
|
|
"epoch": 1.3529255319148936,
|
|
"grad_norm": 4.122128963470459,
|
|
"learning_rate": 8.72766658048344e-06,
|
|
"loss": 0.6917,
|
|
"step": 5087
|
|
},
|
|
{
|
|
"epoch": 1.353191489361702,
|
|
"grad_norm": 3.9738972187042236,
|
|
"learning_rate": 8.727080360414428e-06,
|
|
"loss": 0.7446,
|
|
"step": 5088
|
|
},
|
|
{
|
|
"epoch": 1.3534574468085108,
|
|
"grad_norm": 4.067488670349121,
|
|
"learning_rate": 8.726494025025162e-06,
|
|
"loss": 0.6886,
|
|
"step": 5089
|
|
},
|
|
{
|
|
"epoch": 1.3537234042553192,
|
|
"grad_norm": 3.782886028289795,
|
|
"learning_rate": 8.725907574333783e-06,
|
|
"loss": 0.8159,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 1.3539893617021277,
|
|
"grad_norm": 3.9360549449920654,
|
|
"learning_rate": 8.725321008358436e-06,
|
|
"loss": 0.8189,
|
|
"step": 5091
|
|
},
|
|
{
|
|
"epoch": 1.3542553191489362,
|
|
"grad_norm": 4.132941246032715,
|
|
"learning_rate": 8.724734327117273e-06,
|
|
"loss": 0.9677,
|
|
"step": 5092
|
|
},
|
|
{
|
|
"epoch": 1.3545212765957446,
|
|
"grad_norm": 4.25277042388916,
|
|
"learning_rate": 8.724147530628442e-06,
|
|
"loss": 0.8653,
|
|
"step": 5093
|
|
},
|
|
{
|
|
"epoch": 1.3547872340425533,
|
|
"grad_norm": 3.962684392929077,
|
|
"learning_rate": 8.723560618910103e-06,
|
|
"loss": 0.6903,
|
|
"step": 5094
|
|
},
|
|
{
|
|
"epoch": 1.3550531914893618,
|
|
"grad_norm": 3.9663078784942627,
|
|
"learning_rate": 8.722973591980414e-06,
|
|
"loss": 0.7572,
|
|
"step": 5095
|
|
},
|
|
{
|
|
"epoch": 1.3553191489361702,
|
|
"grad_norm": 4.48624849319458,
|
|
"learning_rate": 8.722386449857541e-06,
|
|
"loss": 0.9056,
|
|
"step": 5096
|
|
},
|
|
{
|
|
"epoch": 1.3555851063829787,
|
|
"grad_norm": 3.8394525051116943,
|
|
"learning_rate": 8.721799192559646e-06,
|
|
"loss": 0.7721,
|
|
"step": 5097
|
|
},
|
|
{
|
|
"epoch": 1.3558510638297872,
|
|
"grad_norm": 4.599715232849121,
|
|
"learning_rate": 8.721211820104903e-06,
|
|
"loss": 1.0118,
|
|
"step": 5098
|
|
},
|
|
{
|
|
"epoch": 1.3561170212765958,
|
|
"grad_norm": 4.1499528884887695,
|
|
"learning_rate": 8.720624332511484e-06,
|
|
"loss": 0.8979,
|
|
"step": 5099
|
|
},
|
|
{
|
|
"epoch": 1.3563829787234043,
|
|
"grad_norm": 3.8984806537628174,
|
|
"learning_rate": 8.72003672979757e-06,
|
|
"loss": 0.8824,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 1.3566489361702128,
|
|
"grad_norm": 3.709800958633423,
|
|
"learning_rate": 8.71944901198134e-06,
|
|
"loss": 0.8053,
|
|
"step": 5101
|
|
},
|
|
{
|
|
"epoch": 1.3569148936170212,
|
|
"grad_norm": 3.4785032272338867,
|
|
"learning_rate": 8.718861179080975e-06,
|
|
"loss": 0.6898,
|
|
"step": 5102
|
|
},
|
|
{
|
|
"epoch": 1.3571808510638297,
|
|
"grad_norm": 3.8457705974578857,
|
|
"learning_rate": 8.71827323111467e-06,
|
|
"loss": 0.75,
|
|
"step": 5103
|
|
},
|
|
{
|
|
"epoch": 1.3574468085106384,
|
|
"grad_norm": 3.66109299659729,
|
|
"learning_rate": 8.71768516810061e-06,
|
|
"loss": 0.7255,
|
|
"step": 5104
|
|
},
|
|
{
|
|
"epoch": 1.3577127659574468,
|
|
"grad_norm": 3.6998486518859863,
|
|
"learning_rate": 8.717096990056999e-06,
|
|
"loss": 0.8202,
|
|
"step": 5105
|
|
},
|
|
{
|
|
"epoch": 1.3579787234042553,
|
|
"grad_norm": 4.291678428649902,
|
|
"learning_rate": 8.716508697002027e-06,
|
|
"loss": 0.9424,
|
|
"step": 5106
|
|
},
|
|
{
|
|
"epoch": 1.3582446808510638,
|
|
"grad_norm": 3.870074987411499,
|
|
"learning_rate": 8.715920288953901e-06,
|
|
"loss": 0.8821,
|
|
"step": 5107
|
|
},
|
|
{
|
|
"epoch": 1.3585106382978722,
|
|
"grad_norm": 3.469759702682495,
|
|
"learning_rate": 8.715331765930828e-06,
|
|
"loss": 0.745,
|
|
"step": 5108
|
|
},
|
|
{
|
|
"epoch": 1.358776595744681,
|
|
"grad_norm": 4.048684597015381,
|
|
"learning_rate": 8.714743127951014e-06,
|
|
"loss": 0.9526,
|
|
"step": 5109
|
|
},
|
|
{
|
|
"epoch": 1.3590425531914894,
|
|
"grad_norm": 4.060766696929932,
|
|
"learning_rate": 8.714154375032675e-06,
|
|
"loss": 0.7971,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 1.3593085106382978,
|
|
"grad_norm": 4.004628658294678,
|
|
"learning_rate": 8.713565507194027e-06,
|
|
"loss": 0.8302,
|
|
"step": 5111
|
|
},
|
|
{
|
|
"epoch": 1.3595744680851065,
|
|
"grad_norm": 4.034252166748047,
|
|
"learning_rate": 8.712976524453289e-06,
|
|
"loss": 0.8873,
|
|
"step": 5112
|
|
},
|
|
{
|
|
"epoch": 1.359840425531915,
|
|
"grad_norm": 3.9113869667053223,
|
|
"learning_rate": 8.712387426828685e-06,
|
|
"loss": 0.7514,
|
|
"step": 5113
|
|
},
|
|
{
|
|
"epoch": 1.3601063829787234,
|
|
"grad_norm": 3.977827787399292,
|
|
"learning_rate": 8.711798214338445e-06,
|
|
"loss": 0.8099,
|
|
"step": 5114
|
|
},
|
|
{
|
|
"epoch": 1.360372340425532,
|
|
"grad_norm": 4.005003929138184,
|
|
"learning_rate": 8.711208887000797e-06,
|
|
"loss": 0.8888,
|
|
"step": 5115
|
|
},
|
|
{
|
|
"epoch": 1.3606382978723404,
|
|
"grad_norm": 3.7809715270996094,
|
|
"learning_rate": 8.710619444833977e-06,
|
|
"loss": 0.8131,
|
|
"step": 5116
|
|
},
|
|
{
|
|
"epoch": 1.360904255319149,
|
|
"grad_norm": 3.8309693336486816,
|
|
"learning_rate": 8.710029887856224e-06,
|
|
"loss": 0.6836,
|
|
"step": 5117
|
|
},
|
|
{
|
|
"epoch": 1.3611702127659575,
|
|
"grad_norm": 3.7106757164001465,
|
|
"learning_rate": 8.709440216085777e-06,
|
|
"loss": 0.8079,
|
|
"step": 5118
|
|
},
|
|
{
|
|
"epoch": 1.361436170212766,
|
|
"grad_norm": 4.386137962341309,
|
|
"learning_rate": 8.708850429540882e-06,
|
|
"loss": 0.8484,
|
|
"step": 5119
|
|
},
|
|
{
|
|
"epoch": 1.3617021276595744,
|
|
"grad_norm": 4.305933952331543,
|
|
"learning_rate": 8.708260528239788e-06,
|
|
"loss": 0.9357,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 1.361968085106383,
|
|
"grad_norm": 4.107351303100586,
|
|
"learning_rate": 8.70767051220075e-06,
|
|
"loss": 0.8932,
|
|
"step": 5121
|
|
},
|
|
{
|
|
"epoch": 1.3622340425531916,
|
|
"grad_norm": 3.7665624618530273,
|
|
"learning_rate": 8.707080381442016e-06,
|
|
"loss": 0.7792,
|
|
"step": 5122
|
|
},
|
|
{
|
|
"epoch": 1.3625,
|
|
"grad_norm": 4.177657604217529,
|
|
"learning_rate": 8.706490135981856e-06,
|
|
"loss": 0.8046,
|
|
"step": 5123
|
|
},
|
|
{
|
|
"epoch": 1.3627659574468085,
|
|
"grad_norm": 4.132664203643799,
|
|
"learning_rate": 8.705899775838525e-06,
|
|
"loss": 0.8516,
|
|
"step": 5124
|
|
},
|
|
{
|
|
"epoch": 1.363031914893617,
|
|
"grad_norm": 4.0525288581848145,
|
|
"learning_rate": 8.70530930103029e-06,
|
|
"loss": 0.8747,
|
|
"step": 5125
|
|
},
|
|
{
|
|
"epoch": 1.3632978723404254,
|
|
"grad_norm": 4.088098526000977,
|
|
"learning_rate": 8.704718711575424e-06,
|
|
"loss": 0.6531,
|
|
"step": 5126
|
|
},
|
|
{
|
|
"epoch": 1.3635638297872341,
|
|
"grad_norm": 3.944594144821167,
|
|
"learning_rate": 8.704128007492201e-06,
|
|
"loss": 0.8084,
|
|
"step": 5127
|
|
},
|
|
{
|
|
"epoch": 1.3638297872340426,
|
|
"grad_norm": 4.340763092041016,
|
|
"learning_rate": 8.703537188798894e-06,
|
|
"loss": 0.8186,
|
|
"step": 5128
|
|
},
|
|
{
|
|
"epoch": 1.364095744680851,
|
|
"grad_norm": 3.9249961376190186,
|
|
"learning_rate": 8.702946255513787e-06,
|
|
"loss": 0.8166,
|
|
"step": 5129
|
|
},
|
|
{
|
|
"epoch": 1.3643617021276595,
|
|
"grad_norm": 3.667654275894165,
|
|
"learning_rate": 8.702355207655164e-06,
|
|
"loss": 0.8432,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 1.364627659574468,
|
|
"grad_norm": 3.6376404762268066,
|
|
"learning_rate": 8.70176404524131e-06,
|
|
"loss": 0.7878,
|
|
"step": 5131
|
|
},
|
|
{
|
|
"epoch": 1.3648936170212767,
|
|
"grad_norm": 3.9054555892944336,
|
|
"learning_rate": 8.70117276829052e-06,
|
|
"loss": 0.7763,
|
|
"step": 5132
|
|
},
|
|
{
|
|
"epoch": 1.3651595744680851,
|
|
"grad_norm": 4.0739288330078125,
|
|
"learning_rate": 8.700581376821086e-06,
|
|
"loss": 0.728,
|
|
"step": 5133
|
|
},
|
|
{
|
|
"epoch": 1.3654255319148936,
|
|
"grad_norm": 3.8359971046447754,
|
|
"learning_rate": 8.699989870851308e-06,
|
|
"loss": 0.8314,
|
|
"step": 5134
|
|
},
|
|
{
|
|
"epoch": 1.3656914893617023,
|
|
"grad_norm": 3.708594799041748,
|
|
"learning_rate": 8.699398250399486e-06,
|
|
"loss": 0.7632,
|
|
"step": 5135
|
|
},
|
|
{
|
|
"epoch": 1.3659574468085105,
|
|
"grad_norm": 3.9665486812591553,
|
|
"learning_rate": 8.698806515483928e-06,
|
|
"loss": 0.8794,
|
|
"step": 5136
|
|
},
|
|
{
|
|
"epoch": 1.3662234042553192,
|
|
"grad_norm": 4.699567794799805,
|
|
"learning_rate": 8.698214666122941e-06,
|
|
"loss": 1.0106,
|
|
"step": 5137
|
|
},
|
|
{
|
|
"epoch": 1.3664893617021276,
|
|
"grad_norm": 3.8563220500946045,
|
|
"learning_rate": 8.697622702334839e-06,
|
|
"loss": 0.7451,
|
|
"step": 5138
|
|
},
|
|
{
|
|
"epoch": 1.366755319148936,
|
|
"grad_norm": 4.188748359680176,
|
|
"learning_rate": 8.697030624137937e-06,
|
|
"loss": 0.7481,
|
|
"step": 5139
|
|
},
|
|
{
|
|
"epoch": 1.3670212765957448,
|
|
"grad_norm": 3.891820192337036,
|
|
"learning_rate": 8.696438431550553e-06,
|
|
"loss": 0.8304,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 1.3672872340425533,
|
|
"grad_norm": 4.065185546875,
|
|
"learning_rate": 8.695846124591015e-06,
|
|
"loss": 0.8912,
|
|
"step": 5141
|
|
},
|
|
{
|
|
"epoch": 1.3675531914893617,
|
|
"grad_norm": 3.466252326965332,
|
|
"learning_rate": 8.695253703277644e-06,
|
|
"loss": 0.7941,
|
|
"step": 5142
|
|
},
|
|
{
|
|
"epoch": 1.3678191489361702,
|
|
"grad_norm": 3.7102415561676025,
|
|
"learning_rate": 8.694661167628772e-06,
|
|
"loss": 0.6821,
|
|
"step": 5143
|
|
},
|
|
{
|
|
"epoch": 1.3680851063829786,
|
|
"grad_norm": 4.1319260597229,
|
|
"learning_rate": 8.694068517662735e-06,
|
|
"loss": 0.9666,
|
|
"step": 5144
|
|
},
|
|
{
|
|
"epoch": 1.3683510638297873,
|
|
"grad_norm": 3.870607852935791,
|
|
"learning_rate": 8.693475753397869e-06,
|
|
"loss": 0.8806,
|
|
"step": 5145
|
|
},
|
|
{
|
|
"epoch": 1.3686170212765958,
|
|
"grad_norm": 3.9953293800354004,
|
|
"learning_rate": 8.692882874852515e-06,
|
|
"loss": 0.8558,
|
|
"step": 5146
|
|
},
|
|
{
|
|
"epoch": 1.3688829787234043,
|
|
"grad_norm": 4.429169178009033,
|
|
"learning_rate": 8.692289882045015e-06,
|
|
"loss": 0.7949,
|
|
"step": 5147
|
|
},
|
|
{
|
|
"epoch": 1.3691489361702127,
|
|
"grad_norm": 3.895005464553833,
|
|
"learning_rate": 8.691696774993721e-06,
|
|
"loss": 0.7547,
|
|
"step": 5148
|
|
},
|
|
{
|
|
"epoch": 1.3694148936170212,
|
|
"grad_norm": 4.446406841278076,
|
|
"learning_rate": 8.691103553716981e-06,
|
|
"loss": 0.8757,
|
|
"step": 5149
|
|
},
|
|
{
|
|
"epoch": 1.3696808510638299,
|
|
"grad_norm": 4.012157440185547,
|
|
"learning_rate": 8.690510218233153e-06,
|
|
"loss": 0.9106,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 1.3699468085106383,
|
|
"grad_norm": 3.966068983078003,
|
|
"learning_rate": 8.689916768560593e-06,
|
|
"loss": 0.7194,
|
|
"step": 5151
|
|
},
|
|
{
|
|
"epoch": 1.3702127659574468,
|
|
"grad_norm": 3.9841232299804688,
|
|
"learning_rate": 8.689323204717663e-06,
|
|
"loss": 0.8174,
|
|
"step": 5152
|
|
},
|
|
{
|
|
"epoch": 1.3704787234042553,
|
|
"grad_norm": 4.248937129974365,
|
|
"learning_rate": 8.688729526722732e-06,
|
|
"loss": 0.8107,
|
|
"step": 5153
|
|
},
|
|
{
|
|
"epoch": 1.3707446808510637,
|
|
"grad_norm": 3.6485583782196045,
|
|
"learning_rate": 8.688135734594165e-06,
|
|
"loss": 0.8828,
|
|
"step": 5154
|
|
},
|
|
{
|
|
"epoch": 1.3710106382978724,
|
|
"grad_norm": 4.1670966148376465,
|
|
"learning_rate": 8.687541828350334e-06,
|
|
"loss": 0.8604,
|
|
"step": 5155
|
|
},
|
|
{
|
|
"epoch": 1.3712765957446809,
|
|
"grad_norm": 4.121282577514648,
|
|
"learning_rate": 8.686947808009621e-06,
|
|
"loss": 0.8228,
|
|
"step": 5156
|
|
},
|
|
{
|
|
"epoch": 1.3715425531914893,
|
|
"grad_norm": 3.781928539276123,
|
|
"learning_rate": 8.6863536735904e-06,
|
|
"loss": 0.7416,
|
|
"step": 5157
|
|
},
|
|
{
|
|
"epoch": 1.371808510638298,
|
|
"grad_norm": 3.688425064086914,
|
|
"learning_rate": 8.685759425111056e-06,
|
|
"loss": 0.7902,
|
|
"step": 5158
|
|
},
|
|
{
|
|
"epoch": 1.3720744680851062,
|
|
"grad_norm": 3.922410488128662,
|
|
"learning_rate": 8.685165062589975e-06,
|
|
"loss": 0.8117,
|
|
"step": 5159
|
|
},
|
|
{
|
|
"epoch": 1.372340425531915,
|
|
"grad_norm": 4.217987060546875,
|
|
"learning_rate": 8.68457058604555e-06,
|
|
"loss": 0.9173,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 1.3726063829787234,
|
|
"grad_norm": 4.135257244110107,
|
|
"learning_rate": 8.683975995496173e-06,
|
|
"loss": 0.7474,
|
|
"step": 5161
|
|
},
|
|
{
|
|
"epoch": 1.3728723404255319,
|
|
"grad_norm": 3.7882463932037354,
|
|
"learning_rate": 8.68338129096024e-06,
|
|
"loss": 0.8153,
|
|
"step": 5162
|
|
},
|
|
{
|
|
"epoch": 1.3731382978723405,
|
|
"grad_norm": 3.6793859004974365,
|
|
"learning_rate": 8.682786472456155e-06,
|
|
"loss": 0.6914,
|
|
"step": 5163
|
|
},
|
|
{
|
|
"epoch": 1.373404255319149,
|
|
"grad_norm": 4.030581951141357,
|
|
"learning_rate": 8.682191540002318e-06,
|
|
"loss": 0.778,
|
|
"step": 5164
|
|
},
|
|
{
|
|
"epoch": 1.3736702127659575,
|
|
"grad_norm": 3.8380470275878906,
|
|
"learning_rate": 8.681596493617141e-06,
|
|
"loss": 0.7522,
|
|
"step": 5165
|
|
},
|
|
{
|
|
"epoch": 1.373936170212766,
|
|
"grad_norm": 4.138343334197998,
|
|
"learning_rate": 8.681001333319035e-06,
|
|
"loss": 0.843,
|
|
"step": 5166
|
|
},
|
|
{
|
|
"epoch": 1.3742021276595744,
|
|
"grad_norm": 3.723407030105591,
|
|
"learning_rate": 8.680406059126412e-06,
|
|
"loss": 0.7799,
|
|
"step": 5167
|
|
},
|
|
{
|
|
"epoch": 1.374468085106383,
|
|
"grad_norm": 3.8985822200775146,
|
|
"learning_rate": 8.679810671057695e-06,
|
|
"loss": 0.7446,
|
|
"step": 5168
|
|
},
|
|
{
|
|
"epoch": 1.3747340425531915,
|
|
"grad_norm": 4.534223556518555,
|
|
"learning_rate": 8.679215169131301e-06,
|
|
"loss": 0.8734,
|
|
"step": 5169
|
|
},
|
|
{
|
|
"epoch": 1.375,
|
|
"grad_norm": 3.75278639793396,
|
|
"learning_rate": 8.67861955336566e-06,
|
|
"loss": 0.8435,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 1.3752659574468085,
|
|
"grad_norm": 4.094736099243164,
|
|
"learning_rate": 8.678023823779196e-06,
|
|
"loss": 0.7671,
|
|
"step": 5171
|
|
},
|
|
{
|
|
"epoch": 1.375531914893617,
|
|
"grad_norm": 3.920642137527466,
|
|
"learning_rate": 8.677427980390348e-06,
|
|
"loss": 0.7937,
|
|
"step": 5172
|
|
},
|
|
{
|
|
"epoch": 1.3757978723404256,
|
|
"grad_norm": 3.5799460411071777,
|
|
"learning_rate": 8.676832023217545e-06,
|
|
"loss": 0.8206,
|
|
"step": 5173
|
|
},
|
|
{
|
|
"epoch": 1.376063829787234,
|
|
"grad_norm": 3.8929152488708496,
|
|
"learning_rate": 8.676235952279233e-06,
|
|
"loss": 0.837,
|
|
"step": 5174
|
|
},
|
|
{
|
|
"epoch": 1.3763297872340425,
|
|
"grad_norm": 3.7762844562530518,
|
|
"learning_rate": 8.675639767593851e-06,
|
|
"loss": 0.8191,
|
|
"step": 5175
|
|
},
|
|
{
|
|
"epoch": 1.376595744680851,
|
|
"grad_norm": 4.34854793548584,
|
|
"learning_rate": 8.675043469179849e-06,
|
|
"loss": 0.9724,
|
|
"step": 5176
|
|
},
|
|
{
|
|
"epoch": 1.3768617021276595,
|
|
"grad_norm": 4.143275260925293,
|
|
"learning_rate": 8.674447057055673e-06,
|
|
"loss": 0.7607,
|
|
"step": 5177
|
|
},
|
|
{
|
|
"epoch": 1.3771276595744681,
|
|
"grad_norm": 3.8602356910705566,
|
|
"learning_rate": 8.673850531239781e-06,
|
|
"loss": 0.8241,
|
|
"step": 5178
|
|
},
|
|
{
|
|
"epoch": 1.3773936170212766,
|
|
"grad_norm": 4.238362789154053,
|
|
"learning_rate": 8.673253891750626e-06,
|
|
"loss": 0.75,
|
|
"step": 5179
|
|
},
|
|
{
|
|
"epoch": 1.377659574468085,
|
|
"grad_norm": 4.423724174499512,
|
|
"learning_rate": 8.672657138606672e-06,
|
|
"loss": 0.8929,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 1.3779255319148938,
|
|
"grad_norm": 3.5237340927124023,
|
|
"learning_rate": 8.672060271826381e-06,
|
|
"loss": 0.6877,
|
|
"step": 5181
|
|
},
|
|
{
|
|
"epoch": 1.378191489361702,
|
|
"grad_norm": 3.615936756134033,
|
|
"learning_rate": 8.671463291428223e-06,
|
|
"loss": 0.7091,
|
|
"step": 5182
|
|
},
|
|
{
|
|
"epoch": 1.3784574468085107,
|
|
"grad_norm": 3.587336778640747,
|
|
"learning_rate": 8.67086619743067e-06,
|
|
"loss": 0.8266,
|
|
"step": 5183
|
|
},
|
|
{
|
|
"epoch": 1.3787234042553191,
|
|
"grad_norm": 4.141132831573486,
|
|
"learning_rate": 8.670268989852192e-06,
|
|
"loss": 0.7199,
|
|
"step": 5184
|
|
},
|
|
{
|
|
"epoch": 1.3789893617021276,
|
|
"grad_norm": 4.076261520385742,
|
|
"learning_rate": 8.669671668711272e-06,
|
|
"loss": 0.7788,
|
|
"step": 5185
|
|
},
|
|
{
|
|
"epoch": 1.3792553191489363,
|
|
"grad_norm": 4.020741939544678,
|
|
"learning_rate": 8.66907423402639e-06,
|
|
"loss": 0.8652,
|
|
"step": 5186
|
|
},
|
|
{
|
|
"epoch": 1.3795212765957447,
|
|
"grad_norm": 3.8059983253479004,
|
|
"learning_rate": 8.668476685816029e-06,
|
|
"loss": 0.8151,
|
|
"step": 5187
|
|
},
|
|
{
|
|
"epoch": 1.3797872340425532,
|
|
"grad_norm": 4.055500030517578,
|
|
"learning_rate": 8.667879024098682e-06,
|
|
"loss": 0.7985,
|
|
"step": 5188
|
|
},
|
|
{
|
|
"epoch": 1.3800531914893617,
|
|
"grad_norm": 3.8605387210845947,
|
|
"learning_rate": 8.66728124889284e-06,
|
|
"loss": 0.8602,
|
|
"step": 5189
|
|
},
|
|
{
|
|
"epoch": 1.3803191489361701,
|
|
"grad_norm": 3.781041383743286,
|
|
"learning_rate": 8.666683360216998e-06,
|
|
"loss": 0.815,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 1.3805851063829788,
|
|
"grad_norm": 4.160099029541016,
|
|
"learning_rate": 8.666085358089655e-06,
|
|
"loss": 0.8366,
|
|
"step": 5191
|
|
},
|
|
{
|
|
"epoch": 1.3808510638297873,
|
|
"grad_norm": 4.079177379608154,
|
|
"learning_rate": 8.665487242529316e-06,
|
|
"loss": 0.9131,
|
|
"step": 5192
|
|
},
|
|
{
|
|
"epoch": 1.3811170212765957,
|
|
"grad_norm": 4.033502578735352,
|
|
"learning_rate": 8.664889013554484e-06,
|
|
"loss": 0.7588,
|
|
"step": 5193
|
|
},
|
|
{
|
|
"epoch": 1.3813829787234042,
|
|
"grad_norm": 3.969634771347046,
|
|
"learning_rate": 8.664290671183675e-06,
|
|
"loss": 0.9422,
|
|
"step": 5194
|
|
},
|
|
{
|
|
"epoch": 1.3816489361702127,
|
|
"grad_norm": 3.9259159564971924,
|
|
"learning_rate": 8.663692215435396e-06,
|
|
"loss": 0.7046,
|
|
"step": 5195
|
|
},
|
|
{
|
|
"epoch": 1.3819148936170214,
|
|
"grad_norm": 4.086988925933838,
|
|
"learning_rate": 8.663093646328166e-06,
|
|
"loss": 0.8629,
|
|
"step": 5196
|
|
},
|
|
{
|
|
"epoch": 1.3821808510638298,
|
|
"grad_norm": 4.083224773406982,
|
|
"learning_rate": 8.662494963880508e-06,
|
|
"loss": 0.8992,
|
|
"step": 5197
|
|
},
|
|
{
|
|
"epoch": 1.3824468085106383,
|
|
"grad_norm": 4.1260881423950195,
|
|
"learning_rate": 8.66189616811094e-06,
|
|
"loss": 0.8958,
|
|
"step": 5198
|
|
},
|
|
{
|
|
"epoch": 1.3827127659574467,
|
|
"grad_norm": 3.9255919456481934,
|
|
"learning_rate": 8.661297259037998e-06,
|
|
"loss": 0.8155,
|
|
"step": 5199
|
|
},
|
|
{
|
|
"epoch": 1.3829787234042552,
|
|
"grad_norm": 4.030576705932617,
|
|
"learning_rate": 8.660698236680205e-06,
|
|
"loss": 0.901,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 1.383244680851064,
|
|
"grad_norm": 4.204456329345703,
|
|
"learning_rate": 8.660099101056098e-06,
|
|
"loss": 0.8021,
|
|
"step": 5201
|
|
},
|
|
{
|
|
"epoch": 1.3835106382978724,
|
|
"grad_norm": 3.743723154067993,
|
|
"learning_rate": 8.659499852184218e-06,
|
|
"loss": 0.8411,
|
|
"step": 5202
|
|
},
|
|
{
|
|
"epoch": 1.3837765957446808,
|
|
"grad_norm": 3.8044793605804443,
|
|
"learning_rate": 8.658900490083102e-06,
|
|
"loss": 0.6985,
|
|
"step": 5203
|
|
},
|
|
{
|
|
"epoch": 1.3840425531914895,
|
|
"grad_norm": 3.762624740600586,
|
|
"learning_rate": 8.658301014771298e-06,
|
|
"loss": 0.7873,
|
|
"step": 5204
|
|
},
|
|
{
|
|
"epoch": 1.3843085106382977,
|
|
"grad_norm": 3.8245599269866943,
|
|
"learning_rate": 8.657701426267355e-06,
|
|
"loss": 0.7773,
|
|
"step": 5205
|
|
},
|
|
{
|
|
"epoch": 1.3845744680851064,
|
|
"grad_norm": 3.875678062438965,
|
|
"learning_rate": 8.65710172458982e-06,
|
|
"loss": 0.9493,
|
|
"step": 5206
|
|
},
|
|
{
|
|
"epoch": 1.3848404255319149,
|
|
"grad_norm": 4.034217834472656,
|
|
"learning_rate": 8.656501909757255e-06,
|
|
"loss": 0.8742,
|
|
"step": 5207
|
|
},
|
|
{
|
|
"epoch": 1.3851063829787233,
|
|
"grad_norm": 3.7253971099853516,
|
|
"learning_rate": 8.655901981788216e-06,
|
|
"loss": 0.7408,
|
|
"step": 5208
|
|
},
|
|
{
|
|
"epoch": 1.385372340425532,
|
|
"grad_norm": 4.211146354675293,
|
|
"learning_rate": 8.655301940701262e-06,
|
|
"loss": 0.8107,
|
|
"step": 5209
|
|
},
|
|
{
|
|
"epoch": 1.3856382978723405,
|
|
"grad_norm": 4.0121378898620605,
|
|
"learning_rate": 8.654701786514965e-06,
|
|
"loss": 0.8808,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 1.385904255319149,
|
|
"grad_norm": 4.111256122589111,
|
|
"learning_rate": 8.654101519247892e-06,
|
|
"loss": 0.8339,
|
|
"step": 5211
|
|
},
|
|
{
|
|
"epoch": 1.3861702127659574,
|
|
"grad_norm": 3.683849811553955,
|
|
"learning_rate": 8.653501138918615e-06,
|
|
"loss": 0.8046,
|
|
"step": 5212
|
|
},
|
|
{
|
|
"epoch": 1.3864361702127659,
|
|
"grad_norm": 4.3086957931518555,
|
|
"learning_rate": 8.652900645545711e-06,
|
|
"loss": 0.8217,
|
|
"step": 5213
|
|
},
|
|
{
|
|
"epoch": 1.3867021276595746,
|
|
"grad_norm": 4.064043998718262,
|
|
"learning_rate": 8.65230003914776e-06,
|
|
"loss": 0.9811,
|
|
"step": 5214
|
|
},
|
|
{
|
|
"epoch": 1.386968085106383,
|
|
"grad_norm": 3.8175463676452637,
|
|
"learning_rate": 8.651699319743348e-06,
|
|
"loss": 0.879,
|
|
"step": 5215
|
|
},
|
|
{
|
|
"epoch": 1.3872340425531915,
|
|
"grad_norm": 4.500128269195557,
|
|
"learning_rate": 8.651098487351057e-06,
|
|
"loss": 0.6979,
|
|
"step": 5216
|
|
},
|
|
{
|
|
"epoch": 1.3875,
|
|
"grad_norm": 4.019436836242676,
|
|
"learning_rate": 8.650497541989483e-06,
|
|
"loss": 0.8766,
|
|
"step": 5217
|
|
},
|
|
{
|
|
"epoch": 1.3877659574468084,
|
|
"grad_norm": 3.5277206897735596,
|
|
"learning_rate": 8.649896483677213e-06,
|
|
"loss": 0.8292,
|
|
"step": 5218
|
|
},
|
|
{
|
|
"epoch": 1.388031914893617,
|
|
"grad_norm": 3.918307065963745,
|
|
"learning_rate": 8.649295312432853e-06,
|
|
"loss": 0.7684,
|
|
"step": 5219
|
|
},
|
|
{
|
|
"epoch": 1.3882978723404256,
|
|
"grad_norm": 3.9739909172058105,
|
|
"learning_rate": 8.648694028274998e-06,
|
|
"loss": 0.743,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 1.388563829787234,
|
|
"grad_norm": 3.6508398056030273,
|
|
"learning_rate": 8.648092631222253e-06,
|
|
"loss": 0.7689,
|
|
"step": 5221
|
|
},
|
|
{
|
|
"epoch": 1.3888297872340425,
|
|
"grad_norm": 3.846869468688965,
|
|
"learning_rate": 8.647491121293228e-06,
|
|
"loss": 0.741,
|
|
"step": 5222
|
|
},
|
|
{
|
|
"epoch": 1.389095744680851,
|
|
"grad_norm": 3.8481643199920654,
|
|
"learning_rate": 8.646889498506532e-06,
|
|
"loss": 0.8665,
|
|
"step": 5223
|
|
},
|
|
{
|
|
"epoch": 1.3893617021276596,
|
|
"grad_norm": 4.380584239959717,
|
|
"learning_rate": 8.646287762880783e-06,
|
|
"loss": 0.8029,
|
|
"step": 5224
|
|
},
|
|
{
|
|
"epoch": 1.389627659574468,
|
|
"grad_norm": 3.8931496143341064,
|
|
"learning_rate": 8.645685914434596e-06,
|
|
"loss": 0.8964,
|
|
"step": 5225
|
|
},
|
|
{
|
|
"epoch": 1.3898936170212766,
|
|
"grad_norm": 3.976508378982544,
|
|
"learning_rate": 8.645083953186596e-06,
|
|
"loss": 0.8707,
|
|
"step": 5226
|
|
},
|
|
{
|
|
"epoch": 1.390159574468085,
|
|
"grad_norm": 3.606631278991699,
|
|
"learning_rate": 8.644481879155406e-06,
|
|
"loss": 0.7476,
|
|
"step": 5227
|
|
},
|
|
{
|
|
"epoch": 1.3904255319148935,
|
|
"grad_norm": 4.043211936950684,
|
|
"learning_rate": 8.643879692359655e-06,
|
|
"loss": 0.7478,
|
|
"step": 5228
|
|
},
|
|
{
|
|
"epoch": 1.3906914893617022,
|
|
"grad_norm": 3.9135618209838867,
|
|
"learning_rate": 8.643277392817976e-06,
|
|
"loss": 0.7469,
|
|
"step": 5229
|
|
},
|
|
{
|
|
"epoch": 1.3909574468085106,
|
|
"grad_norm": 3.747793674468994,
|
|
"learning_rate": 8.642674980549008e-06,
|
|
"loss": 0.8092,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 1.391223404255319,
|
|
"grad_norm": 4.33275032043457,
|
|
"learning_rate": 8.642072455571383e-06,
|
|
"loss": 0.7867,
|
|
"step": 5231
|
|
},
|
|
{
|
|
"epoch": 1.3914893617021278,
|
|
"grad_norm": 4.364730358123779,
|
|
"learning_rate": 8.641469817903752e-06,
|
|
"loss": 0.8545,
|
|
"step": 5232
|
|
},
|
|
{
|
|
"epoch": 1.3917553191489362,
|
|
"grad_norm": 3.848296880722046,
|
|
"learning_rate": 8.640867067564757e-06,
|
|
"loss": 0.8735,
|
|
"step": 5233
|
|
},
|
|
{
|
|
"epoch": 1.3920212765957447,
|
|
"grad_norm": 3.8391952514648438,
|
|
"learning_rate": 8.640264204573049e-06,
|
|
"loss": 0.8439,
|
|
"step": 5234
|
|
},
|
|
{
|
|
"epoch": 1.3922872340425532,
|
|
"grad_norm": 4.061415672302246,
|
|
"learning_rate": 8.639661228947278e-06,
|
|
"loss": 0.7702,
|
|
"step": 5235
|
|
},
|
|
{
|
|
"epoch": 1.3925531914893616,
|
|
"grad_norm": 4.175765037536621,
|
|
"learning_rate": 8.639058140706105e-06,
|
|
"loss": 0.8053,
|
|
"step": 5236
|
|
},
|
|
{
|
|
"epoch": 1.3928191489361703,
|
|
"grad_norm": 3.840773105621338,
|
|
"learning_rate": 8.638454939868188e-06,
|
|
"loss": 0.7192,
|
|
"step": 5237
|
|
},
|
|
{
|
|
"epoch": 1.3930851063829788,
|
|
"grad_norm": 3.76470947265625,
|
|
"learning_rate": 8.637851626452191e-06,
|
|
"loss": 0.7634,
|
|
"step": 5238
|
|
},
|
|
{
|
|
"epoch": 1.3933510638297872,
|
|
"grad_norm": 3.903261184692383,
|
|
"learning_rate": 8.637248200476783e-06,
|
|
"loss": 0.7672,
|
|
"step": 5239
|
|
},
|
|
{
|
|
"epoch": 1.3936170212765957,
|
|
"grad_norm": 4.356569290161133,
|
|
"learning_rate": 8.636644661960634e-06,
|
|
"loss": 0.8834,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 1.3938829787234042,
|
|
"grad_norm": 4.116570949554443,
|
|
"learning_rate": 8.636041010922416e-06,
|
|
"loss": 0.7715,
|
|
"step": 5241
|
|
},
|
|
{
|
|
"epoch": 1.3941489361702128,
|
|
"grad_norm": 3.9501302242279053,
|
|
"learning_rate": 8.635437247380809e-06,
|
|
"loss": 0.7663,
|
|
"step": 5242
|
|
},
|
|
{
|
|
"epoch": 1.3944148936170213,
|
|
"grad_norm": 4.226482391357422,
|
|
"learning_rate": 8.634833371354492e-06,
|
|
"loss": 0.8156,
|
|
"step": 5243
|
|
},
|
|
{
|
|
"epoch": 1.3946808510638298,
|
|
"grad_norm": 4.047403335571289,
|
|
"learning_rate": 8.634229382862152e-06,
|
|
"loss": 0.8982,
|
|
"step": 5244
|
|
},
|
|
{
|
|
"epoch": 1.3949468085106382,
|
|
"grad_norm": 4.245815753936768,
|
|
"learning_rate": 8.633625281922477e-06,
|
|
"loss": 0.8558,
|
|
"step": 5245
|
|
},
|
|
{
|
|
"epoch": 1.3952127659574467,
|
|
"grad_norm": 3.9995036125183105,
|
|
"learning_rate": 8.633021068554155e-06,
|
|
"loss": 0.8246,
|
|
"step": 5246
|
|
},
|
|
{
|
|
"epoch": 1.3954787234042554,
|
|
"grad_norm": 4.213914394378662,
|
|
"learning_rate": 8.632416742775886e-06,
|
|
"loss": 0.7979,
|
|
"step": 5247
|
|
},
|
|
{
|
|
"epoch": 1.3957446808510638,
|
|
"grad_norm": 4.043915748596191,
|
|
"learning_rate": 8.631812304606367e-06,
|
|
"loss": 0.8903,
|
|
"step": 5248
|
|
},
|
|
{
|
|
"epoch": 1.3960106382978723,
|
|
"grad_norm": 3.995999336242676,
|
|
"learning_rate": 8.631207754064299e-06,
|
|
"loss": 0.7445,
|
|
"step": 5249
|
|
},
|
|
{
|
|
"epoch": 1.3962765957446808,
|
|
"grad_norm": 3.6424171924591064,
|
|
"learning_rate": 8.630603091168385e-06,
|
|
"loss": 0.6922,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 1.3965425531914892,
|
|
"grad_norm": 4.3226118087768555,
|
|
"learning_rate": 8.62999831593734e-06,
|
|
"loss": 0.8686,
|
|
"step": 5251
|
|
},
|
|
{
|
|
"epoch": 1.396808510638298,
|
|
"grad_norm": 3.89966082572937,
|
|
"learning_rate": 8.629393428389873e-06,
|
|
"loss": 0.7592,
|
|
"step": 5252
|
|
},
|
|
{
|
|
"epoch": 1.3970744680851064,
|
|
"grad_norm": 4.409592151641846,
|
|
"learning_rate": 8.628788428544698e-06,
|
|
"loss": 0.952,
|
|
"step": 5253
|
|
},
|
|
{
|
|
"epoch": 1.3973404255319148,
|
|
"grad_norm": 3.884060859680176,
|
|
"learning_rate": 8.62818331642054e-06,
|
|
"loss": 0.83,
|
|
"step": 5254
|
|
},
|
|
{
|
|
"epoch": 1.3976063829787235,
|
|
"grad_norm": 3.480745792388916,
|
|
"learning_rate": 8.627578092036117e-06,
|
|
"loss": 0.7324,
|
|
"step": 5255
|
|
},
|
|
{
|
|
"epoch": 1.397872340425532,
|
|
"grad_norm": 3.862119436264038,
|
|
"learning_rate": 8.626972755410156e-06,
|
|
"loss": 0.7555,
|
|
"step": 5256
|
|
},
|
|
{
|
|
"epoch": 1.3981382978723405,
|
|
"grad_norm": 4.149264335632324,
|
|
"learning_rate": 8.626367306561387e-06,
|
|
"loss": 0.7649,
|
|
"step": 5257
|
|
},
|
|
{
|
|
"epoch": 1.398404255319149,
|
|
"grad_norm": 3.6122639179229736,
|
|
"learning_rate": 8.625761745508547e-06,
|
|
"loss": 0.7959,
|
|
"step": 5258
|
|
},
|
|
{
|
|
"epoch": 1.3986702127659574,
|
|
"grad_norm": 3.611455202102661,
|
|
"learning_rate": 8.625156072270367e-06,
|
|
"loss": 0.8546,
|
|
"step": 5259
|
|
},
|
|
{
|
|
"epoch": 1.398936170212766,
|
|
"grad_norm": 4.0274858474731445,
|
|
"learning_rate": 8.624550286865592e-06,
|
|
"loss": 0.818,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 1.3992021276595745,
|
|
"grad_norm": 4.080778121948242,
|
|
"learning_rate": 8.623944389312962e-06,
|
|
"loss": 0.8599,
|
|
"step": 5261
|
|
},
|
|
{
|
|
"epoch": 1.399468085106383,
|
|
"grad_norm": 4.097471237182617,
|
|
"learning_rate": 8.623338379631227e-06,
|
|
"loss": 0.8178,
|
|
"step": 5262
|
|
},
|
|
{
|
|
"epoch": 1.3997340425531914,
|
|
"grad_norm": 3.6200075149536133,
|
|
"learning_rate": 8.622732257839137e-06,
|
|
"loss": 0.8381,
|
|
"step": 5263
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"grad_norm": 4.054747581481934,
|
|
"learning_rate": 8.622126023955446e-06,
|
|
"loss": 0.9865,
|
|
"step": 5264
|
|
},
|
|
{
|
|
"epoch": 1.4002659574468086,
|
|
"grad_norm": 4.653242111206055,
|
|
"learning_rate": 8.62151967799891e-06,
|
|
"loss": 0.8813,
|
|
"step": 5265
|
|
},
|
|
{
|
|
"epoch": 1.400531914893617,
|
|
"grad_norm": 4.182617664337158,
|
|
"learning_rate": 8.620913219988291e-06,
|
|
"loss": 0.7061,
|
|
"step": 5266
|
|
},
|
|
{
|
|
"epoch": 1.4007978723404255,
|
|
"grad_norm": 3.594130277633667,
|
|
"learning_rate": 8.620306649942356e-06,
|
|
"loss": 0.7468,
|
|
"step": 5267
|
|
},
|
|
{
|
|
"epoch": 1.401063829787234,
|
|
"grad_norm": 4.210184574127197,
|
|
"learning_rate": 8.619699967879868e-06,
|
|
"loss": 0.9574,
|
|
"step": 5268
|
|
},
|
|
{
|
|
"epoch": 1.4013297872340424,
|
|
"grad_norm": 4.212064743041992,
|
|
"learning_rate": 8.619093173819603e-06,
|
|
"loss": 0.8027,
|
|
"step": 5269
|
|
},
|
|
{
|
|
"epoch": 1.4015957446808511,
|
|
"grad_norm": 4.000636100769043,
|
|
"learning_rate": 8.618486267780334e-06,
|
|
"loss": 0.8482,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 1.4018617021276596,
|
|
"grad_norm": 4.396604537963867,
|
|
"learning_rate": 8.617879249780841e-06,
|
|
"loss": 0.8989,
|
|
"step": 5271
|
|
},
|
|
{
|
|
"epoch": 1.402127659574468,
|
|
"grad_norm": 3.6377105712890625,
|
|
"learning_rate": 8.617272119839903e-06,
|
|
"loss": 0.7686,
|
|
"step": 5272
|
|
},
|
|
{
|
|
"epoch": 1.4023936170212765,
|
|
"grad_norm": 3.8942556381225586,
|
|
"learning_rate": 8.616664877976308e-06,
|
|
"loss": 0.8185,
|
|
"step": 5273
|
|
},
|
|
{
|
|
"epoch": 1.402659574468085,
|
|
"grad_norm": 3.9607818126678467,
|
|
"learning_rate": 8.616057524208843e-06,
|
|
"loss": 0.6682,
|
|
"step": 5274
|
|
},
|
|
{
|
|
"epoch": 1.4029255319148937,
|
|
"grad_norm": 4.523376941680908,
|
|
"learning_rate": 8.615450058556301e-06,
|
|
"loss": 0.8093,
|
|
"step": 5275
|
|
},
|
|
{
|
|
"epoch": 1.4031914893617021,
|
|
"grad_norm": 4.111645221710205,
|
|
"learning_rate": 8.614842481037476e-06,
|
|
"loss": 0.8694,
|
|
"step": 5276
|
|
},
|
|
{
|
|
"epoch": 1.4034574468085106,
|
|
"grad_norm": 3.7978808879852295,
|
|
"learning_rate": 8.61423479167117e-06,
|
|
"loss": 0.7477,
|
|
"step": 5277
|
|
},
|
|
{
|
|
"epoch": 1.4037234042553193,
|
|
"grad_norm": 3.669728994369507,
|
|
"learning_rate": 8.613626990476186e-06,
|
|
"loss": 0.7951,
|
|
"step": 5278
|
|
},
|
|
{
|
|
"epoch": 1.4039893617021277,
|
|
"grad_norm": 4.3240251541137695,
|
|
"learning_rate": 8.613019077471325e-06,
|
|
"loss": 0.8721,
|
|
"step": 5279
|
|
},
|
|
{
|
|
"epoch": 1.4042553191489362,
|
|
"grad_norm": 3.702890157699585,
|
|
"learning_rate": 8.6124110526754e-06,
|
|
"loss": 0.6856,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 1.4045212765957447,
|
|
"grad_norm": 4.085876941680908,
|
|
"learning_rate": 8.611802916107225e-06,
|
|
"loss": 0.7458,
|
|
"step": 5281
|
|
},
|
|
{
|
|
"epoch": 1.4047872340425531,
|
|
"grad_norm": 4.095217704772949,
|
|
"learning_rate": 8.611194667785615e-06,
|
|
"loss": 0.821,
|
|
"step": 5282
|
|
},
|
|
{
|
|
"epoch": 1.4050531914893618,
|
|
"grad_norm": 3.8958888053894043,
|
|
"learning_rate": 8.610586307729393e-06,
|
|
"loss": 0.7271,
|
|
"step": 5283
|
|
},
|
|
{
|
|
"epoch": 1.4053191489361703,
|
|
"grad_norm": 3.696851968765259,
|
|
"learning_rate": 8.609977835957378e-06,
|
|
"loss": 0.7236,
|
|
"step": 5284
|
|
},
|
|
{
|
|
"epoch": 1.4055851063829787,
|
|
"grad_norm": 4.185340404510498,
|
|
"learning_rate": 8.609369252488398e-06,
|
|
"loss": 0.9089,
|
|
"step": 5285
|
|
},
|
|
{
|
|
"epoch": 1.4058510638297872,
|
|
"grad_norm": 4.072790622711182,
|
|
"learning_rate": 8.608760557341284e-06,
|
|
"loss": 0.761,
|
|
"step": 5286
|
|
},
|
|
{
|
|
"epoch": 1.4061170212765957,
|
|
"grad_norm": 3.8811473846435547,
|
|
"learning_rate": 8.60815175053487e-06,
|
|
"loss": 0.8021,
|
|
"step": 5287
|
|
},
|
|
{
|
|
"epoch": 1.4063829787234043,
|
|
"grad_norm": 4.050495624542236,
|
|
"learning_rate": 8.607542832087993e-06,
|
|
"loss": 0.7736,
|
|
"step": 5288
|
|
},
|
|
{
|
|
"epoch": 1.4066489361702128,
|
|
"grad_norm": 3.903702735900879,
|
|
"learning_rate": 8.606933802019493e-06,
|
|
"loss": 0.8525,
|
|
"step": 5289
|
|
},
|
|
{
|
|
"epoch": 1.4069148936170213,
|
|
"grad_norm": 3.618151903152466,
|
|
"learning_rate": 8.606324660348214e-06,
|
|
"loss": 0.7992,
|
|
"step": 5290
|
|
},
|
|
{
|
|
"epoch": 1.4071808510638297,
|
|
"grad_norm": 3.910585641860962,
|
|
"learning_rate": 8.605715407093005e-06,
|
|
"loss": 0.8235,
|
|
"step": 5291
|
|
},
|
|
{
|
|
"epoch": 1.4074468085106382,
|
|
"grad_norm": 4.317497253417969,
|
|
"learning_rate": 8.605106042272715e-06,
|
|
"loss": 0.8737,
|
|
"step": 5292
|
|
},
|
|
{
|
|
"epoch": 1.4077127659574469,
|
|
"grad_norm": 4.357272624969482,
|
|
"learning_rate": 8.6044965659062e-06,
|
|
"loss": 0.787,
|
|
"step": 5293
|
|
},
|
|
{
|
|
"epoch": 1.4079787234042553,
|
|
"grad_norm": 4.051640033721924,
|
|
"learning_rate": 8.603886978012317e-06,
|
|
"loss": 0.8513,
|
|
"step": 5294
|
|
},
|
|
{
|
|
"epoch": 1.4082446808510638,
|
|
"grad_norm": 4.226726055145264,
|
|
"learning_rate": 8.60327727860993e-06,
|
|
"loss": 0.717,
|
|
"step": 5295
|
|
},
|
|
{
|
|
"epoch": 1.4085106382978723,
|
|
"grad_norm": 3.7265825271606445,
|
|
"learning_rate": 8.6026674677179e-06,
|
|
"loss": 0.7177,
|
|
"step": 5296
|
|
},
|
|
{
|
|
"epoch": 1.4087765957446807,
|
|
"grad_norm": 3.866156816482544,
|
|
"learning_rate": 8.602057545355096e-06,
|
|
"loss": 0.78,
|
|
"step": 5297
|
|
},
|
|
{
|
|
"epoch": 1.4090425531914894,
|
|
"grad_norm": 3.843125820159912,
|
|
"learning_rate": 8.601447511540392e-06,
|
|
"loss": 0.8847,
|
|
"step": 5298
|
|
},
|
|
{
|
|
"epoch": 1.4093085106382979,
|
|
"grad_norm": 3.813894033432007,
|
|
"learning_rate": 8.600837366292663e-06,
|
|
"loss": 0.7,
|
|
"step": 5299
|
|
},
|
|
{
|
|
"epoch": 1.4095744680851063,
|
|
"grad_norm": 4.289909362792969,
|
|
"learning_rate": 8.600227109630785e-06,
|
|
"loss": 0.7832,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 1.409840425531915,
|
|
"grad_norm": 4.330870151519775,
|
|
"learning_rate": 8.599616741573642e-06,
|
|
"loss": 0.9482,
|
|
"step": 5301
|
|
},
|
|
{
|
|
"epoch": 1.4101063829787235,
|
|
"grad_norm": 3.625694990158081,
|
|
"learning_rate": 8.599006262140117e-06,
|
|
"loss": 0.6515,
|
|
"step": 5302
|
|
},
|
|
{
|
|
"epoch": 1.410372340425532,
|
|
"grad_norm": 4.081284999847412,
|
|
"learning_rate": 8.598395671349104e-06,
|
|
"loss": 0.9656,
|
|
"step": 5303
|
|
},
|
|
{
|
|
"epoch": 1.4106382978723404,
|
|
"grad_norm": 4.240716457366943,
|
|
"learning_rate": 8.59778496921949e-06,
|
|
"loss": 0.8328,
|
|
"step": 5304
|
|
},
|
|
{
|
|
"epoch": 1.4109042553191489,
|
|
"grad_norm": 3.9750494956970215,
|
|
"learning_rate": 8.597174155770174e-06,
|
|
"loss": 0.7686,
|
|
"step": 5305
|
|
},
|
|
{
|
|
"epoch": 1.4111702127659576,
|
|
"grad_norm": 3.6305007934570312,
|
|
"learning_rate": 8.596563231020054e-06,
|
|
"loss": 0.7059,
|
|
"step": 5306
|
|
},
|
|
{
|
|
"epoch": 1.411436170212766,
|
|
"grad_norm": 3.9132840633392334,
|
|
"learning_rate": 8.595952194988034e-06,
|
|
"loss": 0.8509,
|
|
"step": 5307
|
|
},
|
|
{
|
|
"epoch": 1.4117021276595745,
|
|
"grad_norm": 4.162221431732178,
|
|
"learning_rate": 8.59534104769302e-06,
|
|
"loss": 0.82,
|
|
"step": 5308
|
|
},
|
|
{
|
|
"epoch": 1.411968085106383,
|
|
"grad_norm": 4.090907096862793,
|
|
"learning_rate": 8.594729789153919e-06,
|
|
"loss": 0.9025,
|
|
"step": 5309
|
|
},
|
|
{
|
|
"epoch": 1.4122340425531914,
|
|
"grad_norm": 4.178388595581055,
|
|
"learning_rate": 8.594118419389648e-06,
|
|
"loss": 0.8537,
|
|
"step": 5310
|
|
},
|
|
{
|
|
"epoch": 1.4125,
|
|
"grad_norm": 3.5532939434051514,
|
|
"learning_rate": 8.59350693841912e-06,
|
|
"loss": 0.684,
|
|
"step": 5311
|
|
},
|
|
{
|
|
"epoch": 1.4127659574468086,
|
|
"grad_norm": 3.9625163078308105,
|
|
"learning_rate": 8.592895346261258e-06,
|
|
"loss": 0.7501,
|
|
"step": 5312
|
|
},
|
|
{
|
|
"epoch": 1.413031914893617,
|
|
"grad_norm": 3.4592795372009277,
|
|
"learning_rate": 8.592283642934983e-06,
|
|
"loss": 0.8845,
|
|
"step": 5313
|
|
},
|
|
{
|
|
"epoch": 1.4132978723404255,
|
|
"grad_norm": 4.265946865081787,
|
|
"learning_rate": 8.591671828459222e-06,
|
|
"loss": 0.8354,
|
|
"step": 5314
|
|
},
|
|
{
|
|
"epoch": 1.413563829787234,
|
|
"grad_norm": 4.301452159881592,
|
|
"learning_rate": 8.591059902852907e-06,
|
|
"loss": 0.9654,
|
|
"step": 5315
|
|
},
|
|
{
|
|
"epoch": 1.4138297872340426,
|
|
"grad_norm": 3.953643560409546,
|
|
"learning_rate": 8.59044786613497e-06,
|
|
"loss": 0.8592,
|
|
"step": 5316
|
|
},
|
|
{
|
|
"epoch": 1.414095744680851,
|
|
"grad_norm": 3.8107998371124268,
|
|
"learning_rate": 8.589835718324349e-06,
|
|
"loss": 0.7486,
|
|
"step": 5317
|
|
},
|
|
{
|
|
"epoch": 1.4143617021276595,
|
|
"grad_norm": 4.148920059204102,
|
|
"learning_rate": 8.589223459439987e-06,
|
|
"loss": 0.8111,
|
|
"step": 5318
|
|
},
|
|
{
|
|
"epoch": 1.414627659574468,
|
|
"grad_norm": 3.7461628913879395,
|
|
"learning_rate": 8.588611089500821e-06,
|
|
"loss": 0.7551,
|
|
"step": 5319
|
|
},
|
|
{
|
|
"epoch": 1.4148936170212765,
|
|
"grad_norm": 4.387768268585205,
|
|
"learning_rate": 8.587998608525806e-06,
|
|
"loss": 0.933,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 1.4151595744680852,
|
|
"grad_norm": 3.419297933578491,
|
|
"learning_rate": 8.587386016533887e-06,
|
|
"loss": 0.7643,
|
|
"step": 5321
|
|
},
|
|
{
|
|
"epoch": 1.4154255319148936,
|
|
"grad_norm": 3.7075390815734863,
|
|
"learning_rate": 8.586773313544023e-06,
|
|
"loss": 0.7818,
|
|
"step": 5322
|
|
},
|
|
{
|
|
"epoch": 1.415691489361702,
|
|
"grad_norm": 4.141719341278076,
|
|
"learning_rate": 8.586160499575168e-06,
|
|
"loss": 0.912,
|
|
"step": 5323
|
|
},
|
|
{
|
|
"epoch": 1.4159574468085108,
|
|
"grad_norm": 4.2602386474609375,
|
|
"learning_rate": 8.585547574646287e-06,
|
|
"loss": 0.834,
|
|
"step": 5324
|
|
},
|
|
{
|
|
"epoch": 1.4162234042553192,
|
|
"grad_norm": 4.043152332305908,
|
|
"learning_rate": 8.584934538776342e-06,
|
|
"loss": 0.6793,
|
|
"step": 5325
|
|
},
|
|
{
|
|
"epoch": 1.4164893617021277,
|
|
"grad_norm": 4.062325954437256,
|
|
"learning_rate": 8.584321391984301e-06,
|
|
"loss": 0.8172,
|
|
"step": 5326
|
|
},
|
|
{
|
|
"epoch": 1.4167553191489362,
|
|
"grad_norm": 3.731950044631958,
|
|
"learning_rate": 8.583708134289138e-06,
|
|
"loss": 0.6754,
|
|
"step": 5327
|
|
},
|
|
{
|
|
"epoch": 1.4170212765957446,
|
|
"grad_norm": 4.3393940925598145,
|
|
"learning_rate": 8.583094765709823e-06,
|
|
"loss": 0.8304,
|
|
"step": 5328
|
|
},
|
|
{
|
|
"epoch": 1.4172872340425533,
|
|
"grad_norm": 4.178645610809326,
|
|
"learning_rate": 8.582481286265341e-06,
|
|
"loss": 0.9168,
|
|
"step": 5329
|
|
},
|
|
{
|
|
"epoch": 1.4175531914893618,
|
|
"grad_norm": 3.5687899589538574,
|
|
"learning_rate": 8.581867695974667e-06,
|
|
"loss": 0.6632,
|
|
"step": 5330
|
|
},
|
|
{
|
|
"epoch": 1.4178191489361702,
|
|
"grad_norm": 3.7236688137054443,
|
|
"learning_rate": 8.58125399485679e-06,
|
|
"loss": 0.6788,
|
|
"step": 5331
|
|
},
|
|
{
|
|
"epoch": 1.4180851063829787,
|
|
"grad_norm": 3.8592636585235596,
|
|
"learning_rate": 8.5806401829307e-06,
|
|
"loss": 0.8632,
|
|
"step": 5332
|
|
},
|
|
{
|
|
"epoch": 1.4183510638297872,
|
|
"grad_norm": 3.7756807804107666,
|
|
"learning_rate": 8.580026260215384e-06,
|
|
"loss": 0.6994,
|
|
"step": 5333
|
|
},
|
|
{
|
|
"epoch": 1.4186170212765958,
|
|
"grad_norm": 3.481576919555664,
|
|
"learning_rate": 8.579412226729843e-06,
|
|
"loss": 0.8748,
|
|
"step": 5334
|
|
},
|
|
{
|
|
"epoch": 1.4188829787234043,
|
|
"grad_norm": 3.908369779586792,
|
|
"learning_rate": 8.578798082493074e-06,
|
|
"loss": 0.7567,
|
|
"step": 5335
|
|
},
|
|
{
|
|
"epoch": 1.4191489361702128,
|
|
"grad_norm": 4.084057807922363,
|
|
"learning_rate": 8.578183827524076e-06,
|
|
"loss": 0.9174,
|
|
"step": 5336
|
|
},
|
|
{
|
|
"epoch": 1.4194148936170212,
|
|
"grad_norm": 4.469969749450684,
|
|
"learning_rate": 8.57756946184186e-06,
|
|
"loss": 0.9547,
|
|
"step": 5337
|
|
},
|
|
{
|
|
"epoch": 1.4196808510638297,
|
|
"grad_norm": 3.8578479290008545,
|
|
"learning_rate": 8.576954985465431e-06,
|
|
"loss": 0.8135,
|
|
"step": 5338
|
|
},
|
|
{
|
|
"epoch": 1.4199468085106384,
|
|
"grad_norm": 3.7595484256744385,
|
|
"learning_rate": 8.576340398413804e-06,
|
|
"loss": 0.7724,
|
|
"step": 5339
|
|
},
|
|
{
|
|
"epoch": 1.4202127659574468,
|
|
"grad_norm": 4.005858898162842,
|
|
"learning_rate": 8.575725700705995e-06,
|
|
"loss": 0.8386,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 1.4204787234042553,
|
|
"grad_norm": 4.103984355926514,
|
|
"learning_rate": 8.575110892361022e-06,
|
|
"loss": 0.9413,
|
|
"step": 5341
|
|
},
|
|
{
|
|
"epoch": 1.4207446808510638,
|
|
"grad_norm": 3.5380845069885254,
|
|
"learning_rate": 8.57449597339791e-06,
|
|
"loss": 0.8393,
|
|
"step": 5342
|
|
},
|
|
{
|
|
"epoch": 1.4210106382978722,
|
|
"grad_norm": 3.589729070663452,
|
|
"learning_rate": 8.573880943835684e-06,
|
|
"loss": 0.7789,
|
|
"step": 5343
|
|
},
|
|
{
|
|
"epoch": 1.421276595744681,
|
|
"grad_norm": 4.016366004943848,
|
|
"learning_rate": 8.573265803693374e-06,
|
|
"loss": 0.7377,
|
|
"step": 5344
|
|
},
|
|
{
|
|
"epoch": 1.4215425531914894,
|
|
"grad_norm": 3.708329439163208,
|
|
"learning_rate": 8.572650552990012e-06,
|
|
"loss": 0.8608,
|
|
"step": 5345
|
|
},
|
|
{
|
|
"epoch": 1.4218085106382978,
|
|
"grad_norm": 4.192487716674805,
|
|
"learning_rate": 8.572035191744637e-06,
|
|
"loss": 0.7963,
|
|
"step": 5346
|
|
},
|
|
{
|
|
"epoch": 1.4220744680851065,
|
|
"grad_norm": 3.561629056930542,
|
|
"learning_rate": 8.571419719976287e-06,
|
|
"loss": 0.8004,
|
|
"step": 5347
|
|
},
|
|
{
|
|
"epoch": 1.422340425531915,
|
|
"grad_norm": 3.7709176540374756,
|
|
"learning_rate": 8.570804137704005e-06,
|
|
"loss": 0.7012,
|
|
"step": 5348
|
|
},
|
|
{
|
|
"epoch": 1.4226063829787234,
|
|
"grad_norm": 3.842339515686035,
|
|
"learning_rate": 8.57018844494684e-06,
|
|
"loss": 0.8063,
|
|
"step": 5349
|
|
},
|
|
{
|
|
"epoch": 1.422872340425532,
|
|
"grad_norm": 4.014485836029053,
|
|
"learning_rate": 8.56957264172384e-06,
|
|
"loss": 0.681,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 1.4231382978723404,
|
|
"grad_norm": 3.9877431392669678,
|
|
"learning_rate": 8.568956728054061e-06,
|
|
"loss": 0.9011,
|
|
"step": 5351
|
|
},
|
|
{
|
|
"epoch": 1.423404255319149,
|
|
"grad_norm": 3.9741530418395996,
|
|
"learning_rate": 8.568340703956558e-06,
|
|
"loss": 0.8245,
|
|
"step": 5352
|
|
},
|
|
{
|
|
"epoch": 1.4236702127659575,
|
|
"grad_norm": 4.008678436279297,
|
|
"learning_rate": 8.567724569450393e-06,
|
|
"loss": 0.8588,
|
|
"step": 5353
|
|
},
|
|
{
|
|
"epoch": 1.423936170212766,
|
|
"grad_norm": 4.2688679695129395,
|
|
"learning_rate": 8.56710832455463e-06,
|
|
"loss": 0.8026,
|
|
"step": 5354
|
|
},
|
|
{
|
|
"epoch": 1.4242021276595744,
|
|
"grad_norm": 4.144524097442627,
|
|
"learning_rate": 8.566491969288333e-06,
|
|
"loss": 0.7977,
|
|
"step": 5355
|
|
},
|
|
{
|
|
"epoch": 1.424468085106383,
|
|
"grad_norm": 4.431448459625244,
|
|
"learning_rate": 8.565875503670578e-06,
|
|
"loss": 0.9466,
|
|
"step": 5356
|
|
},
|
|
{
|
|
"epoch": 1.4247340425531916,
|
|
"grad_norm": 3.9344115257263184,
|
|
"learning_rate": 8.565258927720436e-06,
|
|
"loss": 0.7571,
|
|
"step": 5357
|
|
},
|
|
{
|
|
"epoch": 1.425,
|
|
"grad_norm": 4.618174076080322,
|
|
"learning_rate": 8.564642241456986e-06,
|
|
"loss": 0.92,
|
|
"step": 5358
|
|
},
|
|
{
|
|
"epoch": 1.4252659574468085,
|
|
"grad_norm": 4.515613079071045,
|
|
"learning_rate": 8.564025444899308e-06,
|
|
"loss": 0.8339,
|
|
"step": 5359
|
|
},
|
|
{
|
|
"epoch": 1.425531914893617,
|
|
"grad_norm": 3.8892219066619873,
|
|
"learning_rate": 8.563408538066486e-06,
|
|
"loss": 0.6946,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 1.4257978723404254,
|
|
"grad_norm": 3.8335928916931152,
|
|
"learning_rate": 8.562791520977608e-06,
|
|
"loss": 0.7894,
|
|
"step": 5361
|
|
},
|
|
{
|
|
"epoch": 1.4260638297872341,
|
|
"grad_norm": 3.8898446559906006,
|
|
"learning_rate": 8.562174393651767e-06,
|
|
"loss": 0.6504,
|
|
"step": 5362
|
|
},
|
|
{
|
|
"epoch": 1.4263297872340426,
|
|
"grad_norm": 3.916454553604126,
|
|
"learning_rate": 8.561557156108055e-06,
|
|
"loss": 0.8178,
|
|
"step": 5363
|
|
},
|
|
{
|
|
"epoch": 1.426595744680851,
|
|
"grad_norm": 4.594573020935059,
|
|
"learning_rate": 8.560939808365571e-06,
|
|
"loss": 0.8554,
|
|
"step": 5364
|
|
},
|
|
{
|
|
"epoch": 1.4268617021276595,
|
|
"grad_norm": 3.920474052429199,
|
|
"learning_rate": 8.56032235044342e-06,
|
|
"loss": 0.9173,
|
|
"step": 5365
|
|
},
|
|
{
|
|
"epoch": 1.427127659574468,
|
|
"grad_norm": 3.8437423706054688,
|
|
"learning_rate": 8.5597047823607e-06,
|
|
"loss": 0.7551,
|
|
"step": 5366
|
|
},
|
|
{
|
|
"epoch": 1.4273936170212767,
|
|
"grad_norm": 3.631983518600464,
|
|
"learning_rate": 8.559087104136525e-06,
|
|
"loss": 0.8889,
|
|
"step": 5367
|
|
},
|
|
{
|
|
"epoch": 1.4276595744680851,
|
|
"grad_norm": 3.7418458461761475,
|
|
"learning_rate": 8.558469315790005e-06,
|
|
"loss": 0.7964,
|
|
"step": 5368
|
|
},
|
|
{
|
|
"epoch": 1.4279255319148936,
|
|
"grad_norm": 4.14785099029541,
|
|
"learning_rate": 8.557851417340252e-06,
|
|
"loss": 0.8312,
|
|
"step": 5369
|
|
},
|
|
{
|
|
"epoch": 1.4281914893617023,
|
|
"grad_norm": 4.0224103927612305,
|
|
"learning_rate": 8.55723340880639e-06,
|
|
"loss": 0.9175,
|
|
"step": 5370
|
|
},
|
|
{
|
|
"epoch": 1.4284574468085105,
|
|
"grad_norm": 3.899369478225708,
|
|
"learning_rate": 8.556615290207538e-06,
|
|
"loss": 0.776,
|
|
"step": 5371
|
|
},
|
|
{
|
|
"epoch": 1.4287234042553192,
|
|
"grad_norm": 3.869248628616333,
|
|
"learning_rate": 8.555997061562821e-06,
|
|
"loss": 0.7417,
|
|
"step": 5372
|
|
},
|
|
{
|
|
"epoch": 1.4289893617021276,
|
|
"grad_norm": 3.8381667137145996,
|
|
"learning_rate": 8.555378722891367e-06,
|
|
"loss": 0.7887,
|
|
"step": 5373
|
|
},
|
|
{
|
|
"epoch": 1.429255319148936,
|
|
"grad_norm": 4.0374674797058105,
|
|
"learning_rate": 8.55476027421231e-06,
|
|
"loss": 0.7039,
|
|
"step": 5374
|
|
},
|
|
{
|
|
"epoch": 1.4295212765957448,
|
|
"grad_norm": 4.473758220672607,
|
|
"learning_rate": 8.554141715544788e-06,
|
|
"loss": 0.8829,
|
|
"step": 5375
|
|
},
|
|
{
|
|
"epoch": 1.4297872340425533,
|
|
"grad_norm": 3.995429277420044,
|
|
"learning_rate": 8.553523046907934e-06,
|
|
"loss": 0.8441,
|
|
"step": 5376
|
|
},
|
|
{
|
|
"epoch": 1.4300531914893617,
|
|
"grad_norm": 3.942129373550415,
|
|
"learning_rate": 8.552904268320895e-06,
|
|
"loss": 0.8657,
|
|
"step": 5377
|
|
},
|
|
{
|
|
"epoch": 1.4303191489361702,
|
|
"grad_norm": 4.163167953491211,
|
|
"learning_rate": 8.552285379802811e-06,
|
|
"loss": 0.7497,
|
|
"step": 5378
|
|
},
|
|
{
|
|
"epoch": 1.4305851063829786,
|
|
"grad_norm": 3.926020860671997,
|
|
"learning_rate": 8.551666381372839e-06,
|
|
"loss": 0.8265,
|
|
"step": 5379
|
|
},
|
|
{
|
|
"epoch": 1.4308510638297873,
|
|
"grad_norm": 3.686615228652954,
|
|
"learning_rate": 8.551047273050126e-06,
|
|
"loss": 0.694,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 1.4311170212765958,
|
|
"grad_norm": 4.436965465545654,
|
|
"learning_rate": 8.55042805485383e-06,
|
|
"loss": 0.929,
|
|
"step": 5381
|
|
},
|
|
{
|
|
"epoch": 1.4313829787234043,
|
|
"grad_norm": 4.103221416473389,
|
|
"learning_rate": 8.549808726803108e-06,
|
|
"loss": 0.7724,
|
|
"step": 5382
|
|
},
|
|
{
|
|
"epoch": 1.4316489361702127,
|
|
"grad_norm": 3.994560718536377,
|
|
"learning_rate": 8.549189288917127e-06,
|
|
"loss": 0.6845,
|
|
"step": 5383
|
|
},
|
|
{
|
|
"epoch": 1.4319148936170212,
|
|
"grad_norm": 4.3197712898254395,
|
|
"learning_rate": 8.548569741215049e-06,
|
|
"loss": 0.8348,
|
|
"step": 5384
|
|
},
|
|
{
|
|
"epoch": 1.4321808510638299,
|
|
"grad_norm": 4.51045560836792,
|
|
"learning_rate": 8.547950083716047e-06,
|
|
"loss": 0.8659,
|
|
"step": 5385
|
|
},
|
|
{
|
|
"epoch": 1.4324468085106383,
|
|
"grad_norm": 4.250168323516846,
|
|
"learning_rate": 8.54733031643929e-06,
|
|
"loss": 0.9424,
|
|
"step": 5386
|
|
},
|
|
{
|
|
"epoch": 1.4327127659574468,
|
|
"grad_norm": 3.6297523975372314,
|
|
"learning_rate": 8.54671043940396e-06,
|
|
"loss": 0.8464,
|
|
"step": 5387
|
|
},
|
|
{
|
|
"epoch": 1.4329787234042553,
|
|
"grad_norm": 3.914750099182129,
|
|
"learning_rate": 8.54609045262923e-06,
|
|
"loss": 0.9345,
|
|
"step": 5388
|
|
},
|
|
{
|
|
"epoch": 1.4332446808510637,
|
|
"grad_norm": 4.086660385131836,
|
|
"learning_rate": 8.545470356134289e-06,
|
|
"loss": 0.8161,
|
|
"step": 5389
|
|
},
|
|
{
|
|
"epoch": 1.4335106382978724,
|
|
"grad_norm": 3.657174825668335,
|
|
"learning_rate": 8.54485014993832e-06,
|
|
"loss": 0.8184,
|
|
"step": 5390
|
|
},
|
|
{
|
|
"epoch": 1.4337765957446809,
|
|
"grad_norm": 4.197863578796387,
|
|
"learning_rate": 8.544229834060512e-06,
|
|
"loss": 0.8937,
|
|
"step": 5391
|
|
},
|
|
{
|
|
"epoch": 1.4340425531914893,
|
|
"grad_norm": 4.215087413787842,
|
|
"learning_rate": 8.543609408520062e-06,
|
|
"loss": 0.8149,
|
|
"step": 5392
|
|
},
|
|
{
|
|
"epoch": 1.434308510638298,
|
|
"grad_norm": 4.2908101081848145,
|
|
"learning_rate": 8.542988873336164e-06,
|
|
"loss": 0.7731,
|
|
"step": 5393
|
|
},
|
|
{
|
|
"epoch": 1.4345744680851062,
|
|
"grad_norm": 3.921720266342163,
|
|
"learning_rate": 8.54236822852802e-06,
|
|
"loss": 0.7697,
|
|
"step": 5394
|
|
},
|
|
{
|
|
"epoch": 1.434840425531915,
|
|
"grad_norm": 4.464201927185059,
|
|
"learning_rate": 8.54174747411483e-06,
|
|
"loss": 0.8365,
|
|
"step": 5395
|
|
},
|
|
{
|
|
"epoch": 1.4351063829787234,
|
|
"grad_norm": 3.9795491695404053,
|
|
"learning_rate": 8.541126610115806e-06,
|
|
"loss": 0.8086,
|
|
"step": 5396
|
|
},
|
|
{
|
|
"epoch": 1.4353723404255319,
|
|
"grad_norm": 4.0533766746521,
|
|
"learning_rate": 8.540505636550153e-06,
|
|
"loss": 0.7996,
|
|
"step": 5397
|
|
},
|
|
{
|
|
"epoch": 1.4356382978723405,
|
|
"grad_norm": 4.261003494262695,
|
|
"learning_rate": 8.53988455343709e-06,
|
|
"loss": 0.7748,
|
|
"step": 5398
|
|
},
|
|
{
|
|
"epoch": 1.435904255319149,
|
|
"grad_norm": 4.159748077392578,
|
|
"learning_rate": 8.53926336079583e-06,
|
|
"loss": 0.8867,
|
|
"step": 5399
|
|
},
|
|
{
|
|
"epoch": 1.4361702127659575,
|
|
"grad_norm": 3.9314358234405518,
|
|
"learning_rate": 8.538642058645595e-06,
|
|
"loss": 0.8713,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 1.436436170212766,
|
|
"grad_norm": 3.8043625354766846,
|
|
"learning_rate": 8.538020647005607e-06,
|
|
"loss": 0.7276,
|
|
"step": 5401
|
|
},
|
|
{
|
|
"epoch": 1.4367021276595744,
|
|
"grad_norm": 4.576129913330078,
|
|
"learning_rate": 8.537399125895096e-06,
|
|
"loss": 0.7822,
|
|
"step": 5402
|
|
},
|
|
{
|
|
"epoch": 1.436968085106383,
|
|
"grad_norm": 3.801168918609619,
|
|
"learning_rate": 8.53677749533329e-06,
|
|
"loss": 0.8445,
|
|
"step": 5403
|
|
},
|
|
{
|
|
"epoch": 1.4372340425531915,
|
|
"grad_norm": 3.763317108154297,
|
|
"learning_rate": 8.536155755339427e-06,
|
|
"loss": 0.7572,
|
|
"step": 5404
|
|
},
|
|
{
|
|
"epoch": 1.4375,
|
|
"grad_norm": 4.1881256103515625,
|
|
"learning_rate": 8.535533905932739e-06,
|
|
"loss": 0.8398,
|
|
"step": 5405
|
|
},
|
|
{
|
|
"epoch": 1.4377659574468085,
|
|
"grad_norm": 3.61997127532959,
|
|
"learning_rate": 8.534911947132469e-06,
|
|
"loss": 0.674,
|
|
"step": 5406
|
|
},
|
|
{
|
|
"epoch": 1.438031914893617,
|
|
"grad_norm": 3.6583242416381836,
|
|
"learning_rate": 8.534289878957863e-06,
|
|
"loss": 0.6655,
|
|
"step": 5407
|
|
},
|
|
{
|
|
"epoch": 1.4382978723404256,
|
|
"grad_norm": 3.9012091159820557,
|
|
"learning_rate": 8.533667701428167e-06,
|
|
"loss": 0.6869,
|
|
"step": 5408
|
|
},
|
|
{
|
|
"epoch": 1.438563829787234,
|
|
"grad_norm": 3.890615463256836,
|
|
"learning_rate": 8.53304541456263e-06,
|
|
"loss": 0.8431,
|
|
"step": 5409
|
|
},
|
|
{
|
|
"epoch": 1.4388297872340425,
|
|
"grad_norm": 3.8987715244293213,
|
|
"learning_rate": 8.532423018380511e-06,
|
|
"loss": 0.8705,
|
|
"step": 5410
|
|
},
|
|
{
|
|
"epoch": 1.439095744680851,
|
|
"grad_norm": 4.005768775939941,
|
|
"learning_rate": 8.531800512901066e-06,
|
|
"loss": 0.8555,
|
|
"step": 5411
|
|
},
|
|
{
|
|
"epoch": 1.4393617021276595,
|
|
"grad_norm": 3.9035804271698,
|
|
"learning_rate": 8.531177898143552e-06,
|
|
"loss": 0.7811,
|
|
"step": 5412
|
|
},
|
|
{
|
|
"epoch": 1.4396276595744681,
|
|
"grad_norm": 4.260951995849609,
|
|
"learning_rate": 8.530555174127236e-06,
|
|
"loss": 0.9168,
|
|
"step": 5413
|
|
},
|
|
{
|
|
"epoch": 1.4398936170212766,
|
|
"grad_norm": 4.07423210144043,
|
|
"learning_rate": 8.529932340871388e-06,
|
|
"loss": 0.7437,
|
|
"step": 5414
|
|
},
|
|
{
|
|
"epoch": 1.440159574468085,
|
|
"grad_norm": 3.9797050952911377,
|
|
"learning_rate": 8.529309398395275e-06,
|
|
"loss": 0.707,
|
|
"step": 5415
|
|
},
|
|
{
|
|
"epoch": 1.4404255319148938,
|
|
"grad_norm": 3.7319893836975098,
|
|
"learning_rate": 8.528686346718177e-06,
|
|
"loss": 0.7089,
|
|
"step": 5416
|
|
},
|
|
{
|
|
"epoch": 1.440691489361702,
|
|
"grad_norm": 4.224223613739014,
|
|
"learning_rate": 8.528063185859367e-06,
|
|
"loss": 0.786,
|
|
"step": 5417
|
|
},
|
|
{
|
|
"epoch": 1.4409574468085107,
|
|
"grad_norm": 4.449718952178955,
|
|
"learning_rate": 8.527439915838129e-06,
|
|
"loss": 0.8129,
|
|
"step": 5418
|
|
},
|
|
{
|
|
"epoch": 1.4412234042553191,
|
|
"grad_norm": 3.991421937942505,
|
|
"learning_rate": 8.526816536673748e-06,
|
|
"loss": 0.9446,
|
|
"step": 5419
|
|
},
|
|
{
|
|
"epoch": 1.4414893617021276,
|
|
"grad_norm": 3.5149245262145996,
|
|
"learning_rate": 8.52619304838551e-06,
|
|
"loss": 0.738,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 1.4417553191489363,
|
|
"grad_norm": 4.034007549285889,
|
|
"learning_rate": 8.525569450992707e-06,
|
|
"loss": 0.8011,
|
|
"step": 5421
|
|
},
|
|
{
|
|
"epoch": 1.4420212765957447,
|
|
"grad_norm": 4.191031455993652,
|
|
"learning_rate": 8.524945744514634e-06,
|
|
"loss": 0.9352,
|
|
"step": 5422
|
|
},
|
|
{
|
|
"epoch": 1.4422872340425532,
|
|
"grad_norm": 3.4210205078125,
|
|
"learning_rate": 8.524321928970591e-06,
|
|
"loss": 0.7345,
|
|
"step": 5423
|
|
},
|
|
{
|
|
"epoch": 1.4425531914893617,
|
|
"grad_norm": 3.573930263519287,
|
|
"learning_rate": 8.523698004379878e-06,
|
|
"loss": 0.6936,
|
|
"step": 5424
|
|
},
|
|
{
|
|
"epoch": 1.4428191489361701,
|
|
"grad_norm": 3.847769260406494,
|
|
"learning_rate": 8.523073970761799e-06,
|
|
"loss": 0.7465,
|
|
"step": 5425
|
|
},
|
|
{
|
|
"epoch": 1.4430851063829788,
|
|
"grad_norm": 3.526007652282715,
|
|
"learning_rate": 8.522449828135663e-06,
|
|
"loss": 0.8042,
|
|
"step": 5426
|
|
},
|
|
{
|
|
"epoch": 1.4433510638297873,
|
|
"grad_norm": 3.3529438972473145,
|
|
"learning_rate": 8.521825576520784e-06,
|
|
"loss": 0.6523,
|
|
"step": 5427
|
|
},
|
|
{
|
|
"epoch": 1.4436170212765957,
|
|
"grad_norm": 3.608856678009033,
|
|
"learning_rate": 8.521201215936474e-06,
|
|
"loss": 0.753,
|
|
"step": 5428
|
|
},
|
|
{
|
|
"epoch": 1.4438829787234042,
|
|
"grad_norm": 3.78037691116333,
|
|
"learning_rate": 8.520576746402052e-06,
|
|
"loss": 0.9188,
|
|
"step": 5429
|
|
},
|
|
{
|
|
"epoch": 1.4441489361702127,
|
|
"grad_norm": 3.6370112895965576,
|
|
"learning_rate": 8.519952167936842e-06,
|
|
"loss": 0.7606,
|
|
"step": 5430
|
|
},
|
|
{
|
|
"epoch": 1.4444148936170214,
|
|
"grad_norm": 4.091804504394531,
|
|
"learning_rate": 8.519327480560169e-06,
|
|
"loss": 0.8833,
|
|
"step": 5431
|
|
},
|
|
{
|
|
"epoch": 1.4446808510638298,
|
|
"grad_norm": 4.076303482055664,
|
|
"learning_rate": 8.518702684291358e-06,
|
|
"loss": 0.7852,
|
|
"step": 5432
|
|
},
|
|
{
|
|
"epoch": 1.4449468085106383,
|
|
"grad_norm": 3.845811605453491,
|
|
"learning_rate": 8.518077779149744e-06,
|
|
"loss": 0.7455,
|
|
"step": 5433
|
|
},
|
|
{
|
|
"epoch": 1.4452127659574467,
|
|
"grad_norm": 4.302513599395752,
|
|
"learning_rate": 8.517452765154661e-06,
|
|
"loss": 0.7273,
|
|
"step": 5434
|
|
},
|
|
{
|
|
"epoch": 1.4454787234042552,
|
|
"grad_norm": 3.78494930267334,
|
|
"learning_rate": 8.516827642325447e-06,
|
|
"loss": 0.7468,
|
|
"step": 5435
|
|
},
|
|
{
|
|
"epoch": 1.445744680851064,
|
|
"grad_norm": 3.9590561389923096,
|
|
"learning_rate": 8.516202410681446e-06,
|
|
"loss": 0.9023,
|
|
"step": 5436
|
|
},
|
|
{
|
|
"epoch": 1.4460106382978724,
|
|
"grad_norm": 4.2443766593933105,
|
|
"learning_rate": 8.515577070242005e-06,
|
|
"loss": 0.9363,
|
|
"step": 5437
|
|
},
|
|
{
|
|
"epoch": 1.4462765957446808,
|
|
"grad_norm": 3.511875867843628,
|
|
"learning_rate": 8.514951621026468e-06,
|
|
"loss": 0.7257,
|
|
"step": 5438
|
|
},
|
|
{
|
|
"epoch": 1.4465425531914895,
|
|
"grad_norm": 3.931488513946533,
|
|
"learning_rate": 8.51432606305419e-06,
|
|
"loss": 0.794,
|
|
"step": 5439
|
|
},
|
|
{
|
|
"epoch": 1.4468085106382977,
|
|
"grad_norm": 4.520570755004883,
|
|
"learning_rate": 8.513700396344527e-06,
|
|
"loss": 0.9367,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 1.4470744680851064,
|
|
"grad_norm": 4.023960113525391,
|
|
"learning_rate": 8.513074620916835e-06,
|
|
"loss": 0.8083,
|
|
"step": 5441
|
|
},
|
|
{
|
|
"epoch": 1.4473404255319149,
|
|
"grad_norm": 3.8863484859466553,
|
|
"learning_rate": 8.512448736790479e-06,
|
|
"loss": 0.7789,
|
|
"step": 5442
|
|
},
|
|
{
|
|
"epoch": 1.4476063829787233,
|
|
"grad_norm": 3.4847662448883057,
|
|
"learning_rate": 8.511822743984824e-06,
|
|
"loss": 0.6853,
|
|
"step": 5443
|
|
},
|
|
{
|
|
"epoch": 1.447872340425532,
|
|
"grad_norm": 3.668828010559082,
|
|
"learning_rate": 8.511196642519237e-06,
|
|
"loss": 0.8037,
|
|
"step": 5444
|
|
},
|
|
{
|
|
"epoch": 1.4481382978723405,
|
|
"grad_norm": 3.801157236099243,
|
|
"learning_rate": 8.510570432413095e-06,
|
|
"loss": 0.8393,
|
|
"step": 5445
|
|
},
|
|
{
|
|
"epoch": 1.448404255319149,
|
|
"grad_norm": 4.479011535644531,
|
|
"learning_rate": 8.509944113685769e-06,
|
|
"loss": 0.9082,
|
|
"step": 5446
|
|
},
|
|
{
|
|
"epoch": 1.4486702127659574,
|
|
"grad_norm": 4.385382652282715,
|
|
"learning_rate": 8.509317686356638e-06,
|
|
"loss": 0.9118,
|
|
"step": 5447
|
|
},
|
|
{
|
|
"epoch": 1.4489361702127659,
|
|
"grad_norm": 4.001799583435059,
|
|
"learning_rate": 8.50869115044509e-06,
|
|
"loss": 0.7022,
|
|
"step": 5448
|
|
},
|
|
{
|
|
"epoch": 1.4492021276595746,
|
|
"grad_norm": 4.2879228591918945,
|
|
"learning_rate": 8.508064505970503e-06,
|
|
"loss": 0.8253,
|
|
"step": 5449
|
|
},
|
|
{
|
|
"epoch": 1.449468085106383,
|
|
"grad_norm": 3.933523654937744,
|
|
"learning_rate": 8.507437752952271e-06,
|
|
"loss": 0.8163,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 1.4497340425531915,
|
|
"grad_norm": 4.011867046356201,
|
|
"learning_rate": 8.506810891409786e-06,
|
|
"loss": 0.8196,
|
|
"step": 5451
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"grad_norm": 4.269194602966309,
|
|
"learning_rate": 8.506183921362443e-06,
|
|
"loss": 0.7912,
|
|
"step": 5452
|
|
},
|
|
{
|
|
"epoch": 1.4502659574468084,
|
|
"grad_norm": 4.043778896331787,
|
|
"learning_rate": 8.505556842829643e-06,
|
|
"loss": 0.7842,
|
|
"step": 5453
|
|
},
|
|
{
|
|
"epoch": 1.450531914893617,
|
|
"grad_norm": 4.532417297363281,
|
|
"learning_rate": 8.504929655830785e-06,
|
|
"loss": 0.9794,
|
|
"step": 5454
|
|
},
|
|
{
|
|
"epoch": 1.4507978723404256,
|
|
"grad_norm": 3.571371555328369,
|
|
"learning_rate": 8.504302360385276e-06,
|
|
"loss": 0.8234,
|
|
"step": 5455
|
|
},
|
|
{
|
|
"epoch": 1.451063829787234,
|
|
"grad_norm": 3.6812736988067627,
|
|
"learning_rate": 8.50367495651253e-06,
|
|
"loss": 0.8207,
|
|
"step": 5456
|
|
},
|
|
{
|
|
"epoch": 1.4513297872340425,
|
|
"grad_norm": 3.88917875289917,
|
|
"learning_rate": 8.503047444231954e-06,
|
|
"loss": 0.8452,
|
|
"step": 5457
|
|
},
|
|
{
|
|
"epoch": 1.451595744680851,
|
|
"grad_norm": 3.7152698040008545,
|
|
"learning_rate": 8.502419823562964e-06,
|
|
"loss": 0.7018,
|
|
"step": 5458
|
|
},
|
|
{
|
|
"epoch": 1.4518617021276596,
|
|
"grad_norm": 3.9872684478759766,
|
|
"learning_rate": 8.501792094524983e-06,
|
|
"loss": 0.9355,
|
|
"step": 5459
|
|
},
|
|
{
|
|
"epoch": 1.452127659574468,
|
|
"grad_norm": 3.8965933322906494,
|
|
"learning_rate": 8.501164257137431e-06,
|
|
"loss": 0.7547,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 1.4523936170212766,
|
|
"grad_norm": 4.248835563659668,
|
|
"learning_rate": 8.500536311419735e-06,
|
|
"loss": 0.8456,
|
|
"step": 5461
|
|
},
|
|
{
|
|
"epoch": 1.452659574468085,
|
|
"grad_norm": 4.09518575668335,
|
|
"learning_rate": 8.499908257391324e-06,
|
|
"loss": 0.8698,
|
|
"step": 5462
|
|
},
|
|
{
|
|
"epoch": 1.4529255319148935,
|
|
"grad_norm": 4.262086391448975,
|
|
"learning_rate": 8.49928009507163e-06,
|
|
"loss": 0.761,
|
|
"step": 5463
|
|
},
|
|
{
|
|
"epoch": 1.4531914893617022,
|
|
"grad_norm": 3.634997606277466,
|
|
"learning_rate": 8.49865182448009e-06,
|
|
"loss": 0.7712,
|
|
"step": 5464
|
|
},
|
|
{
|
|
"epoch": 1.4534574468085106,
|
|
"grad_norm": 4.407344818115234,
|
|
"learning_rate": 8.498023445636145e-06,
|
|
"loss": 0.8103,
|
|
"step": 5465
|
|
},
|
|
{
|
|
"epoch": 1.453723404255319,
|
|
"grad_norm": 3.926379680633545,
|
|
"learning_rate": 8.497394958559236e-06,
|
|
"loss": 0.7233,
|
|
"step": 5466
|
|
},
|
|
{
|
|
"epoch": 1.4539893617021278,
|
|
"grad_norm": 4.115360736846924,
|
|
"learning_rate": 8.496766363268809e-06,
|
|
"loss": 0.9513,
|
|
"step": 5467
|
|
},
|
|
{
|
|
"epoch": 1.4542553191489362,
|
|
"grad_norm": 4.249356269836426,
|
|
"learning_rate": 8.496137659784313e-06,
|
|
"loss": 0.7799,
|
|
"step": 5468
|
|
},
|
|
{
|
|
"epoch": 1.4545212765957447,
|
|
"grad_norm": 3.9418179988861084,
|
|
"learning_rate": 8.495508848125202e-06,
|
|
"loss": 0.7216,
|
|
"step": 5469
|
|
},
|
|
{
|
|
"epoch": 1.4547872340425532,
|
|
"grad_norm": 4.33933687210083,
|
|
"learning_rate": 8.494879928310934e-06,
|
|
"loss": 0.8312,
|
|
"step": 5470
|
|
},
|
|
{
|
|
"epoch": 1.4550531914893616,
|
|
"grad_norm": 4.497339248657227,
|
|
"learning_rate": 8.494250900360963e-06,
|
|
"loss": 0.6842,
|
|
"step": 5471
|
|
},
|
|
{
|
|
"epoch": 1.4553191489361703,
|
|
"grad_norm": 4.439492225646973,
|
|
"learning_rate": 8.493621764294757e-06,
|
|
"loss": 0.8134,
|
|
"step": 5472
|
|
},
|
|
{
|
|
"epoch": 1.4555851063829788,
|
|
"grad_norm": 4.622555255889893,
|
|
"learning_rate": 8.49299252013178e-06,
|
|
"loss": 0.878,
|
|
"step": 5473
|
|
},
|
|
{
|
|
"epoch": 1.4558510638297872,
|
|
"grad_norm": 4.369466781616211,
|
|
"learning_rate": 8.492363167891502e-06,
|
|
"loss": 0.7228,
|
|
"step": 5474
|
|
},
|
|
{
|
|
"epoch": 1.4561170212765957,
|
|
"grad_norm": 4.223091125488281,
|
|
"learning_rate": 8.491733707593395e-06,
|
|
"loss": 0.8303,
|
|
"step": 5475
|
|
},
|
|
{
|
|
"epoch": 1.4563829787234042,
|
|
"grad_norm": 4.063412189483643,
|
|
"learning_rate": 8.491104139256936e-06,
|
|
"loss": 0.8504,
|
|
"step": 5476
|
|
},
|
|
{
|
|
"epoch": 1.4566489361702128,
|
|
"grad_norm": 4.342689514160156,
|
|
"learning_rate": 8.490474462901605e-06,
|
|
"loss": 0.841,
|
|
"step": 5477
|
|
},
|
|
{
|
|
"epoch": 1.4569148936170213,
|
|
"grad_norm": 4.090299129486084,
|
|
"learning_rate": 8.489844678546886e-06,
|
|
"loss": 0.8391,
|
|
"step": 5478
|
|
},
|
|
{
|
|
"epoch": 1.4571808510638298,
|
|
"grad_norm": 3.786254644393921,
|
|
"learning_rate": 8.489214786212263e-06,
|
|
"loss": 0.8498,
|
|
"step": 5479
|
|
},
|
|
{
|
|
"epoch": 1.4574468085106382,
|
|
"grad_norm": 4.191230297088623,
|
|
"learning_rate": 8.488584785917226e-06,
|
|
"loss": 0.7906,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 1.4577127659574467,
|
|
"grad_norm": 3.928368330001831,
|
|
"learning_rate": 8.487954677681269e-06,
|
|
"loss": 0.8001,
|
|
"step": 5481
|
|
},
|
|
{
|
|
"epoch": 1.4579787234042554,
|
|
"grad_norm": 3.579162836074829,
|
|
"learning_rate": 8.487324461523887e-06,
|
|
"loss": 0.8023,
|
|
"step": 5482
|
|
},
|
|
{
|
|
"epoch": 1.4582446808510638,
|
|
"grad_norm": 3.6825640201568604,
|
|
"learning_rate": 8.486694137464582e-06,
|
|
"loss": 0.7853,
|
|
"step": 5483
|
|
},
|
|
{
|
|
"epoch": 1.4585106382978723,
|
|
"grad_norm": 4.125916004180908,
|
|
"learning_rate": 8.486063705522853e-06,
|
|
"loss": 0.7216,
|
|
"step": 5484
|
|
},
|
|
{
|
|
"epoch": 1.4587765957446808,
|
|
"grad_norm": 4.086201190948486,
|
|
"learning_rate": 8.48543316571821e-06,
|
|
"loss": 0.7723,
|
|
"step": 5485
|
|
},
|
|
{
|
|
"epoch": 1.4590425531914892,
|
|
"grad_norm": 3.6054461002349854,
|
|
"learning_rate": 8.484802518070161e-06,
|
|
"loss": 0.7561,
|
|
"step": 5486
|
|
},
|
|
{
|
|
"epoch": 1.459308510638298,
|
|
"grad_norm": 3.9755938053131104,
|
|
"learning_rate": 8.48417176259822e-06,
|
|
"loss": 0.7914,
|
|
"step": 5487
|
|
},
|
|
{
|
|
"epoch": 1.4595744680851064,
|
|
"grad_norm": 3.4087741374969482,
|
|
"learning_rate": 8.483540899321901e-06,
|
|
"loss": 0.8288,
|
|
"step": 5488
|
|
},
|
|
{
|
|
"epoch": 1.4598404255319148,
|
|
"grad_norm": 4.220149517059326,
|
|
"learning_rate": 8.482909928260726e-06,
|
|
"loss": 0.9088,
|
|
"step": 5489
|
|
},
|
|
{
|
|
"epoch": 1.4601063829787235,
|
|
"grad_norm": 4.157181262969971,
|
|
"learning_rate": 8.482278849434218e-06,
|
|
"loss": 0.8727,
|
|
"step": 5490
|
|
},
|
|
{
|
|
"epoch": 1.460372340425532,
|
|
"grad_norm": 4.077250003814697,
|
|
"learning_rate": 8.481647662861901e-06,
|
|
"loss": 0.7891,
|
|
"step": 5491
|
|
},
|
|
{
|
|
"epoch": 1.4606382978723405,
|
|
"grad_norm": 3.9751412868499756,
|
|
"learning_rate": 8.481016368563308e-06,
|
|
"loss": 0.8363,
|
|
"step": 5492
|
|
},
|
|
{
|
|
"epoch": 1.460904255319149,
|
|
"grad_norm": 4.07692813873291,
|
|
"learning_rate": 8.480384966557969e-06,
|
|
"loss": 1.0291,
|
|
"step": 5493
|
|
},
|
|
{
|
|
"epoch": 1.4611702127659574,
|
|
"grad_norm": 3.963118553161621,
|
|
"learning_rate": 8.479753456865422e-06,
|
|
"loss": 0.778,
|
|
"step": 5494
|
|
},
|
|
{
|
|
"epoch": 1.461436170212766,
|
|
"grad_norm": 4.359419822692871,
|
|
"learning_rate": 8.479121839505205e-06,
|
|
"loss": 0.8413,
|
|
"step": 5495
|
|
},
|
|
{
|
|
"epoch": 1.4617021276595745,
|
|
"grad_norm": 4.071464538574219,
|
|
"learning_rate": 8.478490114496862e-06,
|
|
"loss": 0.802,
|
|
"step": 5496
|
|
},
|
|
{
|
|
"epoch": 1.461968085106383,
|
|
"grad_norm": 4.090579509735107,
|
|
"learning_rate": 8.477858281859941e-06,
|
|
"loss": 0.8182,
|
|
"step": 5497
|
|
},
|
|
{
|
|
"epoch": 1.4622340425531914,
|
|
"grad_norm": 4.3386006355285645,
|
|
"learning_rate": 8.47722634161399e-06,
|
|
"loss": 0.7349,
|
|
"step": 5498
|
|
},
|
|
{
|
|
"epoch": 1.4625,
|
|
"grad_norm": 3.489248275756836,
|
|
"learning_rate": 8.476594293778561e-06,
|
|
"loss": 0.7918,
|
|
"step": 5499
|
|
},
|
|
{
|
|
"epoch": 1.4627659574468086,
|
|
"grad_norm": 3.849106788635254,
|
|
"learning_rate": 8.475962138373212e-06,
|
|
"loss": 0.7986,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 1.4627659574468086,
|
|
"eval_loss": 1.2964370250701904,
|
|
"eval_runtime": 13.6602,
|
|
"eval_samples_per_second": 29.282,
|
|
"eval_steps_per_second": 3.66,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 1.463031914893617,
|
|
"grad_norm": 3.9225049018859863,
|
|
"learning_rate": 8.475329875417502e-06,
|
|
"loss": 0.7197,
|
|
"step": 5501
|
|
},
|
|
{
|
|
"epoch": 1.4632978723404255,
|
|
"grad_norm": 3.952686071395874,
|
|
"learning_rate": 8.474697504930994e-06,
|
|
"loss": 0.8378,
|
|
"step": 5502
|
|
},
|
|
{
|
|
"epoch": 1.463563829787234,
|
|
"grad_norm": 3.452550172805786,
|
|
"learning_rate": 8.474065026933254e-06,
|
|
"loss": 0.8279,
|
|
"step": 5503
|
|
},
|
|
{
|
|
"epoch": 1.4638297872340424,
|
|
"grad_norm": 3.6807174682617188,
|
|
"learning_rate": 8.473432441443852e-06,
|
|
"loss": 0.8527,
|
|
"step": 5504
|
|
},
|
|
{
|
|
"epoch": 1.4640957446808511,
|
|
"grad_norm": 3.6200850009918213,
|
|
"learning_rate": 8.472799748482361e-06,
|
|
"loss": 0.7749,
|
|
"step": 5505
|
|
},
|
|
{
|
|
"epoch": 1.4643617021276596,
|
|
"grad_norm": 4.591206073760986,
|
|
"learning_rate": 8.472166948068357e-06,
|
|
"loss": 0.8827,
|
|
"step": 5506
|
|
},
|
|
{
|
|
"epoch": 1.464627659574468,
|
|
"grad_norm": 3.7772765159606934,
|
|
"learning_rate": 8.471534040221419e-06,
|
|
"loss": 0.8578,
|
|
"step": 5507
|
|
},
|
|
{
|
|
"epoch": 1.4648936170212765,
|
|
"grad_norm": 3.75657057762146,
|
|
"learning_rate": 8.47090102496113e-06,
|
|
"loss": 0.8552,
|
|
"step": 5508
|
|
},
|
|
{
|
|
"epoch": 1.465159574468085,
|
|
"grad_norm": 3.635420322418213,
|
|
"learning_rate": 8.470267902307079e-06,
|
|
"loss": 0.7732,
|
|
"step": 5509
|
|
},
|
|
{
|
|
"epoch": 1.4654255319148937,
|
|
"grad_norm": 4.403695583343506,
|
|
"learning_rate": 8.469634672278853e-06,
|
|
"loss": 0.9379,
|
|
"step": 5510
|
|
},
|
|
{
|
|
"epoch": 1.4656914893617021,
|
|
"grad_norm": 3.849709987640381,
|
|
"learning_rate": 8.469001334896044e-06,
|
|
"loss": 0.7691,
|
|
"step": 5511
|
|
},
|
|
{
|
|
"epoch": 1.4659574468085106,
|
|
"grad_norm": 3.580702066421509,
|
|
"learning_rate": 8.46836789017825e-06,
|
|
"loss": 0.7887,
|
|
"step": 5512
|
|
},
|
|
{
|
|
"epoch": 1.4662234042553193,
|
|
"grad_norm": 4.184311866760254,
|
|
"learning_rate": 8.46773433814507e-06,
|
|
"loss": 0.9119,
|
|
"step": 5513
|
|
},
|
|
{
|
|
"epoch": 1.4664893617021277,
|
|
"grad_norm": 4.308862686157227,
|
|
"learning_rate": 8.467100678816108e-06,
|
|
"loss": 0.8483,
|
|
"step": 5514
|
|
},
|
|
{
|
|
"epoch": 1.4667553191489362,
|
|
"grad_norm": 3.799316883087158,
|
|
"learning_rate": 8.466466912210967e-06,
|
|
"loss": 0.8143,
|
|
"step": 5515
|
|
},
|
|
{
|
|
"epoch": 1.4670212765957447,
|
|
"grad_norm": 3.673563003540039,
|
|
"learning_rate": 8.465833038349259e-06,
|
|
"loss": 0.7485,
|
|
"step": 5516
|
|
},
|
|
{
|
|
"epoch": 1.4672872340425531,
|
|
"grad_norm": 4.07314395904541,
|
|
"learning_rate": 8.465199057250597e-06,
|
|
"loss": 0.8663,
|
|
"step": 5517
|
|
},
|
|
{
|
|
"epoch": 1.4675531914893618,
|
|
"grad_norm": 3.6095144748687744,
|
|
"learning_rate": 8.464564968934595e-06,
|
|
"loss": 0.6752,
|
|
"step": 5518
|
|
},
|
|
{
|
|
"epoch": 1.4678191489361703,
|
|
"grad_norm": 3.661813735961914,
|
|
"learning_rate": 8.463930773420874e-06,
|
|
"loss": 0.8518,
|
|
"step": 5519
|
|
},
|
|
{
|
|
"epoch": 1.4680851063829787,
|
|
"grad_norm": 4.36665153503418,
|
|
"learning_rate": 8.463296470729058e-06,
|
|
"loss": 0.7581,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 1.4683510638297872,
|
|
"grad_norm": 4.145575046539307,
|
|
"learning_rate": 8.462662060878772e-06,
|
|
"loss": 0.8582,
|
|
"step": 5521
|
|
},
|
|
{
|
|
"epoch": 1.4686170212765957,
|
|
"grad_norm": 3.805684804916382,
|
|
"learning_rate": 8.462027543889644e-06,
|
|
"loss": 0.718,
|
|
"step": 5522
|
|
},
|
|
{
|
|
"epoch": 1.4688829787234043,
|
|
"grad_norm": 3.7820284366607666,
|
|
"learning_rate": 8.461392919781309e-06,
|
|
"loss": 0.7179,
|
|
"step": 5523
|
|
},
|
|
{
|
|
"epoch": 1.4691489361702128,
|
|
"grad_norm": 4.097955226898193,
|
|
"learning_rate": 8.460758188573399e-06,
|
|
"loss": 0.7764,
|
|
"step": 5524
|
|
},
|
|
{
|
|
"epoch": 1.4694148936170213,
|
|
"grad_norm": 4.177279472351074,
|
|
"learning_rate": 8.46012335028556e-06,
|
|
"loss": 0.8168,
|
|
"step": 5525
|
|
},
|
|
{
|
|
"epoch": 1.4696808510638297,
|
|
"grad_norm": 4.4050679206848145,
|
|
"learning_rate": 8.459488404937426e-06,
|
|
"loss": 0.8876,
|
|
"step": 5526
|
|
},
|
|
{
|
|
"epoch": 1.4699468085106382,
|
|
"grad_norm": 3.7400434017181396,
|
|
"learning_rate": 8.458853352548651e-06,
|
|
"loss": 0.8693,
|
|
"step": 5527
|
|
},
|
|
{
|
|
"epoch": 1.4702127659574469,
|
|
"grad_norm": 3.909196138381958,
|
|
"learning_rate": 8.458218193138881e-06,
|
|
"loss": 0.8237,
|
|
"step": 5528
|
|
},
|
|
{
|
|
"epoch": 1.4704787234042553,
|
|
"grad_norm": 3.941265344619751,
|
|
"learning_rate": 8.457582926727768e-06,
|
|
"loss": 0.9123,
|
|
"step": 5529
|
|
},
|
|
{
|
|
"epoch": 1.4707446808510638,
|
|
"grad_norm": 3.8149471282958984,
|
|
"learning_rate": 8.456947553334966e-06,
|
|
"loss": 0.6899,
|
|
"step": 5530
|
|
},
|
|
{
|
|
"epoch": 1.4710106382978723,
|
|
"grad_norm": 3.6952855587005615,
|
|
"learning_rate": 8.45631207298014e-06,
|
|
"loss": 0.7128,
|
|
"step": 5531
|
|
},
|
|
{
|
|
"epoch": 1.4712765957446807,
|
|
"grad_norm": 3.9754221439361572,
|
|
"learning_rate": 8.45567648568295e-06,
|
|
"loss": 0.9245,
|
|
"step": 5532
|
|
},
|
|
{
|
|
"epoch": 1.4715425531914894,
|
|
"grad_norm": 4.337751388549805,
|
|
"learning_rate": 8.455040791463057e-06,
|
|
"loss": 0.8776,
|
|
"step": 5533
|
|
},
|
|
{
|
|
"epoch": 1.4718085106382979,
|
|
"grad_norm": 3.7709763050079346,
|
|
"learning_rate": 8.454404990340137e-06,
|
|
"loss": 0.6869,
|
|
"step": 5534
|
|
},
|
|
{
|
|
"epoch": 1.4720744680851063,
|
|
"grad_norm": 4.196871280670166,
|
|
"learning_rate": 8.453769082333858e-06,
|
|
"loss": 0.8704,
|
|
"step": 5535
|
|
},
|
|
{
|
|
"epoch": 1.472340425531915,
|
|
"grad_norm": 3.957577705383301,
|
|
"learning_rate": 8.453133067463898e-06,
|
|
"loss": 0.7857,
|
|
"step": 5536
|
|
},
|
|
{
|
|
"epoch": 1.4726063829787235,
|
|
"grad_norm": 3.942445993423462,
|
|
"learning_rate": 8.452496945749934e-06,
|
|
"loss": 0.875,
|
|
"step": 5537
|
|
},
|
|
{
|
|
"epoch": 1.472872340425532,
|
|
"grad_norm": 4.122093200683594,
|
|
"learning_rate": 8.451860717211653e-06,
|
|
"loss": 0.8047,
|
|
"step": 5538
|
|
},
|
|
{
|
|
"epoch": 1.4731382978723404,
|
|
"grad_norm": 3.8919665813446045,
|
|
"learning_rate": 8.451224381868735e-06,
|
|
"loss": 0.9631,
|
|
"step": 5539
|
|
},
|
|
{
|
|
"epoch": 1.4734042553191489,
|
|
"grad_norm": 4.186689376831055,
|
|
"learning_rate": 8.45058793974087e-06,
|
|
"loss": 0.8028,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 1.4736702127659576,
|
|
"grad_norm": 4.130399703979492,
|
|
"learning_rate": 8.449951390847754e-06,
|
|
"loss": 0.7659,
|
|
"step": 5541
|
|
},
|
|
{
|
|
"epoch": 1.473936170212766,
|
|
"grad_norm": 3.8741462230682373,
|
|
"learning_rate": 8.44931473520908e-06,
|
|
"loss": 0.74,
|
|
"step": 5542
|
|
},
|
|
{
|
|
"epoch": 1.4742021276595745,
|
|
"grad_norm": 4.210333824157715,
|
|
"learning_rate": 8.448677972844546e-06,
|
|
"loss": 0.7675,
|
|
"step": 5543
|
|
},
|
|
{
|
|
"epoch": 1.474468085106383,
|
|
"grad_norm": 3.959024429321289,
|
|
"learning_rate": 8.448041103773857e-06,
|
|
"loss": 0.8771,
|
|
"step": 5544
|
|
},
|
|
{
|
|
"epoch": 1.4747340425531914,
|
|
"grad_norm": 3.9098892211914062,
|
|
"learning_rate": 8.447404128016715e-06,
|
|
"loss": 0.8756,
|
|
"step": 5545
|
|
},
|
|
{
|
|
"epoch": 1.475,
|
|
"grad_norm": 3.9612808227539062,
|
|
"learning_rate": 8.446767045592829e-06,
|
|
"loss": 0.7888,
|
|
"step": 5546
|
|
},
|
|
{
|
|
"epoch": 1.4752659574468086,
|
|
"grad_norm": 3.754507303237915,
|
|
"learning_rate": 8.446129856521917e-06,
|
|
"loss": 0.8611,
|
|
"step": 5547
|
|
},
|
|
{
|
|
"epoch": 1.475531914893617,
|
|
"grad_norm": 3.97927188873291,
|
|
"learning_rate": 8.445492560823686e-06,
|
|
"loss": 0.7937,
|
|
"step": 5548
|
|
},
|
|
{
|
|
"epoch": 1.4757978723404255,
|
|
"grad_norm": 3.8864712715148926,
|
|
"learning_rate": 8.44485515851786e-06,
|
|
"loss": 0.7687,
|
|
"step": 5549
|
|
},
|
|
{
|
|
"epoch": 1.476063829787234,
|
|
"grad_norm": 3.407346487045288,
|
|
"learning_rate": 8.44421764962416e-06,
|
|
"loss": 0.8368,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 1.4763297872340426,
|
|
"grad_norm": 4.162166118621826,
|
|
"learning_rate": 8.44358003416231e-06,
|
|
"loss": 0.7305,
|
|
"step": 5551
|
|
},
|
|
{
|
|
"epoch": 1.476595744680851,
|
|
"grad_norm": 4.198580741882324,
|
|
"learning_rate": 8.44294231215204e-06,
|
|
"loss": 0.9471,
|
|
"step": 5552
|
|
},
|
|
{
|
|
"epoch": 1.4768617021276595,
|
|
"grad_norm": 3.6172430515289307,
|
|
"learning_rate": 8.44230448361308e-06,
|
|
"loss": 0.84,
|
|
"step": 5553
|
|
},
|
|
{
|
|
"epoch": 1.477127659574468,
|
|
"grad_norm": 3.573073387145996,
|
|
"learning_rate": 8.441666548565169e-06,
|
|
"loss": 0.8333,
|
|
"step": 5554
|
|
},
|
|
{
|
|
"epoch": 1.4773936170212765,
|
|
"grad_norm": 3.864596128463745,
|
|
"learning_rate": 8.441028507028041e-06,
|
|
"loss": 0.7169,
|
|
"step": 5555
|
|
},
|
|
{
|
|
"epoch": 1.4776595744680852,
|
|
"grad_norm": 3.62256121635437,
|
|
"learning_rate": 8.44039035902144e-06,
|
|
"loss": 0.8163,
|
|
"step": 5556
|
|
},
|
|
{
|
|
"epoch": 1.4779255319148936,
|
|
"grad_norm": 3.8395614624023438,
|
|
"learning_rate": 8.43975210456511e-06,
|
|
"loss": 0.7796,
|
|
"step": 5557
|
|
},
|
|
{
|
|
"epoch": 1.478191489361702,
|
|
"grad_norm": 3.980595111846924,
|
|
"learning_rate": 8.439113743678801e-06,
|
|
"loss": 0.9652,
|
|
"step": 5558
|
|
},
|
|
{
|
|
"epoch": 1.4784574468085108,
|
|
"grad_norm": 3.7857303619384766,
|
|
"learning_rate": 8.438475276382264e-06,
|
|
"loss": 0.9076,
|
|
"step": 5559
|
|
},
|
|
{
|
|
"epoch": 1.4787234042553192,
|
|
"grad_norm": 3.4477193355560303,
|
|
"learning_rate": 8.437836702695253e-06,
|
|
"loss": 0.727,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 1.4789893617021277,
|
|
"grad_norm": 3.9439425468444824,
|
|
"learning_rate": 8.437198022637527e-06,
|
|
"loss": 0.7404,
|
|
"step": 5561
|
|
},
|
|
{
|
|
"epoch": 1.4792553191489362,
|
|
"grad_norm": 3.8489301204681396,
|
|
"learning_rate": 8.436559236228849e-06,
|
|
"loss": 0.7598,
|
|
"step": 5562
|
|
},
|
|
{
|
|
"epoch": 1.4795212765957446,
|
|
"grad_norm": 3.9537103176116943,
|
|
"learning_rate": 8.435920343488978e-06,
|
|
"loss": 0.81,
|
|
"step": 5563
|
|
},
|
|
{
|
|
"epoch": 1.4797872340425533,
|
|
"grad_norm": 4.361562252044678,
|
|
"learning_rate": 8.435281344437691e-06,
|
|
"loss": 0.9021,
|
|
"step": 5564
|
|
},
|
|
{
|
|
"epoch": 1.4800531914893618,
|
|
"grad_norm": 4.177056789398193,
|
|
"learning_rate": 8.434642239094752e-06,
|
|
"loss": 0.7916,
|
|
"step": 5565
|
|
},
|
|
{
|
|
"epoch": 1.4803191489361702,
|
|
"grad_norm": 4.249316215515137,
|
|
"learning_rate": 8.43400302747994e-06,
|
|
"loss": 0.8578,
|
|
"step": 5566
|
|
},
|
|
{
|
|
"epoch": 1.4805851063829787,
|
|
"grad_norm": 4.1586198806762695,
|
|
"learning_rate": 8.43336370961303e-06,
|
|
"loss": 0.7918,
|
|
"step": 5567
|
|
},
|
|
{
|
|
"epoch": 1.4808510638297872,
|
|
"grad_norm": 3.8984861373901367,
|
|
"learning_rate": 8.432724285513804e-06,
|
|
"loss": 0.8302,
|
|
"step": 5568
|
|
},
|
|
{
|
|
"epoch": 1.4811170212765958,
|
|
"grad_norm": 4.403296947479248,
|
|
"learning_rate": 8.43208475520205e-06,
|
|
"loss": 0.9246,
|
|
"step": 5569
|
|
},
|
|
{
|
|
"epoch": 1.4813829787234043,
|
|
"grad_norm": 4.00664758682251,
|
|
"learning_rate": 8.43144511869755e-06,
|
|
"loss": 0.7915,
|
|
"step": 5570
|
|
},
|
|
{
|
|
"epoch": 1.4816489361702128,
|
|
"grad_norm": 4.43447732925415,
|
|
"learning_rate": 8.4308053760201e-06,
|
|
"loss": 0.811,
|
|
"step": 5571
|
|
},
|
|
{
|
|
"epoch": 1.4819148936170212,
|
|
"grad_norm": 4.107089519500732,
|
|
"learning_rate": 8.43016552718949e-06,
|
|
"loss": 0.9385,
|
|
"step": 5572
|
|
},
|
|
{
|
|
"epoch": 1.4821808510638297,
|
|
"grad_norm": 4.0541229248046875,
|
|
"learning_rate": 8.429525572225521e-06,
|
|
"loss": 0.7683,
|
|
"step": 5573
|
|
},
|
|
{
|
|
"epoch": 1.4824468085106384,
|
|
"grad_norm": 3.8049004077911377,
|
|
"learning_rate": 8.428885511147994e-06,
|
|
"loss": 0.8483,
|
|
"step": 5574
|
|
},
|
|
{
|
|
"epoch": 1.4827127659574468,
|
|
"grad_norm": 4.220947265625,
|
|
"learning_rate": 8.42824534397671e-06,
|
|
"loss": 0.8209,
|
|
"step": 5575
|
|
},
|
|
{
|
|
"epoch": 1.4829787234042553,
|
|
"grad_norm": 3.299015998840332,
|
|
"learning_rate": 8.427605070731482e-06,
|
|
"loss": 0.6946,
|
|
"step": 5576
|
|
},
|
|
{
|
|
"epoch": 1.4832446808510638,
|
|
"grad_norm": 4.028343677520752,
|
|
"learning_rate": 8.426964691432116e-06,
|
|
"loss": 0.7912,
|
|
"step": 5577
|
|
},
|
|
{
|
|
"epoch": 1.4835106382978722,
|
|
"grad_norm": 3.6714823246002197,
|
|
"learning_rate": 8.426324206098429e-06,
|
|
"loss": 0.7487,
|
|
"step": 5578
|
|
},
|
|
{
|
|
"epoch": 1.483776595744681,
|
|
"grad_norm": 3.8498239517211914,
|
|
"learning_rate": 8.425683614750235e-06,
|
|
"loss": 0.7929,
|
|
"step": 5579
|
|
},
|
|
{
|
|
"epoch": 1.4840425531914894,
|
|
"grad_norm": 3.6556410789489746,
|
|
"learning_rate": 8.425042917407358e-06,
|
|
"loss": 0.7774,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 1.4843085106382978,
|
|
"grad_norm": 3.908780336380005,
|
|
"learning_rate": 8.424402114089618e-06,
|
|
"loss": 0.7533,
|
|
"step": 5581
|
|
},
|
|
{
|
|
"epoch": 1.4845744680851065,
|
|
"grad_norm": 4.054098129272461,
|
|
"learning_rate": 8.42376120481685e-06,
|
|
"loss": 0.8575,
|
|
"step": 5582
|
|
},
|
|
{
|
|
"epoch": 1.484840425531915,
|
|
"grad_norm": 4.667778968811035,
|
|
"learning_rate": 8.423120189608876e-06,
|
|
"loss": 0.8906,
|
|
"step": 5583
|
|
},
|
|
{
|
|
"epoch": 1.4851063829787234,
|
|
"grad_norm": 3.960300922393799,
|
|
"learning_rate": 8.422479068485531e-06,
|
|
"loss": 0.7737,
|
|
"step": 5584
|
|
},
|
|
{
|
|
"epoch": 1.485372340425532,
|
|
"grad_norm": 4.355529308319092,
|
|
"learning_rate": 8.421837841466657e-06,
|
|
"loss": 0.8904,
|
|
"step": 5585
|
|
},
|
|
{
|
|
"epoch": 1.4856382978723404,
|
|
"grad_norm": 4.450819969177246,
|
|
"learning_rate": 8.42119650857209e-06,
|
|
"loss": 0.8558,
|
|
"step": 5586
|
|
},
|
|
{
|
|
"epoch": 1.485904255319149,
|
|
"grad_norm": 3.8777942657470703,
|
|
"learning_rate": 8.420555069821679e-06,
|
|
"loss": 0.8021,
|
|
"step": 5587
|
|
},
|
|
{
|
|
"epoch": 1.4861702127659575,
|
|
"grad_norm": 3.9618871212005615,
|
|
"learning_rate": 8.419913525235264e-06,
|
|
"loss": 0.8717,
|
|
"step": 5588
|
|
},
|
|
{
|
|
"epoch": 1.486436170212766,
|
|
"grad_norm": 3.7627811431884766,
|
|
"learning_rate": 8.419271874832697e-06,
|
|
"loss": 0.7337,
|
|
"step": 5589
|
|
},
|
|
{
|
|
"epoch": 1.4867021276595744,
|
|
"grad_norm": 3.9509243965148926,
|
|
"learning_rate": 8.418630118633835e-06,
|
|
"loss": 0.8209,
|
|
"step": 5590
|
|
},
|
|
{
|
|
"epoch": 1.486968085106383,
|
|
"grad_norm": 3.8642148971557617,
|
|
"learning_rate": 8.417988256658532e-06,
|
|
"loss": 0.7907,
|
|
"step": 5591
|
|
},
|
|
{
|
|
"epoch": 1.4872340425531916,
|
|
"grad_norm": 3.917509078979492,
|
|
"learning_rate": 8.417346288926646e-06,
|
|
"loss": 0.8037,
|
|
"step": 5592
|
|
},
|
|
{
|
|
"epoch": 1.4875,
|
|
"grad_norm": 3.5143251419067383,
|
|
"learning_rate": 8.416704215458042e-06,
|
|
"loss": 0.8127,
|
|
"step": 5593
|
|
},
|
|
{
|
|
"epoch": 1.4877659574468085,
|
|
"grad_norm": 4.229488372802734,
|
|
"learning_rate": 8.41606203627259e-06,
|
|
"loss": 0.8681,
|
|
"step": 5594
|
|
},
|
|
{
|
|
"epoch": 1.488031914893617,
|
|
"grad_norm": 3.636591911315918,
|
|
"learning_rate": 8.415419751390155e-06,
|
|
"loss": 0.8858,
|
|
"step": 5595
|
|
},
|
|
{
|
|
"epoch": 1.4882978723404254,
|
|
"grad_norm": 3.9129700660705566,
|
|
"learning_rate": 8.414777360830611e-06,
|
|
"loss": 0.8607,
|
|
"step": 5596
|
|
},
|
|
{
|
|
"epoch": 1.4885638297872341,
|
|
"grad_norm": 4.00184965133667,
|
|
"learning_rate": 8.414134864613837e-06,
|
|
"loss": 0.7551,
|
|
"step": 5597
|
|
},
|
|
{
|
|
"epoch": 1.4888297872340426,
|
|
"grad_norm": 3.9038429260253906,
|
|
"learning_rate": 8.413492262759708e-06,
|
|
"loss": 0.7195,
|
|
"step": 5598
|
|
},
|
|
{
|
|
"epoch": 1.489095744680851,
|
|
"grad_norm": 3.802076816558838,
|
|
"learning_rate": 8.412849555288111e-06,
|
|
"loss": 0.8092,
|
|
"step": 5599
|
|
},
|
|
{
|
|
"epoch": 1.4893617021276595,
|
|
"grad_norm": 4.020835876464844,
|
|
"learning_rate": 8.41220674221893e-06,
|
|
"loss": 0.8439,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 1.489627659574468,
|
|
"grad_norm": 4.310454845428467,
|
|
"learning_rate": 8.411563823572057e-06,
|
|
"loss": 0.959,
|
|
"step": 5601
|
|
},
|
|
{
|
|
"epoch": 1.4898936170212767,
|
|
"grad_norm": 4.212212085723877,
|
|
"learning_rate": 8.410920799367382e-06,
|
|
"loss": 0.784,
|
|
"step": 5602
|
|
},
|
|
{
|
|
"epoch": 1.4901595744680851,
|
|
"grad_norm": 3.9010252952575684,
|
|
"learning_rate": 8.4102776696248e-06,
|
|
"loss": 0.7156,
|
|
"step": 5603
|
|
},
|
|
{
|
|
"epoch": 1.4904255319148936,
|
|
"grad_norm": 4.061422348022461,
|
|
"learning_rate": 8.409634434364214e-06,
|
|
"loss": 0.8524,
|
|
"step": 5604
|
|
},
|
|
{
|
|
"epoch": 1.4906914893617023,
|
|
"grad_norm": 4.281171798706055,
|
|
"learning_rate": 8.408991093605524e-06,
|
|
"loss": 0.8344,
|
|
"step": 5605
|
|
},
|
|
{
|
|
"epoch": 1.4909574468085105,
|
|
"grad_norm": 4.274752616882324,
|
|
"learning_rate": 8.408347647368634e-06,
|
|
"loss": 0.8106,
|
|
"step": 5606
|
|
},
|
|
{
|
|
"epoch": 1.4912234042553192,
|
|
"grad_norm": 3.9846606254577637,
|
|
"learning_rate": 8.407704095673454e-06,
|
|
"loss": 0.7059,
|
|
"step": 5607
|
|
},
|
|
{
|
|
"epoch": 1.4914893617021276,
|
|
"grad_norm": 4.1280436515808105,
|
|
"learning_rate": 8.4070604385399e-06,
|
|
"loss": 0.8267,
|
|
"step": 5608
|
|
},
|
|
{
|
|
"epoch": 1.491755319148936,
|
|
"grad_norm": 3.7875635623931885,
|
|
"learning_rate": 8.406416675987884e-06,
|
|
"loss": 0.8078,
|
|
"step": 5609
|
|
},
|
|
{
|
|
"epoch": 1.4920212765957448,
|
|
"grad_norm": 4.4207444190979,
|
|
"learning_rate": 8.405772808037326e-06,
|
|
"loss": 0.8452,
|
|
"step": 5610
|
|
},
|
|
{
|
|
"epoch": 1.4922872340425533,
|
|
"grad_norm": 3.9423201084136963,
|
|
"learning_rate": 8.405128834708147e-06,
|
|
"loss": 0.7491,
|
|
"step": 5611
|
|
},
|
|
{
|
|
"epoch": 1.4925531914893617,
|
|
"grad_norm": 3.669431686401367,
|
|
"learning_rate": 8.404484756020272e-06,
|
|
"loss": 0.7232,
|
|
"step": 5612
|
|
},
|
|
{
|
|
"epoch": 1.4928191489361702,
|
|
"grad_norm": 4.371226787567139,
|
|
"learning_rate": 8.403840571993631e-06,
|
|
"loss": 0.7899,
|
|
"step": 5613
|
|
},
|
|
{
|
|
"epoch": 1.4930851063829786,
|
|
"grad_norm": 4.185215950012207,
|
|
"learning_rate": 8.403196282648156e-06,
|
|
"loss": 0.9727,
|
|
"step": 5614
|
|
},
|
|
{
|
|
"epoch": 1.4933510638297873,
|
|
"grad_norm": 3.5517239570617676,
|
|
"learning_rate": 8.402551888003781e-06,
|
|
"loss": 0.805,
|
|
"step": 5615
|
|
},
|
|
{
|
|
"epoch": 1.4936170212765958,
|
|
"grad_norm": 3.4188995361328125,
|
|
"learning_rate": 8.401907388080443e-06,
|
|
"loss": 0.7345,
|
|
"step": 5616
|
|
},
|
|
{
|
|
"epoch": 1.4938829787234043,
|
|
"grad_norm": 3.7187201976776123,
|
|
"learning_rate": 8.401262782898087e-06,
|
|
"loss": 0.7147,
|
|
"step": 5617
|
|
},
|
|
{
|
|
"epoch": 1.4941489361702127,
|
|
"grad_norm": 4.5645976066589355,
|
|
"learning_rate": 8.400618072476655e-06,
|
|
"loss": 0.8707,
|
|
"step": 5618
|
|
},
|
|
{
|
|
"epoch": 1.4944148936170212,
|
|
"grad_norm": 3.7568912506103516,
|
|
"learning_rate": 8.399973256836097e-06,
|
|
"loss": 0.8637,
|
|
"step": 5619
|
|
},
|
|
{
|
|
"epoch": 1.4946808510638299,
|
|
"grad_norm": 4.120610237121582,
|
|
"learning_rate": 8.399328335996362e-06,
|
|
"loss": 0.8749,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 1.4949468085106383,
|
|
"grad_norm": 3.780111312866211,
|
|
"learning_rate": 8.398683309977407e-06,
|
|
"loss": 0.739,
|
|
"step": 5621
|
|
},
|
|
{
|
|
"epoch": 1.4952127659574468,
|
|
"grad_norm": 4.050705909729004,
|
|
"learning_rate": 8.39803817879919e-06,
|
|
"loss": 0.869,
|
|
"step": 5622
|
|
},
|
|
{
|
|
"epoch": 1.4954787234042553,
|
|
"grad_norm": 3.941727876663208,
|
|
"learning_rate": 8.39739294248167e-06,
|
|
"loss": 0.8147,
|
|
"step": 5623
|
|
},
|
|
{
|
|
"epoch": 1.4957446808510637,
|
|
"grad_norm": 4.117156505584717,
|
|
"learning_rate": 8.396747601044812e-06,
|
|
"loss": 0.843,
|
|
"step": 5624
|
|
},
|
|
{
|
|
"epoch": 1.4960106382978724,
|
|
"grad_norm": 3.813788890838623,
|
|
"learning_rate": 8.396102154508584e-06,
|
|
"loss": 0.7214,
|
|
"step": 5625
|
|
},
|
|
{
|
|
"epoch": 1.4962765957446809,
|
|
"grad_norm": 4.435267448425293,
|
|
"learning_rate": 8.395456602892957e-06,
|
|
"loss": 0.9548,
|
|
"step": 5626
|
|
},
|
|
{
|
|
"epoch": 1.4965425531914893,
|
|
"grad_norm": 4.178934097290039,
|
|
"learning_rate": 8.394810946217905e-06,
|
|
"loss": 0.797,
|
|
"step": 5627
|
|
},
|
|
{
|
|
"epoch": 1.496808510638298,
|
|
"grad_norm": 4.201347827911377,
|
|
"learning_rate": 8.394165184503406e-06,
|
|
"loss": 0.8086,
|
|
"step": 5628
|
|
},
|
|
{
|
|
"epoch": 1.4970744680851062,
|
|
"grad_norm": 4.090775489807129,
|
|
"learning_rate": 8.39351931776944e-06,
|
|
"loss": 0.8206,
|
|
"step": 5629
|
|
},
|
|
{
|
|
"epoch": 1.497340425531915,
|
|
"grad_norm": 3.81706166267395,
|
|
"learning_rate": 8.392873346035992e-06,
|
|
"loss": 0.7876,
|
|
"step": 5630
|
|
},
|
|
{
|
|
"epoch": 1.4976063829787234,
|
|
"grad_norm": 4.212119102478027,
|
|
"learning_rate": 8.392227269323046e-06,
|
|
"loss": 0.8634,
|
|
"step": 5631
|
|
},
|
|
{
|
|
"epoch": 1.4978723404255319,
|
|
"grad_norm": 4.333573818206787,
|
|
"learning_rate": 8.391581087650596e-06,
|
|
"loss": 0.8157,
|
|
"step": 5632
|
|
},
|
|
{
|
|
"epoch": 1.4981382978723405,
|
|
"grad_norm": 4.08198356628418,
|
|
"learning_rate": 8.390934801038632e-06,
|
|
"loss": 0.8804,
|
|
"step": 5633
|
|
},
|
|
{
|
|
"epoch": 1.498404255319149,
|
|
"grad_norm": 3.6360666751861572,
|
|
"learning_rate": 8.390288409507156e-06,
|
|
"loss": 0.6327,
|
|
"step": 5634
|
|
},
|
|
{
|
|
"epoch": 1.4986702127659575,
|
|
"grad_norm": 4.428205490112305,
|
|
"learning_rate": 8.389641913076163e-06,
|
|
"loss": 0.8857,
|
|
"step": 5635
|
|
},
|
|
{
|
|
"epoch": 1.498936170212766,
|
|
"grad_norm": 4.506261825561523,
|
|
"learning_rate": 8.388995311765657e-06,
|
|
"loss": 0.8376,
|
|
"step": 5636
|
|
},
|
|
{
|
|
"epoch": 1.4992021276595744,
|
|
"grad_norm": 3.7618744373321533,
|
|
"learning_rate": 8.388348605595649e-06,
|
|
"loss": 0.8656,
|
|
"step": 5637
|
|
},
|
|
{
|
|
"epoch": 1.499468085106383,
|
|
"grad_norm": 3.843425750732422,
|
|
"learning_rate": 8.387701794586145e-06,
|
|
"loss": 0.7474,
|
|
"step": 5638
|
|
},
|
|
{
|
|
"epoch": 1.4997340425531915,
|
|
"grad_norm": 3.933223009109497,
|
|
"learning_rate": 8.387054878757157e-06,
|
|
"loss": 0.9316,
|
|
"step": 5639
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"grad_norm": 3.8141305446624756,
|
|
"learning_rate": 8.386407858128707e-06,
|
|
"loss": 0.7359,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 1.5002659574468085,
|
|
"grad_norm": 4.184633731842041,
|
|
"learning_rate": 8.385760732720809e-06,
|
|
"loss": 0.8206,
|
|
"step": 5641
|
|
},
|
|
{
|
|
"epoch": 1.500531914893617,
|
|
"grad_norm": 3.9276089668273926,
|
|
"learning_rate": 8.385113502553487e-06,
|
|
"loss": 0.8148,
|
|
"step": 5642
|
|
},
|
|
{
|
|
"epoch": 1.5007978723404256,
|
|
"grad_norm": 4.084725856781006,
|
|
"learning_rate": 8.384466167646768e-06,
|
|
"loss": 0.8435,
|
|
"step": 5643
|
|
},
|
|
{
|
|
"epoch": 1.501063829787234,
|
|
"grad_norm": 4.092894077301025,
|
|
"learning_rate": 8.383818728020681e-06,
|
|
"loss": 0.7876,
|
|
"step": 5644
|
|
},
|
|
{
|
|
"epoch": 1.5013297872340425,
|
|
"grad_norm": 3.6473567485809326,
|
|
"learning_rate": 8.383171183695258e-06,
|
|
"loss": 0.7427,
|
|
"step": 5645
|
|
},
|
|
{
|
|
"epoch": 1.5015957446808512,
|
|
"grad_norm": 4.224092483520508,
|
|
"learning_rate": 8.382523534690537e-06,
|
|
"loss": 0.8959,
|
|
"step": 5646
|
|
},
|
|
{
|
|
"epoch": 1.5018617021276595,
|
|
"grad_norm": 4.414750576019287,
|
|
"learning_rate": 8.381875781026553e-06,
|
|
"loss": 0.746,
|
|
"step": 5647
|
|
},
|
|
{
|
|
"epoch": 1.5021276595744681,
|
|
"grad_norm": 4.199521064758301,
|
|
"learning_rate": 8.381227922723353e-06,
|
|
"loss": 0.8083,
|
|
"step": 5648
|
|
},
|
|
{
|
|
"epoch": 1.5023936170212766,
|
|
"grad_norm": 3.8716115951538086,
|
|
"learning_rate": 8.380579959800981e-06,
|
|
"loss": 0.7007,
|
|
"step": 5649
|
|
},
|
|
{
|
|
"epoch": 1.502659574468085,
|
|
"grad_norm": 4.189701080322266,
|
|
"learning_rate": 8.379931892279483e-06,
|
|
"loss": 0.7694,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 1.5029255319148938,
|
|
"grad_norm": 3.577147960662842,
|
|
"learning_rate": 8.379283720178913e-06,
|
|
"loss": 0.7776,
|
|
"step": 5651
|
|
},
|
|
{
|
|
"epoch": 1.503191489361702,
|
|
"grad_norm": 4.009932994842529,
|
|
"learning_rate": 8.378635443519327e-06,
|
|
"loss": 0.7633,
|
|
"step": 5652
|
|
},
|
|
{
|
|
"epoch": 1.5034574468085107,
|
|
"grad_norm": 4.129024505615234,
|
|
"learning_rate": 8.377987062320782e-06,
|
|
"loss": 0.7067,
|
|
"step": 5653
|
|
},
|
|
{
|
|
"epoch": 1.5037234042553191,
|
|
"grad_norm": 3.6017751693725586,
|
|
"learning_rate": 8.37733857660334e-06,
|
|
"loss": 0.7983,
|
|
"step": 5654
|
|
},
|
|
{
|
|
"epoch": 1.5039893617021276,
|
|
"grad_norm": 3.799006223678589,
|
|
"learning_rate": 8.376689986387066e-06,
|
|
"loss": 0.8479,
|
|
"step": 5655
|
|
},
|
|
{
|
|
"epoch": 1.5042553191489363,
|
|
"grad_norm": 4.5062575340271,
|
|
"learning_rate": 8.376041291692028e-06,
|
|
"loss": 0.8298,
|
|
"step": 5656
|
|
},
|
|
{
|
|
"epoch": 1.5045212765957445,
|
|
"grad_norm": 3.729353666305542,
|
|
"learning_rate": 8.3753924925383e-06,
|
|
"loss": 0.7688,
|
|
"step": 5657
|
|
},
|
|
{
|
|
"epoch": 1.5047872340425532,
|
|
"grad_norm": 4.237773418426514,
|
|
"learning_rate": 8.374743588945951e-06,
|
|
"loss": 0.9623,
|
|
"step": 5658
|
|
},
|
|
{
|
|
"epoch": 1.5050531914893617,
|
|
"grad_norm": 3.5734505653381348,
|
|
"learning_rate": 8.374094580935064e-06,
|
|
"loss": 0.6333,
|
|
"step": 5659
|
|
},
|
|
{
|
|
"epoch": 1.5053191489361701,
|
|
"grad_norm": 3.711700677871704,
|
|
"learning_rate": 8.373445468525719e-06,
|
|
"loss": 0.8401,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 1.5055851063829788,
|
|
"grad_norm": 3.8051505088806152,
|
|
"learning_rate": 8.372796251737995e-06,
|
|
"loss": 0.7845,
|
|
"step": 5661
|
|
},
|
|
{
|
|
"epoch": 1.5058510638297873,
|
|
"grad_norm": 3.983067750930786,
|
|
"learning_rate": 8.372146930591988e-06,
|
|
"loss": 0.8886,
|
|
"step": 5662
|
|
},
|
|
{
|
|
"epoch": 1.5061170212765957,
|
|
"grad_norm": 3.872107744216919,
|
|
"learning_rate": 8.371497505107784e-06,
|
|
"loss": 0.8892,
|
|
"step": 5663
|
|
},
|
|
{
|
|
"epoch": 1.5063829787234042,
|
|
"grad_norm": 4.311370849609375,
|
|
"learning_rate": 8.370847975305479e-06,
|
|
"loss": 0.8369,
|
|
"step": 5664
|
|
},
|
|
{
|
|
"epoch": 1.5066489361702127,
|
|
"grad_norm": 3.470078706741333,
|
|
"learning_rate": 8.370198341205167e-06,
|
|
"loss": 0.7035,
|
|
"step": 5665
|
|
},
|
|
{
|
|
"epoch": 1.5069148936170214,
|
|
"grad_norm": 3.7826905250549316,
|
|
"learning_rate": 8.369548602826951e-06,
|
|
"loss": 0.8478,
|
|
"step": 5666
|
|
},
|
|
{
|
|
"epoch": 1.5071808510638298,
|
|
"grad_norm": 4.1136603355407715,
|
|
"learning_rate": 8.368898760190933e-06,
|
|
"loss": 0.7812,
|
|
"step": 5667
|
|
},
|
|
{
|
|
"epoch": 1.5074468085106383,
|
|
"grad_norm": 3.856652021408081,
|
|
"learning_rate": 8.368248813317221e-06,
|
|
"loss": 0.7926,
|
|
"step": 5668
|
|
},
|
|
{
|
|
"epoch": 1.507712765957447,
|
|
"grad_norm": 4.0616865158081055,
|
|
"learning_rate": 8.367598762225929e-06,
|
|
"loss": 0.7884,
|
|
"step": 5669
|
|
},
|
|
{
|
|
"epoch": 1.5079787234042552,
|
|
"grad_norm": 4.08623743057251,
|
|
"learning_rate": 8.366948606937161e-06,
|
|
"loss": 0.8499,
|
|
"step": 5670
|
|
},
|
|
{
|
|
"epoch": 1.508244680851064,
|
|
"grad_norm": 4.225100517272949,
|
|
"learning_rate": 8.366298347471043e-06,
|
|
"loss": 0.8145,
|
|
"step": 5671
|
|
},
|
|
{
|
|
"epoch": 1.5085106382978724,
|
|
"grad_norm": 4.046361923217773,
|
|
"learning_rate": 8.36564798384769e-06,
|
|
"loss": 0.6879,
|
|
"step": 5672
|
|
},
|
|
{
|
|
"epoch": 1.5087765957446808,
|
|
"grad_norm": 4.1829833984375,
|
|
"learning_rate": 8.364997516087224e-06,
|
|
"loss": 0.7828,
|
|
"step": 5673
|
|
},
|
|
{
|
|
"epoch": 1.5090425531914895,
|
|
"grad_norm": 3.750427484512329,
|
|
"learning_rate": 8.364346944209774e-06,
|
|
"loss": 0.7639,
|
|
"step": 5674
|
|
},
|
|
{
|
|
"epoch": 1.5093085106382977,
|
|
"grad_norm": 4.194416522979736,
|
|
"learning_rate": 8.36369626823547e-06,
|
|
"loss": 0.8308,
|
|
"step": 5675
|
|
},
|
|
{
|
|
"epoch": 1.5095744680851064,
|
|
"grad_norm": 4.148036003112793,
|
|
"learning_rate": 8.363045488184443e-06,
|
|
"loss": 0.7443,
|
|
"step": 5676
|
|
},
|
|
{
|
|
"epoch": 1.5098404255319149,
|
|
"grad_norm": 3.7398674488067627,
|
|
"learning_rate": 8.362394604076827e-06,
|
|
"loss": 0.8633,
|
|
"step": 5677
|
|
},
|
|
{
|
|
"epoch": 1.5101063829787233,
|
|
"grad_norm": 3.8514955043792725,
|
|
"learning_rate": 8.361743615932765e-06,
|
|
"loss": 0.797,
|
|
"step": 5678
|
|
},
|
|
{
|
|
"epoch": 1.510372340425532,
|
|
"grad_norm": 4.254388809204102,
|
|
"learning_rate": 8.361092523772396e-06,
|
|
"loss": 0.8425,
|
|
"step": 5679
|
|
},
|
|
{
|
|
"epoch": 1.5106382978723403,
|
|
"grad_norm": 4.257145881652832,
|
|
"learning_rate": 8.360441327615868e-06,
|
|
"loss": 0.7964,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 1.510904255319149,
|
|
"grad_norm": 3.9065487384796143,
|
|
"learning_rate": 8.35979002748333e-06,
|
|
"loss": 0.837,
|
|
"step": 5681
|
|
},
|
|
{
|
|
"epoch": 1.5111702127659574,
|
|
"grad_norm": 4.575162410736084,
|
|
"learning_rate": 8.359138623394931e-06,
|
|
"loss": 0.9485,
|
|
"step": 5682
|
|
},
|
|
{
|
|
"epoch": 1.5114361702127659,
|
|
"grad_norm": 4.180033206939697,
|
|
"learning_rate": 8.35848711537083e-06,
|
|
"loss": 0.8287,
|
|
"step": 5683
|
|
},
|
|
{
|
|
"epoch": 1.5117021276595746,
|
|
"grad_norm": 4.284930229187012,
|
|
"learning_rate": 8.357835503431182e-06,
|
|
"loss": 0.8548,
|
|
"step": 5684
|
|
},
|
|
{
|
|
"epoch": 1.511968085106383,
|
|
"grad_norm": 3.8655450344085693,
|
|
"learning_rate": 8.357183787596151e-06,
|
|
"loss": 0.7792,
|
|
"step": 5685
|
|
},
|
|
{
|
|
"epoch": 1.5122340425531915,
|
|
"grad_norm": 3.840792655944824,
|
|
"learning_rate": 8.356531967885899e-06,
|
|
"loss": 0.7953,
|
|
"step": 5686
|
|
},
|
|
{
|
|
"epoch": 1.5125,
|
|
"grad_norm": 3.675896406173706,
|
|
"learning_rate": 8.355880044320599e-06,
|
|
"loss": 0.7667,
|
|
"step": 5687
|
|
},
|
|
{
|
|
"epoch": 1.5127659574468084,
|
|
"grad_norm": 3.6345510482788086,
|
|
"learning_rate": 8.355228016920417e-06,
|
|
"loss": 0.8588,
|
|
"step": 5688
|
|
},
|
|
{
|
|
"epoch": 1.513031914893617,
|
|
"grad_norm": 3.8645408153533936,
|
|
"learning_rate": 8.354575885705532e-06,
|
|
"loss": 0.862,
|
|
"step": 5689
|
|
},
|
|
{
|
|
"epoch": 1.5132978723404256,
|
|
"grad_norm": 4.727093696594238,
|
|
"learning_rate": 8.353923650696119e-06,
|
|
"loss": 0.8419,
|
|
"step": 5690
|
|
},
|
|
{
|
|
"epoch": 1.513563829787234,
|
|
"grad_norm": 4.074021816253662,
|
|
"learning_rate": 8.353271311912357e-06,
|
|
"loss": 0.7486,
|
|
"step": 5691
|
|
},
|
|
{
|
|
"epoch": 1.5138297872340427,
|
|
"grad_norm": 3.9446327686309814,
|
|
"learning_rate": 8.352618869374435e-06,
|
|
"loss": 0.7721,
|
|
"step": 5692
|
|
},
|
|
{
|
|
"epoch": 1.514095744680851,
|
|
"grad_norm": 3.839276075363159,
|
|
"learning_rate": 8.351966323102538e-06,
|
|
"loss": 0.7744,
|
|
"step": 5693
|
|
},
|
|
{
|
|
"epoch": 1.5143617021276596,
|
|
"grad_norm": 4.190333366394043,
|
|
"learning_rate": 8.351313673116856e-06,
|
|
"loss": 0.8085,
|
|
"step": 5694
|
|
},
|
|
{
|
|
"epoch": 1.514627659574468,
|
|
"grad_norm": 3.8334741592407227,
|
|
"learning_rate": 8.350660919437585e-06,
|
|
"loss": 0.933,
|
|
"step": 5695
|
|
},
|
|
{
|
|
"epoch": 1.5148936170212766,
|
|
"grad_norm": 3.766174793243408,
|
|
"learning_rate": 8.350008062084918e-06,
|
|
"loss": 0.7537,
|
|
"step": 5696
|
|
},
|
|
{
|
|
"epoch": 1.5151595744680852,
|
|
"grad_norm": 4.281386852264404,
|
|
"learning_rate": 8.349355101079058e-06,
|
|
"loss": 0.8714,
|
|
"step": 5697
|
|
},
|
|
{
|
|
"epoch": 1.5154255319148935,
|
|
"grad_norm": 3.8533146381378174,
|
|
"learning_rate": 8.348702036440209e-06,
|
|
"loss": 0.8423,
|
|
"step": 5698
|
|
},
|
|
{
|
|
"epoch": 1.5156914893617022,
|
|
"grad_norm": 4.271562099456787,
|
|
"learning_rate": 8.348048868188574e-06,
|
|
"loss": 0.9832,
|
|
"step": 5699
|
|
},
|
|
{
|
|
"epoch": 1.5159574468085106,
|
|
"grad_norm": 4.475942611694336,
|
|
"learning_rate": 8.347395596344365e-06,
|
|
"loss": 0.9984,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 1.516223404255319,
|
|
"grad_norm": 4.308716773986816,
|
|
"learning_rate": 8.346742220927798e-06,
|
|
"loss": 0.8947,
|
|
"step": 5701
|
|
},
|
|
{
|
|
"epoch": 1.5164893617021278,
|
|
"grad_norm": 4.1707587242126465,
|
|
"learning_rate": 8.346088741959085e-06,
|
|
"loss": 0.9077,
|
|
"step": 5702
|
|
},
|
|
{
|
|
"epoch": 1.516755319148936,
|
|
"grad_norm": 4.016225337982178,
|
|
"learning_rate": 8.345435159458445e-06,
|
|
"loss": 0.9186,
|
|
"step": 5703
|
|
},
|
|
{
|
|
"epoch": 1.5170212765957447,
|
|
"grad_norm": 4.131173133850098,
|
|
"learning_rate": 8.344781473446106e-06,
|
|
"loss": 0.708,
|
|
"step": 5704
|
|
},
|
|
{
|
|
"epoch": 1.5172872340425532,
|
|
"grad_norm": 4.118223667144775,
|
|
"learning_rate": 8.344127683942289e-06,
|
|
"loss": 0.815,
|
|
"step": 5705
|
|
},
|
|
{
|
|
"epoch": 1.5175531914893616,
|
|
"grad_norm": 4.08048677444458,
|
|
"learning_rate": 8.343473790967223e-06,
|
|
"loss": 0.7402,
|
|
"step": 5706
|
|
},
|
|
{
|
|
"epoch": 1.5178191489361703,
|
|
"grad_norm": 4.256683826446533,
|
|
"learning_rate": 8.342819794541143e-06,
|
|
"loss": 0.9272,
|
|
"step": 5707
|
|
},
|
|
{
|
|
"epoch": 1.5180851063829788,
|
|
"grad_norm": 3.6859428882598877,
|
|
"learning_rate": 8.34216569468428e-06,
|
|
"loss": 0.8052,
|
|
"step": 5708
|
|
},
|
|
{
|
|
"epoch": 1.5183510638297872,
|
|
"grad_norm": 4.601988315582275,
|
|
"learning_rate": 8.341511491416877e-06,
|
|
"loss": 0.7638,
|
|
"step": 5709
|
|
},
|
|
{
|
|
"epoch": 1.5186170212765957,
|
|
"grad_norm": 3.8631575107574463,
|
|
"learning_rate": 8.340857184759178e-06,
|
|
"loss": 0.8282,
|
|
"step": 5710
|
|
},
|
|
{
|
|
"epoch": 1.5188829787234042,
|
|
"grad_norm": 4.184502124786377,
|
|
"learning_rate": 8.34020277473142e-06,
|
|
"loss": 0.8513,
|
|
"step": 5711
|
|
},
|
|
{
|
|
"epoch": 1.5191489361702128,
|
|
"grad_norm": 3.9446780681610107,
|
|
"learning_rate": 8.339548261353856e-06,
|
|
"loss": 0.6634,
|
|
"step": 5712
|
|
},
|
|
{
|
|
"epoch": 1.5194148936170213,
|
|
"grad_norm": 3.9360363483428955,
|
|
"learning_rate": 8.338893644646739e-06,
|
|
"loss": 0.7769,
|
|
"step": 5713
|
|
},
|
|
{
|
|
"epoch": 1.5196808510638298,
|
|
"grad_norm": 3.235274314880371,
|
|
"learning_rate": 8.33823892463032e-06,
|
|
"loss": 0.7531,
|
|
"step": 5714
|
|
},
|
|
{
|
|
"epoch": 1.5199468085106385,
|
|
"grad_norm": 3.941875696182251,
|
|
"learning_rate": 8.337584101324859e-06,
|
|
"loss": 0.7937,
|
|
"step": 5715
|
|
},
|
|
{
|
|
"epoch": 1.5202127659574467,
|
|
"grad_norm": 3.7710206508636475,
|
|
"learning_rate": 8.336929174750616e-06,
|
|
"loss": 0.8403,
|
|
"step": 5716
|
|
},
|
|
{
|
|
"epoch": 1.5204787234042554,
|
|
"grad_norm": 4.109030246734619,
|
|
"learning_rate": 8.336274144927855e-06,
|
|
"loss": 0.6704,
|
|
"step": 5717
|
|
},
|
|
{
|
|
"epoch": 1.5207446808510638,
|
|
"grad_norm": 3.7918636798858643,
|
|
"learning_rate": 8.335619011876846e-06,
|
|
"loss": 0.7756,
|
|
"step": 5718
|
|
},
|
|
{
|
|
"epoch": 1.5210106382978723,
|
|
"grad_norm": 3.633254051208496,
|
|
"learning_rate": 8.334963775617854e-06,
|
|
"loss": 0.7325,
|
|
"step": 5719
|
|
},
|
|
{
|
|
"epoch": 1.521276595744681,
|
|
"grad_norm": 3.994147539138794,
|
|
"learning_rate": 8.334308436171159e-06,
|
|
"loss": 0.8936,
|
|
"step": 5720
|
|
},
|
|
{
|
|
"epoch": 1.5215425531914892,
|
|
"grad_norm": 3.5977087020874023,
|
|
"learning_rate": 8.333652993557035e-06,
|
|
"loss": 0.8429,
|
|
"step": 5721
|
|
},
|
|
{
|
|
"epoch": 1.521808510638298,
|
|
"grad_norm": 3.7515316009521484,
|
|
"learning_rate": 8.332997447795763e-06,
|
|
"loss": 0.8329,
|
|
"step": 5722
|
|
},
|
|
{
|
|
"epoch": 1.5220744680851064,
|
|
"grad_norm": 3.969116449356079,
|
|
"learning_rate": 8.332341798907624e-06,
|
|
"loss": 0.804,
|
|
"step": 5723
|
|
},
|
|
{
|
|
"epoch": 1.5223404255319148,
|
|
"grad_norm": 3.915306329727173,
|
|
"learning_rate": 8.331686046912908e-06,
|
|
"loss": 0.9369,
|
|
"step": 5724
|
|
},
|
|
{
|
|
"epoch": 1.5226063829787235,
|
|
"grad_norm": 3.7423787117004395,
|
|
"learning_rate": 8.331030191831904e-06,
|
|
"loss": 0.8416,
|
|
"step": 5725
|
|
},
|
|
{
|
|
"epoch": 1.5228723404255318,
|
|
"grad_norm": 3.554068088531494,
|
|
"learning_rate": 8.3303742336849e-06,
|
|
"loss": 0.7121,
|
|
"step": 5726
|
|
},
|
|
{
|
|
"epoch": 1.5231382978723405,
|
|
"grad_norm": 4.019564628601074,
|
|
"learning_rate": 8.3297181724922e-06,
|
|
"loss": 0.7882,
|
|
"step": 5727
|
|
},
|
|
{
|
|
"epoch": 1.523404255319149,
|
|
"grad_norm": 4.351405143737793,
|
|
"learning_rate": 8.3290620082741e-06,
|
|
"loss": 0.8769,
|
|
"step": 5728
|
|
},
|
|
{
|
|
"epoch": 1.5236702127659574,
|
|
"grad_norm": 3.942936658859253,
|
|
"learning_rate": 8.328405741050901e-06,
|
|
"loss": 0.924,
|
|
"step": 5729
|
|
},
|
|
{
|
|
"epoch": 1.523936170212766,
|
|
"grad_norm": 4.362167835235596,
|
|
"learning_rate": 8.327749370842909e-06,
|
|
"loss": 0.8015,
|
|
"step": 5730
|
|
},
|
|
{
|
|
"epoch": 1.5242021276595743,
|
|
"grad_norm": 3.7932353019714355,
|
|
"learning_rate": 8.327092897670432e-06,
|
|
"loss": 0.7993,
|
|
"step": 5731
|
|
},
|
|
{
|
|
"epoch": 1.524468085106383,
|
|
"grad_norm": 3.8214194774627686,
|
|
"learning_rate": 8.326436321553785e-06,
|
|
"loss": 0.7971,
|
|
"step": 5732
|
|
},
|
|
{
|
|
"epoch": 1.5247340425531914,
|
|
"grad_norm": 4.244415760040283,
|
|
"learning_rate": 8.325779642513283e-06,
|
|
"loss": 0.7253,
|
|
"step": 5733
|
|
},
|
|
{
|
|
"epoch": 1.525,
|
|
"grad_norm": 4.184083938598633,
|
|
"learning_rate": 8.325122860569241e-06,
|
|
"loss": 0.7849,
|
|
"step": 5734
|
|
},
|
|
{
|
|
"epoch": 1.5252659574468086,
|
|
"grad_norm": 4.359492301940918,
|
|
"learning_rate": 8.324465975741986e-06,
|
|
"loss": 0.8228,
|
|
"step": 5735
|
|
},
|
|
{
|
|
"epoch": 1.525531914893617,
|
|
"grad_norm": 3.8751020431518555,
|
|
"learning_rate": 8.323808988051837e-06,
|
|
"loss": 0.7288,
|
|
"step": 5736
|
|
},
|
|
{
|
|
"epoch": 1.5257978723404255,
|
|
"grad_norm": 4.366562843322754,
|
|
"learning_rate": 8.323151897519126e-06,
|
|
"loss": 0.8452,
|
|
"step": 5737
|
|
},
|
|
{
|
|
"epoch": 1.5260638297872342,
|
|
"grad_norm": 4.116846561431885,
|
|
"learning_rate": 8.322494704164182e-06,
|
|
"loss": 0.9376,
|
|
"step": 5738
|
|
},
|
|
{
|
|
"epoch": 1.5263297872340424,
|
|
"grad_norm": 4.062334060668945,
|
|
"learning_rate": 8.321837408007341e-06,
|
|
"loss": 0.855,
|
|
"step": 5739
|
|
},
|
|
{
|
|
"epoch": 1.5265957446808511,
|
|
"grad_norm": 4.4059014320373535,
|
|
"learning_rate": 8.321180009068937e-06,
|
|
"loss": 0.8832,
|
|
"step": 5740
|
|
},
|
|
{
|
|
"epoch": 1.5268617021276596,
|
|
"grad_norm": 4.124050140380859,
|
|
"learning_rate": 8.320522507369315e-06,
|
|
"loss": 0.7446,
|
|
"step": 5741
|
|
},
|
|
{
|
|
"epoch": 1.527127659574468,
|
|
"grad_norm": 3.721942901611328,
|
|
"learning_rate": 8.319864902928819e-06,
|
|
"loss": 0.8547,
|
|
"step": 5742
|
|
},
|
|
{
|
|
"epoch": 1.5273936170212767,
|
|
"grad_norm": 3.816612720489502,
|
|
"learning_rate": 8.31920719576779e-06,
|
|
"loss": 0.8478,
|
|
"step": 5743
|
|
},
|
|
{
|
|
"epoch": 1.527659574468085,
|
|
"grad_norm": 4.217785835266113,
|
|
"learning_rate": 8.318549385906587e-06,
|
|
"loss": 0.8573,
|
|
"step": 5744
|
|
},
|
|
{
|
|
"epoch": 1.5279255319148937,
|
|
"grad_norm": 4.105627536773682,
|
|
"learning_rate": 8.317891473365558e-06,
|
|
"loss": 0.8891,
|
|
"step": 5745
|
|
},
|
|
{
|
|
"epoch": 1.5281914893617021,
|
|
"grad_norm": 4.537158966064453,
|
|
"learning_rate": 8.317233458165059e-06,
|
|
"loss": 0.9119,
|
|
"step": 5746
|
|
},
|
|
{
|
|
"epoch": 1.5284574468085106,
|
|
"grad_norm": 4.287096977233887,
|
|
"learning_rate": 8.31657534032545e-06,
|
|
"loss": 0.8465,
|
|
"step": 5747
|
|
},
|
|
{
|
|
"epoch": 1.5287234042553193,
|
|
"grad_norm": 4.125601291656494,
|
|
"learning_rate": 8.315917119867098e-06,
|
|
"loss": 0.7537,
|
|
"step": 5748
|
|
},
|
|
{
|
|
"epoch": 1.5289893617021275,
|
|
"grad_norm": 4.014163017272949,
|
|
"learning_rate": 8.315258796810366e-06,
|
|
"loss": 0.7572,
|
|
"step": 5749
|
|
},
|
|
{
|
|
"epoch": 1.5292553191489362,
|
|
"grad_norm": 3.912703514099121,
|
|
"learning_rate": 8.314600371175623e-06,
|
|
"loss": 0.7825,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 1.5295212765957447,
|
|
"grad_norm": 3.731410264968872,
|
|
"learning_rate": 8.313941842983243e-06,
|
|
"loss": 0.9015,
|
|
"step": 5751
|
|
},
|
|
{
|
|
"epoch": 1.5297872340425531,
|
|
"grad_norm": 4.122485160827637,
|
|
"learning_rate": 8.313283212253598e-06,
|
|
"loss": 0.8381,
|
|
"step": 5752
|
|
},
|
|
{
|
|
"epoch": 1.5300531914893618,
|
|
"grad_norm": 4.2268757820129395,
|
|
"learning_rate": 8.312624479007072e-06,
|
|
"loss": 0.788,
|
|
"step": 5753
|
|
},
|
|
{
|
|
"epoch": 1.53031914893617,
|
|
"grad_norm": 4.129693508148193,
|
|
"learning_rate": 8.311965643264042e-06,
|
|
"loss": 0.6951,
|
|
"step": 5754
|
|
},
|
|
{
|
|
"epoch": 1.5305851063829787,
|
|
"grad_norm": 4.038047790527344,
|
|
"learning_rate": 8.311306705044898e-06,
|
|
"loss": 0.834,
|
|
"step": 5755
|
|
},
|
|
{
|
|
"epoch": 1.5308510638297872,
|
|
"grad_norm": 3.85589599609375,
|
|
"learning_rate": 8.310647664370026e-06,
|
|
"loss": 0.8583,
|
|
"step": 5756
|
|
},
|
|
{
|
|
"epoch": 1.5311170212765957,
|
|
"grad_norm": 3.889176845550537,
|
|
"learning_rate": 8.309988521259816e-06,
|
|
"loss": 0.8361,
|
|
"step": 5757
|
|
},
|
|
{
|
|
"epoch": 1.5313829787234043,
|
|
"grad_norm": 4.0538458824157715,
|
|
"learning_rate": 8.309329275734664e-06,
|
|
"loss": 0.6951,
|
|
"step": 5758
|
|
},
|
|
{
|
|
"epoch": 1.5316489361702128,
|
|
"grad_norm": 4.010767936706543,
|
|
"learning_rate": 8.30866992781497e-06,
|
|
"loss": 0.8313,
|
|
"step": 5759
|
|
},
|
|
{
|
|
"epoch": 1.5319148936170213,
|
|
"grad_norm": 3.897259473800659,
|
|
"learning_rate": 8.30801047752113e-06,
|
|
"loss": 0.7736,
|
|
"step": 5760
|
|
},
|
|
{
|
|
"epoch": 1.53218085106383,
|
|
"grad_norm": 4.07016134262085,
|
|
"learning_rate": 8.307350924873553e-06,
|
|
"loss": 0.8231,
|
|
"step": 5761
|
|
},
|
|
{
|
|
"epoch": 1.5324468085106382,
|
|
"grad_norm": 3.886470317840576,
|
|
"learning_rate": 8.306691269892646e-06,
|
|
"loss": 0.8535,
|
|
"step": 5762
|
|
},
|
|
{
|
|
"epoch": 1.5327127659574469,
|
|
"grad_norm": 3.458498477935791,
|
|
"learning_rate": 8.306031512598815e-06,
|
|
"loss": 0.7291,
|
|
"step": 5763
|
|
},
|
|
{
|
|
"epoch": 1.5329787234042553,
|
|
"grad_norm": 3.6657865047454834,
|
|
"learning_rate": 8.305371653012479e-06,
|
|
"loss": 0.8239,
|
|
"step": 5764
|
|
},
|
|
{
|
|
"epoch": 1.5332446808510638,
|
|
"grad_norm": 4.054435729980469,
|
|
"learning_rate": 8.304711691154052e-06,
|
|
"loss": 0.7947,
|
|
"step": 5765
|
|
},
|
|
{
|
|
"epoch": 1.5335106382978725,
|
|
"grad_norm": 4.395258903503418,
|
|
"learning_rate": 8.304051627043952e-06,
|
|
"loss": 0.8615,
|
|
"step": 5766
|
|
},
|
|
{
|
|
"epoch": 1.5337765957446807,
|
|
"grad_norm": 4.212094306945801,
|
|
"learning_rate": 8.303391460702607e-06,
|
|
"loss": 0.7645,
|
|
"step": 5767
|
|
},
|
|
{
|
|
"epoch": 1.5340425531914894,
|
|
"grad_norm": 4.2090044021606445,
|
|
"learning_rate": 8.302731192150441e-06,
|
|
"loss": 0.8463,
|
|
"step": 5768
|
|
},
|
|
{
|
|
"epoch": 1.5343085106382979,
|
|
"grad_norm": 3.734283685684204,
|
|
"learning_rate": 8.302070821407882e-06,
|
|
"loss": 0.7986,
|
|
"step": 5769
|
|
},
|
|
{
|
|
"epoch": 1.5345744680851063,
|
|
"grad_norm": 4.0931291580200195,
|
|
"learning_rate": 8.301410348495366e-06,
|
|
"loss": 0.7541,
|
|
"step": 5770
|
|
},
|
|
{
|
|
"epoch": 1.534840425531915,
|
|
"grad_norm": 3.604841470718384,
|
|
"learning_rate": 8.300749773433325e-06,
|
|
"loss": 0.8511,
|
|
"step": 5771
|
|
},
|
|
{
|
|
"epoch": 1.5351063829787233,
|
|
"grad_norm": 3.881558895111084,
|
|
"learning_rate": 8.300089096242201e-06,
|
|
"loss": 0.7382,
|
|
"step": 5772
|
|
},
|
|
{
|
|
"epoch": 1.535372340425532,
|
|
"grad_norm": 3.472681760787964,
|
|
"learning_rate": 8.299428316942435e-06,
|
|
"loss": 0.7106,
|
|
"step": 5773
|
|
},
|
|
{
|
|
"epoch": 1.5356382978723404,
|
|
"grad_norm": 3.5763661861419678,
|
|
"learning_rate": 8.298767435554473e-06,
|
|
"loss": 0.6924,
|
|
"step": 5774
|
|
},
|
|
{
|
|
"epoch": 1.5359042553191489,
|
|
"grad_norm": 3.965982437133789,
|
|
"learning_rate": 8.298106452098761e-06,
|
|
"loss": 0.8163,
|
|
"step": 5775
|
|
},
|
|
{
|
|
"epoch": 1.5361702127659576,
|
|
"grad_norm": 3.9243502616882324,
|
|
"learning_rate": 8.297445366595754e-06,
|
|
"loss": 0.8372,
|
|
"step": 5776
|
|
},
|
|
{
|
|
"epoch": 1.5364361702127658,
|
|
"grad_norm": 3.8713953495025635,
|
|
"learning_rate": 8.296784179065904e-06,
|
|
"loss": 0.7919,
|
|
"step": 5777
|
|
},
|
|
{
|
|
"epoch": 1.5367021276595745,
|
|
"grad_norm": 3.7591898441314697,
|
|
"learning_rate": 8.29612288952967e-06,
|
|
"loss": 0.8597,
|
|
"step": 5778
|
|
},
|
|
{
|
|
"epoch": 1.536968085106383,
|
|
"grad_norm": 4.25253438949585,
|
|
"learning_rate": 8.295461498007513e-06,
|
|
"loss": 1.0482,
|
|
"step": 5779
|
|
},
|
|
{
|
|
"epoch": 1.5372340425531914,
|
|
"grad_norm": 3.846035957336426,
|
|
"learning_rate": 8.294800004519895e-06,
|
|
"loss": 0.8348,
|
|
"step": 5780
|
|
},
|
|
{
|
|
"epoch": 1.5375,
|
|
"grad_norm": 3.652987003326416,
|
|
"learning_rate": 8.29413840908729e-06,
|
|
"loss": 0.7409,
|
|
"step": 5781
|
|
},
|
|
{
|
|
"epoch": 1.5377659574468086,
|
|
"grad_norm": 4.131805419921875,
|
|
"learning_rate": 8.293476711730163e-06,
|
|
"loss": 0.8703,
|
|
"step": 5782
|
|
},
|
|
{
|
|
"epoch": 1.538031914893617,
|
|
"grad_norm": 4.142578125,
|
|
"learning_rate": 8.292814912468988e-06,
|
|
"loss": 0.881,
|
|
"step": 5783
|
|
},
|
|
{
|
|
"epoch": 1.5382978723404257,
|
|
"grad_norm": 3.5386013984680176,
|
|
"learning_rate": 8.292153011324242e-06,
|
|
"loss": 0.7984,
|
|
"step": 5784
|
|
},
|
|
{
|
|
"epoch": 1.538563829787234,
|
|
"grad_norm": 4.26931619644165,
|
|
"learning_rate": 8.291491008316409e-06,
|
|
"loss": 0.8968,
|
|
"step": 5785
|
|
},
|
|
{
|
|
"epoch": 1.5388297872340426,
|
|
"grad_norm": 4.214763164520264,
|
|
"learning_rate": 8.290828903465965e-06,
|
|
"loss": 0.7912,
|
|
"step": 5786
|
|
},
|
|
{
|
|
"epoch": 1.539095744680851,
|
|
"grad_norm": 4.008779525756836,
|
|
"learning_rate": 8.290166696793405e-06,
|
|
"loss": 0.8708,
|
|
"step": 5787
|
|
},
|
|
{
|
|
"epoch": 1.5393617021276595,
|
|
"grad_norm": 3.722784996032715,
|
|
"learning_rate": 8.28950438831921e-06,
|
|
"loss": 0.8047,
|
|
"step": 5788
|
|
},
|
|
{
|
|
"epoch": 1.5396276595744682,
|
|
"grad_norm": 3.9850144386291504,
|
|
"learning_rate": 8.288841978063877e-06,
|
|
"loss": 0.8583,
|
|
"step": 5789
|
|
},
|
|
{
|
|
"epoch": 1.5398936170212765,
|
|
"grad_norm": 3.7640953063964844,
|
|
"learning_rate": 8.288179466047903e-06,
|
|
"loss": 0.899,
|
|
"step": 5790
|
|
},
|
|
{
|
|
"epoch": 1.5401595744680852,
|
|
"grad_norm": 3.9535369873046875,
|
|
"learning_rate": 8.287516852291784e-06,
|
|
"loss": 0.671,
|
|
"step": 5791
|
|
},
|
|
{
|
|
"epoch": 1.5404255319148936,
|
|
"grad_norm": 3.784611940383911,
|
|
"learning_rate": 8.28685413681602e-06,
|
|
"loss": 0.955,
|
|
"step": 5792
|
|
},
|
|
{
|
|
"epoch": 1.540691489361702,
|
|
"grad_norm": 4.205324172973633,
|
|
"learning_rate": 8.286191319641123e-06,
|
|
"loss": 0.8411,
|
|
"step": 5793
|
|
},
|
|
{
|
|
"epoch": 1.5409574468085108,
|
|
"grad_norm": 4.253503322601318,
|
|
"learning_rate": 8.285528400787597e-06,
|
|
"loss": 0.7707,
|
|
"step": 5794
|
|
},
|
|
{
|
|
"epoch": 1.541223404255319,
|
|
"grad_norm": 3.7679977416992188,
|
|
"learning_rate": 8.284865380275953e-06,
|
|
"loss": 0.9103,
|
|
"step": 5795
|
|
},
|
|
{
|
|
"epoch": 1.5414893617021277,
|
|
"grad_norm": 4.094081878662109,
|
|
"learning_rate": 8.284202258126706e-06,
|
|
"loss": 0.9798,
|
|
"step": 5796
|
|
},
|
|
{
|
|
"epoch": 1.5417553191489362,
|
|
"grad_norm": 4.189050674438477,
|
|
"learning_rate": 8.283539034360376e-06,
|
|
"loss": 0.8641,
|
|
"step": 5797
|
|
},
|
|
{
|
|
"epoch": 1.5420212765957446,
|
|
"grad_norm": 4.017099857330322,
|
|
"learning_rate": 8.282875708997482e-06,
|
|
"loss": 0.8214,
|
|
"step": 5798
|
|
},
|
|
{
|
|
"epoch": 1.5422872340425533,
|
|
"grad_norm": 3.6189417839050293,
|
|
"learning_rate": 8.282212282058549e-06,
|
|
"loss": 0.7486,
|
|
"step": 5799
|
|
},
|
|
{
|
|
"epoch": 1.5425531914893615,
|
|
"grad_norm": 4.480672359466553,
|
|
"learning_rate": 8.281548753564101e-06,
|
|
"loss": 0.9041,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 1.5428191489361702,
|
|
"grad_norm": 4.047300338745117,
|
|
"learning_rate": 8.280885123534673e-06,
|
|
"loss": 0.9519,
|
|
"step": 5801
|
|
},
|
|
{
|
|
"epoch": 1.5430851063829787,
|
|
"grad_norm": 4.379581928253174,
|
|
"learning_rate": 8.280221391990797e-06,
|
|
"loss": 0.9203,
|
|
"step": 5802
|
|
},
|
|
{
|
|
"epoch": 1.5433510638297872,
|
|
"grad_norm": 4.053439140319824,
|
|
"learning_rate": 8.279557558953009e-06,
|
|
"loss": 0.7759,
|
|
"step": 5803
|
|
},
|
|
{
|
|
"epoch": 1.5436170212765958,
|
|
"grad_norm": 3.927568197250366,
|
|
"learning_rate": 8.278893624441849e-06,
|
|
"loss": 0.7132,
|
|
"step": 5804
|
|
},
|
|
{
|
|
"epoch": 1.5438829787234043,
|
|
"grad_norm": 4.322382926940918,
|
|
"learning_rate": 8.278229588477857e-06,
|
|
"loss": 0.8272,
|
|
"step": 5805
|
|
},
|
|
{
|
|
"epoch": 1.5441489361702128,
|
|
"grad_norm": 3.6044352054595947,
|
|
"learning_rate": 8.277565451081587e-06,
|
|
"loss": 0.7487,
|
|
"step": 5806
|
|
},
|
|
{
|
|
"epoch": 1.5444148936170212,
|
|
"grad_norm": 3.7423501014709473,
|
|
"learning_rate": 8.27690121227358e-06,
|
|
"loss": 0.7342,
|
|
"step": 5807
|
|
},
|
|
{
|
|
"epoch": 1.5446808510638297,
|
|
"grad_norm": 3.7679383754730225,
|
|
"learning_rate": 8.27623687207439e-06,
|
|
"loss": 0.7897,
|
|
"step": 5808
|
|
},
|
|
{
|
|
"epoch": 1.5449468085106384,
|
|
"grad_norm": 3.7263903617858887,
|
|
"learning_rate": 8.275572430504578e-06,
|
|
"loss": 0.8311,
|
|
"step": 5809
|
|
},
|
|
{
|
|
"epoch": 1.5452127659574468,
|
|
"grad_norm": 3.551025390625,
|
|
"learning_rate": 8.274907887584695e-06,
|
|
"loss": 0.6916,
|
|
"step": 5810
|
|
},
|
|
{
|
|
"epoch": 1.5454787234042553,
|
|
"grad_norm": 3.8874595165252686,
|
|
"learning_rate": 8.274243243335307e-06,
|
|
"loss": 0.8246,
|
|
"step": 5811
|
|
},
|
|
{
|
|
"epoch": 1.545744680851064,
|
|
"grad_norm": 3.7710976600646973,
|
|
"learning_rate": 8.27357849777698e-06,
|
|
"loss": 0.8668,
|
|
"step": 5812
|
|
},
|
|
{
|
|
"epoch": 1.5460106382978722,
|
|
"grad_norm": 4.312849044799805,
|
|
"learning_rate": 8.272913650930277e-06,
|
|
"loss": 0.9206,
|
|
"step": 5813
|
|
},
|
|
{
|
|
"epoch": 1.546276595744681,
|
|
"grad_norm": 4.059734344482422,
|
|
"learning_rate": 8.272248702815776e-06,
|
|
"loss": 0.77,
|
|
"step": 5814
|
|
},
|
|
{
|
|
"epoch": 1.5465425531914894,
|
|
"grad_norm": 3.781832456588745,
|
|
"learning_rate": 8.271583653454046e-06,
|
|
"loss": 0.7643,
|
|
"step": 5815
|
|
},
|
|
{
|
|
"epoch": 1.5468085106382978,
|
|
"grad_norm": 3.607161045074463,
|
|
"learning_rate": 8.270918502865663e-06,
|
|
"loss": 0.7721,
|
|
"step": 5816
|
|
},
|
|
{
|
|
"epoch": 1.5470744680851065,
|
|
"grad_norm": 3.986572504043579,
|
|
"learning_rate": 8.270253251071214e-06,
|
|
"loss": 0.6967,
|
|
"step": 5817
|
|
},
|
|
{
|
|
"epoch": 1.5473404255319148,
|
|
"grad_norm": 3.9674570560455322,
|
|
"learning_rate": 8.269587898091277e-06,
|
|
"loss": 0.7986,
|
|
"step": 5818
|
|
},
|
|
{
|
|
"epoch": 1.5476063829787234,
|
|
"grad_norm": 3.794405698776245,
|
|
"learning_rate": 8.268922443946444e-06,
|
|
"loss": 0.7897,
|
|
"step": 5819
|
|
},
|
|
{
|
|
"epoch": 1.547872340425532,
|
|
"grad_norm": 3.5226500034332275,
|
|
"learning_rate": 8.2682568886573e-06,
|
|
"loss": 0.7474,
|
|
"step": 5820
|
|
},
|
|
{
|
|
"epoch": 1.5481382978723404,
|
|
"grad_norm": 3.692884922027588,
|
|
"learning_rate": 8.267591232244439e-06,
|
|
"loss": 0.9286,
|
|
"step": 5821
|
|
},
|
|
{
|
|
"epoch": 1.548404255319149,
|
|
"grad_norm": 4.193415641784668,
|
|
"learning_rate": 8.266925474728459e-06,
|
|
"loss": 0.7917,
|
|
"step": 5822
|
|
},
|
|
{
|
|
"epoch": 1.5486702127659573,
|
|
"grad_norm": 3.877485752105713,
|
|
"learning_rate": 8.266259616129959e-06,
|
|
"loss": 0.8366,
|
|
"step": 5823
|
|
},
|
|
{
|
|
"epoch": 1.548936170212766,
|
|
"grad_norm": 3.8126795291900635,
|
|
"learning_rate": 8.26559365646954e-06,
|
|
"loss": 0.7591,
|
|
"step": 5824
|
|
},
|
|
{
|
|
"epoch": 1.5492021276595744,
|
|
"grad_norm": 4.233253479003906,
|
|
"learning_rate": 8.264927595767808e-06,
|
|
"loss": 0.8596,
|
|
"step": 5825
|
|
},
|
|
{
|
|
"epoch": 1.549468085106383,
|
|
"grad_norm": 4.092543601989746,
|
|
"learning_rate": 8.264261434045374e-06,
|
|
"loss": 0.7732,
|
|
"step": 5826
|
|
},
|
|
{
|
|
"epoch": 1.5497340425531916,
|
|
"grad_norm": 4.047788619995117,
|
|
"learning_rate": 8.263595171322847e-06,
|
|
"loss": 0.8763,
|
|
"step": 5827
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"grad_norm": 3.990832805633545,
|
|
"learning_rate": 8.262928807620843e-06,
|
|
"loss": 0.8313,
|
|
"step": 5828
|
|
},
|
|
{
|
|
"epoch": 1.5502659574468085,
|
|
"grad_norm": 3.948673725128174,
|
|
"learning_rate": 8.262262342959981e-06,
|
|
"loss": 0.8937,
|
|
"step": 5829
|
|
},
|
|
{
|
|
"epoch": 1.550531914893617,
|
|
"grad_norm": 4.302928924560547,
|
|
"learning_rate": 8.261595777360881e-06,
|
|
"loss": 0.7945,
|
|
"step": 5830
|
|
},
|
|
{
|
|
"epoch": 1.5507978723404254,
|
|
"grad_norm": 3.8130292892456055,
|
|
"learning_rate": 8.260929110844166e-06,
|
|
"loss": 0.7971,
|
|
"step": 5831
|
|
},
|
|
{
|
|
"epoch": 1.5510638297872341,
|
|
"grad_norm": 3.7944552898406982,
|
|
"learning_rate": 8.260262343430468e-06,
|
|
"loss": 0.7268,
|
|
"step": 5832
|
|
},
|
|
{
|
|
"epoch": 1.5513297872340426,
|
|
"grad_norm": 3.765657424926758,
|
|
"learning_rate": 8.259595475140412e-06,
|
|
"loss": 0.7289,
|
|
"step": 5833
|
|
},
|
|
{
|
|
"epoch": 1.551595744680851,
|
|
"grad_norm": 4.215806484222412,
|
|
"learning_rate": 8.258928505994635e-06,
|
|
"loss": 0.8254,
|
|
"step": 5834
|
|
},
|
|
{
|
|
"epoch": 1.5518617021276597,
|
|
"grad_norm": 3.7282323837280273,
|
|
"learning_rate": 8.258261436013774e-06,
|
|
"loss": 0.8426,
|
|
"step": 5835
|
|
},
|
|
{
|
|
"epoch": 1.552127659574468,
|
|
"grad_norm": 4.05489444732666,
|
|
"learning_rate": 8.257594265218468e-06,
|
|
"loss": 0.832,
|
|
"step": 5836
|
|
},
|
|
{
|
|
"epoch": 1.5523936170212767,
|
|
"grad_norm": 4.3416666984558105,
|
|
"learning_rate": 8.256926993629358e-06,
|
|
"loss": 0.844,
|
|
"step": 5837
|
|
},
|
|
{
|
|
"epoch": 1.5526595744680851,
|
|
"grad_norm": 4.158813953399658,
|
|
"learning_rate": 8.256259621267095e-06,
|
|
"loss": 0.7328,
|
|
"step": 5838
|
|
},
|
|
{
|
|
"epoch": 1.5529255319148936,
|
|
"grad_norm": 4.071340560913086,
|
|
"learning_rate": 8.255592148152325e-06,
|
|
"loss": 0.7983,
|
|
"step": 5839
|
|
},
|
|
{
|
|
"epoch": 1.5531914893617023,
|
|
"grad_norm": 3.988938093185425,
|
|
"learning_rate": 8.254924574305698e-06,
|
|
"loss": 0.7863,
|
|
"step": 5840
|
|
},
|
|
{
|
|
"epoch": 1.5534574468085105,
|
|
"grad_norm": 3.8350539207458496,
|
|
"learning_rate": 8.254256899747876e-06,
|
|
"loss": 0.8347,
|
|
"step": 5841
|
|
},
|
|
{
|
|
"epoch": 1.5537234042553192,
|
|
"grad_norm": 3.7759451866149902,
|
|
"learning_rate": 8.253589124499513e-06,
|
|
"loss": 0.7486,
|
|
"step": 5842
|
|
},
|
|
{
|
|
"epoch": 1.5539893617021276,
|
|
"grad_norm": 4.114711284637451,
|
|
"learning_rate": 8.252921248581272e-06,
|
|
"loss": 0.8939,
|
|
"step": 5843
|
|
},
|
|
{
|
|
"epoch": 1.554255319148936,
|
|
"grad_norm": 4.071899890899658,
|
|
"learning_rate": 8.252253272013816e-06,
|
|
"loss": 0.7912,
|
|
"step": 5844
|
|
},
|
|
{
|
|
"epoch": 1.5545212765957448,
|
|
"grad_norm": 3.5732295513153076,
|
|
"learning_rate": 8.251585194817816e-06,
|
|
"loss": 0.7897,
|
|
"step": 5845
|
|
},
|
|
{
|
|
"epoch": 1.554787234042553,
|
|
"grad_norm": 3.884356737136841,
|
|
"learning_rate": 8.250917017013943e-06,
|
|
"loss": 0.8328,
|
|
"step": 5846
|
|
},
|
|
{
|
|
"epoch": 1.5550531914893617,
|
|
"grad_norm": 4.147099018096924,
|
|
"learning_rate": 8.250248738622868e-06,
|
|
"loss": 0.8425,
|
|
"step": 5847
|
|
},
|
|
{
|
|
"epoch": 1.5553191489361702,
|
|
"grad_norm": 4.285495758056641,
|
|
"learning_rate": 8.249580359665272e-06,
|
|
"loss": 0.9088,
|
|
"step": 5848
|
|
},
|
|
{
|
|
"epoch": 1.5555851063829786,
|
|
"grad_norm": 3.903362512588501,
|
|
"learning_rate": 8.248911880161832e-06,
|
|
"loss": 0.8711,
|
|
"step": 5849
|
|
},
|
|
{
|
|
"epoch": 1.5558510638297873,
|
|
"grad_norm": 3.910297155380249,
|
|
"learning_rate": 8.248243300133236e-06,
|
|
"loss": 0.8571,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 1.5561170212765958,
|
|
"grad_norm": 3.7283291816711426,
|
|
"learning_rate": 8.247574619600165e-06,
|
|
"loss": 0.8114,
|
|
"step": 5851
|
|
},
|
|
{
|
|
"epoch": 1.5563829787234043,
|
|
"grad_norm": 4.2508864402771,
|
|
"learning_rate": 8.246905838583315e-06,
|
|
"loss": 0.8498,
|
|
"step": 5852
|
|
},
|
|
{
|
|
"epoch": 1.5566489361702127,
|
|
"grad_norm": 3.5398671627044678,
|
|
"learning_rate": 8.246236957103374e-06,
|
|
"loss": 0.7013,
|
|
"step": 5853
|
|
},
|
|
{
|
|
"epoch": 1.5569148936170212,
|
|
"grad_norm": 3.609945297241211,
|
|
"learning_rate": 8.245567975181037e-06,
|
|
"loss": 0.7113,
|
|
"step": 5854
|
|
},
|
|
{
|
|
"epoch": 1.5571808510638299,
|
|
"grad_norm": 3.550767660140991,
|
|
"learning_rate": 8.244898892837009e-06,
|
|
"loss": 0.753,
|
|
"step": 5855
|
|
},
|
|
{
|
|
"epoch": 1.5574468085106383,
|
|
"grad_norm": 4.197300434112549,
|
|
"learning_rate": 8.244229710091986e-06,
|
|
"loss": 0.7006,
|
|
"step": 5856
|
|
},
|
|
{
|
|
"epoch": 1.5577127659574468,
|
|
"grad_norm": 3.916386842727661,
|
|
"learning_rate": 8.243560426966678e-06,
|
|
"loss": 0.7071,
|
|
"step": 5857
|
|
},
|
|
{
|
|
"epoch": 1.5579787234042555,
|
|
"grad_norm": 4.1130218505859375,
|
|
"learning_rate": 8.242891043481793e-06,
|
|
"loss": 0.8622,
|
|
"step": 5858
|
|
},
|
|
{
|
|
"epoch": 1.5582446808510637,
|
|
"grad_norm": 3.9336955547332764,
|
|
"learning_rate": 8.242221559658039e-06,
|
|
"loss": 0.7626,
|
|
"step": 5859
|
|
},
|
|
{
|
|
"epoch": 1.5585106382978724,
|
|
"grad_norm": 4.237149715423584,
|
|
"learning_rate": 8.241551975516133e-06,
|
|
"loss": 0.8566,
|
|
"step": 5860
|
|
},
|
|
{
|
|
"epoch": 1.5587765957446809,
|
|
"grad_norm": 4.12725305557251,
|
|
"learning_rate": 8.240882291076794e-06,
|
|
"loss": 0.7879,
|
|
"step": 5861
|
|
},
|
|
{
|
|
"epoch": 1.5590425531914893,
|
|
"grad_norm": 4.043492794036865,
|
|
"learning_rate": 8.240212506360738e-06,
|
|
"loss": 0.8772,
|
|
"step": 5862
|
|
},
|
|
{
|
|
"epoch": 1.559308510638298,
|
|
"grad_norm": 3.8735363483428955,
|
|
"learning_rate": 8.239542621388696e-06,
|
|
"loss": 0.9265,
|
|
"step": 5863
|
|
},
|
|
{
|
|
"epoch": 1.5595744680851062,
|
|
"grad_norm": 4.195898056030273,
|
|
"learning_rate": 8.23887263618139e-06,
|
|
"loss": 0.9022,
|
|
"step": 5864
|
|
},
|
|
{
|
|
"epoch": 1.559840425531915,
|
|
"grad_norm": 3.4813778400421143,
|
|
"learning_rate": 8.23820255075955e-06,
|
|
"loss": 0.7605,
|
|
"step": 5865
|
|
},
|
|
{
|
|
"epoch": 1.5601063829787234,
|
|
"grad_norm": 3.5564541816711426,
|
|
"learning_rate": 8.237532365143909e-06,
|
|
"loss": 0.7148,
|
|
"step": 5866
|
|
},
|
|
{
|
|
"epoch": 1.5603723404255319,
|
|
"grad_norm": 4.291294097900391,
|
|
"learning_rate": 8.236862079355208e-06,
|
|
"loss": 1.022,
|
|
"step": 5867
|
|
},
|
|
{
|
|
"epoch": 1.5606382978723405,
|
|
"grad_norm": 3.761632204055786,
|
|
"learning_rate": 8.236191693414184e-06,
|
|
"loss": 0.8673,
|
|
"step": 5868
|
|
},
|
|
{
|
|
"epoch": 1.5609042553191488,
|
|
"grad_norm": 3.8336169719696045,
|
|
"learning_rate": 8.235521207341577e-06,
|
|
"loss": 0.7979,
|
|
"step": 5869
|
|
},
|
|
{
|
|
"epoch": 1.5611702127659575,
|
|
"grad_norm": 3.8964157104492188,
|
|
"learning_rate": 8.234850621158135e-06,
|
|
"loss": 0.7466,
|
|
"step": 5870
|
|
},
|
|
{
|
|
"epoch": 1.561436170212766,
|
|
"grad_norm": 3.8827109336853027,
|
|
"learning_rate": 8.234179934884605e-06,
|
|
"loss": 0.953,
|
|
"step": 5871
|
|
},
|
|
{
|
|
"epoch": 1.5617021276595744,
|
|
"grad_norm": 4.318760395050049,
|
|
"learning_rate": 8.23350914854174e-06,
|
|
"loss": 0.8975,
|
|
"step": 5872
|
|
},
|
|
{
|
|
"epoch": 1.561968085106383,
|
|
"grad_norm": 3.927676200866699,
|
|
"learning_rate": 8.232838262150298e-06,
|
|
"loss": 0.8148,
|
|
"step": 5873
|
|
},
|
|
{
|
|
"epoch": 1.5622340425531915,
|
|
"grad_norm": 4.160933017730713,
|
|
"learning_rate": 8.23216727573103e-06,
|
|
"loss": 0.7736,
|
|
"step": 5874
|
|
},
|
|
{
|
|
"epoch": 1.5625,
|
|
"grad_norm": 4.034573078155518,
|
|
"learning_rate": 8.231496189304704e-06,
|
|
"loss": 0.7754,
|
|
"step": 5875
|
|
},
|
|
{
|
|
"epoch": 1.5627659574468085,
|
|
"grad_norm": 4.033196926116943,
|
|
"learning_rate": 8.230825002892081e-06,
|
|
"loss": 0.8588,
|
|
"step": 5876
|
|
},
|
|
{
|
|
"epoch": 1.563031914893617,
|
|
"grad_norm": 3.949902057647705,
|
|
"learning_rate": 8.23015371651393e-06,
|
|
"loss": 0.8279,
|
|
"step": 5877
|
|
},
|
|
{
|
|
"epoch": 1.5632978723404256,
|
|
"grad_norm": 3.8417794704437256,
|
|
"learning_rate": 8.229482330191016e-06,
|
|
"loss": 0.7201,
|
|
"step": 5878
|
|
},
|
|
{
|
|
"epoch": 1.563563829787234,
|
|
"grad_norm": 3.836516857147217,
|
|
"learning_rate": 8.22881084394412e-06,
|
|
"loss": 0.9244,
|
|
"step": 5879
|
|
},
|
|
{
|
|
"epoch": 1.5638297872340425,
|
|
"grad_norm": 3.882302761077881,
|
|
"learning_rate": 8.228139257794012e-06,
|
|
"loss": 0.7944,
|
|
"step": 5880
|
|
},
|
|
{
|
|
"epoch": 1.5640957446808512,
|
|
"grad_norm": 4.163621425628662,
|
|
"learning_rate": 8.227467571761478e-06,
|
|
"loss": 0.7916,
|
|
"step": 5881
|
|
},
|
|
{
|
|
"epoch": 1.5643617021276595,
|
|
"grad_norm": 3.8937926292419434,
|
|
"learning_rate": 8.226795785867294e-06,
|
|
"loss": 0.7165,
|
|
"step": 5882
|
|
},
|
|
{
|
|
"epoch": 1.5646276595744681,
|
|
"grad_norm": 4.019950866699219,
|
|
"learning_rate": 8.226123900132252e-06,
|
|
"loss": 0.8444,
|
|
"step": 5883
|
|
},
|
|
{
|
|
"epoch": 1.5648936170212766,
|
|
"grad_norm": 3.9146535396575928,
|
|
"learning_rate": 8.225451914577137e-06,
|
|
"loss": 0.7472,
|
|
"step": 5884
|
|
},
|
|
{
|
|
"epoch": 1.565159574468085,
|
|
"grad_norm": 4.430140018463135,
|
|
"learning_rate": 8.224779829222742e-06,
|
|
"loss": 0.8139,
|
|
"step": 5885
|
|
},
|
|
{
|
|
"epoch": 1.5654255319148938,
|
|
"grad_norm": 3.8101890087127686,
|
|
"learning_rate": 8.224107644089863e-06,
|
|
"loss": 0.8198,
|
|
"step": 5886
|
|
},
|
|
{
|
|
"epoch": 1.565691489361702,
|
|
"grad_norm": 3.603240966796875,
|
|
"learning_rate": 8.223435359199297e-06,
|
|
"loss": 0.7507,
|
|
"step": 5887
|
|
},
|
|
{
|
|
"epoch": 1.5659574468085107,
|
|
"grad_norm": 3.993999719619751,
|
|
"learning_rate": 8.222762974571848e-06,
|
|
"loss": 0.6875,
|
|
"step": 5888
|
|
},
|
|
{
|
|
"epoch": 1.5662234042553191,
|
|
"grad_norm": 4.127441883087158,
|
|
"learning_rate": 8.222090490228316e-06,
|
|
"loss": 0.7653,
|
|
"step": 5889
|
|
},
|
|
{
|
|
"epoch": 1.5664893617021276,
|
|
"grad_norm": 4.082408428192139,
|
|
"learning_rate": 8.22141790618951e-06,
|
|
"loss": 0.8506,
|
|
"step": 5890
|
|
},
|
|
{
|
|
"epoch": 1.5667553191489363,
|
|
"grad_norm": 4.1307806968688965,
|
|
"learning_rate": 8.220745222476243e-06,
|
|
"loss": 0.7614,
|
|
"step": 5891
|
|
},
|
|
{
|
|
"epoch": 1.5670212765957445,
|
|
"grad_norm": 3.9022128582000732,
|
|
"learning_rate": 8.220072439109326e-06,
|
|
"loss": 0.8563,
|
|
"step": 5892
|
|
},
|
|
{
|
|
"epoch": 1.5672872340425532,
|
|
"grad_norm": 3.8020009994506836,
|
|
"learning_rate": 8.219399556109578e-06,
|
|
"loss": 0.8016,
|
|
"step": 5893
|
|
},
|
|
{
|
|
"epoch": 1.5675531914893617,
|
|
"grad_norm": 4.383156776428223,
|
|
"learning_rate": 8.218726573497817e-06,
|
|
"loss": 0.7956,
|
|
"step": 5894
|
|
},
|
|
{
|
|
"epoch": 1.5678191489361701,
|
|
"grad_norm": 4.414666175842285,
|
|
"learning_rate": 8.218053491294864e-06,
|
|
"loss": 0.8215,
|
|
"step": 5895
|
|
},
|
|
{
|
|
"epoch": 1.5680851063829788,
|
|
"grad_norm": 4.223287105560303,
|
|
"learning_rate": 8.21738030952155e-06,
|
|
"loss": 0.8466,
|
|
"step": 5896
|
|
},
|
|
{
|
|
"epoch": 1.5683510638297873,
|
|
"grad_norm": 4.012655735015869,
|
|
"learning_rate": 8.216707028198699e-06,
|
|
"loss": 0.7384,
|
|
"step": 5897
|
|
},
|
|
{
|
|
"epoch": 1.5686170212765957,
|
|
"grad_norm": 4.301409721374512,
|
|
"learning_rate": 8.216033647347145e-06,
|
|
"loss": 0.7748,
|
|
"step": 5898
|
|
},
|
|
{
|
|
"epoch": 1.5688829787234042,
|
|
"grad_norm": 4.148224353790283,
|
|
"learning_rate": 8.215360166987728e-06,
|
|
"loss": 0.8227,
|
|
"step": 5899
|
|
},
|
|
{
|
|
"epoch": 1.5691489361702127,
|
|
"grad_norm": 4.055191993713379,
|
|
"learning_rate": 8.214686587141277e-06,
|
|
"loss": 0.7811,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 1.5694148936170214,
|
|
"grad_norm": 3.9274792671203613,
|
|
"learning_rate": 8.21401290782864e-06,
|
|
"loss": 0.7934,
|
|
"step": 5901
|
|
},
|
|
{
|
|
"epoch": 1.5696808510638298,
|
|
"grad_norm": 3.762334108352661,
|
|
"learning_rate": 8.213339129070658e-06,
|
|
"loss": 0.7967,
|
|
"step": 5902
|
|
},
|
|
{
|
|
"epoch": 1.5699468085106383,
|
|
"grad_norm": 4.094070911407471,
|
|
"learning_rate": 8.212665250888184e-06,
|
|
"loss": 0.8637,
|
|
"step": 5903
|
|
},
|
|
{
|
|
"epoch": 1.570212765957447,
|
|
"grad_norm": 3.871859550476074,
|
|
"learning_rate": 8.21199127330206e-06,
|
|
"loss": 0.7181,
|
|
"step": 5904
|
|
},
|
|
{
|
|
"epoch": 1.5704787234042552,
|
|
"grad_norm": 4.029532432556152,
|
|
"learning_rate": 8.211317196333149e-06,
|
|
"loss": 0.756,
|
|
"step": 5905
|
|
},
|
|
{
|
|
"epoch": 1.570744680851064,
|
|
"grad_norm": 3.982078790664673,
|
|
"learning_rate": 8.2106430200023e-06,
|
|
"loss": 0.7437,
|
|
"step": 5906
|
|
},
|
|
{
|
|
"epoch": 1.5710106382978724,
|
|
"grad_norm": 4.319076061248779,
|
|
"learning_rate": 8.209968744330375e-06,
|
|
"loss": 0.8517,
|
|
"step": 5907
|
|
},
|
|
{
|
|
"epoch": 1.5712765957446808,
|
|
"grad_norm": 3.5704493522644043,
|
|
"learning_rate": 8.20929436933824e-06,
|
|
"loss": 0.7369,
|
|
"step": 5908
|
|
},
|
|
{
|
|
"epoch": 1.5715425531914895,
|
|
"grad_norm": 3.825941562652588,
|
|
"learning_rate": 8.208619895046759e-06,
|
|
"loss": 0.7644,
|
|
"step": 5909
|
|
},
|
|
{
|
|
"epoch": 1.5718085106382977,
|
|
"grad_norm": 3.535365581512451,
|
|
"learning_rate": 8.2079453214768e-06,
|
|
"loss": 0.8191,
|
|
"step": 5910
|
|
},
|
|
{
|
|
"epoch": 1.5720744680851064,
|
|
"grad_norm": 4.012056827545166,
|
|
"learning_rate": 8.207270648649235e-06,
|
|
"loss": 0.805,
|
|
"step": 5911
|
|
},
|
|
{
|
|
"epoch": 1.5723404255319149,
|
|
"grad_norm": 3.670342206954956,
|
|
"learning_rate": 8.20659587658494e-06,
|
|
"loss": 0.7253,
|
|
"step": 5912
|
|
},
|
|
{
|
|
"epoch": 1.5726063829787233,
|
|
"grad_norm": 3.5404562950134277,
|
|
"learning_rate": 8.205921005304796e-06,
|
|
"loss": 0.7078,
|
|
"step": 5913
|
|
},
|
|
{
|
|
"epoch": 1.572872340425532,
|
|
"grad_norm": 4.304678916931152,
|
|
"learning_rate": 8.20524603482968e-06,
|
|
"loss": 0.8129,
|
|
"step": 5914
|
|
},
|
|
{
|
|
"epoch": 1.5731382978723403,
|
|
"grad_norm": 3.6795125007629395,
|
|
"learning_rate": 8.204570965180476e-06,
|
|
"loss": 0.7669,
|
|
"step": 5915
|
|
},
|
|
{
|
|
"epoch": 1.573404255319149,
|
|
"grad_norm": 3.8298754692077637,
|
|
"learning_rate": 8.203895796378076e-06,
|
|
"loss": 0.7803,
|
|
"step": 5916
|
|
},
|
|
{
|
|
"epoch": 1.5736702127659574,
|
|
"grad_norm": 4.399144649505615,
|
|
"learning_rate": 8.203220528443367e-06,
|
|
"loss": 0.9503,
|
|
"step": 5917
|
|
},
|
|
{
|
|
"epoch": 1.5739361702127659,
|
|
"grad_norm": 4.104849815368652,
|
|
"learning_rate": 8.202545161397242e-06,
|
|
"loss": 0.8586,
|
|
"step": 5918
|
|
},
|
|
{
|
|
"epoch": 1.5742021276595746,
|
|
"grad_norm": 4.923317909240723,
|
|
"learning_rate": 8.201869695260603e-06,
|
|
"loss": 0.815,
|
|
"step": 5919
|
|
},
|
|
{
|
|
"epoch": 1.574468085106383,
|
|
"grad_norm": 3.845151424407959,
|
|
"learning_rate": 8.201194130054342e-06,
|
|
"loss": 0.8449,
|
|
"step": 5920
|
|
},
|
|
{
|
|
"epoch": 1.5747340425531915,
|
|
"grad_norm": 4.074094295501709,
|
|
"learning_rate": 8.200518465799367e-06,
|
|
"loss": 0.7569,
|
|
"step": 5921
|
|
},
|
|
{
|
|
"epoch": 1.575,
|
|
"grad_norm": 4.062026023864746,
|
|
"learning_rate": 8.199842702516584e-06,
|
|
"loss": 0.8712,
|
|
"step": 5922
|
|
},
|
|
{
|
|
"epoch": 1.5752659574468084,
|
|
"grad_norm": 4.046767711639404,
|
|
"learning_rate": 8.199166840226898e-06,
|
|
"loss": 0.8318,
|
|
"step": 5923
|
|
},
|
|
{
|
|
"epoch": 1.575531914893617,
|
|
"grad_norm": 3.813408851623535,
|
|
"learning_rate": 8.198490878951224e-06,
|
|
"loss": 0.7493,
|
|
"step": 5924
|
|
},
|
|
{
|
|
"epoch": 1.5757978723404256,
|
|
"grad_norm": 4.108468055725098,
|
|
"learning_rate": 8.19781481871048e-06,
|
|
"loss": 0.7867,
|
|
"step": 5925
|
|
},
|
|
{
|
|
"epoch": 1.576063829787234,
|
|
"grad_norm": 3.9004015922546387,
|
|
"learning_rate": 8.197138659525576e-06,
|
|
"loss": 0.7384,
|
|
"step": 5926
|
|
},
|
|
{
|
|
"epoch": 1.5763297872340427,
|
|
"grad_norm": 4.14080286026001,
|
|
"learning_rate": 8.19646240141744e-06,
|
|
"loss": 0.7755,
|
|
"step": 5927
|
|
},
|
|
{
|
|
"epoch": 1.576595744680851,
|
|
"grad_norm": 3.8850128650665283,
|
|
"learning_rate": 8.195786044406992e-06,
|
|
"loss": 0.7689,
|
|
"step": 5928
|
|
},
|
|
{
|
|
"epoch": 1.5768617021276596,
|
|
"grad_norm": 3.973543882369995,
|
|
"learning_rate": 8.195109588515163e-06,
|
|
"loss": 0.7336,
|
|
"step": 5929
|
|
},
|
|
{
|
|
"epoch": 1.577127659574468,
|
|
"grad_norm": 3.7367260456085205,
|
|
"learning_rate": 8.194433033762882e-06,
|
|
"loss": 0.8511,
|
|
"step": 5930
|
|
},
|
|
{
|
|
"epoch": 1.5773936170212766,
|
|
"grad_norm": 3.7051467895507812,
|
|
"learning_rate": 8.193756380171081e-06,
|
|
"loss": 0.7696,
|
|
"step": 5931
|
|
},
|
|
{
|
|
"epoch": 1.5776595744680852,
|
|
"grad_norm": 3.612755298614502,
|
|
"learning_rate": 8.193079627760697e-06,
|
|
"loss": 0.7733,
|
|
"step": 5932
|
|
},
|
|
{
|
|
"epoch": 1.5779255319148935,
|
|
"grad_norm": 4.524839401245117,
|
|
"learning_rate": 8.19240277655267e-06,
|
|
"loss": 0.8047,
|
|
"step": 5933
|
|
},
|
|
{
|
|
"epoch": 1.5781914893617022,
|
|
"grad_norm": 4.2709059715271,
|
|
"learning_rate": 8.191725826567943e-06,
|
|
"loss": 0.9173,
|
|
"step": 5934
|
|
},
|
|
{
|
|
"epoch": 1.5784574468085106,
|
|
"grad_norm": 4.062780857086182,
|
|
"learning_rate": 8.191048777827462e-06,
|
|
"loss": 0.755,
|
|
"step": 5935
|
|
},
|
|
{
|
|
"epoch": 1.578723404255319,
|
|
"grad_norm": 4.253462314605713,
|
|
"learning_rate": 8.190371630352174e-06,
|
|
"loss": 0.9102,
|
|
"step": 5936
|
|
},
|
|
{
|
|
"epoch": 1.5789893617021278,
|
|
"grad_norm": 3.578122854232788,
|
|
"learning_rate": 8.189694384163032e-06,
|
|
"loss": 0.6755,
|
|
"step": 5937
|
|
},
|
|
{
|
|
"epoch": 1.579255319148936,
|
|
"grad_norm": 3.9935173988342285,
|
|
"learning_rate": 8.189017039280989e-06,
|
|
"loss": 0.8196,
|
|
"step": 5938
|
|
},
|
|
{
|
|
"epoch": 1.5795212765957447,
|
|
"grad_norm": 3.9614062309265137,
|
|
"learning_rate": 8.188339595727004e-06,
|
|
"loss": 0.7896,
|
|
"step": 5939
|
|
},
|
|
{
|
|
"epoch": 1.5797872340425532,
|
|
"grad_norm": 3.7698519229888916,
|
|
"learning_rate": 8.187662053522039e-06,
|
|
"loss": 0.785,
|
|
"step": 5940
|
|
},
|
|
{
|
|
"epoch": 1.5800531914893616,
|
|
"grad_norm": 4.328986167907715,
|
|
"learning_rate": 8.186984412687058e-06,
|
|
"loss": 0.87,
|
|
"step": 5941
|
|
},
|
|
{
|
|
"epoch": 1.5803191489361703,
|
|
"grad_norm": 4.169852256774902,
|
|
"learning_rate": 8.186306673243025e-06,
|
|
"loss": 0.8594,
|
|
"step": 5942
|
|
},
|
|
{
|
|
"epoch": 1.5805851063829788,
|
|
"grad_norm": 4.010345458984375,
|
|
"learning_rate": 8.185628835210915e-06,
|
|
"loss": 0.913,
|
|
"step": 5943
|
|
},
|
|
{
|
|
"epoch": 1.5808510638297872,
|
|
"grad_norm": 3.9177587032318115,
|
|
"learning_rate": 8.184950898611696e-06,
|
|
"loss": 0.9157,
|
|
"step": 5944
|
|
},
|
|
{
|
|
"epoch": 1.5811170212765957,
|
|
"grad_norm": 4.508220672607422,
|
|
"learning_rate": 8.184272863466348e-06,
|
|
"loss": 0.8951,
|
|
"step": 5945
|
|
},
|
|
{
|
|
"epoch": 1.5813829787234042,
|
|
"grad_norm": 3.5971477031707764,
|
|
"learning_rate": 8.183594729795848e-06,
|
|
"loss": 0.7883,
|
|
"step": 5946
|
|
},
|
|
{
|
|
"epoch": 1.5816489361702128,
|
|
"grad_norm": 4.1539998054504395,
|
|
"learning_rate": 8.182916497621177e-06,
|
|
"loss": 0.8599,
|
|
"step": 5947
|
|
},
|
|
{
|
|
"epoch": 1.5819148936170213,
|
|
"grad_norm": 3.9577205181121826,
|
|
"learning_rate": 8.182238166963325e-06,
|
|
"loss": 0.8107,
|
|
"step": 5948
|
|
},
|
|
{
|
|
"epoch": 1.5821808510638298,
|
|
"grad_norm": 3.921849250793457,
|
|
"learning_rate": 8.181559737843274e-06,
|
|
"loss": 0.8452,
|
|
"step": 5949
|
|
},
|
|
{
|
|
"epoch": 1.5824468085106385,
|
|
"grad_norm": 3.6595895290374756,
|
|
"learning_rate": 8.18088121028202e-06,
|
|
"loss": 0.8332,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 1.5827127659574467,
|
|
"grad_norm": 4.248002052307129,
|
|
"learning_rate": 8.18020258430056e-06,
|
|
"loss": 0.928,
|
|
"step": 5951
|
|
},
|
|
{
|
|
"epoch": 1.5829787234042554,
|
|
"grad_norm": 3.584662437438965,
|
|
"learning_rate": 8.179523859919884e-06,
|
|
"loss": 0.7684,
|
|
"step": 5952
|
|
},
|
|
{
|
|
"epoch": 1.5832446808510638,
|
|
"grad_norm": 3.5269956588745117,
|
|
"learning_rate": 8.178845037160997e-06,
|
|
"loss": 0.7553,
|
|
"step": 5953
|
|
},
|
|
{
|
|
"epoch": 1.5835106382978723,
|
|
"grad_norm": 4.2691731452941895,
|
|
"learning_rate": 8.178166116044904e-06,
|
|
"loss": 0.8211,
|
|
"step": 5954
|
|
},
|
|
{
|
|
"epoch": 1.583776595744681,
|
|
"grad_norm": 4.050920009613037,
|
|
"learning_rate": 8.177487096592607e-06,
|
|
"loss": 0.9221,
|
|
"step": 5955
|
|
},
|
|
{
|
|
"epoch": 1.5840425531914892,
|
|
"grad_norm": 4.290426731109619,
|
|
"learning_rate": 8.17680797882512e-06,
|
|
"loss": 0.7909,
|
|
"step": 5956
|
|
},
|
|
{
|
|
"epoch": 1.584308510638298,
|
|
"grad_norm": 3.8692431449890137,
|
|
"learning_rate": 8.176128762763451e-06,
|
|
"loss": 0.7887,
|
|
"step": 5957
|
|
},
|
|
{
|
|
"epoch": 1.5845744680851064,
|
|
"grad_norm": 4.173573017120361,
|
|
"learning_rate": 8.175449448428621e-06,
|
|
"loss": 0.7535,
|
|
"step": 5958
|
|
},
|
|
{
|
|
"epoch": 1.5848404255319148,
|
|
"grad_norm": 4.186033248901367,
|
|
"learning_rate": 8.174770035841647e-06,
|
|
"loss": 0.8673,
|
|
"step": 5959
|
|
},
|
|
{
|
|
"epoch": 1.5851063829787235,
|
|
"grad_norm": 4.015555381774902,
|
|
"learning_rate": 8.17409052502355e-06,
|
|
"loss": 0.8815,
|
|
"step": 5960
|
|
},
|
|
{
|
|
"epoch": 1.5853723404255318,
|
|
"grad_norm": 3.864473342895508,
|
|
"learning_rate": 8.173410915995354e-06,
|
|
"loss": 0.8684,
|
|
"step": 5961
|
|
},
|
|
{
|
|
"epoch": 1.5856382978723405,
|
|
"grad_norm": 3.6198973655700684,
|
|
"learning_rate": 8.172731208778089e-06,
|
|
"loss": 0.7445,
|
|
"step": 5962
|
|
},
|
|
{
|
|
"epoch": 1.585904255319149,
|
|
"grad_norm": 3.7900218963623047,
|
|
"learning_rate": 8.172051403392784e-06,
|
|
"loss": 0.7331,
|
|
"step": 5963
|
|
},
|
|
{
|
|
"epoch": 1.5861702127659574,
|
|
"grad_norm": 4.163589954376221,
|
|
"learning_rate": 8.171371499860475e-06,
|
|
"loss": 0.8528,
|
|
"step": 5964
|
|
},
|
|
{
|
|
"epoch": 1.586436170212766,
|
|
"grad_norm": 4.275415420532227,
|
|
"learning_rate": 8.170691498202196e-06,
|
|
"loss": 0.8435,
|
|
"step": 5965
|
|
},
|
|
{
|
|
"epoch": 1.5867021276595743,
|
|
"grad_norm": 3.969174861907959,
|
|
"learning_rate": 8.170011398438992e-06,
|
|
"loss": 0.8812,
|
|
"step": 5966
|
|
},
|
|
{
|
|
"epoch": 1.586968085106383,
|
|
"grad_norm": 4.086930751800537,
|
|
"learning_rate": 8.169331200591901e-06,
|
|
"loss": 0.8988,
|
|
"step": 5967
|
|
},
|
|
{
|
|
"epoch": 1.5872340425531914,
|
|
"grad_norm": 4.444678783416748,
|
|
"learning_rate": 8.168650904681973e-06,
|
|
"loss": 0.9295,
|
|
"step": 5968
|
|
},
|
|
{
|
|
"epoch": 1.5875,
|
|
"grad_norm": 3.7711548805236816,
|
|
"learning_rate": 8.167970510730254e-06,
|
|
"loss": 0.7715,
|
|
"step": 5969
|
|
},
|
|
{
|
|
"epoch": 1.5877659574468086,
|
|
"grad_norm": 3.800588369369507,
|
|
"learning_rate": 8.167290018757797e-06,
|
|
"loss": 0.8273,
|
|
"step": 5970
|
|
},
|
|
{
|
|
"epoch": 1.588031914893617,
|
|
"grad_norm": 4.506065845489502,
|
|
"learning_rate": 8.16660942878566e-06,
|
|
"loss": 0.7786,
|
|
"step": 5971
|
|
},
|
|
{
|
|
"epoch": 1.5882978723404255,
|
|
"grad_norm": 3.8182950019836426,
|
|
"learning_rate": 8.165928740834896e-06,
|
|
"loss": 0.6682,
|
|
"step": 5972
|
|
},
|
|
{
|
|
"epoch": 1.5885638297872342,
|
|
"grad_norm": 4.040492534637451,
|
|
"learning_rate": 8.165247954926572e-06,
|
|
"loss": 0.7333,
|
|
"step": 5973
|
|
},
|
|
{
|
|
"epoch": 1.5888297872340424,
|
|
"grad_norm": 4.233337879180908,
|
|
"learning_rate": 8.164567071081747e-06,
|
|
"loss": 0.7931,
|
|
"step": 5974
|
|
},
|
|
{
|
|
"epoch": 1.5890957446808511,
|
|
"grad_norm": 4.0191969871521,
|
|
"learning_rate": 8.163886089321493e-06,
|
|
"loss": 0.8279,
|
|
"step": 5975
|
|
},
|
|
{
|
|
"epoch": 1.5893617021276596,
|
|
"grad_norm": 3.9428741931915283,
|
|
"learning_rate": 8.163205009666879e-06,
|
|
"loss": 0.7945,
|
|
"step": 5976
|
|
},
|
|
{
|
|
"epoch": 1.589627659574468,
|
|
"grad_norm": 4.383618354797363,
|
|
"learning_rate": 8.162523832138977e-06,
|
|
"loss": 0.8961,
|
|
"step": 5977
|
|
},
|
|
{
|
|
"epoch": 1.5898936170212767,
|
|
"grad_norm": 4.313653945922852,
|
|
"learning_rate": 8.161842556758863e-06,
|
|
"loss": 0.927,
|
|
"step": 5978
|
|
},
|
|
{
|
|
"epoch": 1.590159574468085,
|
|
"grad_norm": 4.137526988983154,
|
|
"learning_rate": 8.161161183547619e-06,
|
|
"loss": 0.833,
|
|
"step": 5979
|
|
},
|
|
{
|
|
"epoch": 1.5904255319148937,
|
|
"grad_norm": 3.9024994373321533,
|
|
"learning_rate": 8.160479712526326e-06,
|
|
"loss": 0.8324,
|
|
"step": 5980
|
|
},
|
|
{
|
|
"epoch": 1.5906914893617021,
|
|
"grad_norm": 3.745685577392578,
|
|
"learning_rate": 8.159798143716069e-06,
|
|
"loss": 0.7946,
|
|
"step": 5981
|
|
},
|
|
{
|
|
"epoch": 1.5909574468085106,
|
|
"grad_norm": 4.142686367034912,
|
|
"learning_rate": 8.159116477137938e-06,
|
|
"loss": 0.8469,
|
|
"step": 5982
|
|
},
|
|
{
|
|
"epoch": 1.5912234042553193,
|
|
"grad_norm": 4.332526683807373,
|
|
"learning_rate": 8.158434712813024e-06,
|
|
"loss": 0.8398,
|
|
"step": 5983
|
|
},
|
|
{
|
|
"epoch": 1.5914893617021275,
|
|
"grad_norm": 4.1822028160095215,
|
|
"learning_rate": 8.157752850762422e-06,
|
|
"loss": 0.8182,
|
|
"step": 5984
|
|
},
|
|
{
|
|
"epoch": 1.5917553191489362,
|
|
"grad_norm": 3.797029972076416,
|
|
"learning_rate": 8.157070891007227e-06,
|
|
"loss": 0.8219,
|
|
"step": 5985
|
|
},
|
|
{
|
|
"epoch": 1.5920212765957447,
|
|
"grad_norm": 3.6281862258911133,
|
|
"learning_rate": 8.156388833568543e-06,
|
|
"loss": 0.7788,
|
|
"step": 5986
|
|
},
|
|
{
|
|
"epoch": 1.5922872340425531,
|
|
"grad_norm": 3.963622570037842,
|
|
"learning_rate": 8.155706678467472e-06,
|
|
"loss": 0.8121,
|
|
"step": 5987
|
|
},
|
|
{
|
|
"epoch": 1.5925531914893618,
|
|
"grad_norm": 3.965254068374634,
|
|
"learning_rate": 8.15502442572512e-06,
|
|
"loss": 0.9758,
|
|
"step": 5988
|
|
},
|
|
{
|
|
"epoch": 1.59281914893617,
|
|
"grad_norm": 3.7290945053100586,
|
|
"learning_rate": 8.1543420753626e-06,
|
|
"loss": 0.7913,
|
|
"step": 5989
|
|
},
|
|
{
|
|
"epoch": 1.5930851063829787,
|
|
"grad_norm": 3.5423686504364014,
|
|
"learning_rate": 8.15365962740102e-06,
|
|
"loss": 0.6702,
|
|
"step": 5990
|
|
},
|
|
{
|
|
"epoch": 1.5933510638297872,
|
|
"grad_norm": 4.0960540771484375,
|
|
"learning_rate": 8.1529770818615e-06,
|
|
"loss": 0.976,
|
|
"step": 5991
|
|
},
|
|
{
|
|
"epoch": 1.5936170212765957,
|
|
"grad_norm": 3.9374215602874756,
|
|
"learning_rate": 8.152294438765157e-06,
|
|
"loss": 0.7726,
|
|
"step": 5992
|
|
},
|
|
{
|
|
"epoch": 1.5938829787234043,
|
|
"grad_norm": 4.123393535614014,
|
|
"learning_rate": 8.15161169813311e-06,
|
|
"loss": 0.7414,
|
|
"step": 5993
|
|
},
|
|
{
|
|
"epoch": 1.5941489361702128,
|
|
"grad_norm": 3.7125062942504883,
|
|
"learning_rate": 8.150928859986488e-06,
|
|
"loss": 0.8094,
|
|
"step": 5994
|
|
},
|
|
{
|
|
"epoch": 1.5944148936170213,
|
|
"grad_norm": 3.6186742782592773,
|
|
"learning_rate": 8.15024592434642e-06,
|
|
"loss": 0.8291,
|
|
"step": 5995
|
|
},
|
|
{
|
|
"epoch": 1.59468085106383,
|
|
"grad_norm": 3.9349913597106934,
|
|
"learning_rate": 8.14956289123403e-06,
|
|
"loss": 0.8469,
|
|
"step": 5996
|
|
},
|
|
{
|
|
"epoch": 1.5949468085106382,
|
|
"grad_norm": 4.224155426025391,
|
|
"learning_rate": 8.148879760670459e-06,
|
|
"loss": 0.8178,
|
|
"step": 5997
|
|
},
|
|
{
|
|
"epoch": 1.5952127659574469,
|
|
"grad_norm": 4.03489351272583,
|
|
"learning_rate": 8.14819653267684e-06,
|
|
"loss": 1.0682,
|
|
"step": 5998
|
|
},
|
|
{
|
|
"epoch": 1.5954787234042553,
|
|
"grad_norm": 3.757615566253662,
|
|
"learning_rate": 8.147513207274314e-06,
|
|
"loss": 0.9454,
|
|
"step": 5999
|
|
},
|
|
{
|
|
"epoch": 1.5957446808510638,
|
|
"grad_norm": 3.69804048538208,
|
|
"learning_rate": 8.146829784484024e-06,
|
|
"loss": 0.6988,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 1.5957446808510638,
|
|
"eval_loss": 1.2842473983764648,
|
|
"eval_runtime": 13.4375,
|
|
"eval_samples_per_second": 29.767,
|
|
"eval_steps_per_second": 3.721,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 1.5960106382978725,
|
|
"grad_norm": 3.8672168254852295,
|
|
"learning_rate": 8.146146264327113e-06,
|
|
"loss": 0.8893,
|
|
"step": 6001
|
|
},
|
|
{
|
|
"epoch": 1.5962765957446807,
|
|
"grad_norm": 3.7445380687713623,
|
|
"learning_rate": 8.145462646824734e-06,
|
|
"loss": 0.8237,
|
|
"step": 6002
|
|
},
|
|
{
|
|
"epoch": 1.5965425531914894,
|
|
"grad_norm": 3.7135863304138184,
|
|
"learning_rate": 8.144778931998038e-06,
|
|
"loss": 0.6954,
|
|
"step": 6003
|
|
},
|
|
{
|
|
"epoch": 1.5968085106382979,
|
|
"grad_norm": 3.946181058883667,
|
|
"learning_rate": 8.144095119868178e-06,
|
|
"loss": 0.8022,
|
|
"step": 6004
|
|
},
|
|
{
|
|
"epoch": 1.5970744680851063,
|
|
"grad_norm": 3.866457223892212,
|
|
"learning_rate": 8.143411210456314e-06,
|
|
"loss": 0.7848,
|
|
"step": 6005
|
|
},
|
|
{
|
|
"epoch": 1.597340425531915,
|
|
"grad_norm": 3.9514496326446533,
|
|
"learning_rate": 8.142727203783608e-06,
|
|
"loss": 0.8287,
|
|
"step": 6006
|
|
},
|
|
{
|
|
"epoch": 1.5976063829787233,
|
|
"grad_norm": 3.780092239379883,
|
|
"learning_rate": 8.142043099871219e-06,
|
|
"loss": 0.731,
|
|
"step": 6007
|
|
},
|
|
{
|
|
"epoch": 1.597872340425532,
|
|
"grad_norm": 3.832037925720215,
|
|
"learning_rate": 8.141358898740319e-06,
|
|
"loss": 0.8207,
|
|
"step": 6008
|
|
},
|
|
{
|
|
"epoch": 1.5981382978723404,
|
|
"grad_norm": 3.7208633422851562,
|
|
"learning_rate": 8.140674600412076e-06,
|
|
"loss": 0.7905,
|
|
"step": 6009
|
|
},
|
|
{
|
|
"epoch": 1.5984042553191489,
|
|
"grad_norm": 3.5873775482177734,
|
|
"learning_rate": 8.139990204907662e-06,
|
|
"loss": 0.7042,
|
|
"step": 6010
|
|
},
|
|
{
|
|
"epoch": 1.5986702127659576,
|
|
"grad_norm": 4.138782024383545,
|
|
"learning_rate": 8.139305712248256e-06,
|
|
"loss": 0.8231,
|
|
"step": 6011
|
|
},
|
|
{
|
|
"epoch": 1.5989361702127658,
|
|
"grad_norm": 4.014845371246338,
|
|
"learning_rate": 8.138621122455034e-06,
|
|
"loss": 0.7606,
|
|
"step": 6012
|
|
},
|
|
{
|
|
"epoch": 1.5992021276595745,
|
|
"grad_norm": 3.997772693634033,
|
|
"learning_rate": 8.13793643554918e-06,
|
|
"loss": 0.8122,
|
|
"step": 6013
|
|
},
|
|
{
|
|
"epoch": 1.599468085106383,
|
|
"grad_norm": 3.3885183334350586,
|
|
"learning_rate": 8.137251651551878e-06,
|
|
"loss": 0.7245,
|
|
"step": 6014
|
|
},
|
|
{
|
|
"epoch": 1.5997340425531914,
|
|
"grad_norm": 3.9096522331237793,
|
|
"learning_rate": 8.136566770484316e-06,
|
|
"loss": 0.7919,
|
|
"step": 6015
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"grad_norm": 4.008962154388428,
|
|
"learning_rate": 8.135881792367686e-06,
|
|
"loss": 0.8683,
|
|
"step": 6016
|
|
},
|
|
{
|
|
"epoch": 1.6002659574468086,
|
|
"grad_norm": 3.9772658348083496,
|
|
"learning_rate": 8.13519671722318e-06,
|
|
"loss": 0.7775,
|
|
"step": 6017
|
|
},
|
|
{
|
|
"epoch": 1.600531914893617,
|
|
"grad_norm": 4.593280792236328,
|
|
"learning_rate": 8.134511545071998e-06,
|
|
"loss": 0.8959,
|
|
"step": 6018
|
|
},
|
|
{
|
|
"epoch": 1.6007978723404257,
|
|
"grad_norm": 3.9730031490325928,
|
|
"learning_rate": 8.133826275935337e-06,
|
|
"loss": 0.8394,
|
|
"step": 6019
|
|
},
|
|
{
|
|
"epoch": 1.601063829787234,
|
|
"grad_norm": 4.224338531494141,
|
|
"learning_rate": 8.133140909834402e-06,
|
|
"loss": 0.7961,
|
|
"step": 6020
|
|
},
|
|
{
|
|
"epoch": 1.6013297872340426,
|
|
"grad_norm": 3.759888172149658,
|
|
"learning_rate": 8.132455446790399e-06,
|
|
"loss": 0.8531,
|
|
"step": 6021
|
|
},
|
|
{
|
|
"epoch": 1.601595744680851,
|
|
"grad_norm": 3.5629312992095947,
|
|
"learning_rate": 8.131769886824535e-06,
|
|
"loss": 0.8102,
|
|
"step": 6022
|
|
},
|
|
{
|
|
"epoch": 1.6018617021276595,
|
|
"grad_norm": 3.5515568256378174,
|
|
"learning_rate": 8.131084229958024e-06,
|
|
"loss": 0.7867,
|
|
"step": 6023
|
|
},
|
|
{
|
|
"epoch": 1.6021276595744682,
|
|
"grad_norm": 4.148061275482178,
|
|
"learning_rate": 8.130398476212081e-06,
|
|
"loss": 0.8708,
|
|
"step": 6024
|
|
},
|
|
{
|
|
"epoch": 1.6023936170212765,
|
|
"grad_norm": 4.018913745880127,
|
|
"learning_rate": 8.129712625607924e-06,
|
|
"loss": 0.771,
|
|
"step": 6025
|
|
},
|
|
{
|
|
"epoch": 1.6026595744680852,
|
|
"grad_norm": 4.379147052764893,
|
|
"learning_rate": 8.129026678166772e-06,
|
|
"loss": 0.8199,
|
|
"step": 6026
|
|
},
|
|
{
|
|
"epoch": 1.6029255319148936,
|
|
"grad_norm": 3.568890333175659,
|
|
"learning_rate": 8.128340633909852e-06,
|
|
"loss": 0.705,
|
|
"step": 6027
|
|
},
|
|
{
|
|
"epoch": 1.603191489361702,
|
|
"grad_norm": 3.6377384662628174,
|
|
"learning_rate": 8.127654492858388e-06,
|
|
"loss": 0.6958,
|
|
"step": 6028
|
|
},
|
|
{
|
|
"epoch": 1.6034574468085108,
|
|
"grad_norm": 4.233497142791748,
|
|
"learning_rate": 8.126968255033614e-06,
|
|
"loss": 0.8446,
|
|
"step": 6029
|
|
},
|
|
{
|
|
"epoch": 1.603723404255319,
|
|
"grad_norm": 4.239995956420898,
|
|
"learning_rate": 8.126281920456758e-06,
|
|
"loss": 0.813,
|
|
"step": 6030
|
|
},
|
|
{
|
|
"epoch": 1.6039893617021277,
|
|
"grad_norm": 3.8521575927734375,
|
|
"learning_rate": 8.12559548914906e-06,
|
|
"loss": 0.7906,
|
|
"step": 6031
|
|
},
|
|
{
|
|
"epoch": 1.6042553191489362,
|
|
"grad_norm": 3.567471742630005,
|
|
"learning_rate": 8.124908961131759e-06,
|
|
"loss": 0.6709,
|
|
"step": 6032
|
|
},
|
|
{
|
|
"epoch": 1.6045212765957446,
|
|
"grad_norm": 3.527024030685425,
|
|
"learning_rate": 8.124222336426094e-06,
|
|
"loss": 0.7508,
|
|
"step": 6033
|
|
},
|
|
{
|
|
"epoch": 1.6047872340425533,
|
|
"grad_norm": 4.134167671203613,
|
|
"learning_rate": 8.123535615053312e-06,
|
|
"loss": 0.8233,
|
|
"step": 6034
|
|
},
|
|
{
|
|
"epoch": 1.6050531914893615,
|
|
"grad_norm": 3.62556791305542,
|
|
"learning_rate": 8.12284879703466e-06,
|
|
"loss": 0.7347,
|
|
"step": 6035
|
|
},
|
|
{
|
|
"epoch": 1.6053191489361702,
|
|
"grad_norm": 4.534690856933594,
|
|
"learning_rate": 8.12216188239139e-06,
|
|
"loss": 0.9258,
|
|
"step": 6036
|
|
},
|
|
{
|
|
"epoch": 1.6055851063829787,
|
|
"grad_norm": 3.8855905532836914,
|
|
"learning_rate": 8.121474871144757e-06,
|
|
"loss": 0.7215,
|
|
"step": 6037
|
|
},
|
|
{
|
|
"epoch": 1.6058510638297872,
|
|
"grad_norm": 3.889317274093628,
|
|
"learning_rate": 8.120787763316014e-06,
|
|
"loss": 0.7557,
|
|
"step": 6038
|
|
},
|
|
{
|
|
"epoch": 1.6061170212765958,
|
|
"grad_norm": 4.091339588165283,
|
|
"learning_rate": 8.120100558926425e-06,
|
|
"loss": 0.8053,
|
|
"step": 6039
|
|
},
|
|
{
|
|
"epoch": 1.6063829787234043,
|
|
"grad_norm": 4.249019622802734,
|
|
"learning_rate": 8.11941325799725e-06,
|
|
"loss": 0.837,
|
|
"step": 6040
|
|
},
|
|
{
|
|
"epoch": 1.6066489361702128,
|
|
"grad_norm": 4.165124416351318,
|
|
"learning_rate": 8.118725860549756e-06,
|
|
"loss": 0.8762,
|
|
"step": 6041
|
|
},
|
|
{
|
|
"epoch": 1.6069148936170212,
|
|
"grad_norm": 4.028770923614502,
|
|
"learning_rate": 8.118038366605212e-06,
|
|
"loss": 0.8456,
|
|
"step": 6042
|
|
},
|
|
{
|
|
"epoch": 1.6071808510638297,
|
|
"grad_norm": 3.60648250579834,
|
|
"learning_rate": 8.117350776184892e-06,
|
|
"loss": 0.688,
|
|
"step": 6043
|
|
},
|
|
{
|
|
"epoch": 1.6074468085106384,
|
|
"grad_norm": 3.6444270610809326,
|
|
"learning_rate": 8.116663089310067e-06,
|
|
"loss": 0.8199,
|
|
"step": 6044
|
|
},
|
|
{
|
|
"epoch": 1.6077127659574468,
|
|
"grad_norm": 4.073156833648682,
|
|
"learning_rate": 8.115975306002018e-06,
|
|
"loss": 0.9758,
|
|
"step": 6045
|
|
},
|
|
{
|
|
"epoch": 1.6079787234042553,
|
|
"grad_norm": 4.100760459899902,
|
|
"learning_rate": 8.115287426282022e-06,
|
|
"loss": 0.9357,
|
|
"step": 6046
|
|
},
|
|
{
|
|
"epoch": 1.608244680851064,
|
|
"grad_norm": 4.134888648986816,
|
|
"learning_rate": 8.114599450171366e-06,
|
|
"loss": 0.7536,
|
|
"step": 6047
|
|
},
|
|
{
|
|
"epoch": 1.6085106382978722,
|
|
"grad_norm": 3.8742432594299316,
|
|
"learning_rate": 8.113911377691338e-06,
|
|
"loss": 0.7832,
|
|
"step": 6048
|
|
},
|
|
{
|
|
"epoch": 1.608776595744681,
|
|
"grad_norm": 4.110736846923828,
|
|
"learning_rate": 8.113223208863224e-06,
|
|
"loss": 0.7098,
|
|
"step": 6049
|
|
},
|
|
{
|
|
"epoch": 1.6090425531914894,
|
|
"grad_norm": 3.972907304763794,
|
|
"learning_rate": 8.11253494370832e-06,
|
|
"loss": 0.8414,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 1.6093085106382978,
|
|
"grad_norm": 3.984872817993164,
|
|
"learning_rate": 8.111846582247917e-06,
|
|
"loss": 0.9063,
|
|
"step": 6051
|
|
},
|
|
{
|
|
"epoch": 1.6095744680851065,
|
|
"grad_norm": 4.114076614379883,
|
|
"learning_rate": 8.11115812450332e-06,
|
|
"loss": 0.8774,
|
|
"step": 6052
|
|
},
|
|
{
|
|
"epoch": 1.6098404255319148,
|
|
"grad_norm": 3.8898861408233643,
|
|
"learning_rate": 8.110469570495828e-06,
|
|
"loss": 0.6855,
|
|
"step": 6053
|
|
},
|
|
{
|
|
"epoch": 1.6101063829787234,
|
|
"grad_norm": 3.620485544204712,
|
|
"learning_rate": 8.109780920246743e-06,
|
|
"loss": 0.8566,
|
|
"step": 6054
|
|
},
|
|
{
|
|
"epoch": 1.610372340425532,
|
|
"grad_norm": 4.412075519561768,
|
|
"learning_rate": 8.109092173777376e-06,
|
|
"loss": 0.8386,
|
|
"step": 6055
|
|
},
|
|
{
|
|
"epoch": 1.6106382978723404,
|
|
"grad_norm": 4.396791934967041,
|
|
"learning_rate": 8.108403331109038e-06,
|
|
"loss": 0.7074,
|
|
"step": 6056
|
|
},
|
|
{
|
|
"epoch": 1.610904255319149,
|
|
"grad_norm": 4.347930431365967,
|
|
"learning_rate": 8.10771439226304e-06,
|
|
"loss": 0.8188,
|
|
"step": 6057
|
|
},
|
|
{
|
|
"epoch": 1.6111702127659573,
|
|
"grad_norm": 3.751016855239868,
|
|
"learning_rate": 8.1070253572607e-06,
|
|
"loss": 0.7469,
|
|
"step": 6058
|
|
},
|
|
{
|
|
"epoch": 1.611436170212766,
|
|
"grad_norm": 4.112164497375488,
|
|
"learning_rate": 8.106336226123339e-06,
|
|
"loss": 0.8259,
|
|
"step": 6059
|
|
},
|
|
{
|
|
"epoch": 1.6117021276595744,
|
|
"grad_norm": 4.112537860870361,
|
|
"learning_rate": 8.105646998872275e-06,
|
|
"loss": 0.8493,
|
|
"step": 6060
|
|
},
|
|
{
|
|
"epoch": 1.611968085106383,
|
|
"grad_norm": 4.171288967132568,
|
|
"learning_rate": 8.104957675528837e-06,
|
|
"loss": 0.9249,
|
|
"step": 6061
|
|
},
|
|
{
|
|
"epoch": 1.6122340425531916,
|
|
"grad_norm": 4.331489086151123,
|
|
"learning_rate": 8.104268256114354e-06,
|
|
"loss": 0.9123,
|
|
"step": 6062
|
|
},
|
|
{
|
|
"epoch": 1.6125,
|
|
"grad_norm": 4.148106575012207,
|
|
"learning_rate": 8.103578740650157e-06,
|
|
"loss": 0.7654,
|
|
"step": 6063
|
|
},
|
|
{
|
|
"epoch": 1.6127659574468085,
|
|
"grad_norm": 3.72057843208313,
|
|
"learning_rate": 8.102889129157578e-06,
|
|
"loss": 0.8049,
|
|
"step": 6064
|
|
},
|
|
{
|
|
"epoch": 1.613031914893617,
|
|
"grad_norm": 3.9282565116882324,
|
|
"learning_rate": 8.102199421657957e-06,
|
|
"loss": 0.7639,
|
|
"step": 6065
|
|
},
|
|
{
|
|
"epoch": 1.6132978723404254,
|
|
"grad_norm": 3.8103582859039307,
|
|
"learning_rate": 8.101509618172634e-06,
|
|
"loss": 0.8689,
|
|
"step": 6066
|
|
},
|
|
{
|
|
"epoch": 1.6135638297872341,
|
|
"grad_norm": 4.2297539710998535,
|
|
"learning_rate": 8.10081971872295e-06,
|
|
"loss": 0.9582,
|
|
"step": 6067
|
|
},
|
|
{
|
|
"epoch": 1.6138297872340426,
|
|
"grad_norm": 4.653298854827881,
|
|
"learning_rate": 8.100129723330255e-06,
|
|
"loss": 0.9946,
|
|
"step": 6068
|
|
},
|
|
{
|
|
"epoch": 1.614095744680851,
|
|
"grad_norm": 3.7969958782196045,
|
|
"learning_rate": 8.099439632015896e-06,
|
|
"loss": 0.7852,
|
|
"step": 6069
|
|
},
|
|
{
|
|
"epoch": 1.6143617021276597,
|
|
"grad_norm": 4.072946071624756,
|
|
"learning_rate": 8.098749444801226e-06,
|
|
"loss": 0.79,
|
|
"step": 6070
|
|
},
|
|
{
|
|
"epoch": 1.614627659574468,
|
|
"grad_norm": 3.9592959880828857,
|
|
"learning_rate": 8.0980591617076e-06,
|
|
"loss": 0.7815,
|
|
"step": 6071
|
|
},
|
|
{
|
|
"epoch": 1.6148936170212767,
|
|
"grad_norm": 4.4633588790893555,
|
|
"learning_rate": 8.097368782756374e-06,
|
|
"loss": 0.7754,
|
|
"step": 6072
|
|
},
|
|
{
|
|
"epoch": 1.6151595744680851,
|
|
"grad_norm": 4.381833553314209,
|
|
"learning_rate": 8.096678307968913e-06,
|
|
"loss": 0.9649,
|
|
"step": 6073
|
|
},
|
|
{
|
|
"epoch": 1.6154255319148936,
|
|
"grad_norm": 4.433225154876709,
|
|
"learning_rate": 8.095987737366578e-06,
|
|
"loss": 0.9376,
|
|
"step": 6074
|
|
},
|
|
{
|
|
"epoch": 1.6156914893617023,
|
|
"grad_norm": 3.7621006965637207,
|
|
"learning_rate": 8.095297070970738e-06,
|
|
"loss": 0.7577,
|
|
"step": 6075
|
|
},
|
|
{
|
|
"epoch": 1.6159574468085105,
|
|
"grad_norm": 3.4518826007843018,
|
|
"learning_rate": 8.094606308802764e-06,
|
|
"loss": 0.816,
|
|
"step": 6076
|
|
},
|
|
{
|
|
"epoch": 1.6162234042553192,
|
|
"grad_norm": 4.059780120849609,
|
|
"learning_rate": 8.093915450884025e-06,
|
|
"loss": 0.8319,
|
|
"step": 6077
|
|
},
|
|
{
|
|
"epoch": 1.6164893617021276,
|
|
"grad_norm": 3.8527324199676514,
|
|
"learning_rate": 8.093224497235899e-06,
|
|
"loss": 0.8826,
|
|
"step": 6078
|
|
},
|
|
{
|
|
"epoch": 1.616755319148936,
|
|
"grad_norm": 3.3895418643951416,
|
|
"learning_rate": 8.092533447879766e-06,
|
|
"loss": 0.73,
|
|
"step": 6079
|
|
},
|
|
{
|
|
"epoch": 1.6170212765957448,
|
|
"grad_norm": 3.9259166717529297,
|
|
"learning_rate": 8.091842302837009e-06,
|
|
"loss": 0.8569,
|
|
"step": 6080
|
|
},
|
|
{
|
|
"epoch": 1.617287234042553,
|
|
"grad_norm": 3.5704541206359863,
|
|
"learning_rate": 8.091151062129008e-06,
|
|
"loss": 0.8113,
|
|
"step": 6081
|
|
},
|
|
{
|
|
"epoch": 1.6175531914893617,
|
|
"grad_norm": 3.8313138484954834,
|
|
"learning_rate": 8.090459725777156e-06,
|
|
"loss": 0.7352,
|
|
"step": 6082
|
|
},
|
|
{
|
|
"epoch": 1.6178191489361702,
|
|
"grad_norm": 4.403858184814453,
|
|
"learning_rate": 8.089768293802842e-06,
|
|
"loss": 0.7757,
|
|
"step": 6083
|
|
},
|
|
{
|
|
"epoch": 1.6180851063829786,
|
|
"grad_norm": 4.078790664672852,
|
|
"learning_rate": 8.089076766227457e-06,
|
|
"loss": 0.8444,
|
|
"step": 6084
|
|
},
|
|
{
|
|
"epoch": 1.6183510638297873,
|
|
"grad_norm": 4.103868007659912,
|
|
"learning_rate": 8.088385143072402e-06,
|
|
"loss": 0.7451,
|
|
"step": 6085
|
|
},
|
|
{
|
|
"epoch": 1.6186170212765958,
|
|
"grad_norm": 3.906527042388916,
|
|
"learning_rate": 8.087693424359073e-06,
|
|
"loss": 0.7095,
|
|
"step": 6086
|
|
},
|
|
{
|
|
"epoch": 1.6188829787234043,
|
|
"grad_norm": 4.909295082092285,
|
|
"learning_rate": 8.087001610108874e-06,
|
|
"loss": 0.8277,
|
|
"step": 6087
|
|
},
|
|
{
|
|
"epoch": 1.6191489361702127,
|
|
"grad_norm": 5.194472312927246,
|
|
"learning_rate": 8.086309700343211e-06,
|
|
"loss": 0.8959,
|
|
"step": 6088
|
|
},
|
|
{
|
|
"epoch": 1.6194148936170212,
|
|
"grad_norm": 3.6174070835113525,
|
|
"learning_rate": 8.085617695083493e-06,
|
|
"loss": 0.7838,
|
|
"step": 6089
|
|
},
|
|
{
|
|
"epoch": 1.6196808510638299,
|
|
"grad_norm": 3.5253570079803467,
|
|
"learning_rate": 8.08492559435113e-06,
|
|
"loss": 0.7633,
|
|
"step": 6090
|
|
},
|
|
{
|
|
"epoch": 1.6199468085106383,
|
|
"grad_norm": 4.330216884613037,
|
|
"learning_rate": 8.084233398167537e-06,
|
|
"loss": 0.8669,
|
|
"step": 6091
|
|
},
|
|
{
|
|
"epoch": 1.6202127659574468,
|
|
"grad_norm": 3.792811393737793,
|
|
"learning_rate": 8.083541106554131e-06,
|
|
"loss": 0.8782,
|
|
"step": 6092
|
|
},
|
|
{
|
|
"epoch": 1.6204787234042555,
|
|
"grad_norm": 3.888946533203125,
|
|
"learning_rate": 8.082848719532335e-06,
|
|
"loss": 0.8816,
|
|
"step": 6093
|
|
},
|
|
{
|
|
"epoch": 1.6207446808510637,
|
|
"grad_norm": 3.9346768856048584,
|
|
"learning_rate": 8.082156237123567e-06,
|
|
"loss": 0.6887,
|
|
"step": 6094
|
|
},
|
|
{
|
|
"epoch": 1.6210106382978724,
|
|
"grad_norm": 3.7470414638519287,
|
|
"learning_rate": 8.081463659349258e-06,
|
|
"loss": 0.7622,
|
|
"step": 6095
|
|
},
|
|
{
|
|
"epoch": 1.6212765957446809,
|
|
"grad_norm": 3.9194772243499756,
|
|
"learning_rate": 8.080770986230835e-06,
|
|
"loss": 0.768,
|
|
"step": 6096
|
|
},
|
|
{
|
|
"epoch": 1.6215425531914893,
|
|
"grad_norm": 3.7921671867370605,
|
|
"learning_rate": 8.08007821778973e-06,
|
|
"loss": 0.8936,
|
|
"step": 6097
|
|
},
|
|
{
|
|
"epoch": 1.621808510638298,
|
|
"grad_norm": 3.8893918991088867,
|
|
"learning_rate": 8.07938535404738e-06,
|
|
"loss": 0.835,
|
|
"step": 6098
|
|
},
|
|
{
|
|
"epoch": 1.6220744680851062,
|
|
"grad_norm": 3.7834744453430176,
|
|
"learning_rate": 8.07869239502522e-06,
|
|
"loss": 0.7374,
|
|
"step": 6099
|
|
},
|
|
{
|
|
"epoch": 1.622340425531915,
|
|
"grad_norm": 3.867154598236084,
|
|
"learning_rate": 8.077999340744694e-06,
|
|
"loss": 0.7935,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 1.6226063829787234,
|
|
"grad_norm": 4.853170394897461,
|
|
"learning_rate": 8.077306191227244e-06,
|
|
"loss": 0.7786,
|
|
"step": 6101
|
|
},
|
|
{
|
|
"epoch": 1.6228723404255319,
|
|
"grad_norm": 4.339568614959717,
|
|
"learning_rate": 8.076612946494317e-06,
|
|
"loss": 0.6722,
|
|
"step": 6102
|
|
},
|
|
{
|
|
"epoch": 1.6231382978723405,
|
|
"grad_norm": 3.6707983016967773,
|
|
"learning_rate": 8.075919606567363e-06,
|
|
"loss": 0.8792,
|
|
"step": 6103
|
|
},
|
|
{
|
|
"epoch": 1.6234042553191488,
|
|
"grad_norm": 3.867652177810669,
|
|
"learning_rate": 8.075226171467835e-06,
|
|
"loss": 0.7879,
|
|
"step": 6104
|
|
},
|
|
{
|
|
"epoch": 1.6236702127659575,
|
|
"grad_norm": 3.5733299255371094,
|
|
"learning_rate": 8.07453264121719e-06,
|
|
"loss": 0.7921,
|
|
"step": 6105
|
|
},
|
|
{
|
|
"epoch": 1.623936170212766,
|
|
"grad_norm": 3.7665045261383057,
|
|
"learning_rate": 8.073839015836884e-06,
|
|
"loss": 0.9738,
|
|
"step": 6106
|
|
},
|
|
{
|
|
"epoch": 1.6242021276595744,
|
|
"grad_norm": 4.237964153289795,
|
|
"learning_rate": 8.07314529534838e-06,
|
|
"loss": 0.869,
|
|
"step": 6107
|
|
},
|
|
{
|
|
"epoch": 1.624468085106383,
|
|
"grad_norm": 3.797464370727539,
|
|
"learning_rate": 8.072451479773143e-06,
|
|
"loss": 0.8445,
|
|
"step": 6108
|
|
},
|
|
{
|
|
"epoch": 1.6247340425531915,
|
|
"grad_norm": 3.9559130668640137,
|
|
"learning_rate": 8.071757569132639e-06,
|
|
"loss": 0.848,
|
|
"step": 6109
|
|
},
|
|
{
|
|
"epoch": 1.625,
|
|
"grad_norm": 3.7033722400665283,
|
|
"learning_rate": 8.071063563448341e-06,
|
|
"loss": 0.8571,
|
|
"step": 6110
|
|
},
|
|
{
|
|
"epoch": 1.6252659574468085,
|
|
"grad_norm": 3.696049451828003,
|
|
"learning_rate": 8.070369462741719e-06,
|
|
"loss": 0.8649,
|
|
"step": 6111
|
|
},
|
|
{
|
|
"epoch": 1.625531914893617,
|
|
"grad_norm": 3.495377540588379,
|
|
"learning_rate": 8.06967526703425e-06,
|
|
"loss": 0.7691,
|
|
"step": 6112
|
|
},
|
|
{
|
|
"epoch": 1.6257978723404256,
|
|
"grad_norm": 3.9298911094665527,
|
|
"learning_rate": 8.068980976347416e-06,
|
|
"loss": 0.7793,
|
|
"step": 6113
|
|
},
|
|
{
|
|
"epoch": 1.626063829787234,
|
|
"grad_norm": 3.756425380706787,
|
|
"learning_rate": 8.068286590702697e-06,
|
|
"loss": 0.8161,
|
|
"step": 6114
|
|
},
|
|
{
|
|
"epoch": 1.6263297872340425,
|
|
"grad_norm": 4.13591194152832,
|
|
"learning_rate": 8.067592110121576e-06,
|
|
"loss": 0.8543,
|
|
"step": 6115
|
|
},
|
|
{
|
|
"epoch": 1.6265957446808512,
|
|
"grad_norm": 4.203410625457764,
|
|
"learning_rate": 8.066897534625547e-06,
|
|
"loss": 0.7607,
|
|
"step": 6116
|
|
},
|
|
{
|
|
"epoch": 1.6268617021276595,
|
|
"grad_norm": 4.2013983726501465,
|
|
"learning_rate": 8.066202864236096e-06,
|
|
"loss": 0.8248,
|
|
"step": 6117
|
|
},
|
|
{
|
|
"epoch": 1.6271276595744681,
|
|
"grad_norm": 4.034732341766357,
|
|
"learning_rate": 8.065508098974719e-06,
|
|
"loss": 0.804,
|
|
"step": 6118
|
|
},
|
|
{
|
|
"epoch": 1.6273936170212766,
|
|
"grad_norm": 4.180783271789551,
|
|
"learning_rate": 8.06481323886291e-06,
|
|
"loss": 0.8354,
|
|
"step": 6119
|
|
},
|
|
{
|
|
"epoch": 1.627659574468085,
|
|
"grad_norm": 3.9474117755889893,
|
|
"learning_rate": 8.064118283922173e-06,
|
|
"loss": 0.8622,
|
|
"step": 6120
|
|
},
|
|
{
|
|
"epoch": 1.6279255319148938,
|
|
"grad_norm": 3.8866050243377686,
|
|
"learning_rate": 8.063423234174008e-06,
|
|
"loss": 0.7197,
|
|
"step": 6121
|
|
},
|
|
{
|
|
"epoch": 1.628191489361702,
|
|
"grad_norm": 4.463206768035889,
|
|
"learning_rate": 8.062728089639921e-06,
|
|
"loss": 0.9226,
|
|
"step": 6122
|
|
},
|
|
{
|
|
"epoch": 1.6284574468085107,
|
|
"grad_norm": 3.982656717300415,
|
|
"learning_rate": 8.062032850341423e-06,
|
|
"loss": 0.7225,
|
|
"step": 6123
|
|
},
|
|
{
|
|
"epoch": 1.6287234042553191,
|
|
"grad_norm": 3.9853739738464355,
|
|
"learning_rate": 8.061337516300024e-06,
|
|
"loss": 0.6711,
|
|
"step": 6124
|
|
},
|
|
{
|
|
"epoch": 1.6289893617021276,
|
|
"grad_norm": 3.823125123977661,
|
|
"learning_rate": 8.060642087537233e-06,
|
|
"loss": 0.8944,
|
|
"step": 6125
|
|
},
|
|
{
|
|
"epoch": 1.6292553191489363,
|
|
"grad_norm": 4.082576274871826,
|
|
"learning_rate": 8.059946564074577e-06,
|
|
"loss": 0.8235,
|
|
"step": 6126
|
|
},
|
|
{
|
|
"epoch": 1.6295212765957445,
|
|
"grad_norm": 4.3164472579956055,
|
|
"learning_rate": 8.05925094593357e-06,
|
|
"loss": 0.8086,
|
|
"step": 6127
|
|
},
|
|
{
|
|
"epoch": 1.6297872340425532,
|
|
"grad_norm": 3.8943753242492676,
|
|
"learning_rate": 8.058555233135737e-06,
|
|
"loss": 0.7088,
|
|
"step": 6128
|
|
},
|
|
{
|
|
"epoch": 1.6300531914893617,
|
|
"grad_norm": 4.248415470123291,
|
|
"learning_rate": 8.057859425702605e-06,
|
|
"loss": 0.8011,
|
|
"step": 6129
|
|
},
|
|
{
|
|
"epoch": 1.6303191489361701,
|
|
"grad_norm": 3.8152194023132324,
|
|
"learning_rate": 8.057163523655702e-06,
|
|
"loss": 0.7437,
|
|
"step": 6130
|
|
},
|
|
{
|
|
"epoch": 1.6305851063829788,
|
|
"grad_norm": 4.243065357208252,
|
|
"learning_rate": 8.056467527016559e-06,
|
|
"loss": 0.8156,
|
|
"step": 6131
|
|
},
|
|
{
|
|
"epoch": 1.6308510638297873,
|
|
"grad_norm": 4.148963928222656,
|
|
"learning_rate": 8.055771435806714e-06,
|
|
"loss": 0.8538,
|
|
"step": 6132
|
|
},
|
|
{
|
|
"epoch": 1.6311170212765957,
|
|
"grad_norm": 3.848583698272705,
|
|
"learning_rate": 8.0550752500477e-06,
|
|
"loss": 0.7818,
|
|
"step": 6133
|
|
},
|
|
{
|
|
"epoch": 1.6313829787234042,
|
|
"grad_norm": 4.185320854187012,
|
|
"learning_rate": 8.054378969761062e-06,
|
|
"loss": 0.85,
|
|
"step": 6134
|
|
},
|
|
{
|
|
"epoch": 1.6316489361702127,
|
|
"grad_norm": 4.244765758514404,
|
|
"learning_rate": 8.053682594968346e-06,
|
|
"loss": 0.8856,
|
|
"step": 6135
|
|
},
|
|
{
|
|
"epoch": 1.6319148936170214,
|
|
"grad_norm": 3.8420188426971436,
|
|
"learning_rate": 8.052986125691091e-06,
|
|
"loss": 0.7745,
|
|
"step": 6136
|
|
},
|
|
{
|
|
"epoch": 1.6321808510638298,
|
|
"grad_norm": 4.029837131500244,
|
|
"learning_rate": 8.052289561950852e-06,
|
|
"loss": 0.8724,
|
|
"step": 6137
|
|
},
|
|
{
|
|
"epoch": 1.6324468085106383,
|
|
"grad_norm": 3.9027750492095947,
|
|
"learning_rate": 8.051592903769182e-06,
|
|
"loss": 0.7405,
|
|
"step": 6138
|
|
},
|
|
{
|
|
"epoch": 1.632712765957447,
|
|
"grad_norm": 4.00022554397583,
|
|
"learning_rate": 8.050896151167632e-06,
|
|
"loss": 0.7677,
|
|
"step": 6139
|
|
},
|
|
{
|
|
"epoch": 1.6329787234042552,
|
|
"grad_norm": 4.150446891784668,
|
|
"learning_rate": 8.050199304167766e-06,
|
|
"loss": 0.7348,
|
|
"step": 6140
|
|
},
|
|
{
|
|
"epoch": 1.633244680851064,
|
|
"grad_norm": 4.308548927307129,
|
|
"learning_rate": 8.04950236279114e-06,
|
|
"loss": 0.8106,
|
|
"step": 6141
|
|
},
|
|
{
|
|
"epoch": 1.6335106382978724,
|
|
"grad_norm": 3.9967095851898193,
|
|
"learning_rate": 8.048805327059321e-06,
|
|
"loss": 0.7345,
|
|
"step": 6142
|
|
},
|
|
{
|
|
"epoch": 1.6337765957446808,
|
|
"grad_norm": 3.783818244934082,
|
|
"learning_rate": 8.048108196993879e-06,
|
|
"loss": 0.716,
|
|
"step": 6143
|
|
},
|
|
{
|
|
"epoch": 1.6340425531914895,
|
|
"grad_norm": 3.8823726177215576,
|
|
"learning_rate": 8.047410972616376e-06,
|
|
"loss": 0.778,
|
|
"step": 6144
|
|
},
|
|
{
|
|
"epoch": 1.6343085106382977,
|
|
"grad_norm": 4.007701873779297,
|
|
"learning_rate": 8.046713653948393e-06,
|
|
"loss": 0.9691,
|
|
"step": 6145
|
|
},
|
|
{
|
|
"epoch": 1.6345744680851064,
|
|
"grad_norm": 4.14747428894043,
|
|
"learning_rate": 8.0460162410115e-06,
|
|
"loss": 0.8201,
|
|
"step": 6146
|
|
},
|
|
{
|
|
"epoch": 1.6348404255319149,
|
|
"grad_norm": 4.101099967956543,
|
|
"learning_rate": 8.045318733827278e-06,
|
|
"loss": 0.8864,
|
|
"step": 6147
|
|
},
|
|
{
|
|
"epoch": 1.6351063829787233,
|
|
"grad_norm": 3.709555149078369,
|
|
"learning_rate": 8.044621132417311e-06,
|
|
"loss": 0.7185,
|
|
"step": 6148
|
|
},
|
|
{
|
|
"epoch": 1.635372340425532,
|
|
"grad_norm": 4.0000481605529785,
|
|
"learning_rate": 8.043923436803182e-06,
|
|
"loss": 0.8816,
|
|
"step": 6149
|
|
},
|
|
{
|
|
"epoch": 1.6356382978723403,
|
|
"grad_norm": 4.075678825378418,
|
|
"learning_rate": 8.043225647006475e-06,
|
|
"loss": 0.8192,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 1.635904255319149,
|
|
"grad_norm": 4.004273891448975,
|
|
"learning_rate": 8.042527763048787e-06,
|
|
"loss": 0.9374,
|
|
"step": 6151
|
|
},
|
|
{
|
|
"epoch": 1.6361702127659574,
|
|
"grad_norm": 3.904745101928711,
|
|
"learning_rate": 8.041829784951706e-06,
|
|
"loss": 0.7701,
|
|
"step": 6152
|
|
},
|
|
{
|
|
"epoch": 1.6364361702127659,
|
|
"grad_norm": 3.7361650466918945,
|
|
"learning_rate": 8.04113171273683e-06,
|
|
"loss": 0.6875,
|
|
"step": 6153
|
|
},
|
|
{
|
|
"epoch": 1.6367021276595746,
|
|
"grad_norm": 3.9355521202087402,
|
|
"learning_rate": 8.040433546425759e-06,
|
|
"loss": 0.828,
|
|
"step": 6154
|
|
},
|
|
{
|
|
"epoch": 1.636968085106383,
|
|
"grad_norm": 3.615612745285034,
|
|
"learning_rate": 8.039735286040095e-06,
|
|
"loss": 0.8136,
|
|
"step": 6155
|
|
},
|
|
{
|
|
"epoch": 1.6372340425531915,
|
|
"grad_norm": 3.900493621826172,
|
|
"learning_rate": 8.03903693160144e-06,
|
|
"loss": 0.7782,
|
|
"step": 6156
|
|
},
|
|
{
|
|
"epoch": 1.6375,
|
|
"grad_norm": 4.175507068634033,
|
|
"learning_rate": 8.038338483131408e-06,
|
|
"loss": 0.8486,
|
|
"step": 6157
|
|
},
|
|
{
|
|
"epoch": 1.6377659574468084,
|
|
"grad_norm": 4.02733039855957,
|
|
"learning_rate": 8.037639940651603e-06,
|
|
"loss": 0.7591,
|
|
"step": 6158
|
|
},
|
|
{
|
|
"epoch": 1.638031914893617,
|
|
"grad_norm": 4.006030559539795,
|
|
"learning_rate": 8.036941304183643e-06,
|
|
"loss": 0.8453,
|
|
"step": 6159
|
|
},
|
|
{
|
|
"epoch": 1.6382978723404256,
|
|
"grad_norm": 3.9777238368988037,
|
|
"learning_rate": 8.036242573749142e-06,
|
|
"loss": 0.7623,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 1.638563829787234,
|
|
"grad_norm": 3.7054030895233154,
|
|
"learning_rate": 8.035543749369724e-06,
|
|
"loss": 0.7552,
|
|
"step": 6161
|
|
},
|
|
{
|
|
"epoch": 1.6388297872340427,
|
|
"grad_norm": 4.149451732635498,
|
|
"learning_rate": 8.034844831067006e-06,
|
|
"loss": 0.6954,
|
|
"step": 6162
|
|
},
|
|
{
|
|
"epoch": 1.639095744680851,
|
|
"grad_norm": 4.144680500030518,
|
|
"learning_rate": 8.034145818862618e-06,
|
|
"loss": 0.8583,
|
|
"step": 6163
|
|
},
|
|
{
|
|
"epoch": 1.6393617021276596,
|
|
"grad_norm": 3.732167959213257,
|
|
"learning_rate": 8.033446712778184e-06,
|
|
"loss": 0.7437,
|
|
"step": 6164
|
|
},
|
|
{
|
|
"epoch": 1.639627659574468,
|
|
"grad_norm": 4.176260471343994,
|
|
"learning_rate": 8.032747512835338e-06,
|
|
"loss": 0.9089,
|
|
"step": 6165
|
|
},
|
|
{
|
|
"epoch": 1.6398936170212766,
|
|
"grad_norm": 3.9875879287719727,
|
|
"learning_rate": 8.032048219055712e-06,
|
|
"loss": 0.7776,
|
|
"step": 6166
|
|
},
|
|
{
|
|
"epoch": 1.6401595744680852,
|
|
"grad_norm": 3.942016839981079,
|
|
"learning_rate": 8.031348831460948e-06,
|
|
"loss": 0.752,
|
|
"step": 6167
|
|
},
|
|
{
|
|
"epoch": 1.6404255319148935,
|
|
"grad_norm": 4.088458061218262,
|
|
"learning_rate": 8.030649350072679e-06,
|
|
"loss": 0.8339,
|
|
"step": 6168
|
|
},
|
|
{
|
|
"epoch": 1.6406914893617022,
|
|
"grad_norm": 4.712299346923828,
|
|
"learning_rate": 8.029949774912552e-06,
|
|
"loss": 0.942,
|
|
"step": 6169
|
|
},
|
|
{
|
|
"epoch": 1.6409574468085106,
|
|
"grad_norm": 3.5929760932922363,
|
|
"learning_rate": 8.029250106002212e-06,
|
|
"loss": 0.7309,
|
|
"step": 6170
|
|
},
|
|
{
|
|
"epoch": 1.641223404255319,
|
|
"grad_norm": 4.059690475463867,
|
|
"learning_rate": 8.028550343363306e-06,
|
|
"loss": 0.8479,
|
|
"step": 6171
|
|
},
|
|
{
|
|
"epoch": 1.6414893617021278,
|
|
"grad_norm": 4.054781436920166,
|
|
"learning_rate": 8.027850487017488e-06,
|
|
"loss": 0.9293,
|
|
"step": 6172
|
|
},
|
|
{
|
|
"epoch": 1.641755319148936,
|
|
"grad_norm": 3.754241466522217,
|
|
"learning_rate": 8.027150536986411e-06,
|
|
"loss": 0.7714,
|
|
"step": 6173
|
|
},
|
|
{
|
|
"epoch": 1.6420212765957447,
|
|
"grad_norm": 3.6258599758148193,
|
|
"learning_rate": 8.026450493291731e-06,
|
|
"loss": 0.725,
|
|
"step": 6174
|
|
},
|
|
{
|
|
"epoch": 1.6422872340425532,
|
|
"grad_norm": 4.247791290283203,
|
|
"learning_rate": 8.025750355955112e-06,
|
|
"loss": 0.7394,
|
|
"step": 6175
|
|
},
|
|
{
|
|
"epoch": 1.6425531914893616,
|
|
"grad_norm": 3.7767536640167236,
|
|
"learning_rate": 8.025050124998213e-06,
|
|
"loss": 0.757,
|
|
"step": 6176
|
|
},
|
|
{
|
|
"epoch": 1.6428191489361703,
|
|
"grad_norm": 3.970726490020752,
|
|
"learning_rate": 8.0243498004427e-06,
|
|
"loss": 0.7449,
|
|
"step": 6177
|
|
},
|
|
{
|
|
"epoch": 1.6430851063829788,
|
|
"grad_norm": 4.161791801452637,
|
|
"learning_rate": 8.023649382310246e-06,
|
|
"loss": 0.8939,
|
|
"step": 6178
|
|
},
|
|
{
|
|
"epoch": 1.6433510638297872,
|
|
"grad_norm": 3.9791698455810547,
|
|
"learning_rate": 8.02294887062252e-06,
|
|
"loss": 0.7553,
|
|
"step": 6179
|
|
},
|
|
{
|
|
"epoch": 1.6436170212765957,
|
|
"grad_norm": 3.881882905960083,
|
|
"learning_rate": 8.022248265401196e-06,
|
|
"loss": 0.7806,
|
|
"step": 6180
|
|
},
|
|
{
|
|
"epoch": 1.6438829787234042,
|
|
"grad_norm": 4.165888786315918,
|
|
"learning_rate": 8.021547566667952e-06,
|
|
"loss": 0.7756,
|
|
"step": 6181
|
|
},
|
|
{
|
|
"epoch": 1.6441489361702128,
|
|
"grad_norm": 4.053508281707764,
|
|
"learning_rate": 8.02084677444447e-06,
|
|
"loss": 0.7472,
|
|
"step": 6182
|
|
},
|
|
{
|
|
"epoch": 1.6444148936170213,
|
|
"grad_norm": 4.370820045471191,
|
|
"learning_rate": 8.020145888752431e-06,
|
|
"loss": 0.858,
|
|
"step": 6183
|
|
},
|
|
{
|
|
"epoch": 1.6446808510638298,
|
|
"grad_norm": 4.108578205108643,
|
|
"learning_rate": 8.019444909613524e-06,
|
|
"loss": 0.8644,
|
|
"step": 6184
|
|
},
|
|
{
|
|
"epoch": 1.6449468085106385,
|
|
"grad_norm": 3.9922139644622803,
|
|
"learning_rate": 8.018743837049433e-06,
|
|
"loss": 0.7846,
|
|
"step": 6185
|
|
},
|
|
{
|
|
"epoch": 1.6452127659574467,
|
|
"grad_norm": 3.711470127105713,
|
|
"learning_rate": 8.018042671081858e-06,
|
|
"loss": 0.685,
|
|
"step": 6186
|
|
},
|
|
{
|
|
"epoch": 1.6454787234042554,
|
|
"grad_norm": 3.7997970581054688,
|
|
"learning_rate": 8.01734141173249e-06,
|
|
"loss": 0.7726,
|
|
"step": 6187
|
|
},
|
|
{
|
|
"epoch": 1.6457446808510638,
|
|
"grad_norm": 4.349726676940918,
|
|
"learning_rate": 8.016640059023023e-06,
|
|
"loss": 0.9296,
|
|
"step": 6188
|
|
},
|
|
{
|
|
"epoch": 1.6460106382978723,
|
|
"grad_norm": 3.8738739490509033,
|
|
"learning_rate": 8.01593861297516e-06,
|
|
"loss": 0.9472,
|
|
"step": 6189
|
|
},
|
|
{
|
|
"epoch": 1.646276595744681,
|
|
"grad_norm": 4.002452850341797,
|
|
"learning_rate": 8.015237073610607e-06,
|
|
"loss": 0.7488,
|
|
"step": 6190
|
|
},
|
|
{
|
|
"epoch": 1.6465425531914892,
|
|
"grad_norm": 4.017054557800293,
|
|
"learning_rate": 8.01453544095107e-06,
|
|
"loss": 0.9446,
|
|
"step": 6191
|
|
},
|
|
{
|
|
"epoch": 1.646808510638298,
|
|
"grad_norm": 4.123724460601807,
|
|
"learning_rate": 8.013833715018256e-06,
|
|
"loss": 0.9052,
|
|
"step": 6192
|
|
},
|
|
{
|
|
"epoch": 1.6470744680851064,
|
|
"grad_norm": 3.664494752883911,
|
|
"learning_rate": 8.013131895833879e-06,
|
|
"loss": 0.7421,
|
|
"step": 6193
|
|
},
|
|
{
|
|
"epoch": 1.6473404255319148,
|
|
"grad_norm": 3.7503373622894287,
|
|
"learning_rate": 8.012429983419654e-06,
|
|
"loss": 0.7293,
|
|
"step": 6194
|
|
},
|
|
{
|
|
"epoch": 1.6476063829787235,
|
|
"grad_norm": 4.248551845550537,
|
|
"learning_rate": 8.0117279777973e-06,
|
|
"loss": 0.664,
|
|
"step": 6195
|
|
},
|
|
{
|
|
"epoch": 1.6478723404255318,
|
|
"grad_norm": 4.146711349487305,
|
|
"learning_rate": 8.011025878988534e-06,
|
|
"loss": 0.8164,
|
|
"step": 6196
|
|
},
|
|
{
|
|
"epoch": 1.6481382978723405,
|
|
"grad_norm": 3.8372318744659424,
|
|
"learning_rate": 8.010323687015083e-06,
|
|
"loss": 0.7173,
|
|
"step": 6197
|
|
},
|
|
{
|
|
"epoch": 1.648404255319149,
|
|
"grad_norm": 4.206233501434326,
|
|
"learning_rate": 8.009621401898671e-06,
|
|
"loss": 0.8324,
|
|
"step": 6198
|
|
},
|
|
{
|
|
"epoch": 1.6486702127659574,
|
|
"grad_norm": 3.9302217960357666,
|
|
"learning_rate": 8.008919023661033e-06,
|
|
"loss": 0.8095,
|
|
"step": 6199
|
|
},
|
|
{
|
|
"epoch": 1.648936170212766,
|
|
"grad_norm": 3.8333635330200195,
|
|
"learning_rate": 8.008216552323896e-06,
|
|
"loss": 0.6761,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 1.6492021276595743,
|
|
"grad_norm": 4.308274269104004,
|
|
"learning_rate": 8.007513987908997e-06,
|
|
"loss": 0.9286,
|
|
"step": 6201
|
|
},
|
|
{
|
|
"epoch": 1.649468085106383,
|
|
"grad_norm": 3.9875328540802,
|
|
"learning_rate": 8.006811330438076e-06,
|
|
"loss": 0.8439,
|
|
"step": 6202
|
|
},
|
|
{
|
|
"epoch": 1.6497340425531914,
|
|
"grad_norm": 3.9723567962646484,
|
|
"learning_rate": 8.006108579932869e-06,
|
|
"loss": 0.743,
|
|
"step": 6203
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"grad_norm": 3.6594903469085693,
|
|
"learning_rate": 8.005405736415127e-06,
|
|
"loss": 0.8403,
|
|
"step": 6204
|
|
},
|
|
{
|
|
"epoch": 1.6502659574468086,
|
|
"grad_norm": 3.7459709644317627,
|
|
"learning_rate": 8.00470279990659e-06,
|
|
"loss": 0.7611,
|
|
"step": 6205
|
|
},
|
|
{
|
|
"epoch": 1.650531914893617,
|
|
"grad_norm": 4.077069282531738,
|
|
"learning_rate": 8.003999770429013e-06,
|
|
"loss": 0.8415,
|
|
"step": 6206
|
|
},
|
|
{
|
|
"epoch": 1.6507978723404255,
|
|
"grad_norm": 4.072371482849121,
|
|
"learning_rate": 8.003296648004146e-06,
|
|
"loss": 0.8709,
|
|
"step": 6207
|
|
},
|
|
{
|
|
"epoch": 1.6510638297872342,
|
|
"grad_norm": 4.159237861633301,
|
|
"learning_rate": 8.002593432653743e-06,
|
|
"loss": 0.802,
|
|
"step": 6208
|
|
},
|
|
{
|
|
"epoch": 1.6513297872340424,
|
|
"grad_norm": 4.047359943389893,
|
|
"learning_rate": 8.001890124399565e-06,
|
|
"loss": 0.7666,
|
|
"step": 6209
|
|
},
|
|
{
|
|
"epoch": 1.6515957446808511,
|
|
"grad_norm": 3.548340320587158,
|
|
"learning_rate": 8.001186723263374e-06,
|
|
"loss": 0.8141,
|
|
"step": 6210
|
|
},
|
|
{
|
|
"epoch": 1.6518617021276596,
|
|
"grad_norm": 4.3510050773620605,
|
|
"learning_rate": 8.00048322926693e-06,
|
|
"loss": 0.7908,
|
|
"step": 6211
|
|
},
|
|
{
|
|
"epoch": 1.652127659574468,
|
|
"grad_norm": 3.642498254776001,
|
|
"learning_rate": 7.999779642432003e-06,
|
|
"loss": 0.8594,
|
|
"step": 6212
|
|
},
|
|
{
|
|
"epoch": 1.6523936170212767,
|
|
"grad_norm": 3.804325819015503,
|
|
"learning_rate": 7.999075962780363e-06,
|
|
"loss": 0.7736,
|
|
"step": 6213
|
|
},
|
|
{
|
|
"epoch": 1.652659574468085,
|
|
"grad_norm": 4.080993175506592,
|
|
"learning_rate": 7.998372190333781e-06,
|
|
"loss": 0.8834,
|
|
"step": 6214
|
|
},
|
|
{
|
|
"epoch": 1.6529255319148937,
|
|
"grad_norm": 4.291904449462891,
|
|
"learning_rate": 7.997668325114033e-06,
|
|
"loss": 0.8433,
|
|
"step": 6215
|
|
},
|
|
{
|
|
"epoch": 1.6531914893617021,
|
|
"grad_norm": 3.4936020374298096,
|
|
"learning_rate": 7.996964367142899e-06,
|
|
"loss": 0.7045,
|
|
"step": 6216
|
|
},
|
|
{
|
|
"epoch": 1.6534574468085106,
|
|
"grad_norm": 4.251427173614502,
|
|
"learning_rate": 7.996260316442157e-06,
|
|
"loss": 0.8487,
|
|
"step": 6217
|
|
},
|
|
{
|
|
"epoch": 1.6537234042553193,
|
|
"grad_norm": 3.810161828994751,
|
|
"learning_rate": 7.995556173033594e-06,
|
|
"loss": 0.7715,
|
|
"step": 6218
|
|
},
|
|
{
|
|
"epoch": 1.6539893617021275,
|
|
"grad_norm": 3.8157644271850586,
|
|
"learning_rate": 7.994851936938996e-06,
|
|
"loss": 0.8408,
|
|
"step": 6219
|
|
},
|
|
{
|
|
"epoch": 1.6542553191489362,
|
|
"grad_norm": 3.614837646484375,
|
|
"learning_rate": 7.994147608180153e-06,
|
|
"loss": 0.7829,
|
|
"step": 6220
|
|
},
|
|
{
|
|
"epoch": 1.6545212765957447,
|
|
"grad_norm": 4.262511253356934,
|
|
"learning_rate": 7.99344318677886e-06,
|
|
"loss": 0.8728,
|
|
"step": 6221
|
|
},
|
|
{
|
|
"epoch": 1.6547872340425531,
|
|
"grad_norm": 4.14133358001709,
|
|
"learning_rate": 7.992738672756909e-06,
|
|
"loss": 0.8611,
|
|
"step": 6222
|
|
},
|
|
{
|
|
"epoch": 1.6550531914893618,
|
|
"grad_norm": 4.4198737144470215,
|
|
"learning_rate": 7.992034066136099e-06,
|
|
"loss": 0.8825,
|
|
"step": 6223
|
|
},
|
|
{
|
|
"epoch": 1.65531914893617,
|
|
"grad_norm": 4.433263778686523,
|
|
"learning_rate": 7.991329366938232e-06,
|
|
"loss": 0.9547,
|
|
"step": 6224
|
|
},
|
|
{
|
|
"epoch": 1.6555851063829787,
|
|
"grad_norm": 4.354765892028809,
|
|
"learning_rate": 7.990624575185116e-06,
|
|
"loss": 0.9415,
|
|
"step": 6225
|
|
},
|
|
{
|
|
"epoch": 1.6558510638297872,
|
|
"grad_norm": 4.149988174438477,
|
|
"learning_rate": 7.98991969089855e-06,
|
|
"loss": 0.7804,
|
|
"step": 6226
|
|
},
|
|
{
|
|
"epoch": 1.6561170212765957,
|
|
"grad_norm": 3.833970546722412,
|
|
"learning_rate": 7.98921471410035e-06,
|
|
"loss": 0.7944,
|
|
"step": 6227
|
|
},
|
|
{
|
|
"epoch": 1.6563829787234043,
|
|
"grad_norm": 3.816167116165161,
|
|
"learning_rate": 7.98850964481233e-06,
|
|
"loss": 0.8054,
|
|
"step": 6228
|
|
},
|
|
{
|
|
"epoch": 1.6566489361702128,
|
|
"grad_norm": 3.758295774459839,
|
|
"learning_rate": 7.987804483056301e-06,
|
|
"loss": 0.7724,
|
|
"step": 6229
|
|
},
|
|
{
|
|
"epoch": 1.6569148936170213,
|
|
"grad_norm": 4.2231669425964355,
|
|
"learning_rate": 7.987099228854083e-06,
|
|
"loss": 0.8713,
|
|
"step": 6230
|
|
},
|
|
{
|
|
"epoch": 1.65718085106383,
|
|
"grad_norm": 4.497824192047119,
|
|
"learning_rate": 7.9863938822275e-06,
|
|
"loss": 0.9629,
|
|
"step": 6231
|
|
},
|
|
{
|
|
"epoch": 1.6574468085106382,
|
|
"grad_norm": 3.9088895320892334,
|
|
"learning_rate": 7.985688443198371e-06,
|
|
"loss": 0.7597,
|
|
"step": 6232
|
|
},
|
|
{
|
|
"epoch": 1.6577127659574469,
|
|
"grad_norm": 3.699256658554077,
|
|
"learning_rate": 7.984982911788528e-06,
|
|
"loss": 0.8468,
|
|
"step": 6233
|
|
},
|
|
{
|
|
"epoch": 1.6579787234042553,
|
|
"grad_norm": 3.8971588611602783,
|
|
"learning_rate": 7.9842772880198e-06,
|
|
"loss": 0.8377,
|
|
"step": 6234
|
|
},
|
|
{
|
|
"epoch": 1.6582446808510638,
|
|
"grad_norm": 3.8062503337860107,
|
|
"learning_rate": 7.98357157191402e-06,
|
|
"loss": 0.6739,
|
|
"step": 6235
|
|
},
|
|
{
|
|
"epoch": 1.6585106382978725,
|
|
"grad_norm": 3.7170534133911133,
|
|
"learning_rate": 7.982865763493022e-06,
|
|
"loss": 0.7505,
|
|
"step": 6236
|
|
},
|
|
{
|
|
"epoch": 1.6587765957446807,
|
|
"grad_norm": 3.678074598312378,
|
|
"learning_rate": 7.982159862778645e-06,
|
|
"loss": 0.7589,
|
|
"step": 6237
|
|
},
|
|
{
|
|
"epoch": 1.6590425531914894,
|
|
"grad_norm": 3.895219326019287,
|
|
"learning_rate": 7.98145386979273e-06,
|
|
"loss": 0.6712,
|
|
"step": 6238
|
|
},
|
|
{
|
|
"epoch": 1.6593085106382979,
|
|
"grad_norm": 4.339925765991211,
|
|
"learning_rate": 7.980747784557123e-06,
|
|
"loss": 0.9584,
|
|
"step": 6239
|
|
},
|
|
{
|
|
"epoch": 1.6595744680851063,
|
|
"grad_norm": 3.8446319103240967,
|
|
"learning_rate": 7.98004160709367e-06,
|
|
"loss": 0.7287,
|
|
"step": 6240
|
|
},
|
|
{
|
|
"epoch": 1.659840425531915,
|
|
"grad_norm": 3.852252960205078,
|
|
"learning_rate": 7.979335337424222e-06,
|
|
"loss": 0.9698,
|
|
"step": 6241
|
|
},
|
|
{
|
|
"epoch": 1.6601063829787233,
|
|
"grad_norm": 3.7780802249908447,
|
|
"learning_rate": 7.97862897557063e-06,
|
|
"loss": 0.8085,
|
|
"step": 6242
|
|
},
|
|
{
|
|
"epoch": 1.660372340425532,
|
|
"grad_norm": 3.954035758972168,
|
|
"learning_rate": 7.97792252155475e-06,
|
|
"loss": 0.8768,
|
|
"step": 6243
|
|
},
|
|
{
|
|
"epoch": 1.6606382978723404,
|
|
"grad_norm": 3.267712116241455,
|
|
"learning_rate": 7.977215975398442e-06,
|
|
"loss": 0.6974,
|
|
"step": 6244
|
|
},
|
|
{
|
|
"epoch": 1.6609042553191489,
|
|
"grad_norm": 3.534168243408203,
|
|
"learning_rate": 7.976509337123567e-06,
|
|
"loss": 0.8029,
|
|
"step": 6245
|
|
},
|
|
{
|
|
"epoch": 1.6611702127659576,
|
|
"grad_norm": 3.9597525596618652,
|
|
"learning_rate": 7.975802606751989e-06,
|
|
"loss": 0.7754,
|
|
"step": 6246
|
|
},
|
|
{
|
|
"epoch": 1.6614361702127658,
|
|
"grad_norm": 4.123916149139404,
|
|
"learning_rate": 7.975095784305572e-06,
|
|
"loss": 0.8451,
|
|
"step": 6247
|
|
},
|
|
{
|
|
"epoch": 1.6617021276595745,
|
|
"grad_norm": 3.989689588546753,
|
|
"learning_rate": 7.97438886980619e-06,
|
|
"loss": 0.7707,
|
|
"step": 6248
|
|
},
|
|
{
|
|
"epoch": 1.661968085106383,
|
|
"grad_norm": 4.045599937438965,
|
|
"learning_rate": 7.973681863275715e-06,
|
|
"loss": 0.7474,
|
|
"step": 6249
|
|
},
|
|
{
|
|
"epoch": 1.6622340425531914,
|
|
"grad_norm": 4.4239420890808105,
|
|
"learning_rate": 7.972974764736023e-06,
|
|
"loss": 0.7858,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 1.6625,
|
|
"grad_norm": 3.499119520187378,
|
|
"learning_rate": 7.972267574208991e-06,
|
|
"loss": 0.7021,
|
|
"step": 6251
|
|
},
|
|
{
|
|
"epoch": 1.6627659574468086,
|
|
"grad_norm": 4.45729923248291,
|
|
"learning_rate": 7.971560291716501e-06,
|
|
"loss": 0.9094,
|
|
"step": 6252
|
|
},
|
|
{
|
|
"epoch": 1.663031914893617,
|
|
"grad_norm": 4.242092609405518,
|
|
"learning_rate": 7.970852917280434e-06,
|
|
"loss": 0.8807,
|
|
"step": 6253
|
|
},
|
|
{
|
|
"epoch": 1.6632978723404257,
|
|
"grad_norm": 3.947512149810791,
|
|
"learning_rate": 7.970145450922684e-06,
|
|
"loss": 0.8778,
|
|
"step": 6254
|
|
},
|
|
{
|
|
"epoch": 1.663563829787234,
|
|
"grad_norm": 5.4790167808532715,
|
|
"learning_rate": 7.969437892665134e-06,
|
|
"loss": 0.8196,
|
|
"step": 6255
|
|
},
|
|
{
|
|
"epoch": 1.6638297872340426,
|
|
"grad_norm": 3.856820583343506,
|
|
"learning_rate": 7.968730242529681e-06,
|
|
"loss": 0.7653,
|
|
"step": 6256
|
|
},
|
|
{
|
|
"epoch": 1.664095744680851,
|
|
"grad_norm": 4.446346759796143,
|
|
"learning_rate": 7.968022500538219e-06,
|
|
"loss": 0.9374,
|
|
"step": 6257
|
|
},
|
|
{
|
|
"epoch": 1.6643617021276595,
|
|
"grad_norm": 4.079642295837402,
|
|
"learning_rate": 7.967314666712647e-06,
|
|
"loss": 0.8123,
|
|
"step": 6258
|
|
},
|
|
{
|
|
"epoch": 1.6646276595744682,
|
|
"grad_norm": 4.338622570037842,
|
|
"learning_rate": 7.966606741074864e-06,
|
|
"loss": 0.7508,
|
|
"step": 6259
|
|
},
|
|
{
|
|
"epoch": 1.6648936170212765,
|
|
"grad_norm": 3.974862813949585,
|
|
"learning_rate": 7.965898723646777e-06,
|
|
"loss": 0.8222,
|
|
"step": 6260
|
|
},
|
|
{
|
|
"epoch": 1.6651595744680852,
|
|
"grad_norm": 4.263228416442871,
|
|
"learning_rate": 7.96519061445029e-06,
|
|
"loss": 0.9591,
|
|
"step": 6261
|
|
},
|
|
{
|
|
"epoch": 1.6654255319148936,
|
|
"grad_norm": 3.6377105712890625,
|
|
"learning_rate": 7.964482413507316e-06,
|
|
"loss": 0.7791,
|
|
"step": 6262
|
|
},
|
|
{
|
|
"epoch": 1.665691489361702,
|
|
"grad_norm": 3.3404452800750732,
|
|
"learning_rate": 7.963774120839767e-06,
|
|
"loss": 0.7668,
|
|
"step": 6263
|
|
},
|
|
{
|
|
"epoch": 1.6659574468085108,
|
|
"grad_norm": 3.6252615451812744,
|
|
"learning_rate": 7.963065736469555e-06,
|
|
"loss": 0.7628,
|
|
"step": 6264
|
|
},
|
|
{
|
|
"epoch": 1.666223404255319,
|
|
"grad_norm": 4.053292751312256,
|
|
"learning_rate": 7.9623572604186e-06,
|
|
"loss": 0.9255,
|
|
"step": 6265
|
|
},
|
|
{
|
|
"epoch": 1.6664893617021277,
|
|
"grad_norm": 3.612187385559082,
|
|
"learning_rate": 7.961648692708826e-06,
|
|
"loss": 0.7864,
|
|
"step": 6266
|
|
},
|
|
{
|
|
"epoch": 1.6667553191489362,
|
|
"grad_norm": 4.19817590713501,
|
|
"learning_rate": 7.960940033362152e-06,
|
|
"loss": 0.8414,
|
|
"step": 6267
|
|
},
|
|
{
|
|
"epoch": 1.6670212765957446,
|
|
"grad_norm": 3.919515371322632,
|
|
"learning_rate": 7.960231282400509e-06,
|
|
"loss": 0.7358,
|
|
"step": 6268
|
|
},
|
|
{
|
|
"epoch": 1.6672872340425533,
|
|
"grad_norm": 4.0831732749938965,
|
|
"learning_rate": 7.959522439845825e-06,
|
|
"loss": 0.7613,
|
|
"step": 6269
|
|
},
|
|
{
|
|
"epoch": 1.6675531914893615,
|
|
"grad_norm": 4.200259685516357,
|
|
"learning_rate": 7.958813505720031e-06,
|
|
"loss": 0.9464,
|
|
"step": 6270
|
|
},
|
|
{
|
|
"epoch": 1.6678191489361702,
|
|
"grad_norm": 4.281257152557373,
|
|
"learning_rate": 7.958104480045066e-06,
|
|
"loss": 0.8795,
|
|
"step": 6271
|
|
},
|
|
{
|
|
"epoch": 1.6680851063829787,
|
|
"grad_norm": 3.907784938812256,
|
|
"learning_rate": 7.957395362842864e-06,
|
|
"loss": 0.6676,
|
|
"step": 6272
|
|
},
|
|
{
|
|
"epoch": 1.6683510638297872,
|
|
"grad_norm": 4.122792720794678,
|
|
"learning_rate": 7.956686154135368e-06,
|
|
"loss": 0.7808,
|
|
"step": 6273
|
|
},
|
|
{
|
|
"epoch": 1.6686170212765958,
|
|
"grad_norm": 4.015087127685547,
|
|
"learning_rate": 7.95597685394452e-06,
|
|
"loss": 0.8536,
|
|
"step": 6274
|
|
},
|
|
{
|
|
"epoch": 1.6688829787234043,
|
|
"grad_norm": 3.8058676719665527,
|
|
"learning_rate": 7.95526746229227e-06,
|
|
"loss": 0.8526,
|
|
"step": 6275
|
|
},
|
|
{
|
|
"epoch": 1.6691489361702128,
|
|
"grad_norm": 4.022008895874023,
|
|
"learning_rate": 7.954557979200562e-06,
|
|
"loss": 0.7642,
|
|
"step": 6276
|
|
},
|
|
{
|
|
"epoch": 1.6694148936170212,
|
|
"grad_norm": 3.820610284805298,
|
|
"learning_rate": 7.953848404691354e-06,
|
|
"loss": 0.8786,
|
|
"step": 6277
|
|
},
|
|
{
|
|
"epoch": 1.6696808510638297,
|
|
"grad_norm": 3.6477434635162354,
|
|
"learning_rate": 7.9531387387866e-06,
|
|
"loss": 0.8277,
|
|
"step": 6278
|
|
},
|
|
{
|
|
"epoch": 1.6699468085106384,
|
|
"grad_norm": 4.075412273406982,
|
|
"learning_rate": 7.952428981508254e-06,
|
|
"loss": 0.8095,
|
|
"step": 6279
|
|
},
|
|
{
|
|
"epoch": 1.6702127659574468,
|
|
"grad_norm": 4.030799388885498,
|
|
"learning_rate": 7.951719132878279e-06,
|
|
"loss": 0.7007,
|
|
"step": 6280
|
|
},
|
|
{
|
|
"epoch": 1.6704787234042553,
|
|
"grad_norm": 4.039961338043213,
|
|
"learning_rate": 7.95100919291864e-06,
|
|
"loss": 0.8829,
|
|
"step": 6281
|
|
},
|
|
{
|
|
"epoch": 1.670744680851064,
|
|
"grad_norm": 3.8483259677886963,
|
|
"learning_rate": 7.950299161651303e-06,
|
|
"loss": 0.7494,
|
|
"step": 6282
|
|
},
|
|
{
|
|
"epoch": 1.6710106382978722,
|
|
"grad_norm": 3.8535609245300293,
|
|
"learning_rate": 7.949589039098235e-06,
|
|
"loss": 0.7572,
|
|
"step": 6283
|
|
},
|
|
{
|
|
"epoch": 1.671276595744681,
|
|
"grad_norm": 4.3112311363220215,
|
|
"learning_rate": 7.94887882528141e-06,
|
|
"loss": 0.9061,
|
|
"step": 6284
|
|
},
|
|
{
|
|
"epoch": 1.6715425531914894,
|
|
"grad_norm": 3.8851253986358643,
|
|
"learning_rate": 7.948168520222802e-06,
|
|
"loss": 0.9334,
|
|
"step": 6285
|
|
},
|
|
{
|
|
"epoch": 1.6718085106382978,
|
|
"grad_norm": 4.051077842712402,
|
|
"learning_rate": 7.94745812394439e-06,
|
|
"loss": 0.8568,
|
|
"step": 6286
|
|
},
|
|
{
|
|
"epoch": 1.6720744680851065,
|
|
"grad_norm": 3.8714540004730225,
|
|
"learning_rate": 7.946747636468153e-06,
|
|
"loss": 0.8496,
|
|
"step": 6287
|
|
},
|
|
{
|
|
"epoch": 1.6723404255319148,
|
|
"grad_norm": 3.9510905742645264,
|
|
"learning_rate": 7.946037057816075e-06,
|
|
"loss": 0.8367,
|
|
"step": 6288
|
|
},
|
|
{
|
|
"epoch": 1.6726063829787234,
|
|
"grad_norm": 4.504206657409668,
|
|
"learning_rate": 7.945326388010141e-06,
|
|
"loss": 0.8716,
|
|
"step": 6289
|
|
},
|
|
{
|
|
"epoch": 1.672872340425532,
|
|
"grad_norm": 4.116037845611572,
|
|
"learning_rate": 7.944615627072341e-06,
|
|
"loss": 0.8481,
|
|
"step": 6290
|
|
},
|
|
{
|
|
"epoch": 1.6731382978723404,
|
|
"grad_norm": 3.539327383041382,
|
|
"learning_rate": 7.943904775024667e-06,
|
|
"loss": 0.6687,
|
|
"step": 6291
|
|
},
|
|
{
|
|
"epoch": 1.673404255319149,
|
|
"grad_norm": 4.1150898933410645,
|
|
"learning_rate": 7.943193831889112e-06,
|
|
"loss": 0.9299,
|
|
"step": 6292
|
|
},
|
|
{
|
|
"epoch": 1.6736702127659573,
|
|
"grad_norm": 4.379646301269531,
|
|
"learning_rate": 7.942482797687675e-06,
|
|
"loss": 0.8867,
|
|
"step": 6293
|
|
},
|
|
{
|
|
"epoch": 1.673936170212766,
|
|
"grad_norm": 3.6255533695220947,
|
|
"learning_rate": 7.941771672442358e-06,
|
|
"loss": 0.6831,
|
|
"step": 6294
|
|
},
|
|
{
|
|
"epoch": 1.6742021276595744,
|
|
"grad_norm": 4.358723163604736,
|
|
"learning_rate": 7.94106045617516e-06,
|
|
"loss": 0.6923,
|
|
"step": 6295
|
|
},
|
|
{
|
|
"epoch": 1.674468085106383,
|
|
"grad_norm": 3.967379093170166,
|
|
"learning_rate": 7.94034914890809e-06,
|
|
"loss": 0.8413,
|
|
"step": 6296
|
|
},
|
|
{
|
|
"epoch": 1.6747340425531916,
|
|
"grad_norm": 4.233070373535156,
|
|
"learning_rate": 7.939637750663153e-06,
|
|
"loss": 0.9755,
|
|
"step": 6297
|
|
},
|
|
{
|
|
"epoch": 1.675,
|
|
"grad_norm": 3.4149739742279053,
|
|
"learning_rate": 7.938926261462366e-06,
|
|
"loss": 0.6741,
|
|
"step": 6298
|
|
},
|
|
{
|
|
"epoch": 1.6752659574468085,
|
|
"grad_norm": 4.045546054840088,
|
|
"learning_rate": 7.938214681327739e-06,
|
|
"loss": 0.8484,
|
|
"step": 6299
|
|
},
|
|
{
|
|
"epoch": 1.675531914893617,
|
|
"grad_norm": 4.123802185058594,
|
|
"learning_rate": 7.93750301028129e-06,
|
|
"loss": 0.8398,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 1.6757978723404254,
|
|
"grad_norm": 3.7821900844573975,
|
|
"learning_rate": 7.936791248345041e-06,
|
|
"loss": 0.7785,
|
|
"step": 6301
|
|
},
|
|
{
|
|
"epoch": 1.6760638297872341,
|
|
"grad_norm": 3.6713192462921143,
|
|
"learning_rate": 7.936079395541013e-06,
|
|
"loss": 0.7191,
|
|
"step": 6302
|
|
},
|
|
{
|
|
"epoch": 1.6763297872340426,
|
|
"grad_norm": 4.085387706756592,
|
|
"learning_rate": 7.935367451891232e-06,
|
|
"loss": 0.684,
|
|
"step": 6303
|
|
},
|
|
{
|
|
"epoch": 1.676595744680851,
|
|
"grad_norm": 3.6555123329162598,
|
|
"learning_rate": 7.934655417417724e-06,
|
|
"loss": 0.7526,
|
|
"step": 6304
|
|
},
|
|
{
|
|
"epoch": 1.6768617021276597,
|
|
"grad_norm": 3.9464025497436523,
|
|
"learning_rate": 7.933943292142524e-06,
|
|
"loss": 0.7544,
|
|
"step": 6305
|
|
},
|
|
{
|
|
"epoch": 1.677127659574468,
|
|
"grad_norm": 3.74369215965271,
|
|
"learning_rate": 7.933231076087662e-06,
|
|
"loss": 0.7524,
|
|
"step": 6306
|
|
},
|
|
{
|
|
"epoch": 1.6773936170212767,
|
|
"grad_norm": 4.703025817871094,
|
|
"learning_rate": 7.932518769275179e-06,
|
|
"loss": 0.8955,
|
|
"step": 6307
|
|
},
|
|
{
|
|
"epoch": 1.6776595744680851,
|
|
"grad_norm": 4.241019248962402,
|
|
"learning_rate": 7.931806371727111e-06,
|
|
"loss": 0.7727,
|
|
"step": 6308
|
|
},
|
|
{
|
|
"epoch": 1.6779255319148936,
|
|
"grad_norm": 4.029513359069824,
|
|
"learning_rate": 7.931093883465503e-06,
|
|
"loss": 0.7951,
|
|
"step": 6309
|
|
},
|
|
{
|
|
"epoch": 1.6781914893617023,
|
|
"grad_norm": 3.7332520484924316,
|
|
"learning_rate": 7.930381304512401e-06,
|
|
"loss": 0.7148,
|
|
"step": 6310
|
|
},
|
|
{
|
|
"epoch": 1.6784574468085105,
|
|
"grad_norm": 3.734999179840088,
|
|
"learning_rate": 7.92966863488985e-06,
|
|
"loss": 0.7856,
|
|
"step": 6311
|
|
},
|
|
{
|
|
"epoch": 1.6787234042553192,
|
|
"grad_norm": 4.164159774780273,
|
|
"learning_rate": 7.928955874619902e-06,
|
|
"loss": 0.8163,
|
|
"step": 6312
|
|
},
|
|
{
|
|
"epoch": 1.6789893617021276,
|
|
"grad_norm": 4.043959617614746,
|
|
"learning_rate": 7.928243023724611e-06,
|
|
"loss": 0.8262,
|
|
"step": 6313
|
|
},
|
|
{
|
|
"epoch": 1.679255319148936,
|
|
"grad_norm": 3.5217018127441406,
|
|
"learning_rate": 7.927530082226034e-06,
|
|
"loss": 0.7066,
|
|
"step": 6314
|
|
},
|
|
{
|
|
"epoch": 1.6795212765957448,
|
|
"grad_norm": 4.035088539123535,
|
|
"learning_rate": 7.926817050146227e-06,
|
|
"loss": 0.9041,
|
|
"step": 6315
|
|
},
|
|
{
|
|
"epoch": 1.679787234042553,
|
|
"grad_norm": 3.8981032371520996,
|
|
"learning_rate": 7.926103927507257e-06,
|
|
"loss": 0.8896,
|
|
"step": 6316
|
|
},
|
|
{
|
|
"epoch": 1.6800531914893617,
|
|
"grad_norm": 3.613386392593384,
|
|
"learning_rate": 7.925390714331185e-06,
|
|
"loss": 0.8692,
|
|
"step": 6317
|
|
},
|
|
{
|
|
"epoch": 1.6803191489361702,
|
|
"grad_norm": 4.042194843292236,
|
|
"learning_rate": 7.924677410640081e-06,
|
|
"loss": 0.8251,
|
|
"step": 6318
|
|
},
|
|
{
|
|
"epoch": 1.6805851063829786,
|
|
"grad_norm": 3.749028444290161,
|
|
"learning_rate": 7.923964016456014e-06,
|
|
"loss": 0.8519,
|
|
"step": 6319
|
|
},
|
|
{
|
|
"epoch": 1.6808510638297873,
|
|
"grad_norm": 3.482661008834839,
|
|
"learning_rate": 7.92325053180106e-06,
|
|
"loss": 0.6798,
|
|
"step": 6320
|
|
},
|
|
{
|
|
"epoch": 1.6811170212765958,
|
|
"grad_norm": 3.876594066619873,
|
|
"learning_rate": 7.92253695669729e-06,
|
|
"loss": 0.8437,
|
|
"step": 6321
|
|
},
|
|
{
|
|
"epoch": 1.6813829787234043,
|
|
"grad_norm": 3.941342830657959,
|
|
"learning_rate": 7.921823291166785e-06,
|
|
"loss": 0.7915,
|
|
"step": 6322
|
|
},
|
|
{
|
|
"epoch": 1.6816489361702127,
|
|
"grad_norm": 4.015593528747559,
|
|
"learning_rate": 7.92110953523163e-06,
|
|
"loss": 0.8184,
|
|
"step": 6323
|
|
},
|
|
{
|
|
"epoch": 1.6819148936170212,
|
|
"grad_norm": 4.370626449584961,
|
|
"learning_rate": 7.920395688913906e-06,
|
|
"loss": 0.962,
|
|
"step": 6324
|
|
},
|
|
{
|
|
"epoch": 1.6821808510638299,
|
|
"grad_norm": 3.7897567749023438,
|
|
"learning_rate": 7.919681752235701e-06,
|
|
"loss": 0.9113,
|
|
"step": 6325
|
|
},
|
|
{
|
|
"epoch": 1.6824468085106383,
|
|
"grad_norm": 3.8005380630493164,
|
|
"learning_rate": 7.918967725219104e-06,
|
|
"loss": 0.869,
|
|
"step": 6326
|
|
},
|
|
{
|
|
"epoch": 1.6827127659574468,
|
|
"grad_norm": 4.056982040405273,
|
|
"learning_rate": 7.918253607886212e-06,
|
|
"loss": 0.8451,
|
|
"step": 6327
|
|
},
|
|
{
|
|
"epoch": 1.6829787234042555,
|
|
"grad_norm": 3.5084946155548096,
|
|
"learning_rate": 7.917539400259116e-06,
|
|
"loss": 0.7714,
|
|
"step": 6328
|
|
},
|
|
{
|
|
"epoch": 1.6832446808510637,
|
|
"grad_norm": 3.9143457412719727,
|
|
"learning_rate": 7.916825102359914e-06,
|
|
"loss": 0.8663,
|
|
"step": 6329
|
|
},
|
|
{
|
|
"epoch": 1.6835106382978724,
|
|
"grad_norm": 3.867074966430664,
|
|
"learning_rate": 7.916110714210711e-06,
|
|
"loss": 0.8741,
|
|
"step": 6330
|
|
},
|
|
{
|
|
"epoch": 1.6837765957446809,
|
|
"grad_norm": 3.8426260948181152,
|
|
"learning_rate": 7.91539623583361e-06,
|
|
"loss": 0.8347,
|
|
"step": 6331
|
|
},
|
|
{
|
|
"epoch": 1.6840425531914893,
|
|
"grad_norm": 3.8092234134674072,
|
|
"learning_rate": 7.914681667250714e-06,
|
|
"loss": 0.8565,
|
|
"step": 6332
|
|
},
|
|
{
|
|
"epoch": 1.684308510638298,
|
|
"grad_norm": 3.754821538925171,
|
|
"learning_rate": 7.913967008484138e-06,
|
|
"loss": 0.6845,
|
|
"step": 6333
|
|
},
|
|
{
|
|
"epoch": 1.6845744680851062,
|
|
"grad_norm": 4.067741394042969,
|
|
"learning_rate": 7.913252259555992e-06,
|
|
"loss": 0.7716,
|
|
"step": 6334
|
|
},
|
|
{
|
|
"epoch": 1.684840425531915,
|
|
"grad_norm": 4.096173286437988,
|
|
"learning_rate": 7.91253742048839e-06,
|
|
"loss": 0.8299,
|
|
"step": 6335
|
|
},
|
|
{
|
|
"epoch": 1.6851063829787234,
|
|
"grad_norm": 4.119457721710205,
|
|
"learning_rate": 7.911822491303453e-06,
|
|
"loss": 0.8621,
|
|
"step": 6336
|
|
},
|
|
{
|
|
"epoch": 1.6853723404255319,
|
|
"grad_norm": 4.278772354125977,
|
|
"learning_rate": 7.911107472023298e-06,
|
|
"loss": 0.8446,
|
|
"step": 6337
|
|
},
|
|
{
|
|
"epoch": 1.6856382978723405,
|
|
"grad_norm": 3.7795321941375732,
|
|
"learning_rate": 7.910392362670051e-06,
|
|
"loss": 0.6943,
|
|
"step": 6338
|
|
},
|
|
{
|
|
"epoch": 1.6859042553191488,
|
|
"grad_norm": 3.9733240604400635,
|
|
"learning_rate": 7.909677163265838e-06,
|
|
"loss": 0.6562,
|
|
"step": 6339
|
|
},
|
|
{
|
|
"epoch": 1.6861702127659575,
|
|
"grad_norm": 4.160102844238281,
|
|
"learning_rate": 7.908961873832788e-06,
|
|
"loss": 0.7915,
|
|
"step": 6340
|
|
},
|
|
{
|
|
"epoch": 1.686436170212766,
|
|
"grad_norm": 4.3431525230407715,
|
|
"learning_rate": 7.908246494393032e-06,
|
|
"loss": 0.8474,
|
|
"step": 6341
|
|
},
|
|
{
|
|
"epoch": 1.6867021276595744,
|
|
"grad_norm": 4.230860233306885,
|
|
"learning_rate": 7.907531024968705e-06,
|
|
"loss": 0.7098,
|
|
"step": 6342
|
|
},
|
|
{
|
|
"epoch": 1.686968085106383,
|
|
"grad_norm": 4.223114967346191,
|
|
"learning_rate": 7.906815465581945e-06,
|
|
"loss": 0.7278,
|
|
"step": 6343
|
|
},
|
|
{
|
|
"epoch": 1.6872340425531915,
|
|
"grad_norm": 4.246336460113525,
|
|
"learning_rate": 7.906099816254895e-06,
|
|
"loss": 0.825,
|
|
"step": 6344
|
|
},
|
|
{
|
|
"epoch": 1.6875,
|
|
"grad_norm": 3.5722670555114746,
|
|
"learning_rate": 7.905384077009693e-06,
|
|
"loss": 0.8907,
|
|
"step": 6345
|
|
},
|
|
{
|
|
"epoch": 1.6877659574468085,
|
|
"grad_norm": 4.00727653503418,
|
|
"learning_rate": 7.904668247868486e-06,
|
|
"loss": 0.7821,
|
|
"step": 6346
|
|
},
|
|
{
|
|
"epoch": 1.688031914893617,
|
|
"grad_norm": 3.889538049697876,
|
|
"learning_rate": 7.903952328853426e-06,
|
|
"loss": 0.7967,
|
|
"step": 6347
|
|
},
|
|
{
|
|
"epoch": 1.6882978723404256,
|
|
"grad_norm": 3.923154830932617,
|
|
"learning_rate": 7.90323631998666e-06,
|
|
"loss": 0.8152,
|
|
"step": 6348
|
|
},
|
|
{
|
|
"epoch": 1.688563829787234,
|
|
"grad_norm": 4.059485912322998,
|
|
"learning_rate": 7.902520221290345e-06,
|
|
"loss": 0.7824,
|
|
"step": 6349
|
|
},
|
|
{
|
|
"epoch": 1.6888297872340425,
|
|
"grad_norm": 4.1757378578186035,
|
|
"learning_rate": 7.901804032786637e-06,
|
|
"loss": 0.8839,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 1.6890957446808512,
|
|
"grad_norm": 3.6736671924591064,
|
|
"learning_rate": 7.901087754497694e-06,
|
|
"loss": 0.684,
|
|
"step": 6351
|
|
},
|
|
{
|
|
"epoch": 1.6893617021276595,
|
|
"grad_norm": 4.116995811462402,
|
|
"learning_rate": 7.900371386445682e-06,
|
|
"loss": 0.9625,
|
|
"step": 6352
|
|
},
|
|
{
|
|
"epoch": 1.6896276595744681,
|
|
"grad_norm": 3.686619758605957,
|
|
"learning_rate": 7.899654928652765e-06,
|
|
"loss": 0.8667,
|
|
"step": 6353
|
|
},
|
|
{
|
|
"epoch": 1.6898936170212766,
|
|
"grad_norm": 4.151339054107666,
|
|
"learning_rate": 7.89893838114111e-06,
|
|
"loss": 0.8102,
|
|
"step": 6354
|
|
},
|
|
{
|
|
"epoch": 1.690159574468085,
|
|
"grad_norm": 3.7917020320892334,
|
|
"learning_rate": 7.898221743932887e-06,
|
|
"loss": 0.934,
|
|
"step": 6355
|
|
},
|
|
{
|
|
"epoch": 1.6904255319148938,
|
|
"grad_norm": 3.5394623279571533,
|
|
"learning_rate": 7.897505017050272e-06,
|
|
"loss": 0.7577,
|
|
"step": 6356
|
|
},
|
|
{
|
|
"epoch": 1.690691489361702,
|
|
"grad_norm": 4.058946132659912,
|
|
"learning_rate": 7.896788200515442e-06,
|
|
"loss": 0.7536,
|
|
"step": 6357
|
|
},
|
|
{
|
|
"epoch": 1.6909574468085107,
|
|
"grad_norm": 3.8410744667053223,
|
|
"learning_rate": 7.896071294350574e-06,
|
|
"loss": 0.8212,
|
|
"step": 6358
|
|
},
|
|
{
|
|
"epoch": 1.6912234042553191,
|
|
"grad_norm": 3.915674924850464,
|
|
"learning_rate": 7.89535429857785e-06,
|
|
"loss": 0.8288,
|
|
"step": 6359
|
|
},
|
|
{
|
|
"epoch": 1.6914893617021276,
|
|
"grad_norm": 3.954108715057373,
|
|
"learning_rate": 7.894637213219454e-06,
|
|
"loss": 0.7738,
|
|
"step": 6360
|
|
},
|
|
{
|
|
"epoch": 1.6917553191489363,
|
|
"grad_norm": 4.220264434814453,
|
|
"learning_rate": 7.893920038297575e-06,
|
|
"loss": 0.7686,
|
|
"step": 6361
|
|
},
|
|
{
|
|
"epoch": 1.6920212765957445,
|
|
"grad_norm": 4.50542688369751,
|
|
"learning_rate": 7.893202773834404e-06,
|
|
"loss": 0.825,
|
|
"step": 6362
|
|
},
|
|
{
|
|
"epoch": 1.6922872340425532,
|
|
"grad_norm": 4.274563312530518,
|
|
"learning_rate": 7.892485419852131e-06,
|
|
"loss": 0.8119,
|
|
"step": 6363
|
|
},
|
|
{
|
|
"epoch": 1.6925531914893617,
|
|
"grad_norm": 3.8938279151916504,
|
|
"learning_rate": 7.891767976372957e-06,
|
|
"loss": 0.9073,
|
|
"step": 6364
|
|
},
|
|
{
|
|
"epoch": 1.6928191489361701,
|
|
"grad_norm": 3.949944257736206,
|
|
"learning_rate": 7.891050443419074e-06,
|
|
"loss": 0.757,
|
|
"step": 6365
|
|
},
|
|
{
|
|
"epoch": 1.6930851063829788,
|
|
"grad_norm": 4.313665866851807,
|
|
"learning_rate": 7.890332821012687e-06,
|
|
"loss": 0.8997,
|
|
"step": 6366
|
|
},
|
|
{
|
|
"epoch": 1.6933510638297873,
|
|
"grad_norm": 4.165764331817627,
|
|
"learning_rate": 7.889615109176e-06,
|
|
"loss": 0.8262,
|
|
"step": 6367
|
|
},
|
|
{
|
|
"epoch": 1.6936170212765957,
|
|
"grad_norm": 3.462186336517334,
|
|
"learning_rate": 7.88889730793122e-06,
|
|
"loss": 0.6989,
|
|
"step": 6368
|
|
},
|
|
{
|
|
"epoch": 1.6938829787234042,
|
|
"grad_norm": 4.610195159912109,
|
|
"learning_rate": 7.888179417300556e-06,
|
|
"loss": 0.924,
|
|
"step": 6369
|
|
},
|
|
{
|
|
"epoch": 1.6941489361702127,
|
|
"grad_norm": 3.8986306190490723,
|
|
"learning_rate": 7.887461437306221e-06,
|
|
"loss": 0.8204,
|
|
"step": 6370
|
|
},
|
|
{
|
|
"epoch": 1.6944148936170214,
|
|
"grad_norm": 3.9623425006866455,
|
|
"learning_rate": 7.886743367970428e-06,
|
|
"loss": 0.8856,
|
|
"step": 6371
|
|
},
|
|
{
|
|
"epoch": 1.6946808510638298,
|
|
"grad_norm": 3.7937700748443604,
|
|
"learning_rate": 7.886025209315396e-06,
|
|
"loss": 0.905,
|
|
"step": 6372
|
|
},
|
|
{
|
|
"epoch": 1.6949468085106383,
|
|
"grad_norm": 3.6256890296936035,
|
|
"learning_rate": 7.885306961363347e-06,
|
|
"loss": 0.7097,
|
|
"step": 6373
|
|
},
|
|
{
|
|
"epoch": 1.695212765957447,
|
|
"grad_norm": 4.079528331756592,
|
|
"learning_rate": 7.884588624136505e-06,
|
|
"loss": 0.8255,
|
|
"step": 6374
|
|
},
|
|
{
|
|
"epoch": 1.6954787234042552,
|
|
"grad_norm": 3.7182741165161133,
|
|
"learning_rate": 7.883870197657094e-06,
|
|
"loss": 0.671,
|
|
"step": 6375
|
|
},
|
|
{
|
|
"epoch": 1.695744680851064,
|
|
"grad_norm": 3.2320377826690674,
|
|
"learning_rate": 7.883151681947343e-06,
|
|
"loss": 0.6876,
|
|
"step": 6376
|
|
},
|
|
{
|
|
"epoch": 1.6960106382978724,
|
|
"grad_norm": 3.610546588897705,
|
|
"learning_rate": 7.882433077029484e-06,
|
|
"loss": 0.7904,
|
|
"step": 6377
|
|
},
|
|
{
|
|
"epoch": 1.6962765957446808,
|
|
"grad_norm": 3.8851020336151123,
|
|
"learning_rate": 7.881714382925753e-06,
|
|
"loss": 0.7701,
|
|
"step": 6378
|
|
},
|
|
{
|
|
"epoch": 1.6965425531914895,
|
|
"grad_norm": 3.727907657623291,
|
|
"learning_rate": 7.880995599658387e-06,
|
|
"loss": 0.8374,
|
|
"step": 6379
|
|
},
|
|
{
|
|
"epoch": 1.6968085106382977,
|
|
"grad_norm": 3.564770221710205,
|
|
"learning_rate": 7.880276727249623e-06,
|
|
"loss": 0.6483,
|
|
"step": 6380
|
|
},
|
|
{
|
|
"epoch": 1.6970744680851064,
|
|
"grad_norm": 4.088687419891357,
|
|
"learning_rate": 7.879557765721707e-06,
|
|
"loss": 0.7902,
|
|
"step": 6381
|
|
},
|
|
{
|
|
"epoch": 1.6973404255319149,
|
|
"grad_norm": 4.087176322937012,
|
|
"learning_rate": 7.878838715096883e-06,
|
|
"loss": 0.8723,
|
|
"step": 6382
|
|
},
|
|
{
|
|
"epoch": 1.6976063829787233,
|
|
"grad_norm": 3.7613840103149414,
|
|
"learning_rate": 7.878119575397401e-06,
|
|
"loss": 0.7559,
|
|
"step": 6383
|
|
},
|
|
{
|
|
"epoch": 1.697872340425532,
|
|
"grad_norm": 4.426526069641113,
|
|
"learning_rate": 7.87740034664551e-06,
|
|
"loss": 1.1472,
|
|
"step": 6384
|
|
},
|
|
{
|
|
"epoch": 1.6981382978723403,
|
|
"grad_norm": 3.5922887325286865,
|
|
"learning_rate": 7.876681028863464e-06,
|
|
"loss": 0.8193,
|
|
"step": 6385
|
|
},
|
|
{
|
|
"epoch": 1.698404255319149,
|
|
"grad_norm": 4.141395092010498,
|
|
"learning_rate": 7.875961622073523e-06,
|
|
"loss": 0.8629,
|
|
"step": 6386
|
|
},
|
|
{
|
|
"epoch": 1.6986702127659574,
|
|
"grad_norm": 3.894594669342041,
|
|
"learning_rate": 7.875242126297939e-06,
|
|
"loss": 0.8301,
|
|
"step": 6387
|
|
},
|
|
{
|
|
"epoch": 1.6989361702127659,
|
|
"grad_norm": 3.929243564605713,
|
|
"learning_rate": 7.87452254155898e-06,
|
|
"loss": 0.8301,
|
|
"step": 6388
|
|
},
|
|
{
|
|
"epoch": 1.6992021276595746,
|
|
"grad_norm": 3.575058698654175,
|
|
"learning_rate": 7.87380286787891e-06,
|
|
"loss": 0.7595,
|
|
"step": 6389
|
|
},
|
|
{
|
|
"epoch": 1.699468085106383,
|
|
"grad_norm": 3.9643123149871826,
|
|
"learning_rate": 7.873083105279996e-06,
|
|
"loss": 0.8527,
|
|
"step": 6390
|
|
},
|
|
{
|
|
"epoch": 1.6997340425531915,
|
|
"grad_norm": 3.8817079067230225,
|
|
"learning_rate": 7.872363253784508e-06,
|
|
"loss": 0.6764,
|
|
"step": 6391
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"grad_norm": 4.209853649139404,
|
|
"learning_rate": 7.871643313414718e-06,
|
|
"loss": 0.8082,
|
|
"step": 6392
|
|
},
|
|
{
|
|
"epoch": 1.7002659574468084,
|
|
"grad_norm": 3.9260003566741943,
|
|
"learning_rate": 7.870923284192904e-06,
|
|
"loss": 0.7839,
|
|
"step": 6393
|
|
},
|
|
{
|
|
"epoch": 1.700531914893617,
|
|
"grad_norm": 3.726177453994751,
|
|
"learning_rate": 7.870203166141343e-06,
|
|
"loss": 0.721,
|
|
"step": 6394
|
|
},
|
|
{
|
|
"epoch": 1.7007978723404256,
|
|
"grad_norm": 4.2059326171875,
|
|
"learning_rate": 7.869482959282318e-06,
|
|
"loss": 0.7346,
|
|
"step": 6395
|
|
},
|
|
{
|
|
"epoch": 1.701063829787234,
|
|
"grad_norm": 4.017068862915039,
|
|
"learning_rate": 7.868762663638111e-06,
|
|
"loss": 0.6286,
|
|
"step": 6396
|
|
},
|
|
{
|
|
"epoch": 1.7013297872340427,
|
|
"grad_norm": 3.6799540519714355,
|
|
"learning_rate": 7.86804227923101e-06,
|
|
"loss": 0.7389,
|
|
"step": 6397
|
|
},
|
|
{
|
|
"epoch": 1.701595744680851,
|
|
"grad_norm": 3.797459602355957,
|
|
"learning_rate": 7.867321806083303e-06,
|
|
"loss": 0.7271,
|
|
"step": 6398
|
|
},
|
|
{
|
|
"epoch": 1.7018617021276596,
|
|
"grad_norm": 3.9897758960723877,
|
|
"learning_rate": 7.866601244217284e-06,
|
|
"loss": 0.8449,
|
|
"step": 6399
|
|
},
|
|
{
|
|
"epoch": 1.702127659574468,
|
|
"grad_norm": 4.305942058563232,
|
|
"learning_rate": 7.86588059365525e-06,
|
|
"loss": 0.8108,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 1.7023936170212766,
|
|
"grad_norm": 3.727057456970215,
|
|
"learning_rate": 7.865159854419493e-06,
|
|
"loss": 0.801,
|
|
"step": 6401
|
|
},
|
|
{
|
|
"epoch": 1.7026595744680852,
|
|
"grad_norm": 3.9825263023376465,
|
|
"learning_rate": 7.864439026532318e-06,
|
|
"loss": 0.8026,
|
|
"step": 6402
|
|
},
|
|
{
|
|
"epoch": 1.7029255319148935,
|
|
"grad_norm": 3.602372884750366,
|
|
"learning_rate": 7.863718110016025e-06,
|
|
"loss": 0.6829,
|
|
"step": 6403
|
|
},
|
|
{
|
|
"epoch": 1.7031914893617022,
|
|
"grad_norm": 4.175540447235107,
|
|
"learning_rate": 7.862997104892924e-06,
|
|
"loss": 0.7491,
|
|
"step": 6404
|
|
},
|
|
{
|
|
"epoch": 1.7034574468085106,
|
|
"grad_norm": 3.7469863891601562,
|
|
"learning_rate": 7.862276011185323e-06,
|
|
"loss": 0.6495,
|
|
"step": 6405
|
|
},
|
|
{
|
|
"epoch": 1.703723404255319,
|
|
"grad_norm": 3.860929012298584,
|
|
"learning_rate": 7.861554828915531e-06,
|
|
"loss": 0.8538,
|
|
"step": 6406
|
|
},
|
|
{
|
|
"epoch": 1.7039893617021278,
|
|
"grad_norm": 3.6298773288726807,
|
|
"learning_rate": 7.860833558105863e-06,
|
|
"loss": 0.7653,
|
|
"step": 6407
|
|
},
|
|
{
|
|
"epoch": 1.704255319148936,
|
|
"grad_norm": 3.6208910942077637,
|
|
"learning_rate": 7.860112198778638e-06,
|
|
"loss": 0.8272,
|
|
"step": 6408
|
|
},
|
|
{
|
|
"epoch": 1.7045212765957447,
|
|
"grad_norm": 3.9331130981445312,
|
|
"learning_rate": 7.859390750956172e-06,
|
|
"loss": 0.802,
|
|
"step": 6409
|
|
},
|
|
{
|
|
"epoch": 1.7047872340425532,
|
|
"grad_norm": 3.843306303024292,
|
|
"learning_rate": 7.858669214660792e-06,
|
|
"loss": 0.8426,
|
|
"step": 6410
|
|
},
|
|
{
|
|
"epoch": 1.7050531914893616,
|
|
"grad_norm": 3.844093084335327,
|
|
"learning_rate": 7.857947589914819e-06,
|
|
"loss": 0.7836,
|
|
"step": 6411
|
|
},
|
|
{
|
|
"epoch": 1.7053191489361703,
|
|
"grad_norm": 3.7956225872039795,
|
|
"learning_rate": 7.857225876740585e-06,
|
|
"loss": 0.7151,
|
|
"step": 6412
|
|
},
|
|
{
|
|
"epoch": 1.7055851063829788,
|
|
"grad_norm": 3.568847417831421,
|
|
"learning_rate": 7.856504075160416e-06,
|
|
"loss": 0.8406,
|
|
"step": 6413
|
|
},
|
|
{
|
|
"epoch": 1.7058510638297872,
|
|
"grad_norm": 5.6517462730407715,
|
|
"learning_rate": 7.855782185196648e-06,
|
|
"loss": 0.8804,
|
|
"step": 6414
|
|
},
|
|
{
|
|
"epoch": 1.7061170212765957,
|
|
"grad_norm": 3.6728999614715576,
|
|
"learning_rate": 7.855060206871618e-06,
|
|
"loss": 0.7445,
|
|
"step": 6415
|
|
},
|
|
{
|
|
"epoch": 1.7063829787234042,
|
|
"grad_norm": 4.358402729034424,
|
|
"learning_rate": 7.854338140207662e-06,
|
|
"loss": 0.7949,
|
|
"step": 6416
|
|
},
|
|
{
|
|
"epoch": 1.7066489361702128,
|
|
"grad_norm": 4.032132625579834,
|
|
"learning_rate": 7.853615985227126e-06,
|
|
"loss": 0.8492,
|
|
"step": 6417
|
|
},
|
|
{
|
|
"epoch": 1.7069148936170213,
|
|
"grad_norm": 4.185794353485107,
|
|
"learning_rate": 7.85289374195235e-06,
|
|
"loss": 0.9054,
|
|
"step": 6418
|
|
},
|
|
{
|
|
"epoch": 1.7071808510638298,
|
|
"grad_norm": 4.639225006103516,
|
|
"learning_rate": 7.852171410405684e-06,
|
|
"loss": 0.9118,
|
|
"step": 6419
|
|
},
|
|
{
|
|
"epoch": 1.7074468085106385,
|
|
"grad_norm": 3.67490816116333,
|
|
"learning_rate": 7.851448990609476e-06,
|
|
"loss": 0.8046,
|
|
"step": 6420
|
|
},
|
|
{
|
|
"epoch": 1.7077127659574467,
|
|
"grad_norm": 3.879056692123413,
|
|
"learning_rate": 7.850726482586078e-06,
|
|
"loss": 0.6831,
|
|
"step": 6421
|
|
},
|
|
{
|
|
"epoch": 1.7079787234042554,
|
|
"grad_norm": 3.963789463043213,
|
|
"learning_rate": 7.850003886357847e-06,
|
|
"loss": 0.7881,
|
|
"step": 6422
|
|
},
|
|
{
|
|
"epoch": 1.7082446808510638,
|
|
"grad_norm": 4.229506015777588,
|
|
"learning_rate": 7.849281201947142e-06,
|
|
"loss": 0.8157,
|
|
"step": 6423
|
|
},
|
|
{
|
|
"epoch": 1.7085106382978723,
|
|
"grad_norm": 4.29874849319458,
|
|
"learning_rate": 7.84855842937632e-06,
|
|
"loss": 0.9049,
|
|
"step": 6424
|
|
},
|
|
{
|
|
"epoch": 1.708776595744681,
|
|
"grad_norm": 3.8917417526245117,
|
|
"learning_rate": 7.847835568667746e-06,
|
|
"loss": 0.7922,
|
|
"step": 6425
|
|
},
|
|
{
|
|
"epoch": 1.7090425531914892,
|
|
"grad_norm": 3.8562116622924805,
|
|
"learning_rate": 7.847112619843789e-06,
|
|
"loss": 0.7363,
|
|
"step": 6426
|
|
},
|
|
{
|
|
"epoch": 1.709308510638298,
|
|
"grad_norm": 4.495066165924072,
|
|
"learning_rate": 7.846389582926814e-06,
|
|
"loss": 0.977,
|
|
"step": 6427
|
|
},
|
|
{
|
|
"epoch": 1.7095744680851064,
|
|
"grad_norm": 3.899489164352417,
|
|
"learning_rate": 7.845666457939193e-06,
|
|
"loss": 0.7289,
|
|
"step": 6428
|
|
},
|
|
{
|
|
"epoch": 1.7098404255319148,
|
|
"grad_norm": 3.9472427368164062,
|
|
"learning_rate": 7.844943244903303e-06,
|
|
"loss": 0.8273,
|
|
"step": 6429
|
|
},
|
|
{
|
|
"epoch": 1.7101063829787235,
|
|
"grad_norm": 4.187959671020508,
|
|
"learning_rate": 7.84421994384152e-06,
|
|
"loss": 0.8658,
|
|
"step": 6430
|
|
},
|
|
{
|
|
"epoch": 1.7103723404255318,
|
|
"grad_norm": 4.103062152862549,
|
|
"learning_rate": 7.843496554776222e-06,
|
|
"loss": 0.8097,
|
|
"step": 6431
|
|
},
|
|
{
|
|
"epoch": 1.7106382978723405,
|
|
"grad_norm": 3.977741241455078,
|
|
"learning_rate": 7.842773077729793e-06,
|
|
"loss": 0.799,
|
|
"step": 6432
|
|
},
|
|
{
|
|
"epoch": 1.710904255319149,
|
|
"grad_norm": 3.8812167644500732,
|
|
"learning_rate": 7.842049512724618e-06,
|
|
"loss": 0.6743,
|
|
"step": 6433
|
|
},
|
|
{
|
|
"epoch": 1.7111702127659574,
|
|
"grad_norm": 4.060866832733154,
|
|
"learning_rate": 7.841325859783086e-06,
|
|
"loss": 0.7479,
|
|
"step": 6434
|
|
},
|
|
{
|
|
"epoch": 1.711436170212766,
|
|
"grad_norm": 4.428943634033203,
|
|
"learning_rate": 7.840602118927584e-06,
|
|
"loss": 0.9101,
|
|
"step": 6435
|
|
},
|
|
{
|
|
"epoch": 1.7117021276595743,
|
|
"grad_norm": 3.989323139190674,
|
|
"learning_rate": 7.83987829018051e-06,
|
|
"loss": 0.8308,
|
|
"step": 6436
|
|
},
|
|
{
|
|
"epoch": 1.711968085106383,
|
|
"grad_norm": 4.173738479614258,
|
|
"learning_rate": 7.83915437356426e-06,
|
|
"loss": 0.8025,
|
|
"step": 6437
|
|
},
|
|
{
|
|
"epoch": 1.7122340425531914,
|
|
"grad_norm": 3.7683372497558594,
|
|
"learning_rate": 7.838430369101227e-06,
|
|
"loss": 0.8168,
|
|
"step": 6438
|
|
},
|
|
{
|
|
"epoch": 1.7125,
|
|
"grad_norm": 3.9382693767547607,
|
|
"learning_rate": 7.837706276813819e-06,
|
|
"loss": 0.8469,
|
|
"step": 6439
|
|
},
|
|
{
|
|
"epoch": 1.7127659574468086,
|
|
"grad_norm": 4.1283278465271,
|
|
"learning_rate": 7.836982096724438e-06,
|
|
"loss": 0.7938,
|
|
"step": 6440
|
|
},
|
|
{
|
|
"epoch": 1.713031914893617,
|
|
"grad_norm": 4.033618927001953,
|
|
"learning_rate": 7.836257828855489e-06,
|
|
"loss": 0.8479,
|
|
"step": 6441
|
|
},
|
|
{
|
|
"epoch": 1.7132978723404255,
|
|
"grad_norm": 4.25187349319458,
|
|
"learning_rate": 7.835533473229385e-06,
|
|
"loss": 0.8507,
|
|
"step": 6442
|
|
},
|
|
{
|
|
"epoch": 1.7135638297872342,
|
|
"grad_norm": 4.031279563903809,
|
|
"learning_rate": 7.834809029868538e-06,
|
|
"loss": 0.8444,
|
|
"step": 6443
|
|
},
|
|
{
|
|
"epoch": 1.7138297872340424,
|
|
"grad_norm": 3.5434410572052,
|
|
"learning_rate": 7.834084498795361e-06,
|
|
"loss": 0.6862,
|
|
"step": 6444
|
|
},
|
|
{
|
|
"epoch": 1.7140957446808511,
|
|
"grad_norm": 4.158623218536377,
|
|
"learning_rate": 7.833359880032272e-06,
|
|
"loss": 0.8362,
|
|
"step": 6445
|
|
},
|
|
{
|
|
"epoch": 1.7143617021276596,
|
|
"grad_norm": 4.039031982421875,
|
|
"learning_rate": 7.832635173601692e-06,
|
|
"loss": 0.8806,
|
|
"step": 6446
|
|
},
|
|
{
|
|
"epoch": 1.714627659574468,
|
|
"grad_norm": 4.09163236618042,
|
|
"learning_rate": 7.831910379526047e-06,
|
|
"loss": 0.9957,
|
|
"step": 6447
|
|
},
|
|
{
|
|
"epoch": 1.7148936170212767,
|
|
"grad_norm": 3.4675064086914062,
|
|
"learning_rate": 7.831185497827758e-06,
|
|
"loss": 0.7451,
|
|
"step": 6448
|
|
},
|
|
{
|
|
"epoch": 1.715159574468085,
|
|
"grad_norm": 3.6473426818847656,
|
|
"learning_rate": 7.830460528529258e-06,
|
|
"loss": 0.7436,
|
|
"step": 6449
|
|
},
|
|
{
|
|
"epoch": 1.7154255319148937,
|
|
"grad_norm": 3.779623508453369,
|
|
"learning_rate": 7.829735471652978e-06,
|
|
"loss": 0.7522,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 1.7156914893617021,
|
|
"grad_norm": 3.759127616882324,
|
|
"learning_rate": 7.829010327221348e-06,
|
|
"loss": 0.8186,
|
|
"step": 6451
|
|
},
|
|
{
|
|
"epoch": 1.7159574468085106,
|
|
"grad_norm": 3.606985330581665,
|
|
"learning_rate": 7.828285095256808e-06,
|
|
"loss": 0.8916,
|
|
"step": 6452
|
|
},
|
|
{
|
|
"epoch": 1.7162234042553193,
|
|
"grad_norm": 3.6981024742126465,
|
|
"learning_rate": 7.8275597757818e-06,
|
|
"loss": 0.7967,
|
|
"step": 6453
|
|
},
|
|
{
|
|
"epoch": 1.7164893617021275,
|
|
"grad_norm": 3.8665547370910645,
|
|
"learning_rate": 7.826834368818761e-06,
|
|
"loss": 0.731,
|
|
"step": 6454
|
|
},
|
|
{
|
|
"epoch": 1.7167553191489362,
|
|
"grad_norm": 3.547314167022705,
|
|
"learning_rate": 7.826108874390141e-06,
|
|
"loss": 0.7793,
|
|
"step": 6455
|
|
},
|
|
{
|
|
"epoch": 1.7170212765957447,
|
|
"grad_norm": 3.823787212371826,
|
|
"learning_rate": 7.825383292518383e-06,
|
|
"loss": 0.7854,
|
|
"step": 6456
|
|
},
|
|
{
|
|
"epoch": 1.7172872340425531,
|
|
"grad_norm": 4.252329349517822,
|
|
"learning_rate": 7.82465762322594e-06,
|
|
"loss": 0.9033,
|
|
"step": 6457
|
|
},
|
|
{
|
|
"epoch": 1.7175531914893618,
|
|
"grad_norm": 3.9819960594177246,
|
|
"learning_rate": 7.823931866535264e-06,
|
|
"loss": 0.9616,
|
|
"step": 6458
|
|
},
|
|
{
|
|
"epoch": 1.71781914893617,
|
|
"grad_norm": 4.099963665008545,
|
|
"learning_rate": 7.823206022468812e-06,
|
|
"loss": 0.8145,
|
|
"step": 6459
|
|
},
|
|
{
|
|
"epoch": 1.7180851063829787,
|
|
"grad_norm": 4.146093368530273,
|
|
"learning_rate": 7.82248009104904e-06,
|
|
"loss": 0.7693,
|
|
"step": 6460
|
|
},
|
|
{
|
|
"epoch": 1.7183510638297872,
|
|
"grad_norm": 3.9053497314453125,
|
|
"learning_rate": 7.821754072298414e-06,
|
|
"loss": 0.8287,
|
|
"step": 6461
|
|
},
|
|
{
|
|
"epoch": 1.7186170212765957,
|
|
"grad_norm": 4.186066150665283,
|
|
"learning_rate": 7.821027966239393e-06,
|
|
"loss": 0.7655,
|
|
"step": 6462
|
|
},
|
|
{
|
|
"epoch": 1.7188829787234043,
|
|
"grad_norm": 4.364232540130615,
|
|
"learning_rate": 7.820301772894445e-06,
|
|
"loss": 0.7746,
|
|
"step": 6463
|
|
},
|
|
{
|
|
"epoch": 1.7191489361702128,
|
|
"grad_norm": 3.838639736175537,
|
|
"learning_rate": 7.81957549228604e-06,
|
|
"loss": 0.8342,
|
|
"step": 6464
|
|
},
|
|
{
|
|
"epoch": 1.7194148936170213,
|
|
"grad_norm": 4.181699752807617,
|
|
"learning_rate": 7.818849124436651e-06,
|
|
"loss": 0.8181,
|
|
"step": 6465
|
|
},
|
|
{
|
|
"epoch": 1.71968085106383,
|
|
"grad_norm": 4.069806098937988,
|
|
"learning_rate": 7.818122669368751e-06,
|
|
"loss": 0.7486,
|
|
"step": 6466
|
|
},
|
|
{
|
|
"epoch": 1.7199468085106382,
|
|
"grad_norm": 3.9210989475250244,
|
|
"learning_rate": 7.817396127104815e-06,
|
|
"loss": 0.8064,
|
|
"step": 6467
|
|
},
|
|
{
|
|
"epoch": 1.7202127659574469,
|
|
"grad_norm": 3.3825418949127197,
|
|
"learning_rate": 7.816669497667328e-06,
|
|
"loss": 0.7276,
|
|
"step": 6468
|
|
},
|
|
{
|
|
"epoch": 1.7204787234042553,
|
|
"grad_norm": 4.07489013671875,
|
|
"learning_rate": 7.815942781078772e-06,
|
|
"loss": 0.7628,
|
|
"step": 6469
|
|
},
|
|
{
|
|
"epoch": 1.7207446808510638,
|
|
"grad_norm": 4.20849084854126,
|
|
"learning_rate": 7.815215977361628e-06,
|
|
"loss": 0.822,
|
|
"step": 6470
|
|
},
|
|
{
|
|
"epoch": 1.7210106382978725,
|
|
"grad_norm": 4.13023567199707,
|
|
"learning_rate": 7.814489086538388e-06,
|
|
"loss": 0.8117,
|
|
"step": 6471
|
|
},
|
|
{
|
|
"epoch": 1.7212765957446807,
|
|
"grad_norm": 4.143436431884766,
|
|
"learning_rate": 7.813762108631544e-06,
|
|
"loss": 0.8769,
|
|
"step": 6472
|
|
},
|
|
{
|
|
"epoch": 1.7215425531914894,
|
|
"grad_norm": 3.954219102859497,
|
|
"learning_rate": 7.813035043663585e-06,
|
|
"loss": 0.7836,
|
|
"step": 6473
|
|
},
|
|
{
|
|
"epoch": 1.7218085106382979,
|
|
"grad_norm": 3.688133478164673,
|
|
"learning_rate": 7.81230789165701e-06,
|
|
"loss": 0.8905,
|
|
"step": 6474
|
|
},
|
|
{
|
|
"epoch": 1.7220744680851063,
|
|
"grad_norm": 4.443986892700195,
|
|
"learning_rate": 7.811580652634319e-06,
|
|
"loss": 0.8933,
|
|
"step": 6475
|
|
},
|
|
{
|
|
"epoch": 1.722340425531915,
|
|
"grad_norm": 3.791365146636963,
|
|
"learning_rate": 7.810853326618012e-06,
|
|
"loss": 0.8278,
|
|
"step": 6476
|
|
},
|
|
{
|
|
"epoch": 1.7226063829787233,
|
|
"grad_norm": 4.167088031768799,
|
|
"learning_rate": 7.810125913630593e-06,
|
|
"loss": 0.7669,
|
|
"step": 6477
|
|
},
|
|
{
|
|
"epoch": 1.722872340425532,
|
|
"grad_norm": 3.4958133697509766,
|
|
"learning_rate": 7.80939841369457e-06,
|
|
"loss": 0.7095,
|
|
"step": 6478
|
|
},
|
|
{
|
|
"epoch": 1.7231382978723404,
|
|
"grad_norm": 4.2002339363098145,
|
|
"learning_rate": 7.808670826832455e-06,
|
|
"loss": 0.7463,
|
|
"step": 6479
|
|
},
|
|
{
|
|
"epoch": 1.7234042553191489,
|
|
"grad_norm": 3.795557737350464,
|
|
"learning_rate": 7.807943153066754e-06,
|
|
"loss": 0.6731,
|
|
"step": 6480
|
|
},
|
|
{
|
|
"epoch": 1.7236702127659576,
|
|
"grad_norm": 3.272183895111084,
|
|
"learning_rate": 7.807215392419988e-06,
|
|
"loss": 0.6116,
|
|
"step": 6481
|
|
},
|
|
{
|
|
"epoch": 1.7239361702127658,
|
|
"grad_norm": 4.027061462402344,
|
|
"learning_rate": 7.806487544914672e-06,
|
|
"loss": 0.8122,
|
|
"step": 6482
|
|
},
|
|
{
|
|
"epoch": 1.7242021276595745,
|
|
"grad_norm": 3.5909063816070557,
|
|
"learning_rate": 7.805759610573327e-06,
|
|
"loss": 0.7915,
|
|
"step": 6483
|
|
},
|
|
{
|
|
"epoch": 1.724468085106383,
|
|
"grad_norm": 4.0041961669921875,
|
|
"learning_rate": 7.805031589418477e-06,
|
|
"loss": 0.6859,
|
|
"step": 6484
|
|
},
|
|
{
|
|
"epoch": 1.7247340425531914,
|
|
"grad_norm": 3.9270341396331787,
|
|
"learning_rate": 7.804303481472645e-06,
|
|
"loss": 0.7585,
|
|
"step": 6485
|
|
},
|
|
{
|
|
"epoch": 1.725,
|
|
"grad_norm": 4.444969654083252,
|
|
"learning_rate": 7.803575286758365e-06,
|
|
"loss": 0.8409,
|
|
"step": 6486
|
|
},
|
|
{
|
|
"epoch": 1.7252659574468086,
|
|
"grad_norm": 4.4063262939453125,
|
|
"learning_rate": 7.802847005298162e-06,
|
|
"loss": 1.0173,
|
|
"step": 6487
|
|
},
|
|
{
|
|
"epoch": 1.725531914893617,
|
|
"grad_norm": 4.078791618347168,
|
|
"learning_rate": 7.802118637114575e-06,
|
|
"loss": 0.8106,
|
|
"step": 6488
|
|
},
|
|
{
|
|
"epoch": 1.7257978723404257,
|
|
"grad_norm": 3.8760604858398438,
|
|
"learning_rate": 7.801390182230137e-06,
|
|
"loss": 0.7751,
|
|
"step": 6489
|
|
},
|
|
{
|
|
"epoch": 1.726063829787234,
|
|
"grad_norm": 4.180771350860596,
|
|
"learning_rate": 7.800661640667388e-06,
|
|
"loss": 0.8671,
|
|
"step": 6490
|
|
},
|
|
{
|
|
"epoch": 1.7263297872340426,
|
|
"grad_norm": 3.921558380126953,
|
|
"learning_rate": 7.799933012448872e-06,
|
|
"loss": 0.8414,
|
|
"step": 6491
|
|
},
|
|
{
|
|
"epoch": 1.726595744680851,
|
|
"grad_norm": 3.8960835933685303,
|
|
"learning_rate": 7.799204297597129e-06,
|
|
"loss": 0.7135,
|
|
"step": 6492
|
|
},
|
|
{
|
|
"epoch": 1.7268617021276595,
|
|
"grad_norm": 3.834841251373291,
|
|
"learning_rate": 7.798475496134714e-06,
|
|
"loss": 0.7374,
|
|
"step": 6493
|
|
},
|
|
{
|
|
"epoch": 1.7271276595744682,
|
|
"grad_norm": 3.5948872566223145,
|
|
"learning_rate": 7.79774660808417e-06,
|
|
"loss": 0.7354,
|
|
"step": 6494
|
|
},
|
|
{
|
|
"epoch": 1.7273936170212765,
|
|
"grad_norm": 3.763976573944092,
|
|
"learning_rate": 7.797017633468052e-06,
|
|
"loss": 0.9162,
|
|
"step": 6495
|
|
},
|
|
{
|
|
"epoch": 1.7276595744680852,
|
|
"grad_norm": 3.8534562587738037,
|
|
"learning_rate": 7.796288572308914e-06,
|
|
"loss": 0.8713,
|
|
"step": 6496
|
|
},
|
|
{
|
|
"epoch": 1.7279255319148936,
|
|
"grad_norm": 4.049807071685791,
|
|
"learning_rate": 7.795559424629317e-06,
|
|
"loss": 0.8404,
|
|
"step": 6497
|
|
},
|
|
{
|
|
"epoch": 1.728191489361702,
|
|
"grad_norm": 3.8596930503845215,
|
|
"learning_rate": 7.79483019045182e-06,
|
|
"loss": 0.7868,
|
|
"step": 6498
|
|
},
|
|
{
|
|
"epoch": 1.7284574468085108,
|
|
"grad_norm": 4.452897071838379,
|
|
"learning_rate": 7.794100869798986e-06,
|
|
"loss": 0.9168,
|
|
"step": 6499
|
|
},
|
|
{
|
|
"epoch": 1.728723404255319,
|
|
"grad_norm": 3.7102370262145996,
|
|
"learning_rate": 7.79337146269338e-06,
|
|
"loss": 0.9201,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 1.728723404255319,
|
|
"eval_loss": 1.2800854444503784,
|
|
"eval_runtime": 13.8491,
|
|
"eval_samples_per_second": 28.883,
|
|
"eval_steps_per_second": 3.61,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 1.7289893617021277,
|
|
"grad_norm": 4.088536262512207,
|
|
"learning_rate": 7.792641969157574e-06,
|
|
"loss": 0.8304,
|
|
"step": 6501
|
|
},
|
|
{
|
|
"epoch": 1.7292553191489362,
|
|
"grad_norm": 3.8640379905700684,
|
|
"learning_rate": 7.791912389214138e-06,
|
|
"loss": 0.77,
|
|
"step": 6502
|
|
},
|
|
{
|
|
"epoch": 1.7295212765957446,
|
|
"grad_norm": 3.927625894546509,
|
|
"learning_rate": 7.791182722885644e-06,
|
|
"loss": 0.7303,
|
|
"step": 6503
|
|
},
|
|
{
|
|
"epoch": 1.7297872340425533,
|
|
"grad_norm": 3.960904598236084,
|
|
"learning_rate": 7.790452970194673e-06,
|
|
"loss": 0.8346,
|
|
"step": 6504
|
|
},
|
|
{
|
|
"epoch": 1.7300531914893615,
|
|
"grad_norm": 3.953512191772461,
|
|
"learning_rate": 7.7897231311638e-06,
|
|
"loss": 0.6958,
|
|
"step": 6505
|
|
},
|
|
{
|
|
"epoch": 1.7303191489361702,
|
|
"grad_norm": 3.7672922611236572,
|
|
"learning_rate": 7.788993205815606e-06,
|
|
"loss": 0.7887,
|
|
"step": 6506
|
|
},
|
|
{
|
|
"epoch": 1.7305851063829787,
|
|
"grad_norm": 4.269046783447266,
|
|
"learning_rate": 7.788263194172684e-06,
|
|
"loss": 0.9836,
|
|
"step": 6507
|
|
},
|
|
{
|
|
"epoch": 1.7308510638297872,
|
|
"grad_norm": 3.96058988571167,
|
|
"learning_rate": 7.787533096257613e-06,
|
|
"loss": 0.9103,
|
|
"step": 6508
|
|
},
|
|
{
|
|
"epoch": 1.7311170212765958,
|
|
"grad_norm": 3.9208950996398926,
|
|
"learning_rate": 7.786802912092986e-06,
|
|
"loss": 0.819,
|
|
"step": 6509
|
|
},
|
|
{
|
|
"epoch": 1.7313829787234043,
|
|
"grad_norm": 3.600135326385498,
|
|
"learning_rate": 7.786072641701397e-06,
|
|
"loss": 0.8122,
|
|
"step": 6510
|
|
},
|
|
{
|
|
"epoch": 1.7316489361702128,
|
|
"grad_norm": 3.9716193675994873,
|
|
"learning_rate": 7.78534228510544e-06,
|
|
"loss": 0.7281,
|
|
"step": 6511
|
|
},
|
|
{
|
|
"epoch": 1.7319148936170212,
|
|
"grad_norm": 4.222037315368652,
|
|
"learning_rate": 7.784611842327711e-06,
|
|
"loss": 0.8926,
|
|
"step": 6512
|
|
},
|
|
{
|
|
"epoch": 1.7321808510638297,
|
|
"grad_norm": 3.3642852306365967,
|
|
"learning_rate": 7.783881313390816e-06,
|
|
"loss": 0.7014,
|
|
"step": 6513
|
|
},
|
|
{
|
|
"epoch": 1.7324468085106384,
|
|
"grad_norm": 4.051825046539307,
|
|
"learning_rate": 7.783150698317354e-06,
|
|
"loss": 0.7602,
|
|
"step": 6514
|
|
},
|
|
{
|
|
"epoch": 1.7327127659574468,
|
|
"grad_norm": 4.036343574523926,
|
|
"learning_rate": 7.782419997129934e-06,
|
|
"loss": 0.8381,
|
|
"step": 6515
|
|
},
|
|
{
|
|
"epoch": 1.7329787234042553,
|
|
"grad_norm": 3.722576856613159,
|
|
"learning_rate": 7.781689209851163e-06,
|
|
"loss": 0.8737,
|
|
"step": 6516
|
|
},
|
|
{
|
|
"epoch": 1.733244680851064,
|
|
"grad_norm": 4.037721157073975,
|
|
"learning_rate": 7.780958336503653e-06,
|
|
"loss": 0.8382,
|
|
"step": 6517
|
|
},
|
|
{
|
|
"epoch": 1.7335106382978722,
|
|
"grad_norm": 4.075493812561035,
|
|
"learning_rate": 7.780227377110016e-06,
|
|
"loss": 0.8215,
|
|
"step": 6518
|
|
},
|
|
{
|
|
"epoch": 1.733776595744681,
|
|
"grad_norm": 3.9683899879455566,
|
|
"learning_rate": 7.779496331692872e-06,
|
|
"loss": 0.8797,
|
|
"step": 6519
|
|
},
|
|
{
|
|
"epoch": 1.7340425531914894,
|
|
"grad_norm": 3.871469259262085,
|
|
"learning_rate": 7.77876520027484e-06,
|
|
"loss": 0.7388,
|
|
"step": 6520
|
|
},
|
|
{
|
|
"epoch": 1.7343085106382978,
|
|
"grad_norm": 3.950624465942383,
|
|
"learning_rate": 7.778033982878539e-06,
|
|
"loss": 0.7502,
|
|
"step": 6521
|
|
},
|
|
{
|
|
"epoch": 1.7345744680851065,
|
|
"grad_norm": 4.015387058258057,
|
|
"learning_rate": 7.777302679526596e-06,
|
|
"loss": 0.9874,
|
|
"step": 6522
|
|
},
|
|
{
|
|
"epoch": 1.7348404255319148,
|
|
"grad_norm": 4.03596830368042,
|
|
"learning_rate": 7.776571290241642e-06,
|
|
"loss": 0.7633,
|
|
"step": 6523
|
|
},
|
|
{
|
|
"epoch": 1.7351063829787234,
|
|
"grad_norm": 4.029125213623047,
|
|
"learning_rate": 7.775839815046299e-06,
|
|
"loss": 0.7994,
|
|
"step": 6524
|
|
},
|
|
{
|
|
"epoch": 1.735372340425532,
|
|
"grad_norm": 4.058604717254639,
|
|
"learning_rate": 7.775108253963207e-06,
|
|
"loss": 0.7391,
|
|
"step": 6525
|
|
},
|
|
{
|
|
"epoch": 1.7356382978723404,
|
|
"grad_norm": 3.862391948699951,
|
|
"learning_rate": 7.774376607014995e-06,
|
|
"loss": 0.9032,
|
|
"step": 6526
|
|
},
|
|
{
|
|
"epoch": 1.735904255319149,
|
|
"grad_norm": 3.903395414352417,
|
|
"learning_rate": 7.773644874224306e-06,
|
|
"loss": 0.8429,
|
|
"step": 6527
|
|
},
|
|
{
|
|
"epoch": 1.7361702127659573,
|
|
"grad_norm": 3.8711469173431396,
|
|
"learning_rate": 7.77291305561378e-06,
|
|
"loss": 0.807,
|
|
"step": 6528
|
|
},
|
|
{
|
|
"epoch": 1.736436170212766,
|
|
"grad_norm": 3.977463483810425,
|
|
"learning_rate": 7.77218115120606e-06,
|
|
"loss": 0.7929,
|
|
"step": 6529
|
|
},
|
|
{
|
|
"epoch": 1.7367021276595744,
|
|
"grad_norm": 3.7397544384002686,
|
|
"learning_rate": 7.77144916102379e-06,
|
|
"loss": 0.8478,
|
|
"step": 6530
|
|
},
|
|
{
|
|
"epoch": 1.736968085106383,
|
|
"grad_norm": 3.6703922748565674,
|
|
"learning_rate": 7.770717085089618e-06,
|
|
"loss": 0.6432,
|
|
"step": 6531
|
|
},
|
|
{
|
|
"epoch": 1.7372340425531916,
|
|
"grad_norm": 4.170365333557129,
|
|
"learning_rate": 7.7699849234262e-06,
|
|
"loss": 0.7565,
|
|
"step": 6532
|
|
},
|
|
{
|
|
"epoch": 1.7375,
|
|
"grad_norm": 3.6264007091522217,
|
|
"learning_rate": 7.769252676056186e-06,
|
|
"loss": 0.7635,
|
|
"step": 6533
|
|
},
|
|
{
|
|
"epoch": 1.7377659574468085,
|
|
"grad_norm": 3.9042675495147705,
|
|
"learning_rate": 7.768520343002235e-06,
|
|
"loss": 0.9037,
|
|
"step": 6534
|
|
},
|
|
{
|
|
"epoch": 1.738031914893617,
|
|
"grad_norm": 4.19412899017334,
|
|
"learning_rate": 7.767787924287005e-06,
|
|
"loss": 0.8516,
|
|
"step": 6535
|
|
},
|
|
{
|
|
"epoch": 1.7382978723404254,
|
|
"grad_norm": 3.869814157485962,
|
|
"learning_rate": 7.767055419933157e-06,
|
|
"loss": 0.7815,
|
|
"step": 6536
|
|
},
|
|
{
|
|
"epoch": 1.7385638297872341,
|
|
"grad_norm": 3.712411642074585,
|
|
"learning_rate": 7.766322829963357e-06,
|
|
"loss": 0.6676,
|
|
"step": 6537
|
|
},
|
|
{
|
|
"epoch": 1.7388297872340426,
|
|
"grad_norm": 4.046865463256836,
|
|
"learning_rate": 7.76559015440027e-06,
|
|
"loss": 0.8799,
|
|
"step": 6538
|
|
},
|
|
{
|
|
"epoch": 1.739095744680851,
|
|
"grad_norm": 3.908235549926758,
|
|
"learning_rate": 7.76485739326657e-06,
|
|
"loss": 0.7999,
|
|
"step": 6539
|
|
},
|
|
{
|
|
"epoch": 1.7393617021276597,
|
|
"grad_norm": 4.396571159362793,
|
|
"learning_rate": 7.764124546584926e-06,
|
|
"loss": 0.8813,
|
|
"step": 6540
|
|
},
|
|
{
|
|
"epoch": 1.739627659574468,
|
|
"grad_norm": 3.7259883880615234,
|
|
"learning_rate": 7.763391614378014e-06,
|
|
"loss": 0.8519,
|
|
"step": 6541
|
|
},
|
|
{
|
|
"epoch": 1.7398936170212767,
|
|
"grad_norm": 3.7457261085510254,
|
|
"learning_rate": 7.762658596668514e-06,
|
|
"loss": 0.7913,
|
|
"step": 6542
|
|
},
|
|
{
|
|
"epoch": 1.7401595744680851,
|
|
"grad_norm": 3.66605544090271,
|
|
"learning_rate": 7.7619254934791e-06,
|
|
"loss": 0.8122,
|
|
"step": 6543
|
|
},
|
|
{
|
|
"epoch": 1.7404255319148936,
|
|
"grad_norm": 3.8894519805908203,
|
|
"learning_rate": 7.761192304832463e-06,
|
|
"loss": 0.6829,
|
|
"step": 6544
|
|
},
|
|
{
|
|
"epoch": 1.7406914893617023,
|
|
"grad_norm": 3.4376041889190674,
|
|
"learning_rate": 7.760459030751285e-06,
|
|
"loss": 0.6903,
|
|
"step": 6545
|
|
},
|
|
{
|
|
"epoch": 1.7409574468085105,
|
|
"grad_norm": 4.00453519821167,
|
|
"learning_rate": 7.759725671258254e-06,
|
|
"loss": 0.8714,
|
|
"step": 6546
|
|
},
|
|
{
|
|
"epoch": 1.7412234042553192,
|
|
"grad_norm": 3.9484405517578125,
|
|
"learning_rate": 7.758992226376062e-06,
|
|
"loss": 0.9567,
|
|
"step": 6547
|
|
},
|
|
{
|
|
"epoch": 1.7414893617021276,
|
|
"grad_norm": 3.885755777359009,
|
|
"learning_rate": 7.7582586961274e-06,
|
|
"loss": 0.7928,
|
|
"step": 6548
|
|
},
|
|
{
|
|
"epoch": 1.741755319148936,
|
|
"grad_norm": 3.8768088817596436,
|
|
"learning_rate": 7.757525080534968e-06,
|
|
"loss": 0.7554,
|
|
"step": 6549
|
|
},
|
|
{
|
|
"epoch": 1.7420212765957448,
|
|
"grad_norm": 3.7053639888763428,
|
|
"learning_rate": 7.756791379621461e-06,
|
|
"loss": 0.8122,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 1.742287234042553,
|
|
"grad_norm": 3.9800238609313965,
|
|
"learning_rate": 7.756057593409588e-06,
|
|
"loss": 0.8505,
|
|
"step": 6551
|
|
},
|
|
{
|
|
"epoch": 1.7425531914893617,
|
|
"grad_norm": 3.586451768875122,
|
|
"learning_rate": 7.755323721922045e-06,
|
|
"loss": 0.7435,
|
|
"step": 6552
|
|
},
|
|
{
|
|
"epoch": 1.7428191489361702,
|
|
"grad_norm": 4.315957069396973,
|
|
"learning_rate": 7.754589765181543e-06,
|
|
"loss": 0.8308,
|
|
"step": 6553
|
|
},
|
|
{
|
|
"epoch": 1.7430851063829786,
|
|
"grad_norm": 3.764915704727173,
|
|
"learning_rate": 7.75385572321079e-06,
|
|
"loss": 0.7939,
|
|
"step": 6554
|
|
},
|
|
{
|
|
"epoch": 1.7433510638297873,
|
|
"grad_norm": 3.9177279472351074,
|
|
"learning_rate": 7.7531215960325e-06,
|
|
"loss": 0.8557,
|
|
"step": 6555
|
|
},
|
|
{
|
|
"epoch": 1.7436170212765958,
|
|
"grad_norm": 3.802114248275757,
|
|
"learning_rate": 7.752387383669384e-06,
|
|
"loss": 0.7933,
|
|
"step": 6556
|
|
},
|
|
{
|
|
"epoch": 1.7438829787234043,
|
|
"grad_norm": 4.129657745361328,
|
|
"learning_rate": 7.751653086144164e-06,
|
|
"loss": 0.8744,
|
|
"step": 6557
|
|
},
|
|
{
|
|
"epoch": 1.7441489361702127,
|
|
"grad_norm": 4.201019763946533,
|
|
"learning_rate": 7.750918703479558e-06,
|
|
"loss": 0.7875,
|
|
"step": 6558
|
|
},
|
|
{
|
|
"epoch": 1.7444148936170212,
|
|
"grad_norm": 4.305670261383057,
|
|
"learning_rate": 7.750184235698285e-06,
|
|
"loss": 0.8137,
|
|
"step": 6559
|
|
},
|
|
{
|
|
"epoch": 1.7446808510638299,
|
|
"grad_norm": 3.571631908416748,
|
|
"learning_rate": 7.749449682823077e-06,
|
|
"loss": 0.7308,
|
|
"step": 6560
|
|
},
|
|
{
|
|
"epoch": 1.7449468085106383,
|
|
"grad_norm": 4.124020576477051,
|
|
"learning_rate": 7.74871504487666e-06,
|
|
"loss": 0.9546,
|
|
"step": 6561
|
|
},
|
|
{
|
|
"epoch": 1.7452127659574468,
|
|
"grad_norm": 4.1722588539123535,
|
|
"learning_rate": 7.74798032188176e-06,
|
|
"loss": 0.787,
|
|
"step": 6562
|
|
},
|
|
{
|
|
"epoch": 1.7454787234042555,
|
|
"grad_norm": 4.017617225646973,
|
|
"learning_rate": 7.747245513861115e-06,
|
|
"loss": 0.8655,
|
|
"step": 6563
|
|
},
|
|
{
|
|
"epoch": 1.7457446808510637,
|
|
"grad_norm": 4.122082233428955,
|
|
"learning_rate": 7.74651062083746e-06,
|
|
"loss": 0.9471,
|
|
"step": 6564
|
|
},
|
|
{
|
|
"epoch": 1.7460106382978724,
|
|
"grad_norm": 4.254493713378906,
|
|
"learning_rate": 7.745775642833532e-06,
|
|
"loss": 0.8313,
|
|
"step": 6565
|
|
},
|
|
{
|
|
"epoch": 1.7462765957446809,
|
|
"grad_norm": 3.856379985809326,
|
|
"learning_rate": 7.745040579872073e-06,
|
|
"loss": 0.9207,
|
|
"step": 6566
|
|
},
|
|
{
|
|
"epoch": 1.7465425531914893,
|
|
"grad_norm": 4.020528316497803,
|
|
"learning_rate": 7.744305431975827e-06,
|
|
"loss": 0.7029,
|
|
"step": 6567
|
|
},
|
|
{
|
|
"epoch": 1.746808510638298,
|
|
"grad_norm": 4.091069221496582,
|
|
"learning_rate": 7.743570199167539e-06,
|
|
"loss": 0.8682,
|
|
"step": 6568
|
|
},
|
|
{
|
|
"epoch": 1.7470744680851062,
|
|
"grad_norm": 3.8805131912231445,
|
|
"learning_rate": 7.742834881469959e-06,
|
|
"loss": 0.8366,
|
|
"step": 6569
|
|
},
|
|
{
|
|
"epoch": 1.747340425531915,
|
|
"grad_norm": 3.5972797870635986,
|
|
"learning_rate": 7.742099478905837e-06,
|
|
"loss": 0.784,
|
|
"step": 6570
|
|
},
|
|
{
|
|
"epoch": 1.7476063829787234,
|
|
"grad_norm": 3.655684232711792,
|
|
"learning_rate": 7.741363991497932e-06,
|
|
"loss": 0.7849,
|
|
"step": 6571
|
|
},
|
|
{
|
|
"epoch": 1.7478723404255319,
|
|
"grad_norm": 3.854562520980835,
|
|
"learning_rate": 7.740628419268996e-06,
|
|
"loss": 0.7961,
|
|
"step": 6572
|
|
},
|
|
{
|
|
"epoch": 1.7481382978723405,
|
|
"grad_norm": 3.5972256660461426,
|
|
"learning_rate": 7.73989276224179e-06,
|
|
"loss": 0.8045,
|
|
"step": 6573
|
|
},
|
|
{
|
|
"epoch": 1.7484042553191488,
|
|
"grad_norm": 4.087411880493164,
|
|
"learning_rate": 7.739157020439077e-06,
|
|
"loss": 0.8889,
|
|
"step": 6574
|
|
},
|
|
{
|
|
"epoch": 1.7486702127659575,
|
|
"grad_norm": 4.145167350769043,
|
|
"learning_rate": 7.738421193883618e-06,
|
|
"loss": 0.8542,
|
|
"step": 6575
|
|
},
|
|
{
|
|
"epoch": 1.748936170212766,
|
|
"grad_norm": 4.064332008361816,
|
|
"learning_rate": 7.737685282598187e-06,
|
|
"loss": 0.8523,
|
|
"step": 6576
|
|
},
|
|
{
|
|
"epoch": 1.7492021276595744,
|
|
"grad_norm": 4.075108051300049,
|
|
"learning_rate": 7.736949286605549e-06,
|
|
"loss": 0.8839,
|
|
"step": 6577
|
|
},
|
|
{
|
|
"epoch": 1.749468085106383,
|
|
"grad_norm": 4.157843112945557,
|
|
"learning_rate": 7.736213205928476e-06,
|
|
"loss": 0.9253,
|
|
"step": 6578
|
|
},
|
|
{
|
|
"epoch": 1.7497340425531915,
|
|
"grad_norm": 3.978928327560425,
|
|
"learning_rate": 7.735477040589745e-06,
|
|
"loss": 0.8454,
|
|
"step": 6579
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"grad_norm": 3.7294394969940186,
|
|
"learning_rate": 7.734740790612137e-06,
|
|
"loss": 0.7877,
|
|
"step": 6580
|
|
},
|
|
{
|
|
"epoch": 1.7502659574468085,
|
|
"grad_norm": 4.367574214935303,
|
|
"learning_rate": 7.734004456018424e-06,
|
|
"loss": 0.7477,
|
|
"step": 6581
|
|
},
|
|
{
|
|
"epoch": 1.750531914893617,
|
|
"grad_norm": 3.952146291732788,
|
|
"learning_rate": 7.733268036831398e-06,
|
|
"loss": 0.7725,
|
|
"step": 6582
|
|
},
|
|
{
|
|
"epoch": 1.7507978723404256,
|
|
"grad_norm": 4.400146961212158,
|
|
"learning_rate": 7.73253153307384e-06,
|
|
"loss": 0.8059,
|
|
"step": 6583
|
|
},
|
|
{
|
|
"epoch": 1.751063829787234,
|
|
"grad_norm": 4.003587245941162,
|
|
"learning_rate": 7.73179494476854e-06,
|
|
"loss": 0.8549,
|
|
"step": 6584
|
|
},
|
|
{
|
|
"epoch": 1.7513297872340425,
|
|
"grad_norm": 3.898470640182495,
|
|
"learning_rate": 7.731058271938286e-06,
|
|
"loss": 0.7925,
|
|
"step": 6585
|
|
},
|
|
{
|
|
"epoch": 1.7515957446808512,
|
|
"grad_norm": 3.6899170875549316,
|
|
"learning_rate": 7.730321514605877e-06,
|
|
"loss": 0.7535,
|
|
"step": 6586
|
|
},
|
|
{
|
|
"epoch": 1.7518617021276595,
|
|
"grad_norm": 3.996615171432495,
|
|
"learning_rate": 7.729584672794102e-06,
|
|
"loss": 0.8278,
|
|
"step": 6587
|
|
},
|
|
{
|
|
"epoch": 1.7521276595744681,
|
|
"grad_norm": 4.020608901977539,
|
|
"learning_rate": 7.728847746525764e-06,
|
|
"loss": 0.7233,
|
|
"step": 6588
|
|
},
|
|
{
|
|
"epoch": 1.7523936170212766,
|
|
"grad_norm": 4.504430294036865,
|
|
"learning_rate": 7.728110735823666e-06,
|
|
"loss": 0.8254,
|
|
"step": 6589
|
|
},
|
|
{
|
|
"epoch": 1.752659574468085,
|
|
"grad_norm": 3.7418766021728516,
|
|
"learning_rate": 7.72737364071061e-06,
|
|
"loss": 0.8151,
|
|
"step": 6590
|
|
},
|
|
{
|
|
"epoch": 1.7529255319148938,
|
|
"grad_norm": 4.577789783477783,
|
|
"learning_rate": 7.7266364612094e-06,
|
|
"loss": 0.9276,
|
|
"step": 6591
|
|
},
|
|
{
|
|
"epoch": 1.753191489361702,
|
|
"grad_norm": 4.067131042480469,
|
|
"learning_rate": 7.72589919734285e-06,
|
|
"loss": 0.8282,
|
|
"step": 6592
|
|
},
|
|
{
|
|
"epoch": 1.7534574468085107,
|
|
"grad_norm": 4.11132287979126,
|
|
"learning_rate": 7.725161849133769e-06,
|
|
"loss": 0.8663,
|
|
"step": 6593
|
|
},
|
|
{
|
|
"epoch": 1.7537234042553191,
|
|
"grad_norm": 3.8996002674102783,
|
|
"learning_rate": 7.724424416604972e-06,
|
|
"loss": 0.9631,
|
|
"step": 6594
|
|
},
|
|
{
|
|
"epoch": 1.7539893617021276,
|
|
"grad_norm": 3.911623954772949,
|
|
"learning_rate": 7.723686899779277e-06,
|
|
"loss": 0.8082,
|
|
"step": 6595
|
|
},
|
|
{
|
|
"epoch": 1.7542553191489363,
|
|
"grad_norm": 4.957215785980225,
|
|
"learning_rate": 7.7229492986795e-06,
|
|
"loss": 0.8758,
|
|
"step": 6596
|
|
},
|
|
{
|
|
"epoch": 1.7545212765957445,
|
|
"grad_norm": 4.114643573760986,
|
|
"learning_rate": 7.722211613328467e-06,
|
|
"loss": 0.7665,
|
|
"step": 6597
|
|
},
|
|
{
|
|
"epoch": 1.7547872340425532,
|
|
"grad_norm": 3.4866108894348145,
|
|
"learning_rate": 7.721473843749e-06,
|
|
"loss": 0.7636,
|
|
"step": 6598
|
|
},
|
|
{
|
|
"epoch": 1.7550531914893617,
|
|
"grad_norm": 3.798917055130005,
|
|
"learning_rate": 7.72073598996393e-06,
|
|
"loss": 0.7645,
|
|
"step": 6599
|
|
},
|
|
{
|
|
"epoch": 1.7553191489361701,
|
|
"grad_norm": 4.327617168426514,
|
|
"learning_rate": 7.719998051996087e-06,
|
|
"loss": 0.8174,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 1.7555851063829788,
|
|
"grad_norm": 3.7455971240997314,
|
|
"learning_rate": 7.719260029868299e-06,
|
|
"loss": 0.7484,
|
|
"step": 6601
|
|
},
|
|
{
|
|
"epoch": 1.7558510638297873,
|
|
"grad_norm": 3.4463014602661133,
|
|
"learning_rate": 7.718521923603404e-06,
|
|
"loss": 0.692,
|
|
"step": 6602
|
|
},
|
|
{
|
|
"epoch": 1.7561170212765957,
|
|
"grad_norm": 3.920140027999878,
|
|
"learning_rate": 7.717783733224243e-06,
|
|
"loss": 0.9122,
|
|
"step": 6603
|
|
},
|
|
{
|
|
"epoch": 1.7563829787234042,
|
|
"grad_norm": 4.227574825286865,
|
|
"learning_rate": 7.717045458753651e-06,
|
|
"loss": 0.7812,
|
|
"step": 6604
|
|
},
|
|
{
|
|
"epoch": 1.7566489361702127,
|
|
"grad_norm": 4.23086404800415,
|
|
"learning_rate": 7.716307100214472e-06,
|
|
"loss": 0.829,
|
|
"step": 6605
|
|
},
|
|
{
|
|
"epoch": 1.7569148936170214,
|
|
"grad_norm": 3.5714340209960938,
|
|
"learning_rate": 7.715568657629557e-06,
|
|
"loss": 0.8676,
|
|
"step": 6606
|
|
},
|
|
{
|
|
"epoch": 1.7571808510638298,
|
|
"grad_norm": 4.220118045806885,
|
|
"learning_rate": 7.71483013102175e-06,
|
|
"loss": 0.7351,
|
|
"step": 6607
|
|
},
|
|
{
|
|
"epoch": 1.7574468085106383,
|
|
"grad_norm": 3.8862133026123047,
|
|
"learning_rate": 7.7140915204139e-06,
|
|
"loss": 0.7836,
|
|
"step": 6608
|
|
},
|
|
{
|
|
"epoch": 1.757712765957447,
|
|
"grad_norm": 3.9056966304779053,
|
|
"learning_rate": 7.713352825828865e-06,
|
|
"loss": 0.7439,
|
|
"step": 6609
|
|
},
|
|
{
|
|
"epoch": 1.7579787234042552,
|
|
"grad_norm": 4.519630432128906,
|
|
"learning_rate": 7.712614047289498e-06,
|
|
"loss": 0.9618,
|
|
"step": 6610
|
|
},
|
|
{
|
|
"epoch": 1.758244680851064,
|
|
"grad_norm": 3.756225109100342,
|
|
"learning_rate": 7.711875184818659e-06,
|
|
"loss": 0.7612,
|
|
"step": 6611
|
|
},
|
|
{
|
|
"epoch": 1.7585106382978724,
|
|
"grad_norm": 4.109426498413086,
|
|
"learning_rate": 7.71113623843921e-06,
|
|
"loss": 0.8828,
|
|
"step": 6612
|
|
},
|
|
{
|
|
"epoch": 1.7587765957446808,
|
|
"grad_norm": 4.274012565612793,
|
|
"learning_rate": 7.710397208174012e-06,
|
|
"loss": 0.8212,
|
|
"step": 6613
|
|
},
|
|
{
|
|
"epoch": 1.7590425531914895,
|
|
"grad_norm": 4.489198207855225,
|
|
"learning_rate": 7.709658094045933e-06,
|
|
"loss": 0.9358,
|
|
"step": 6614
|
|
},
|
|
{
|
|
"epoch": 1.7593085106382977,
|
|
"grad_norm": 3.796844005584717,
|
|
"learning_rate": 7.708918896077843e-06,
|
|
"loss": 0.8092,
|
|
"step": 6615
|
|
},
|
|
{
|
|
"epoch": 1.7595744680851064,
|
|
"grad_norm": 4.139426231384277,
|
|
"learning_rate": 7.708179614292614e-06,
|
|
"loss": 0.7859,
|
|
"step": 6616
|
|
},
|
|
{
|
|
"epoch": 1.7598404255319149,
|
|
"grad_norm": 4.109641075134277,
|
|
"learning_rate": 7.707440248713118e-06,
|
|
"loss": 0.7763,
|
|
"step": 6617
|
|
},
|
|
{
|
|
"epoch": 1.7601063829787233,
|
|
"grad_norm": 4.1055521965026855,
|
|
"learning_rate": 7.706700799362235e-06,
|
|
"loss": 0.7225,
|
|
"step": 6618
|
|
},
|
|
{
|
|
"epoch": 1.760372340425532,
|
|
"grad_norm": 4.071004390716553,
|
|
"learning_rate": 7.70596126626284e-06,
|
|
"loss": 0.7714,
|
|
"step": 6619
|
|
},
|
|
{
|
|
"epoch": 1.7606382978723403,
|
|
"grad_norm": 4.117389678955078,
|
|
"learning_rate": 7.705221649437819e-06,
|
|
"loss": 0.8,
|
|
"step": 6620
|
|
},
|
|
{
|
|
"epoch": 1.760904255319149,
|
|
"grad_norm": 3.617248058319092,
|
|
"learning_rate": 7.704481948910057e-06,
|
|
"loss": 0.8286,
|
|
"step": 6621
|
|
},
|
|
{
|
|
"epoch": 1.7611702127659574,
|
|
"grad_norm": 3.6249337196350098,
|
|
"learning_rate": 7.703742164702436e-06,
|
|
"loss": 0.732,
|
|
"step": 6622
|
|
},
|
|
{
|
|
"epoch": 1.7614361702127659,
|
|
"grad_norm": 3.584951400756836,
|
|
"learning_rate": 7.703002296837849e-06,
|
|
"loss": 0.859,
|
|
"step": 6623
|
|
},
|
|
{
|
|
"epoch": 1.7617021276595746,
|
|
"grad_norm": 3.908857822418213,
|
|
"learning_rate": 7.70226234533919e-06,
|
|
"loss": 0.8112,
|
|
"step": 6624
|
|
},
|
|
{
|
|
"epoch": 1.761968085106383,
|
|
"grad_norm": 4.350627422332764,
|
|
"learning_rate": 7.701522310229353e-06,
|
|
"loss": 0.9676,
|
|
"step": 6625
|
|
},
|
|
{
|
|
"epoch": 1.7622340425531915,
|
|
"grad_norm": 3.7733817100524902,
|
|
"learning_rate": 7.700782191531236e-06,
|
|
"loss": 0.7312,
|
|
"step": 6626
|
|
},
|
|
{
|
|
"epoch": 1.7625,
|
|
"grad_norm": 3.822552442550659,
|
|
"learning_rate": 7.700041989267738e-06,
|
|
"loss": 0.6901,
|
|
"step": 6627
|
|
},
|
|
{
|
|
"epoch": 1.7627659574468084,
|
|
"grad_norm": 3.9083547592163086,
|
|
"learning_rate": 7.69930170346176e-06,
|
|
"loss": 0.7498,
|
|
"step": 6628
|
|
},
|
|
{
|
|
"epoch": 1.763031914893617,
|
|
"grad_norm": 4.126950263977051,
|
|
"learning_rate": 7.69856133413621e-06,
|
|
"loss": 0.7975,
|
|
"step": 6629
|
|
},
|
|
{
|
|
"epoch": 1.7632978723404256,
|
|
"grad_norm": 4.27503776550293,
|
|
"learning_rate": 7.697820881313994e-06,
|
|
"loss": 0.7927,
|
|
"step": 6630
|
|
},
|
|
{
|
|
"epoch": 1.763563829787234,
|
|
"grad_norm": 4.2161407470703125,
|
|
"learning_rate": 7.697080345018024e-06,
|
|
"loss": 0.8779,
|
|
"step": 6631
|
|
},
|
|
{
|
|
"epoch": 1.7638297872340427,
|
|
"grad_norm": 4.142273426055908,
|
|
"learning_rate": 7.696339725271215e-06,
|
|
"loss": 0.8069,
|
|
"step": 6632
|
|
},
|
|
{
|
|
"epoch": 1.764095744680851,
|
|
"grad_norm": 4.17659330368042,
|
|
"learning_rate": 7.695599022096478e-06,
|
|
"loss": 0.7439,
|
|
"step": 6633
|
|
},
|
|
{
|
|
"epoch": 1.7643617021276596,
|
|
"grad_norm": 4.072018623352051,
|
|
"learning_rate": 7.694858235516735e-06,
|
|
"loss": 0.8364,
|
|
"step": 6634
|
|
},
|
|
{
|
|
"epoch": 1.764627659574468,
|
|
"grad_norm": 3.6811084747314453,
|
|
"learning_rate": 7.694117365554905e-06,
|
|
"loss": 0.8986,
|
|
"step": 6635
|
|
},
|
|
{
|
|
"epoch": 1.7648936170212766,
|
|
"grad_norm": 3.924104928970337,
|
|
"learning_rate": 7.693376412233913e-06,
|
|
"loss": 0.7906,
|
|
"step": 6636
|
|
},
|
|
{
|
|
"epoch": 1.7651595744680852,
|
|
"grad_norm": 4.180627822875977,
|
|
"learning_rate": 7.69263537557668e-06,
|
|
"loss": 0.814,
|
|
"step": 6637
|
|
},
|
|
{
|
|
"epoch": 1.7654255319148935,
|
|
"grad_norm": 3.74808931350708,
|
|
"learning_rate": 7.691894255606143e-06,
|
|
"loss": 0.8623,
|
|
"step": 6638
|
|
},
|
|
{
|
|
"epoch": 1.7656914893617022,
|
|
"grad_norm": 3.8845086097717285,
|
|
"learning_rate": 7.691153052345227e-06,
|
|
"loss": 0.8279,
|
|
"step": 6639
|
|
},
|
|
{
|
|
"epoch": 1.7659574468085106,
|
|
"grad_norm": 3.6786465644836426,
|
|
"learning_rate": 7.690411765816864e-06,
|
|
"loss": 0.8579,
|
|
"step": 6640
|
|
},
|
|
{
|
|
"epoch": 1.766223404255319,
|
|
"grad_norm": 4.260414123535156,
|
|
"learning_rate": 7.689670396043997e-06,
|
|
"loss": 0.8473,
|
|
"step": 6641
|
|
},
|
|
{
|
|
"epoch": 1.7664893617021278,
|
|
"grad_norm": 3.757199287414551,
|
|
"learning_rate": 7.688928943049558e-06,
|
|
"loss": 0.8065,
|
|
"step": 6642
|
|
},
|
|
{
|
|
"epoch": 1.766755319148936,
|
|
"grad_norm": 4.010439872741699,
|
|
"learning_rate": 7.688187406856494e-06,
|
|
"loss": 0.8412,
|
|
"step": 6643
|
|
},
|
|
{
|
|
"epoch": 1.7670212765957447,
|
|
"grad_norm": 4.193131923675537,
|
|
"learning_rate": 7.687445787487746e-06,
|
|
"loss": 0.7638,
|
|
"step": 6644
|
|
},
|
|
{
|
|
"epoch": 1.7672872340425532,
|
|
"grad_norm": 3.7920022010803223,
|
|
"learning_rate": 7.686704084966263e-06,
|
|
"loss": 0.7628,
|
|
"step": 6645
|
|
},
|
|
{
|
|
"epoch": 1.7675531914893616,
|
|
"grad_norm": 3.6464099884033203,
|
|
"learning_rate": 7.68596229931499e-06,
|
|
"loss": 0.7547,
|
|
"step": 6646
|
|
},
|
|
{
|
|
"epoch": 1.7678191489361703,
|
|
"grad_norm": 3.7222912311553955,
|
|
"learning_rate": 7.685220430556883e-06,
|
|
"loss": 0.6741,
|
|
"step": 6647
|
|
},
|
|
{
|
|
"epoch": 1.7680851063829788,
|
|
"grad_norm": 3.48502254486084,
|
|
"learning_rate": 7.684478478714892e-06,
|
|
"loss": 0.6893,
|
|
"step": 6648
|
|
},
|
|
{
|
|
"epoch": 1.7683510638297872,
|
|
"grad_norm": 4.072755813598633,
|
|
"learning_rate": 7.683736443811978e-06,
|
|
"loss": 0.8487,
|
|
"step": 6649
|
|
},
|
|
{
|
|
"epoch": 1.7686170212765957,
|
|
"grad_norm": 3.5753612518310547,
|
|
"learning_rate": 7.682994325871098e-06,
|
|
"loss": 0.8314,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 1.7688829787234042,
|
|
"grad_norm": 4.951267242431641,
|
|
"learning_rate": 7.682252124915216e-06,
|
|
"loss": 0.9956,
|
|
"step": 6651
|
|
},
|
|
{
|
|
"epoch": 1.7691489361702128,
|
|
"grad_norm": 4.200650691986084,
|
|
"learning_rate": 7.681509840967294e-06,
|
|
"loss": 0.7119,
|
|
"step": 6652
|
|
},
|
|
{
|
|
"epoch": 1.7694148936170213,
|
|
"grad_norm": 3.4650633335113525,
|
|
"learning_rate": 7.6807674740503e-06,
|
|
"loss": 0.843,
|
|
"step": 6653
|
|
},
|
|
{
|
|
"epoch": 1.7696808510638298,
|
|
"grad_norm": 4.049907207489014,
|
|
"learning_rate": 7.680025024187206e-06,
|
|
"loss": 0.7776,
|
|
"step": 6654
|
|
},
|
|
{
|
|
"epoch": 1.7699468085106385,
|
|
"grad_norm": 3.934799909591675,
|
|
"learning_rate": 7.67928249140098e-06,
|
|
"loss": 0.7957,
|
|
"step": 6655
|
|
},
|
|
{
|
|
"epoch": 1.7702127659574467,
|
|
"grad_norm": 4.14153528213501,
|
|
"learning_rate": 7.678539875714604e-06,
|
|
"loss": 0.7445,
|
|
"step": 6656
|
|
},
|
|
{
|
|
"epoch": 1.7704787234042554,
|
|
"grad_norm": 3.816898822784424,
|
|
"learning_rate": 7.677797177151047e-06,
|
|
"loss": 0.8869,
|
|
"step": 6657
|
|
},
|
|
{
|
|
"epoch": 1.7707446808510638,
|
|
"grad_norm": 4.405877113342285,
|
|
"learning_rate": 7.677054395733292e-06,
|
|
"loss": 0.9004,
|
|
"step": 6658
|
|
},
|
|
{
|
|
"epoch": 1.7710106382978723,
|
|
"grad_norm": 4.069585800170898,
|
|
"learning_rate": 7.676311531484324e-06,
|
|
"loss": 0.7907,
|
|
"step": 6659
|
|
},
|
|
{
|
|
"epoch": 1.771276595744681,
|
|
"grad_norm": 3.9655072689056396,
|
|
"learning_rate": 7.675568584427125e-06,
|
|
"loss": 0.8069,
|
|
"step": 6660
|
|
},
|
|
{
|
|
"epoch": 1.7715425531914892,
|
|
"grad_norm": 3.8515357971191406,
|
|
"learning_rate": 7.674825554584686e-06,
|
|
"loss": 0.8013,
|
|
"step": 6661
|
|
},
|
|
{
|
|
"epoch": 1.771808510638298,
|
|
"grad_norm": 4.2742438316345215,
|
|
"learning_rate": 7.674082441979993e-06,
|
|
"loss": 0.9655,
|
|
"step": 6662
|
|
},
|
|
{
|
|
"epoch": 1.7720744680851064,
|
|
"grad_norm": 4.425269603729248,
|
|
"learning_rate": 7.67333924663604e-06,
|
|
"loss": 0.872,
|
|
"step": 6663
|
|
},
|
|
{
|
|
"epoch": 1.7723404255319148,
|
|
"grad_norm": 4.043865203857422,
|
|
"learning_rate": 7.672595968575827e-06,
|
|
"loss": 0.8425,
|
|
"step": 6664
|
|
},
|
|
{
|
|
"epoch": 1.7726063829787235,
|
|
"grad_norm": 3.77255916595459,
|
|
"learning_rate": 7.671852607822346e-06,
|
|
"loss": 0.6711,
|
|
"step": 6665
|
|
},
|
|
{
|
|
"epoch": 1.7728723404255318,
|
|
"grad_norm": 3.8917951583862305,
|
|
"learning_rate": 7.671109164398598e-06,
|
|
"loss": 0.7429,
|
|
"step": 6666
|
|
},
|
|
{
|
|
"epoch": 1.7731382978723405,
|
|
"grad_norm": 4.034469127655029,
|
|
"learning_rate": 7.67036563832759e-06,
|
|
"loss": 0.884,
|
|
"step": 6667
|
|
},
|
|
{
|
|
"epoch": 1.773404255319149,
|
|
"grad_norm": 4.177572727203369,
|
|
"learning_rate": 7.669622029632323e-06,
|
|
"loss": 0.7823,
|
|
"step": 6668
|
|
},
|
|
{
|
|
"epoch": 1.7736702127659574,
|
|
"grad_norm": 3.816012382507324,
|
|
"learning_rate": 7.668878338335808e-06,
|
|
"loss": 0.8012,
|
|
"step": 6669
|
|
},
|
|
{
|
|
"epoch": 1.773936170212766,
|
|
"grad_norm": 3.6478235721588135,
|
|
"learning_rate": 7.668134564461057e-06,
|
|
"loss": 0.8071,
|
|
"step": 6670
|
|
},
|
|
{
|
|
"epoch": 1.7742021276595743,
|
|
"grad_norm": 4.1651177406311035,
|
|
"learning_rate": 7.66739070803108e-06,
|
|
"loss": 0.882,
|
|
"step": 6671
|
|
},
|
|
{
|
|
"epoch": 1.774468085106383,
|
|
"grad_norm": 4.032572269439697,
|
|
"learning_rate": 7.666646769068894e-06,
|
|
"loss": 0.7804,
|
|
"step": 6672
|
|
},
|
|
{
|
|
"epoch": 1.7747340425531914,
|
|
"grad_norm": 4.481500148773193,
|
|
"learning_rate": 7.665902747597516e-06,
|
|
"loss": 0.8824,
|
|
"step": 6673
|
|
},
|
|
{
|
|
"epoch": 1.775,
|
|
"grad_norm": 3.6887848377227783,
|
|
"learning_rate": 7.66515864363997e-06,
|
|
"loss": 0.8179,
|
|
"step": 6674
|
|
},
|
|
{
|
|
"epoch": 1.7752659574468086,
|
|
"grad_norm": 3.5154476165771484,
|
|
"learning_rate": 7.664414457219277e-06,
|
|
"loss": 0.8015,
|
|
"step": 6675
|
|
},
|
|
{
|
|
"epoch": 1.775531914893617,
|
|
"grad_norm": 3.9713804721832275,
|
|
"learning_rate": 7.663670188358464e-06,
|
|
"loss": 0.8426,
|
|
"step": 6676
|
|
},
|
|
{
|
|
"epoch": 1.7757978723404255,
|
|
"grad_norm": 4.082159996032715,
|
|
"learning_rate": 7.66292583708056e-06,
|
|
"loss": 0.81,
|
|
"step": 6677
|
|
},
|
|
{
|
|
"epoch": 1.7760638297872342,
|
|
"grad_norm": 3.8582613468170166,
|
|
"learning_rate": 7.662181403408593e-06,
|
|
"loss": 0.7965,
|
|
"step": 6678
|
|
},
|
|
{
|
|
"epoch": 1.7763297872340424,
|
|
"grad_norm": 4.068000793457031,
|
|
"learning_rate": 7.661436887365603e-06,
|
|
"loss": 0.8332,
|
|
"step": 6679
|
|
},
|
|
{
|
|
"epoch": 1.7765957446808511,
|
|
"grad_norm": 4.067226409912109,
|
|
"learning_rate": 7.660692288974618e-06,
|
|
"loss": 0.8399,
|
|
"step": 6680
|
|
},
|
|
{
|
|
"epoch": 1.7768617021276596,
|
|
"grad_norm": 3.885331392288208,
|
|
"learning_rate": 7.659947608258684e-06,
|
|
"loss": 0.8701,
|
|
"step": 6681
|
|
},
|
|
{
|
|
"epoch": 1.777127659574468,
|
|
"grad_norm": 3.792872905731201,
|
|
"learning_rate": 7.659202845240839e-06,
|
|
"loss": 0.8379,
|
|
"step": 6682
|
|
},
|
|
{
|
|
"epoch": 1.7773936170212767,
|
|
"grad_norm": 3.553959369659424,
|
|
"learning_rate": 7.658457999944124e-06,
|
|
"loss": 0.6874,
|
|
"step": 6683
|
|
},
|
|
{
|
|
"epoch": 1.777659574468085,
|
|
"grad_norm": 4.169983386993408,
|
|
"learning_rate": 7.657713072391591e-06,
|
|
"loss": 0.7569,
|
|
"step": 6684
|
|
},
|
|
{
|
|
"epoch": 1.7779255319148937,
|
|
"grad_norm": 4.05847692489624,
|
|
"learning_rate": 7.656968062606288e-06,
|
|
"loss": 0.8497,
|
|
"step": 6685
|
|
},
|
|
{
|
|
"epoch": 1.7781914893617021,
|
|
"grad_norm": 4.117887496948242,
|
|
"learning_rate": 7.656222970611263e-06,
|
|
"loss": 0.708,
|
|
"step": 6686
|
|
},
|
|
{
|
|
"epoch": 1.7784574468085106,
|
|
"grad_norm": 3.683126211166382,
|
|
"learning_rate": 7.655477796429571e-06,
|
|
"loss": 0.7568,
|
|
"step": 6687
|
|
},
|
|
{
|
|
"epoch": 1.7787234042553193,
|
|
"grad_norm": 3.6990060806274414,
|
|
"learning_rate": 7.654732540084273e-06,
|
|
"loss": 0.7721,
|
|
"step": 6688
|
|
},
|
|
{
|
|
"epoch": 1.7789893617021275,
|
|
"grad_norm": 3.917276620864868,
|
|
"learning_rate": 7.653987201598422e-06,
|
|
"loss": 0.8214,
|
|
"step": 6689
|
|
},
|
|
{
|
|
"epoch": 1.7792553191489362,
|
|
"grad_norm": 4.091401100158691,
|
|
"learning_rate": 7.653241780995083e-06,
|
|
"loss": 0.7312,
|
|
"step": 6690
|
|
},
|
|
{
|
|
"epoch": 1.7795212765957447,
|
|
"grad_norm": 4.167940139770508,
|
|
"learning_rate": 7.652496278297319e-06,
|
|
"loss": 0.9115,
|
|
"step": 6691
|
|
},
|
|
{
|
|
"epoch": 1.7797872340425531,
|
|
"grad_norm": 3.9726510047912598,
|
|
"learning_rate": 7.651750693528197e-06,
|
|
"loss": 0.7857,
|
|
"step": 6692
|
|
},
|
|
{
|
|
"epoch": 1.7800531914893618,
|
|
"grad_norm": 3.7973427772521973,
|
|
"learning_rate": 7.651005026710786e-06,
|
|
"loss": 0.8594,
|
|
"step": 6693
|
|
},
|
|
{
|
|
"epoch": 1.78031914893617,
|
|
"grad_norm": 3.932386875152588,
|
|
"learning_rate": 7.65025927786816e-06,
|
|
"loss": 0.7873,
|
|
"step": 6694
|
|
},
|
|
{
|
|
"epoch": 1.7805851063829787,
|
|
"grad_norm": 3.6921486854553223,
|
|
"learning_rate": 7.64951344702339e-06,
|
|
"loss": 0.7569,
|
|
"step": 6695
|
|
},
|
|
{
|
|
"epoch": 1.7808510638297872,
|
|
"grad_norm": 4.060511589050293,
|
|
"learning_rate": 7.648767534199556e-06,
|
|
"loss": 0.7533,
|
|
"step": 6696
|
|
},
|
|
{
|
|
"epoch": 1.7811170212765957,
|
|
"grad_norm": 4.142321586608887,
|
|
"learning_rate": 7.648021539419737e-06,
|
|
"loss": 0.7836,
|
|
"step": 6697
|
|
},
|
|
{
|
|
"epoch": 1.7813829787234043,
|
|
"grad_norm": 4.071194648742676,
|
|
"learning_rate": 7.647275462707011e-06,
|
|
"loss": 0.7489,
|
|
"step": 6698
|
|
},
|
|
{
|
|
"epoch": 1.7816489361702128,
|
|
"grad_norm": 4.006459712982178,
|
|
"learning_rate": 7.646529304084469e-06,
|
|
"loss": 0.812,
|
|
"step": 6699
|
|
},
|
|
{
|
|
"epoch": 1.7819148936170213,
|
|
"grad_norm": 3.6437671184539795,
|
|
"learning_rate": 7.64578306357519e-06,
|
|
"loss": 0.7105,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 1.78218085106383,
|
|
"grad_norm": 4.094074249267578,
|
|
"learning_rate": 7.645036741202271e-06,
|
|
"loss": 0.9633,
|
|
"step": 6701
|
|
},
|
|
{
|
|
"epoch": 1.7824468085106382,
|
|
"grad_norm": 4.029351711273193,
|
|
"learning_rate": 7.6442903369888e-06,
|
|
"loss": 0.8999,
|
|
"step": 6702
|
|
},
|
|
{
|
|
"epoch": 1.7827127659574469,
|
|
"grad_norm": 3.8068792819976807,
|
|
"learning_rate": 7.643543850957872e-06,
|
|
"loss": 0.7305,
|
|
"step": 6703
|
|
},
|
|
{
|
|
"epoch": 1.7829787234042553,
|
|
"grad_norm": 4.074723243713379,
|
|
"learning_rate": 7.642797283132586e-06,
|
|
"loss": 0.8502,
|
|
"step": 6704
|
|
},
|
|
{
|
|
"epoch": 1.7832446808510638,
|
|
"grad_norm": 3.3582799434661865,
|
|
"learning_rate": 7.642050633536042e-06,
|
|
"loss": 0.7219,
|
|
"step": 6705
|
|
},
|
|
{
|
|
"epoch": 1.7835106382978725,
|
|
"grad_norm": 3.6337673664093018,
|
|
"learning_rate": 7.641303902191339e-06,
|
|
"loss": 0.7843,
|
|
"step": 6706
|
|
},
|
|
{
|
|
"epoch": 1.7837765957446807,
|
|
"grad_norm": 4.376511573791504,
|
|
"learning_rate": 7.640557089121583e-06,
|
|
"loss": 0.9737,
|
|
"step": 6707
|
|
},
|
|
{
|
|
"epoch": 1.7840425531914894,
|
|
"grad_norm": 3.6106109619140625,
|
|
"learning_rate": 7.639810194349884e-06,
|
|
"loss": 0.7549,
|
|
"step": 6708
|
|
},
|
|
{
|
|
"epoch": 1.7843085106382979,
|
|
"grad_norm": 3.9676499366760254,
|
|
"learning_rate": 7.639063217899348e-06,
|
|
"loss": 0.8951,
|
|
"step": 6709
|
|
},
|
|
{
|
|
"epoch": 1.7845744680851063,
|
|
"grad_norm": 3.7763378620147705,
|
|
"learning_rate": 7.638316159793089e-06,
|
|
"loss": 0.8431,
|
|
"step": 6710
|
|
},
|
|
{
|
|
"epoch": 1.784840425531915,
|
|
"grad_norm": 3.744365930557251,
|
|
"learning_rate": 7.637569020054221e-06,
|
|
"loss": 0.8697,
|
|
"step": 6711
|
|
},
|
|
{
|
|
"epoch": 1.7851063829787233,
|
|
"grad_norm": 3.4194390773773193,
|
|
"learning_rate": 7.636821798705864e-06,
|
|
"loss": 0.8979,
|
|
"step": 6712
|
|
},
|
|
{
|
|
"epoch": 1.785372340425532,
|
|
"grad_norm": 3.804483413696289,
|
|
"learning_rate": 7.636074495771134e-06,
|
|
"loss": 0.8484,
|
|
"step": 6713
|
|
},
|
|
{
|
|
"epoch": 1.7856382978723404,
|
|
"grad_norm": 4.089145660400391,
|
|
"learning_rate": 7.635327111273158e-06,
|
|
"loss": 0.892,
|
|
"step": 6714
|
|
},
|
|
{
|
|
"epoch": 1.7859042553191489,
|
|
"grad_norm": 4.051761150360107,
|
|
"learning_rate": 7.634579645235056e-06,
|
|
"loss": 0.8972,
|
|
"step": 6715
|
|
},
|
|
{
|
|
"epoch": 1.7861702127659576,
|
|
"grad_norm": 4.0280961990356445,
|
|
"learning_rate": 7.633832097679959e-06,
|
|
"loss": 0.8125,
|
|
"step": 6716
|
|
},
|
|
{
|
|
"epoch": 1.7864361702127658,
|
|
"grad_norm": 4.206244468688965,
|
|
"learning_rate": 7.633084468630996e-06,
|
|
"loss": 0.7675,
|
|
"step": 6717
|
|
},
|
|
{
|
|
"epoch": 1.7867021276595745,
|
|
"grad_norm": 3.4746177196502686,
|
|
"learning_rate": 7.6323367581113e-06,
|
|
"loss": 0.7079,
|
|
"step": 6718
|
|
},
|
|
{
|
|
"epoch": 1.786968085106383,
|
|
"grad_norm": 3.8518667221069336,
|
|
"learning_rate": 7.631588966144003e-06,
|
|
"loss": 0.965,
|
|
"step": 6719
|
|
},
|
|
{
|
|
"epoch": 1.7872340425531914,
|
|
"grad_norm": 3.605275869369507,
|
|
"learning_rate": 7.630841092752248e-06,
|
|
"loss": 0.7733,
|
|
"step": 6720
|
|
},
|
|
{
|
|
"epoch": 1.7875,
|
|
"grad_norm": 4.255527019500732,
|
|
"learning_rate": 7.63009313795917e-06,
|
|
"loss": 0.8645,
|
|
"step": 6721
|
|
},
|
|
{
|
|
"epoch": 1.7877659574468086,
|
|
"grad_norm": 3.93906307220459,
|
|
"learning_rate": 7.629345101787917e-06,
|
|
"loss": 0.8449,
|
|
"step": 6722
|
|
},
|
|
{
|
|
"epoch": 1.788031914893617,
|
|
"grad_norm": 4.351909160614014,
|
|
"learning_rate": 7.628596984261629e-06,
|
|
"loss": 0.8644,
|
|
"step": 6723
|
|
},
|
|
{
|
|
"epoch": 1.7882978723404257,
|
|
"grad_norm": 3.7165818214416504,
|
|
"learning_rate": 7.627848785403456e-06,
|
|
"loss": 0.7284,
|
|
"step": 6724
|
|
},
|
|
{
|
|
"epoch": 1.788563829787234,
|
|
"grad_norm": 3.9665300846099854,
|
|
"learning_rate": 7.6271005052365465e-06,
|
|
"loss": 0.8396,
|
|
"step": 6725
|
|
},
|
|
{
|
|
"epoch": 1.7888297872340426,
|
|
"grad_norm": 3.951260566711426,
|
|
"learning_rate": 7.6263521437840544e-06,
|
|
"loss": 0.9464,
|
|
"step": 6726
|
|
},
|
|
{
|
|
"epoch": 1.789095744680851,
|
|
"grad_norm": 4.499269008636475,
|
|
"learning_rate": 7.625603701069135e-06,
|
|
"loss": 0.9031,
|
|
"step": 6727
|
|
},
|
|
{
|
|
"epoch": 1.7893617021276595,
|
|
"grad_norm": 3.931673526763916,
|
|
"learning_rate": 7.6248551771149474e-06,
|
|
"loss": 0.823,
|
|
"step": 6728
|
|
},
|
|
{
|
|
"epoch": 1.7896276595744682,
|
|
"grad_norm": 4.128811836242676,
|
|
"learning_rate": 7.624106571944648e-06,
|
|
"loss": 0.7497,
|
|
"step": 6729
|
|
},
|
|
{
|
|
"epoch": 1.7898936170212765,
|
|
"grad_norm": 3.873683452606201,
|
|
"learning_rate": 7.623357885581403e-06,
|
|
"loss": 0.8247,
|
|
"step": 6730
|
|
},
|
|
{
|
|
"epoch": 1.7901595744680852,
|
|
"grad_norm": 3.7852728366851807,
|
|
"learning_rate": 7.6226091180483765e-06,
|
|
"loss": 0.8774,
|
|
"step": 6731
|
|
},
|
|
{
|
|
"epoch": 1.7904255319148936,
|
|
"grad_norm": 3.885965585708618,
|
|
"learning_rate": 7.621860269368735e-06,
|
|
"loss": 0.7561,
|
|
"step": 6732
|
|
},
|
|
{
|
|
"epoch": 1.790691489361702,
|
|
"grad_norm": 4.435214519500732,
|
|
"learning_rate": 7.6211113395656515e-06,
|
|
"loss": 0.9338,
|
|
"step": 6733
|
|
},
|
|
{
|
|
"epoch": 1.7909574468085108,
|
|
"grad_norm": 4.548224449157715,
|
|
"learning_rate": 7.6203623286622955e-06,
|
|
"loss": 0.8323,
|
|
"step": 6734
|
|
},
|
|
{
|
|
"epoch": 1.791223404255319,
|
|
"grad_norm": 3.8655712604522705,
|
|
"learning_rate": 7.619613236681845e-06,
|
|
"loss": 0.8654,
|
|
"step": 6735
|
|
},
|
|
{
|
|
"epoch": 1.7914893617021277,
|
|
"grad_norm": 3.7102363109588623,
|
|
"learning_rate": 7.618864063647477e-06,
|
|
"loss": 0.8015,
|
|
"step": 6736
|
|
},
|
|
{
|
|
"epoch": 1.7917553191489362,
|
|
"grad_norm": 4.260025978088379,
|
|
"learning_rate": 7.6181148095823705e-06,
|
|
"loss": 0.7977,
|
|
"step": 6737
|
|
},
|
|
{
|
|
"epoch": 1.7920212765957446,
|
|
"grad_norm": 4.112497806549072,
|
|
"learning_rate": 7.6173654745097106e-06,
|
|
"loss": 0.7763,
|
|
"step": 6738
|
|
},
|
|
{
|
|
"epoch": 1.7922872340425533,
|
|
"grad_norm": 3.998528003692627,
|
|
"learning_rate": 7.6166160584526795e-06,
|
|
"loss": 0.8215,
|
|
"step": 6739
|
|
},
|
|
{
|
|
"epoch": 1.7925531914893615,
|
|
"grad_norm": 3.6492180824279785,
|
|
"learning_rate": 7.615866561434468e-06,
|
|
"loss": 0.7239,
|
|
"step": 6740
|
|
},
|
|
{
|
|
"epoch": 1.7928191489361702,
|
|
"grad_norm": 3.8486714363098145,
|
|
"learning_rate": 7.615116983478266e-06,
|
|
"loss": 0.8435,
|
|
"step": 6741
|
|
},
|
|
{
|
|
"epoch": 1.7930851063829787,
|
|
"grad_norm": 3.863814353942871,
|
|
"learning_rate": 7.614367324607263e-06,
|
|
"loss": 0.8033,
|
|
"step": 6742
|
|
},
|
|
{
|
|
"epoch": 1.7933510638297872,
|
|
"grad_norm": 3.88749098777771,
|
|
"learning_rate": 7.613617584844662e-06,
|
|
"loss": 0.8072,
|
|
"step": 6743
|
|
},
|
|
{
|
|
"epoch": 1.7936170212765958,
|
|
"grad_norm": 3.9917871952056885,
|
|
"learning_rate": 7.612867764213651e-06,
|
|
"loss": 0.8138,
|
|
"step": 6744
|
|
},
|
|
{
|
|
"epoch": 1.7938829787234043,
|
|
"grad_norm": 4.009222507476807,
|
|
"learning_rate": 7.612117862737437e-06,
|
|
"loss": 0.7131,
|
|
"step": 6745
|
|
},
|
|
{
|
|
"epoch": 1.7941489361702128,
|
|
"grad_norm": 4.001763343811035,
|
|
"learning_rate": 7.611367880439221e-06,
|
|
"loss": 0.9487,
|
|
"step": 6746
|
|
},
|
|
{
|
|
"epoch": 1.7944148936170212,
|
|
"grad_norm": 4.2233805656433105,
|
|
"learning_rate": 7.610617817342207e-06,
|
|
"loss": 0.7244,
|
|
"step": 6747
|
|
},
|
|
{
|
|
"epoch": 1.7946808510638297,
|
|
"grad_norm": 3.7131550312042236,
|
|
"learning_rate": 7.609867673469607e-06,
|
|
"loss": 0.8303,
|
|
"step": 6748
|
|
},
|
|
{
|
|
"epoch": 1.7949468085106384,
|
|
"grad_norm": 4.046380519866943,
|
|
"learning_rate": 7.609117448844626e-06,
|
|
"loss": 0.8372,
|
|
"step": 6749
|
|
},
|
|
{
|
|
"epoch": 1.7952127659574468,
|
|
"grad_norm": 4.070696830749512,
|
|
"learning_rate": 7.60836714349048e-06,
|
|
"loss": 0.8259,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 1.7954787234042553,
|
|
"grad_norm": 3.893247604370117,
|
|
"learning_rate": 7.607616757430383e-06,
|
|
"loss": 0.8598,
|
|
"step": 6751
|
|
},
|
|
{
|
|
"epoch": 1.795744680851064,
|
|
"grad_norm": 3.7077648639678955,
|
|
"learning_rate": 7.606866290687555e-06,
|
|
"loss": 0.8036,
|
|
"step": 6752
|
|
},
|
|
{
|
|
"epoch": 1.7960106382978722,
|
|
"grad_norm": 4.3204450607299805,
|
|
"learning_rate": 7.606115743285213e-06,
|
|
"loss": 0.8424,
|
|
"step": 6753
|
|
},
|
|
{
|
|
"epoch": 1.796276595744681,
|
|
"grad_norm": 3.3555731773376465,
|
|
"learning_rate": 7.605365115246581e-06,
|
|
"loss": 0.8369,
|
|
"step": 6754
|
|
},
|
|
{
|
|
"epoch": 1.7965425531914894,
|
|
"grad_norm": 3.561962842941284,
|
|
"learning_rate": 7.604614406594888e-06,
|
|
"loss": 0.7841,
|
|
"step": 6755
|
|
},
|
|
{
|
|
"epoch": 1.7968085106382978,
|
|
"grad_norm": 4.0263166427612305,
|
|
"learning_rate": 7.6038636173533565e-06,
|
|
"loss": 0.7135,
|
|
"step": 6756
|
|
},
|
|
{
|
|
"epoch": 1.7970744680851065,
|
|
"grad_norm": 3.8524928092956543,
|
|
"learning_rate": 7.603112747545218e-06,
|
|
"loss": 0.8327,
|
|
"step": 6757
|
|
},
|
|
{
|
|
"epoch": 1.7973404255319148,
|
|
"grad_norm": 3.5046606063842773,
|
|
"learning_rate": 7.602361797193709e-06,
|
|
"loss": 0.8162,
|
|
"step": 6758
|
|
},
|
|
{
|
|
"epoch": 1.7976063829787234,
|
|
"grad_norm": 4.547070503234863,
|
|
"learning_rate": 7.60161076632206e-06,
|
|
"loss": 0.8014,
|
|
"step": 6759
|
|
},
|
|
{
|
|
"epoch": 1.797872340425532,
|
|
"grad_norm": 4.453802585601807,
|
|
"learning_rate": 7.600859654953513e-06,
|
|
"loss": 0.9287,
|
|
"step": 6760
|
|
},
|
|
{
|
|
"epoch": 1.7981382978723404,
|
|
"grad_norm": 4.324093818664551,
|
|
"learning_rate": 7.6001084631113046e-06,
|
|
"loss": 0.848,
|
|
"step": 6761
|
|
},
|
|
{
|
|
"epoch": 1.798404255319149,
|
|
"grad_norm": 4.146725177764893,
|
|
"learning_rate": 7.599357190818679e-06,
|
|
"loss": 0.8875,
|
|
"step": 6762
|
|
},
|
|
{
|
|
"epoch": 1.7986702127659573,
|
|
"grad_norm": 4.132041931152344,
|
|
"learning_rate": 7.598605838098882e-06,
|
|
"loss": 0.8413,
|
|
"step": 6763
|
|
},
|
|
{
|
|
"epoch": 1.798936170212766,
|
|
"grad_norm": 3.829908847808838,
|
|
"learning_rate": 7.59785440497516e-06,
|
|
"loss": 0.843,
|
|
"step": 6764
|
|
},
|
|
{
|
|
"epoch": 1.7992021276595744,
|
|
"grad_norm": 4.308759689331055,
|
|
"learning_rate": 7.597102891470766e-06,
|
|
"loss": 0.7839,
|
|
"step": 6765
|
|
},
|
|
{
|
|
"epoch": 1.799468085106383,
|
|
"grad_norm": 3.6383216381073,
|
|
"learning_rate": 7.59635129760895e-06,
|
|
"loss": 0.608,
|
|
"step": 6766
|
|
},
|
|
{
|
|
"epoch": 1.7997340425531916,
|
|
"grad_norm": 3.6101510524749756,
|
|
"learning_rate": 7.595599623412968e-06,
|
|
"loss": 0.7246,
|
|
"step": 6767
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"grad_norm": 3.51635479927063,
|
|
"learning_rate": 7.594847868906076e-06,
|
|
"loss": 0.798,
|
|
"step": 6768
|
|
},
|
|
{
|
|
"epoch": 1.8002659574468085,
|
|
"grad_norm": 3.927917718887329,
|
|
"learning_rate": 7.594096034111538e-06,
|
|
"loss": 0.8229,
|
|
"step": 6769
|
|
},
|
|
{
|
|
"epoch": 1.800531914893617,
|
|
"grad_norm": 4.29150390625,
|
|
"learning_rate": 7.5933441190526146e-06,
|
|
"loss": 0.922,
|
|
"step": 6770
|
|
},
|
|
{
|
|
"epoch": 1.8007978723404254,
|
|
"grad_norm": 3.8685336112976074,
|
|
"learning_rate": 7.592592123752569e-06,
|
|
"loss": 0.7242,
|
|
"step": 6771
|
|
},
|
|
{
|
|
"epoch": 1.8010638297872341,
|
|
"grad_norm": 3.9335358142852783,
|
|
"learning_rate": 7.591840048234673e-06,
|
|
"loss": 0.8717,
|
|
"step": 6772
|
|
},
|
|
{
|
|
"epoch": 1.8013297872340426,
|
|
"grad_norm": 4.033020496368408,
|
|
"learning_rate": 7.591087892522193e-06,
|
|
"loss": 0.8129,
|
|
"step": 6773
|
|
},
|
|
{
|
|
"epoch": 1.801595744680851,
|
|
"grad_norm": 4.348812580108643,
|
|
"learning_rate": 7.590335656638403e-06,
|
|
"loss": 0.8352,
|
|
"step": 6774
|
|
},
|
|
{
|
|
"epoch": 1.8018617021276597,
|
|
"grad_norm": 3.683743476867676,
|
|
"learning_rate": 7.589583340606579e-06,
|
|
"loss": 0.8427,
|
|
"step": 6775
|
|
},
|
|
{
|
|
"epoch": 1.802127659574468,
|
|
"grad_norm": 3.782118797302246,
|
|
"learning_rate": 7.588830944449996e-06,
|
|
"loss": 0.8659,
|
|
"step": 6776
|
|
},
|
|
{
|
|
"epoch": 1.8023936170212767,
|
|
"grad_norm": 4.097870826721191,
|
|
"learning_rate": 7.5880784681919365e-06,
|
|
"loss": 0.7472,
|
|
"step": 6777
|
|
},
|
|
{
|
|
"epoch": 1.8026595744680851,
|
|
"grad_norm": 3.921733856201172,
|
|
"learning_rate": 7.587325911855681e-06,
|
|
"loss": 0.8388,
|
|
"step": 6778
|
|
},
|
|
{
|
|
"epoch": 1.8029255319148936,
|
|
"grad_norm": 4.305613994598389,
|
|
"learning_rate": 7.586573275464517e-06,
|
|
"loss": 1.0133,
|
|
"step": 6779
|
|
},
|
|
{
|
|
"epoch": 1.8031914893617023,
|
|
"grad_norm": 4.13943338394165,
|
|
"learning_rate": 7.58582055904173e-06,
|
|
"loss": 0.7861,
|
|
"step": 6780
|
|
},
|
|
{
|
|
"epoch": 1.8034574468085105,
|
|
"grad_norm": 4.047939777374268,
|
|
"learning_rate": 7.585067762610612e-06,
|
|
"loss": 0.8422,
|
|
"step": 6781
|
|
},
|
|
{
|
|
"epoch": 1.8037234042553192,
|
|
"grad_norm": 3.8695991039276123,
|
|
"learning_rate": 7.584314886194451e-06,
|
|
"loss": 0.8365,
|
|
"step": 6782
|
|
},
|
|
{
|
|
"epoch": 1.8039893617021276,
|
|
"grad_norm": 3.7691190242767334,
|
|
"learning_rate": 7.583561929816547e-06,
|
|
"loss": 0.8293,
|
|
"step": 6783
|
|
},
|
|
{
|
|
"epoch": 1.804255319148936,
|
|
"grad_norm": 4.062473773956299,
|
|
"learning_rate": 7.5828088935001954e-06,
|
|
"loss": 0.8118,
|
|
"step": 6784
|
|
},
|
|
{
|
|
"epoch": 1.8045212765957448,
|
|
"grad_norm": 4.588931560516357,
|
|
"learning_rate": 7.582055777268693e-06,
|
|
"loss": 0.8835,
|
|
"step": 6785
|
|
},
|
|
{
|
|
"epoch": 1.804787234042553,
|
|
"grad_norm": 3.1973307132720947,
|
|
"learning_rate": 7.581302581145346e-06,
|
|
"loss": 0.6728,
|
|
"step": 6786
|
|
},
|
|
{
|
|
"epoch": 1.8050531914893617,
|
|
"grad_norm": 4.123830318450928,
|
|
"learning_rate": 7.5805493051534605e-06,
|
|
"loss": 0.9315,
|
|
"step": 6787
|
|
},
|
|
{
|
|
"epoch": 1.8053191489361702,
|
|
"grad_norm": 3.992337942123413,
|
|
"learning_rate": 7.57979594931634e-06,
|
|
"loss": 0.7951,
|
|
"step": 6788
|
|
},
|
|
{
|
|
"epoch": 1.8055851063829786,
|
|
"grad_norm": 3.456594467163086,
|
|
"learning_rate": 7.579042513657294e-06,
|
|
"loss": 0.8114,
|
|
"step": 6789
|
|
},
|
|
{
|
|
"epoch": 1.8058510638297873,
|
|
"grad_norm": 4.029353618621826,
|
|
"learning_rate": 7.578288998199638e-06,
|
|
"loss": 0.895,
|
|
"step": 6790
|
|
},
|
|
{
|
|
"epoch": 1.8061170212765958,
|
|
"grad_norm": 4.027595520019531,
|
|
"learning_rate": 7.577535402966683e-06,
|
|
"loss": 0.8416,
|
|
"step": 6791
|
|
},
|
|
{
|
|
"epoch": 1.8063829787234043,
|
|
"grad_norm": 3.8989861011505127,
|
|
"learning_rate": 7.5767817279817505e-06,
|
|
"loss": 0.8275,
|
|
"step": 6792
|
|
},
|
|
{
|
|
"epoch": 1.8066489361702127,
|
|
"grad_norm": 4.1814961433410645,
|
|
"learning_rate": 7.576027973268155e-06,
|
|
"loss": 0.7388,
|
|
"step": 6793
|
|
},
|
|
{
|
|
"epoch": 1.8069148936170212,
|
|
"grad_norm": 3.8830153942108154,
|
|
"learning_rate": 7.575274138849223e-06,
|
|
"loss": 0.7622,
|
|
"step": 6794
|
|
},
|
|
{
|
|
"epoch": 1.8071808510638299,
|
|
"grad_norm": 3.6945488452911377,
|
|
"learning_rate": 7.574520224748276e-06,
|
|
"loss": 0.6767,
|
|
"step": 6795
|
|
},
|
|
{
|
|
"epoch": 1.8074468085106383,
|
|
"grad_norm": 3.8499093055725098,
|
|
"learning_rate": 7.5737662309886415e-06,
|
|
"loss": 0.8128,
|
|
"step": 6796
|
|
},
|
|
{
|
|
"epoch": 1.8077127659574468,
|
|
"grad_norm": 4.120965480804443,
|
|
"learning_rate": 7.573012157593651e-06,
|
|
"loss": 0.8356,
|
|
"step": 6797
|
|
},
|
|
{
|
|
"epoch": 1.8079787234042555,
|
|
"grad_norm": 3.9702072143554688,
|
|
"learning_rate": 7.572258004586635e-06,
|
|
"loss": 0.773,
|
|
"step": 6798
|
|
},
|
|
{
|
|
"epoch": 1.8082446808510637,
|
|
"grad_norm": 3.910039186477661,
|
|
"learning_rate": 7.5715037719909266e-06,
|
|
"loss": 0.7577,
|
|
"step": 6799
|
|
},
|
|
{
|
|
"epoch": 1.8085106382978724,
|
|
"grad_norm": 3.9392266273498535,
|
|
"learning_rate": 7.570749459829865e-06,
|
|
"loss": 0.9043,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 1.8087765957446809,
|
|
"grad_norm": 3.9405999183654785,
|
|
"learning_rate": 7.56999506812679e-06,
|
|
"loss": 0.8526,
|
|
"step": 6801
|
|
},
|
|
{
|
|
"epoch": 1.8090425531914893,
|
|
"grad_norm": 3.701950788497925,
|
|
"learning_rate": 7.569240596905038e-06,
|
|
"loss": 0.7136,
|
|
"step": 6802
|
|
},
|
|
{
|
|
"epoch": 1.809308510638298,
|
|
"grad_norm": 3.7333173751831055,
|
|
"learning_rate": 7.568486046187959e-06,
|
|
"loss": 0.8191,
|
|
"step": 6803
|
|
},
|
|
{
|
|
"epoch": 1.8095744680851062,
|
|
"grad_norm": 3.9274251461029053,
|
|
"learning_rate": 7.567731415998898e-06,
|
|
"loss": 0.8371,
|
|
"step": 6804
|
|
},
|
|
{
|
|
"epoch": 1.809840425531915,
|
|
"grad_norm": 4.320472240447998,
|
|
"learning_rate": 7.566976706361204e-06,
|
|
"loss": 0.8743,
|
|
"step": 6805
|
|
},
|
|
{
|
|
"epoch": 1.8101063829787234,
|
|
"grad_norm": 4.124827861785889,
|
|
"learning_rate": 7.566221917298228e-06,
|
|
"loss": 0.8599,
|
|
"step": 6806
|
|
},
|
|
{
|
|
"epoch": 1.8103723404255319,
|
|
"grad_norm": 4.09792947769165,
|
|
"learning_rate": 7.565467048833325e-06,
|
|
"loss": 0.782,
|
|
"step": 6807
|
|
},
|
|
{
|
|
"epoch": 1.8106382978723405,
|
|
"grad_norm": 4.003774166107178,
|
|
"learning_rate": 7.56471210098985e-06,
|
|
"loss": 0.7946,
|
|
"step": 6808
|
|
},
|
|
{
|
|
"epoch": 1.8109042553191488,
|
|
"grad_norm": 4.259424686431885,
|
|
"learning_rate": 7.563957073791164e-06,
|
|
"loss": 0.8328,
|
|
"step": 6809
|
|
},
|
|
{
|
|
"epoch": 1.8111702127659575,
|
|
"grad_norm": 3.9565248489379883,
|
|
"learning_rate": 7.563201967260627e-06,
|
|
"loss": 0.8544,
|
|
"step": 6810
|
|
},
|
|
{
|
|
"epoch": 1.811436170212766,
|
|
"grad_norm": 3.88087797164917,
|
|
"learning_rate": 7.562446781421604e-06,
|
|
"loss": 0.7987,
|
|
"step": 6811
|
|
},
|
|
{
|
|
"epoch": 1.8117021276595744,
|
|
"grad_norm": 3.9190945625305176,
|
|
"learning_rate": 7.5616915162974594e-06,
|
|
"loss": 0.8162,
|
|
"step": 6812
|
|
},
|
|
{
|
|
"epoch": 1.811968085106383,
|
|
"grad_norm": 3.700688600540161,
|
|
"learning_rate": 7.560936171911564e-06,
|
|
"loss": 0.7738,
|
|
"step": 6813
|
|
},
|
|
{
|
|
"epoch": 1.8122340425531915,
|
|
"grad_norm": 4.023971080780029,
|
|
"learning_rate": 7.560180748287289e-06,
|
|
"loss": 0.8266,
|
|
"step": 6814
|
|
},
|
|
{
|
|
"epoch": 1.8125,
|
|
"grad_norm": 4.754519462585449,
|
|
"learning_rate": 7.559425245448006e-06,
|
|
"loss": 1.0779,
|
|
"step": 6815
|
|
},
|
|
{
|
|
"epoch": 1.8127659574468085,
|
|
"grad_norm": 4.043941497802734,
|
|
"learning_rate": 7.558669663417093e-06,
|
|
"loss": 0.7789,
|
|
"step": 6816
|
|
},
|
|
{
|
|
"epoch": 1.813031914893617,
|
|
"grad_norm": 4.064941883087158,
|
|
"learning_rate": 7.557914002217929e-06,
|
|
"loss": 0.8235,
|
|
"step": 6817
|
|
},
|
|
{
|
|
"epoch": 1.8132978723404256,
|
|
"grad_norm": 4.2770562171936035,
|
|
"learning_rate": 7.5571582618738936e-06,
|
|
"loss": 0.8647,
|
|
"step": 6818
|
|
},
|
|
{
|
|
"epoch": 1.813563829787234,
|
|
"grad_norm": 3.758079767227173,
|
|
"learning_rate": 7.55640244240837e-06,
|
|
"loss": 0.765,
|
|
"step": 6819
|
|
},
|
|
{
|
|
"epoch": 1.8138297872340425,
|
|
"grad_norm": 4.024742603302002,
|
|
"learning_rate": 7.555646543844747e-06,
|
|
"loss": 0.9143,
|
|
"step": 6820
|
|
},
|
|
{
|
|
"epoch": 1.8140957446808512,
|
|
"grad_norm": 4.142058372497559,
|
|
"learning_rate": 7.55489056620641e-06,
|
|
"loss": 0.8872,
|
|
"step": 6821
|
|
},
|
|
{
|
|
"epoch": 1.8143617021276595,
|
|
"grad_norm": 4.0311455726623535,
|
|
"learning_rate": 7.554134509516751e-06,
|
|
"loss": 0.7628,
|
|
"step": 6822
|
|
},
|
|
{
|
|
"epoch": 1.8146276595744681,
|
|
"grad_norm": 3.73848032951355,
|
|
"learning_rate": 7.553378373799163e-06,
|
|
"loss": 0.807,
|
|
"step": 6823
|
|
},
|
|
{
|
|
"epoch": 1.8148936170212766,
|
|
"grad_norm": 3.553116798400879,
|
|
"learning_rate": 7.552622159077041e-06,
|
|
"loss": 0.8166,
|
|
"step": 6824
|
|
},
|
|
{
|
|
"epoch": 1.815159574468085,
|
|
"grad_norm": 3.678316116333008,
|
|
"learning_rate": 7.5518658653737844e-06,
|
|
"loss": 0.8462,
|
|
"step": 6825
|
|
},
|
|
{
|
|
"epoch": 1.8154255319148938,
|
|
"grad_norm": 4.440575122833252,
|
|
"learning_rate": 7.551109492712795e-06,
|
|
"loss": 0.8861,
|
|
"step": 6826
|
|
},
|
|
{
|
|
"epoch": 1.815691489361702,
|
|
"grad_norm": 4.359316825866699,
|
|
"learning_rate": 7.550353041117473e-06,
|
|
"loss": 0.8025,
|
|
"step": 6827
|
|
},
|
|
{
|
|
"epoch": 1.8159574468085107,
|
|
"grad_norm": 3.976832389831543,
|
|
"learning_rate": 7.549596510611226e-06,
|
|
"loss": 0.8486,
|
|
"step": 6828
|
|
},
|
|
{
|
|
"epoch": 1.8162234042553191,
|
|
"grad_norm": 3.64974308013916,
|
|
"learning_rate": 7.54883990121746e-06,
|
|
"loss": 0.6982,
|
|
"step": 6829
|
|
},
|
|
{
|
|
"epoch": 1.8164893617021276,
|
|
"grad_norm": 4.051089286804199,
|
|
"learning_rate": 7.548083212959588e-06,
|
|
"loss": 0.8417,
|
|
"step": 6830
|
|
},
|
|
{
|
|
"epoch": 1.8167553191489363,
|
|
"grad_norm": 3.949113130569458,
|
|
"learning_rate": 7.547326445861021e-06,
|
|
"loss": 0.7382,
|
|
"step": 6831
|
|
},
|
|
{
|
|
"epoch": 1.8170212765957445,
|
|
"grad_norm": 3.896155834197998,
|
|
"learning_rate": 7.546569599945174e-06,
|
|
"loss": 0.9312,
|
|
"step": 6832
|
|
},
|
|
{
|
|
"epoch": 1.8172872340425532,
|
|
"grad_norm": 4.127990245819092,
|
|
"learning_rate": 7.545812675235467e-06,
|
|
"loss": 0.9422,
|
|
"step": 6833
|
|
},
|
|
{
|
|
"epoch": 1.8175531914893617,
|
|
"grad_norm": 3.8345584869384766,
|
|
"learning_rate": 7.545055671755316e-06,
|
|
"loss": 0.8672,
|
|
"step": 6834
|
|
},
|
|
{
|
|
"epoch": 1.8178191489361701,
|
|
"grad_norm": 3.544022560119629,
|
|
"learning_rate": 7.544298589528148e-06,
|
|
"loss": 0.8378,
|
|
"step": 6835
|
|
},
|
|
{
|
|
"epoch": 1.8180851063829788,
|
|
"grad_norm": 3.773446798324585,
|
|
"learning_rate": 7.543541428577386e-06,
|
|
"loss": 0.7617,
|
|
"step": 6836
|
|
},
|
|
{
|
|
"epoch": 1.8183510638297873,
|
|
"grad_norm": 4.245392322540283,
|
|
"learning_rate": 7.542784188926456e-06,
|
|
"loss": 0.7689,
|
|
"step": 6837
|
|
},
|
|
{
|
|
"epoch": 1.8186170212765957,
|
|
"grad_norm": 4.0154924392700195,
|
|
"learning_rate": 7.542026870598791e-06,
|
|
"loss": 0.7467,
|
|
"step": 6838
|
|
},
|
|
{
|
|
"epoch": 1.8188829787234042,
|
|
"grad_norm": 4.492767810821533,
|
|
"learning_rate": 7.5412694736178206e-06,
|
|
"loss": 0.9573,
|
|
"step": 6839
|
|
},
|
|
{
|
|
"epoch": 1.8191489361702127,
|
|
"grad_norm": 3.7740705013275146,
|
|
"learning_rate": 7.540511998006982e-06,
|
|
"loss": 0.6853,
|
|
"step": 6840
|
|
},
|
|
{
|
|
"epoch": 1.8194148936170214,
|
|
"grad_norm": 4.6515655517578125,
|
|
"learning_rate": 7.539754443789709e-06,
|
|
"loss": 0.9875,
|
|
"step": 6841
|
|
},
|
|
{
|
|
"epoch": 1.8196808510638298,
|
|
"grad_norm": 4.019815921783447,
|
|
"learning_rate": 7.5389968109894465e-06,
|
|
"loss": 0.7956,
|
|
"step": 6842
|
|
},
|
|
{
|
|
"epoch": 1.8199468085106383,
|
|
"grad_norm": 3.8876473903656006,
|
|
"learning_rate": 7.5382390996296315e-06,
|
|
"loss": 0.8368,
|
|
"step": 6843
|
|
},
|
|
{
|
|
"epoch": 1.820212765957447,
|
|
"grad_norm": 4.036003112792969,
|
|
"learning_rate": 7.537481309733709e-06,
|
|
"loss": 0.7615,
|
|
"step": 6844
|
|
},
|
|
{
|
|
"epoch": 1.8204787234042552,
|
|
"grad_norm": 3.9731733798980713,
|
|
"learning_rate": 7.53672344132513e-06,
|
|
"loss": 0.8408,
|
|
"step": 6845
|
|
},
|
|
{
|
|
"epoch": 1.820744680851064,
|
|
"grad_norm": 4.149892807006836,
|
|
"learning_rate": 7.53596549442734e-06,
|
|
"loss": 0.7553,
|
|
"step": 6846
|
|
},
|
|
{
|
|
"epoch": 1.8210106382978724,
|
|
"grad_norm": 3.9756197929382324,
|
|
"learning_rate": 7.535207469063791e-06,
|
|
"loss": 0.8429,
|
|
"step": 6847
|
|
},
|
|
{
|
|
"epoch": 1.8212765957446808,
|
|
"grad_norm": 4.044477939605713,
|
|
"learning_rate": 7.53444936525794e-06,
|
|
"loss": 0.7761,
|
|
"step": 6848
|
|
},
|
|
{
|
|
"epoch": 1.8215425531914895,
|
|
"grad_norm": 3.613596200942993,
|
|
"learning_rate": 7.53369118303324e-06,
|
|
"loss": 0.808,
|
|
"step": 6849
|
|
},
|
|
{
|
|
"epoch": 1.8218085106382977,
|
|
"grad_norm": 4.789092540740967,
|
|
"learning_rate": 7.532932922413152e-06,
|
|
"loss": 0.8992,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 1.8220744680851064,
|
|
"grad_norm": 3.8128976821899414,
|
|
"learning_rate": 7.532174583421138e-06,
|
|
"loss": 0.7259,
|
|
"step": 6851
|
|
},
|
|
{
|
|
"epoch": 1.8223404255319149,
|
|
"grad_norm": 3.685126781463623,
|
|
"learning_rate": 7.53141616608066e-06,
|
|
"loss": 0.7971,
|
|
"step": 6852
|
|
},
|
|
{
|
|
"epoch": 1.8226063829787233,
|
|
"grad_norm": 3.8787617683410645,
|
|
"learning_rate": 7.5306576704151865e-06,
|
|
"loss": 0.7447,
|
|
"step": 6853
|
|
},
|
|
{
|
|
"epoch": 1.822872340425532,
|
|
"grad_norm": 4.506245136260986,
|
|
"learning_rate": 7.529899096448185e-06,
|
|
"loss": 0.8898,
|
|
"step": 6854
|
|
},
|
|
{
|
|
"epoch": 1.8231382978723403,
|
|
"grad_norm": 4.238636016845703,
|
|
"learning_rate": 7.529140444203127e-06,
|
|
"loss": 0.8057,
|
|
"step": 6855
|
|
},
|
|
{
|
|
"epoch": 1.823404255319149,
|
|
"grad_norm": 4.039521217346191,
|
|
"learning_rate": 7.528381713703485e-06,
|
|
"loss": 0.772,
|
|
"step": 6856
|
|
},
|
|
{
|
|
"epoch": 1.8236702127659574,
|
|
"grad_norm": 3.6089868545532227,
|
|
"learning_rate": 7.5276229049727375e-06,
|
|
"loss": 0.8194,
|
|
"step": 6857
|
|
},
|
|
{
|
|
"epoch": 1.8239361702127659,
|
|
"grad_norm": 3.4110054969787598,
|
|
"learning_rate": 7.52686401803436e-06,
|
|
"loss": 0.6902,
|
|
"step": 6858
|
|
},
|
|
{
|
|
"epoch": 1.8242021276595746,
|
|
"grad_norm": 3.6139302253723145,
|
|
"learning_rate": 7.526105052911836e-06,
|
|
"loss": 0.8318,
|
|
"step": 6859
|
|
},
|
|
{
|
|
"epoch": 1.824468085106383,
|
|
"grad_norm": 4.215152740478516,
|
|
"learning_rate": 7.525346009628647e-06,
|
|
"loss": 0.8303,
|
|
"step": 6860
|
|
},
|
|
{
|
|
"epoch": 1.8247340425531915,
|
|
"grad_norm": 3.8578953742980957,
|
|
"learning_rate": 7.524586888208278e-06,
|
|
"loss": 0.8625,
|
|
"step": 6861
|
|
},
|
|
{
|
|
"epoch": 1.825,
|
|
"grad_norm": 3.8874824047088623,
|
|
"learning_rate": 7.52382768867422e-06,
|
|
"loss": 0.7106,
|
|
"step": 6862
|
|
},
|
|
{
|
|
"epoch": 1.8252659574468084,
|
|
"grad_norm": 3.746168851852417,
|
|
"learning_rate": 7.5230684110499604e-06,
|
|
"loss": 0.8753,
|
|
"step": 6863
|
|
},
|
|
{
|
|
"epoch": 1.825531914893617,
|
|
"grad_norm": 3.70993971824646,
|
|
"learning_rate": 7.522309055358995e-06,
|
|
"loss": 0.7393,
|
|
"step": 6864
|
|
},
|
|
{
|
|
"epoch": 1.8257978723404256,
|
|
"grad_norm": 3.599679470062256,
|
|
"learning_rate": 7.5215496216248175e-06,
|
|
"loss": 0.893,
|
|
"step": 6865
|
|
},
|
|
{
|
|
"epoch": 1.826063829787234,
|
|
"grad_norm": 3.7604589462280273,
|
|
"learning_rate": 7.520790109870926e-06,
|
|
"loss": 0.7966,
|
|
"step": 6866
|
|
},
|
|
{
|
|
"epoch": 1.8263297872340427,
|
|
"grad_norm": 3.9113166332244873,
|
|
"learning_rate": 7.5200305201208205e-06,
|
|
"loss": 0.8071,
|
|
"step": 6867
|
|
},
|
|
{
|
|
"epoch": 1.826595744680851,
|
|
"grad_norm": 4.262864112854004,
|
|
"learning_rate": 7.519270852398002e-06,
|
|
"loss": 0.7942,
|
|
"step": 6868
|
|
},
|
|
{
|
|
"epoch": 1.8268617021276596,
|
|
"grad_norm": 4.096951007843018,
|
|
"learning_rate": 7.5185111067259804e-06,
|
|
"loss": 0.717,
|
|
"step": 6869
|
|
},
|
|
{
|
|
"epoch": 1.827127659574468,
|
|
"grad_norm": 4.112506866455078,
|
|
"learning_rate": 7.517751283128258e-06,
|
|
"loss": 0.8871,
|
|
"step": 6870
|
|
},
|
|
{
|
|
"epoch": 1.8273936170212766,
|
|
"grad_norm": 3.5203890800476074,
|
|
"learning_rate": 7.516991381628347e-06,
|
|
"loss": 0.796,
|
|
"step": 6871
|
|
},
|
|
{
|
|
"epoch": 1.8276595744680852,
|
|
"grad_norm": 3.556929588317871,
|
|
"learning_rate": 7.516231402249758e-06,
|
|
"loss": 0.8346,
|
|
"step": 6872
|
|
},
|
|
{
|
|
"epoch": 1.8279255319148935,
|
|
"grad_norm": 3.3509085178375244,
|
|
"learning_rate": 7.51547134501601e-06,
|
|
"loss": 0.7763,
|
|
"step": 6873
|
|
},
|
|
{
|
|
"epoch": 1.8281914893617022,
|
|
"grad_norm": 4.3177103996276855,
|
|
"learning_rate": 7.514711209950615e-06,
|
|
"loss": 0.7943,
|
|
"step": 6874
|
|
},
|
|
{
|
|
"epoch": 1.8284574468085106,
|
|
"grad_norm": 3.8919661045074463,
|
|
"learning_rate": 7.513950997077094e-06,
|
|
"loss": 0.7541,
|
|
"step": 6875
|
|
},
|
|
{
|
|
"epoch": 1.828723404255319,
|
|
"grad_norm": 3.506849765777588,
|
|
"learning_rate": 7.513190706418969e-06,
|
|
"loss": 0.8451,
|
|
"step": 6876
|
|
},
|
|
{
|
|
"epoch": 1.8289893617021278,
|
|
"grad_norm": 4.711544513702393,
|
|
"learning_rate": 7.512430337999768e-06,
|
|
"loss": 0.9569,
|
|
"step": 6877
|
|
},
|
|
{
|
|
"epoch": 1.829255319148936,
|
|
"grad_norm": 4.111194610595703,
|
|
"learning_rate": 7.511669891843011e-06,
|
|
"loss": 0.9289,
|
|
"step": 6878
|
|
},
|
|
{
|
|
"epoch": 1.8295212765957447,
|
|
"grad_norm": 3.4928982257843018,
|
|
"learning_rate": 7.510909367972231e-06,
|
|
"loss": 0.7627,
|
|
"step": 6879
|
|
},
|
|
{
|
|
"epoch": 1.8297872340425532,
|
|
"grad_norm": 3.737337827682495,
|
|
"learning_rate": 7.5101487664109605e-06,
|
|
"loss": 0.7463,
|
|
"step": 6880
|
|
},
|
|
{
|
|
"epoch": 1.8300531914893616,
|
|
"grad_norm": 3.4611358642578125,
|
|
"learning_rate": 7.50938808718273e-06,
|
|
"loss": 0.7764,
|
|
"step": 6881
|
|
},
|
|
{
|
|
"epoch": 1.8303191489361703,
|
|
"grad_norm": 3.901796817779541,
|
|
"learning_rate": 7.508627330311078e-06,
|
|
"loss": 0.9079,
|
|
"step": 6882
|
|
},
|
|
{
|
|
"epoch": 1.8305851063829788,
|
|
"grad_norm": 3.8375611305236816,
|
|
"learning_rate": 7.507866495819543e-06,
|
|
"loss": 0.7861,
|
|
"step": 6883
|
|
},
|
|
{
|
|
"epoch": 1.8308510638297872,
|
|
"grad_norm": 3.7982888221740723,
|
|
"learning_rate": 7.507105583731666e-06,
|
|
"loss": 0.8905,
|
|
"step": 6884
|
|
},
|
|
{
|
|
"epoch": 1.8311170212765957,
|
|
"grad_norm": 3.70542573928833,
|
|
"learning_rate": 7.506344594070991e-06,
|
|
"loss": 0.7173,
|
|
"step": 6885
|
|
},
|
|
{
|
|
"epoch": 1.8313829787234042,
|
|
"grad_norm": 3.7828474044799805,
|
|
"learning_rate": 7.505583526861064e-06,
|
|
"loss": 0.8687,
|
|
"step": 6886
|
|
},
|
|
{
|
|
"epoch": 1.8316489361702128,
|
|
"grad_norm": 4.376963138580322,
|
|
"learning_rate": 7.504822382125432e-06,
|
|
"loss": 0.982,
|
|
"step": 6887
|
|
},
|
|
{
|
|
"epoch": 1.8319148936170213,
|
|
"grad_norm": 3.9631431102752686,
|
|
"learning_rate": 7.504061159887646e-06,
|
|
"loss": 0.8186,
|
|
"step": 6888
|
|
},
|
|
{
|
|
"epoch": 1.8321808510638298,
|
|
"grad_norm": 4.296795845031738,
|
|
"learning_rate": 7.5032998601712605e-06,
|
|
"loss": 0.8346,
|
|
"step": 6889
|
|
},
|
|
{
|
|
"epoch": 1.8324468085106385,
|
|
"grad_norm": 3.889289617538452,
|
|
"learning_rate": 7.502538482999829e-06,
|
|
"loss": 0.8344,
|
|
"step": 6890
|
|
},
|
|
{
|
|
"epoch": 1.8327127659574467,
|
|
"grad_norm": 4.060772895812988,
|
|
"learning_rate": 7.50177702839691e-06,
|
|
"loss": 0.7625,
|
|
"step": 6891
|
|
},
|
|
{
|
|
"epoch": 1.8329787234042554,
|
|
"grad_norm": 3.6209208965301514,
|
|
"learning_rate": 7.501015496386066e-06,
|
|
"loss": 0.779,
|
|
"step": 6892
|
|
},
|
|
{
|
|
"epoch": 1.8332446808510638,
|
|
"grad_norm": 3.7519564628601074,
|
|
"learning_rate": 7.5002538869908556e-06,
|
|
"loss": 0.7245,
|
|
"step": 6893
|
|
},
|
|
{
|
|
"epoch": 1.8335106382978723,
|
|
"grad_norm": 3.842135190963745,
|
|
"learning_rate": 7.499492200234849e-06,
|
|
"loss": 0.7977,
|
|
"step": 6894
|
|
},
|
|
{
|
|
"epoch": 1.833776595744681,
|
|
"grad_norm": 4.067161560058594,
|
|
"learning_rate": 7.498730436141609e-06,
|
|
"loss": 0.8287,
|
|
"step": 6895
|
|
},
|
|
{
|
|
"epoch": 1.8340425531914892,
|
|
"grad_norm": 3.8573522567749023,
|
|
"learning_rate": 7.497968594734708e-06,
|
|
"loss": 0.7012,
|
|
"step": 6896
|
|
},
|
|
{
|
|
"epoch": 1.834308510638298,
|
|
"grad_norm": 3.792734146118164,
|
|
"learning_rate": 7.4972066760377184e-06,
|
|
"loss": 0.7986,
|
|
"step": 6897
|
|
},
|
|
{
|
|
"epoch": 1.8345744680851064,
|
|
"grad_norm": 4.287036418914795,
|
|
"learning_rate": 7.496444680074213e-06,
|
|
"loss": 0.8091,
|
|
"step": 6898
|
|
},
|
|
{
|
|
"epoch": 1.8348404255319148,
|
|
"grad_norm": 3.9161949157714844,
|
|
"learning_rate": 7.49568260686777e-06,
|
|
"loss": 0.8796,
|
|
"step": 6899
|
|
},
|
|
{
|
|
"epoch": 1.8351063829787235,
|
|
"grad_norm": 3.8841638565063477,
|
|
"learning_rate": 7.49492045644197e-06,
|
|
"loss": 0.8827,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 1.8353723404255318,
|
|
"grad_norm": 3.770533323287964,
|
|
"learning_rate": 7.494158228820393e-06,
|
|
"loss": 0.7671,
|
|
"step": 6901
|
|
},
|
|
{
|
|
"epoch": 1.8356382978723405,
|
|
"grad_norm": 4.155034065246582,
|
|
"learning_rate": 7.493395924026623e-06,
|
|
"loss": 0.8533,
|
|
"step": 6902
|
|
},
|
|
{
|
|
"epoch": 1.835904255319149,
|
|
"grad_norm": 3.911745071411133,
|
|
"learning_rate": 7.492633542084249e-06,
|
|
"loss": 0.82,
|
|
"step": 6903
|
|
},
|
|
{
|
|
"epoch": 1.8361702127659574,
|
|
"grad_norm": 3.444728136062622,
|
|
"learning_rate": 7.491871083016858e-06,
|
|
"loss": 0.7717,
|
|
"step": 6904
|
|
},
|
|
{
|
|
"epoch": 1.836436170212766,
|
|
"grad_norm": 4.003023147583008,
|
|
"learning_rate": 7.491108546848041e-06,
|
|
"loss": 0.7351,
|
|
"step": 6905
|
|
},
|
|
{
|
|
"epoch": 1.8367021276595743,
|
|
"grad_norm": 3.9087607860565186,
|
|
"learning_rate": 7.490345933601395e-06,
|
|
"loss": 0.8509,
|
|
"step": 6906
|
|
},
|
|
{
|
|
"epoch": 1.836968085106383,
|
|
"grad_norm": 4.098905086517334,
|
|
"learning_rate": 7.489583243300511e-06,
|
|
"loss": 0.9289,
|
|
"step": 6907
|
|
},
|
|
{
|
|
"epoch": 1.8372340425531914,
|
|
"grad_norm": 4.120253562927246,
|
|
"learning_rate": 7.488820475968992e-06,
|
|
"loss": 0.8707,
|
|
"step": 6908
|
|
},
|
|
{
|
|
"epoch": 1.8375,
|
|
"grad_norm": 4.324950218200684,
|
|
"learning_rate": 7.488057631630438e-06,
|
|
"loss": 0.7811,
|
|
"step": 6909
|
|
},
|
|
{
|
|
"epoch": 1.8377659574468086,
|
|
"grad_norm": 4.5706634521484375,
|
|
"learning_rate": 7.4872947103084495e-06,
|
|
"loss": 0.8641,
|
|
"step": 6910
|
|
},
|
|
{
|
|
"epoch": 1.838031914893617,
|
|
"grad_norm": 4.22561502456665,
|
|
"learning_rate": 7.486531712026634e-06,
|
|
"loss": 0.794,
|
|
"step": 6911
|
|
},
|
|
{
|
|
"epoch": 1.8382978723404255,
|
|
"grad_norm": 4.015974521636963,
|
|
"learning_rate": 7.485768636808603e-06,
|
|
"loss": 0.8757,
|
|
"step": 6912
|
|
},
|
|
{
|
|
"epoch": 1.8385638297872342,
|
|
"grad_norm": 3.7457127571105957,
|
|
"learning_rate": 7.48500548467796e-06,
|
|
"loss": 0.8682,
|
|
"step": 6913
|
|
},
|
|
{
|
|
"epoch": 1.8388297872340424,
|
|
"grad_norm": 3.964571714401245,
|
|
"learning_rate": 7.484242255658322e-06,
|
|
"loss": 0.7431,
|
|
"step": 6914
|
|
},
|
|
{
|
|
"epoch": 1.8390957446808511,
|
|
"grad_norm": 3.838426351547241,
|
|
"learning_rate": 7.4834789497733065e-06,
|
|
"loss": 0.7413,
|
|
"step": 6915
|
|
},
|
|
{
|
|
"epoch": 1.8393617021276596,
|
|
"grad_norm": 3.7367520332336426,
|
|
"learning_rate": 7.4827155670465264e-06,
|
|
"loss": 0.8366,
|
|
"step": 6916
|
|
},
|
|
{
|
|
"epoch": 1.839627659574468,
|
|
"grad_norm": 3.9056553840637207,
|
|
"learning_rate": 7.481952107501604e-06,
|
|
"loss": 0.7134,
|
|
"step": 6917
|
|
},
|
|
{
|
|
"epoch": 1.8398936170212767,
|
|
"grad_norm": 4.098144054412842,
|
|
"learning_rate": 7.481188571162161e-06,
|
|
"loss": 0.7744,
|
|
"step": 6918
|
|
},
|
|
{
|
|
"epoch": 1.840159574468085,
|
|
"grad_norm": 4.067973613739014,
|
|
"learning_rate": 7.480424958051823e-06,
|
|
"loss": 0.8143,
|
|
"step": 6919
|
|
},
|
|
{
|
|
"epoch": 1.8404255319148937,
|
|
"grad_norm": 3.9194462299346924,
|
|
"learning_rate": 7.479661268194217e-06,
|
|
"loss": 0.8335,
|
|
"step": 6920
|
|
},
|
|
{
|
|
"epoch": 1.8406914893617021,
|
|
"grad_norm": 4.130805492401123,
|
|
"learning_rate": 7.4788975016129704e-06,
|
|
"loss": 0.769,
|
|
"step": 6921
|
|
},
|
|
{
|
|
"epoch": 1.8409574468085106,
|
|
"grad_norm": 3.580792188644409,
|
|
"learning_rate": 7.478133658331716e-06,
|
|
"loss": 0.7743,
|
|
"step": 6922
|
|
},
|
|
{
|
|
"epoch": 1.8412234042553193,
|
|
"grad_norm": 3.78035569190979,
|
|
"learning_rate": 7.477369738374092e-06,
|
|
"loss": 0.8619,
|
|
"step": 6923
|
|
},
|
|
{
|
|
"epoch": 1.8414893617021275,
|
|
"grad_norm": 3.8400089740753174,
|
|
"learning_rate": 7.476605741763729e-06,
|
|
"loss": 0.8161,
|
|
"step": 6924
|
|
},
|
|
{
|
|
"epoch": 1.8417553191489362,
|
|
"grad_norm": 3.7448103427886963,
|
|
"learning_rate": 7.475841668524268e-06,
|
|
"loss": 0.8305,
|
|
"step": 6925
|
|
},
|
|
{
|
|
"epoch": 1.8420212765957447,
|
|
"grad_norm": 3.828014850616455,
|
|
"learning_rate": 7.475077518679352e-06,
|
|
"loss": 0.8424,
|
|
"step": 6926
|
|
},
|
|
{
|
|
"epoch": 1.8422872340425531,
|
|
"grad_norm": 3.776527166366577,
|
|
"learning_rate": 7.474313292252624e-06,
|
|
"loss": 0.9811,
|
|
"step": 6927
|
|
},
|
|
{
|
|
"epoch": 1.8425531914893618,
|
|
"grad_norm": 4.294341564178467,
|
|
"learning_rate": 7.473548989267728e-06,
|
|
"loss": 0.8375,
|
|
"step": 6928
|
|
},
|
|
{
|
|
"epoch": 1.84281914893617,
|
|
"grad_norm": 4.230419158935547,
|
|
"learning_rate": 7.472784609748316e-06,
|
|
"loss": 0.7886,
|
|
"step": 6929
|
|
},
|
|
{
|
|
"epoch": 1.8430851063829787,
|
|
"grad_norm": 4.243613243103027,
|
|
"learning_rate": 7.472020153718036e-06,
|
|
"loss": 0.8787,
|
|
"step": 6930
|
|
},
|
|
{
|
|
"epoch": 1.8433510638297872,
|
|
"grad_norm": 4.046195983886719,
|
|
"learning_rate": 7.471255621200541e-06,
|
|
"loss": 0.7344,
|
|
"step": 6931
|
|
},
|
|
{
|
|
"epoch": 1.8436170212765957,
|
|
"grad_norm": 3.4666972160339355,
|
|
"learning_rate": 7.470491012219488e-06,
|
|
"loss": 0.8123,
|
|
"step": 6932
|
|
},
|
|
{
|
|
"epoch": 1.8438829787234043,
|
|
"grad_norm": 4.226772785186768,
|
|
"learning_rate": 7.469726326798535e-06,
|
|
"loss": 0.7765,
|
|
"step": 6933
|
|
},
|
|
{
|
|
"epoch": 1.8441489361702128,
|
|
"grad_norm": 4.348804950714111,
|
|
"learning_rate": 7.468961564961341e-06,
|
|
"loss": 0.8481,
|
|
"step": 6934
|
|
},
|
|
{
|
|
"epoch": 1.8444148936170213,
|
|
"grad_norm": 3.7085683345794678,
|
|
"learning_rate": 7.4681967267315715e-06,
|
|
"loss": 0.7717,
|
|
"step": 6935
|
|
},
|
|
{
|
|
"epoch": 1.84468085106383,
|
|
"grad_norm": 3.670295238494873,
|
|
"learning_rate": 7.4674318121328856e-06,
|
|
"loss": 0.7074,
|
|
"step": 6936
|
|
},
|
|
{
|
|
"epoch": 1.8449468085106382,
|
|
"grad_norm": 4.235050678253174,
|
|
"learning_rate": 7.466666821188957e-06,
|
|
"loss": 0.9085,
|
|
"step": 6937
|
|
},
|
|
{
|
|
"epoch": 1.8452127659574469,
|
|
"grad_norm": 4.282822132110596,
|
|
"learning_rate": 7.465901753923452e-06,
|
|
"loss": 0.8641,
|
|
"step": 6938
|
|
},
|
|
{
|
|
"epoch": 1.8454787234042553,
|
|
"grad_norm": 3.9703402519226074,
|
|
"learning_rate": 7.465136610360044e-06,
|
|
"loss": 0.7331,
|
|
"step": 6939
|
|
},
|
|
{
|
|
"epoch": 1.8457446808510638,
|
|
"grad_norm": 3.793503522872925,
|
|
"learning_rate": 7.4643713905224065e-06,
|
|
"loss": 0.8122,
|
|
"step": 6940
|
|
},
|
|
{
|
|
"epoch": 1.8460106382978725,
|
|
"grad_norm": 4.120753288269043,
|
|
"learning_rate": 7.463606094434218e-06,
|
|
"loss": 0.8822,
|
|
"step": 6941
|
|
},
|
|
{
|
|
"epoch": 1.8462765957446807,
|
|
"grad_norm": 4.266670227050781,
|
|
"learning_rate": 7.462840722119155e-06,
|
|
"loss": 0.8363,
|
|
"step": 6942
|
|
},
|
|
{
|
|
"epoch": 1.8465425531914894,
|
|
"grad_norm": 3.998488664627075,
|
|
"learning_rate": 7.462075273600901e-06,
|
|
"loss": 0.895,
|
|
"step": 6943
|
|
},
|
|
{
|
|
"epoch": 1.8468085106382979,
|
|
"grad_norm": 3.923610210418701,
|
|
"learning_rate": 7.461309748903138e-06,
|
|
"loss": 0.8406,
|
|
"step": 6944
|
|
},
|
|
{
|
|
"epoch": 1.8470744680851063,
|
|
"grad_norm": 4.076598644256592,
|
|
"learning_rate": 7.460544148049555e-06,
|
|
"loss": 0.7919,
|
|
"step": 6945
|
|
},
|
|
{
|
|
"epoch": 1.847340425531915,
|
|
"grad_norm": 4.171792507171631,
|
|
"learning_rate": 7.459778471063839e-06,
|
|
"loss": 0.9616,
|
|
"step": 6946
|
|
},
|
|
{
|
|
"epoch": 1.8476063829787233,
|
|
"grad_norm": 4.327701091766357,
|
|
"learning_rate": 7.45901271796968e-06,
|
|
"loss": 0.8918,
|
|
"step": 6947
|
|
},
|
|
{
|
|
"epoch": 1.847872340425532,
|
|
"grad_norm": 4.035894393920898,
|
|
"learning_rate": 7.4582468887907746e-06,
|
|
"loss": 0.7007,
|
|
"step": 6948
|
|
},
|
|
{
|
|
"epoch": 1.8481382978723404,
|
|
"grad_norm": 3.9794068336486816,
|
|
"learning_rate": 7.457480983550813e-06,
|
|
"loss": 0.8622,
|
|
"step": 6949
|
|
},
|
|
{
|
|
"epoch": 1.8484042553191489,
|
|
"grad_norm": 3.988560914993286,
|
|
"learning_rate": 7.4567150022735e-06,
|
|
"loss": 0.7892,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 1.8486702127659576,
|
|
"grad_norm": 3.761817216873169,
|
|
"learning_rate": 7.455948944982529e-06,
|
|
"loss": 0.7549,
|
|
"step": 6951
|
|
},
|
|
{
|
|
"epoch": 1.8489361702127658,
|
|
"grad_norm": 3.962528944015503,
|
|
"learning_rate": 7.455182811701609e-06,
|
|
"loss": 0.7874,
|
|
"step": 6952
|
|
},
|
|
{
|
|
"epoch": 1.8492021276595745,
|
|
"grad_norm": 4.180268287658691,
|
|
"learning_rate": 7.454416602454441e-06,
|
|
"loss": 0.8401,
|
|
"step": 6953
|
|
},
|
|
{
|
|
"epoch": 1.849468085106383,
|
|
"grad_norm": 3.7611262798309326,
|
|
"learning_rate": 7.453650317264734e-06,
|
|
"loss": 0.8463,
|
|
"step": 6954
|
|
},
|
|
{
|
|
"epoch": 1.8497340425531914,
|
|
"grad_norm": 3.7269387245178223,
|
|
"learning_rate": 7.452883956156197e-06,
|
|
"loss": 0.7884,
|
|
"step": 6955
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"grad_norm": 4.998419284820557,
|
|
"learning_rate": 7.452117519152542e-06,
|
|
"loss": 0.861,
|
|
"step": 6956
|
|
},
|
|
{
|
|
"epoch": 1.8502659574468086,
|
|
"grad_norm": 4.210315704345703,
|
|
"learning_rate": 7.4513510062774845e-06,
|
|
"loss": 0.8083,
|
|
"step": 6957
|
|
},
|
|
{
|
|
"epoch": 1.850531914893617,
|
|
"grad_norm": 4.184957027435303,
|
|
"learning_rate": 7.4505844175547405e-06,
|
|
"loss": 0.7648,
|
|
"step": 6958
|
|
},
|
|
{
|
|
"epoch": 1.8507978723404257,
|
|
"grad_norm": 3.883157730102539,
|
|
"learning_rate": 7.44981775300803e-06,
|
|
"loss": 0.789,
|
|
"step": 6959
|
|
},
|
|
{
|
|
"epoch": 1.851063829787234,
|
|
"grad_norm": 3.930384397506714,
|
|
"learning_rate": 7.449051012661073e-06,
|
|
"loss": 0.7467,
|
|
"step": 6960
|
|
},
|
|
{
|
|
"epoch": 1.8513297872340426,
|
|
"grad_norm": 4.148220062255859,
|
|
"learning_rate": 7.448284196537594e-06,
|
|
"loss": 0.8692,
|
|
"step": 6961
|
|
},
|
|
{
|
|
"epoch": 1.851595744680851,
|
|
"grad_norm": 4.141353607177734,
|
|
"learning_rate": 7.4475173046613205e-06,
|
|
"loss": 0.8553,
|
|
"step": 6962
|
|
},
|
|
{
|
|
"epoch": 1.8518617021276595,
|
|
"grad_norm": 3.8646962642669678,
|
|
"learning_rate": 7.4467503370559806e-06,
|
|
"loss": 0.7953,
|
|
"step": 6963
|
|
},
|
|
{
|
|
"epoch": 1.8521276595744682,
|
|
"grad_norm": 3.765763759613037,
|
|
"learning_rate": 7.445983293745302e-06,
|
|
"loss": 0.7173,
|
|
"step": 6964
|
|
},
|
|
{
|
|
"epoch": 1.8523936170212765,
|
|
"grad_norm": 3.5731546878814697,
|
|
"learning_rate": 7.445216174753022e-06,
|
|
"loss": 0.7643,
|
|
"step": 6965
|
|
},
|
|
{
|
|
"epoch": 1.8526595744680852,
|
|
"grad_norm": 3.3962113857269287,
|
|
"learning_rate": 7.444448980102875e-06,
|
|
"loss": 0.7694,
|
|
"step": 6966
|
|
},
|
|
{
|
|
"epoch": 1.8529255319148936,
|
|
"grad_norm": 4.201429843902588,
|
|
"learning_rate": 7.4436817098186e-06,
|
|
"loss": 0.9388,
|
|
"step": 6967
|
|
},
|
|
{
|
|
"epoch": 1.853191489361702,
|
|
"grad_norm": 4.063852787017822,
|
|
"learning_rate": 7.442914363923933e-06,
|
|
"loss": 0.8472,
|
|
"step": 6968
|
|
},
|
|
{
|
|
"epoch": 1.8534574468085108,
|
|
"grad_norm": 4.6696696281433105,
|
|
"learning_rate": 7.442146942442621e-06,
|
|
"loss": 0.8739,
|
|
"step": 6969
|
|
},
|
|
{
|
|
"epoch": 1.853723404255319,
|
|
"grad_norm": 3.5337836742401123,
|
|
"learning_rate": 7.4413794453984065e-06,
|
|
"loss": 0.7506,
|
|
"step": 6970
|
|
},
|
|
{
|
|
"epoch": 1.8539893617021277,
|
|
"grad_norm": 4.372726917266846,
|
|
"learning_rate": 7.440611872815038e-06,
|
|
"loss": 0.824,
|
|
"step": 6971
|
|
},
|
|
{
|
|
"epoch": 1.8542553191489362,
|
|
"grad_norm": 4.04209566116333,
|
|
"learning_rate": 7.439844224716265e-06,
|
|
"loss": 0.8098,
|
|
"step": 6972
|
|
},
|
|
{
|
|
"epoch": 1.8545212765957446,
|
|
"grad_norm": 3.8578147888183594,
|
|
"learning_rate": 7.439076501125839e-06,
|
|
"loss": 0.7585,
|
|
"step": 6973
|
|
},
|
|
{
|
|
"epoch": 1.8547872340425533,
|
|
"grad_norm": 4.210418701171875,
|
|
"learning_rate": 7.4383087020675145e-06,
|
|
"loss": 0.7915,
|
|
"step": 6974
|
|
},
|
|
{
|
|
"epoch": 1.8550531914893615,
|
|
"grad_norm": 3.4614603519439697,
|
|
"learning_rate": 7.4375408275650475e-06,
|
|
"loss": 0.7506,
|
|
"step": 6975
|
|
},
|
|
{
|
|
"epoch": 1.8553191489361702,
|
|
"grad_norm": 4.220035076141357,
|
|
"learning_rate": 7.436772877642199e-06,
|
|
"loss": 0.8875,
|
|
"step": 6976
|
|
},
|
|
{
|
|
"epoch": 1.8555851063829787,
|
|
"grad_norm": 4.095662593841553,
|
|
"learning_rate": 7.436004852322727e-06,
|
|
"loss": 0.8973,
|
|
"step": 6977
|
|
},
|
|
{
|
|
"epoch": 1.8558510638297872,
|
|
"grad_norm": 4.23422908782959,
|
|
"learning_rate": 7.435236751630397e-06,
|
|
"loss": 0.699,
|
|
"step": 6978
|
|
},
|
|
{
|
|
"epoch": 1.8561170212765958,
|
|
"grad_norm": 3.976768970489502,
|
|
"learning_rate": 7.434468575588976e-06,
|
|
"loss": 0.781,
|
|
"step": 6979
|
|
},
|
|
{
|
|
"epoch": 1.8563829787234043,
|
|
"grad_norm": 4.405401229858398,
|
|
"learning_rate": 7.43370032422223e-06,
|
|
"loss": 0.7388,
|
|
"step": 6980
|
|
},
|
|
{
|
|
"epoch": 1.8566489361702128,
|
|
"grad_norm": 4.096654891967773,
|
|
"learning_rate": 7.432931997553929e-06,
|
|
"loss": 0.8305,
|
|
"step": 6981
|
|
},
|
|
{
|
|
"epoch": 1.8569148936170212,
|
|
"grad_norm": 3.9386327266693115,
|
|
"learning_rate": 7.432163595607851e-06,
|
|
"loss": 0.775,
|
|
"step": 6982
|
|
},
|
|
{
|
|
"epoch": 1.8571808510638297,
|
|
"grad_norm": 4.111544609069824,
|
|
"learning_rate": 7.431395118407766e-06,
|
|
"loss": 0.9179,
|
|
"step": 6983
|
|
},
|
|
{
|
|
"epoch": 1.8574468085106384,
|
|
"grad_norm": 3.3650224208831787,
|
|
"learning_rate": 7.4306265659774525e-06,
|
|
"loss": 0.8286,
|
|
"step": 6984
|
|
},
|
|
{
|
|
"epoch": 1.8577127659574468,
|
|
"grad_norm": 4.099471569061279,
|
|
"learning_rate": 7.429857938340693e-06,
|
|
"loss": 0.8789,
|
|
"step": 6985
|
|
},
|
|
{
|
|
"epoch": 1.8579787234042553,
|
|
"grad_norm": 4.082056999206543,
|
|
"learning_rate": 7.429089235521267e-06,
|
|
"loss": 0.8938,
|
|
"step": 6986
|
|
},
|
|
{
|
|
"epoch": 1.858244680851064,
|
|
"grad_norm": 4.1304545402526855,
|
|
"learning_rate": 7.428320457542962e-06,
|
|
"loss": 0.8639,
|
|
"step": 6987
|
|
},
|
|
{
|
|
"epoch": 1.8585106382978722,
|
|
"grad_norm": 3.941922426223755,
|
|
"learning_rate": 7.427551604429562e-06,
|
|
"loss": 0.7966,
|
|
"step": 6988
|
|
},
|
|
{
|
|
"epoch": 1.858776595744681,
|
|
"grad_norm": 3.8861730098724365,
|
|
"learning_rate": 7.426782676204857e-06,
|
|
"loss": 0.8282,
|
|
"step": 6989
|
|
},
|
|
{
|
|
"epoch": 1.8590425531914894,
|
|
"grad_norm": 3.8917558193206787,
|
|
"learning_rate": 7.426013672892639e-06,
|
|
"loss": 0.7213,
|
|
"step": 6990
|
|
},
|
|
{
|
|
"epoch": 1.8593085106382978,
|
|
"grad_norm": 4.324743747711182,
|
|
"learning_rate": 7.4252445945167005e-06,
|
|
"loss": 0.9627,
|
|
"step": 6991
|
|
},
|
|
{
|
|
"epoch": 1.8595744680851065,
|
|
"grad_norm": 3.6545021533966064,
|
|
"learning_rate": 7.42447544110084e-06,
|
|
"loss": 0.742,
|
|
"step": 6992
|
|
},
|
|
{
|
|
"epoch": 1.8598404255319148,
|
|
"grad_norm": 4.201162338256836,
|
|
"learning_rate": 7.423706212668855e-06,
|
|
"loss": 0.8343,
|
|
"step": 6993
|
|
},
|
|
{
|
|
"epoch": 1.8601063829787234,
|
|
"grad_norm": 3.67588472366333,
|
|
"learning_rate": 7.4229369092445465e-06,
|
|
"loss": 0.7863,
|
|
"step": 6994
|
|
},
|
|
{
|
|
"epoch": 1.860372340425532,
|
|
"grad_norm": 3.3527588844299316,
|
|
"learning_rate": 7.422167530851716e-06,
|
|
"loss": 0.7513,
|
|
"step": 6995
|
|
},
|
|
{
|
|
"epoch": 1.8606382978723404,
|
|
"grad_norm": 3.977691888809204,
|
|
"learning_rate": 7.421398077514172e-06,
|
|
"loss": 0.7507,
|
|
"step": 6996
|
|
},
|
|
{
|
|
"epoch": 1.860904255319149,
|
|
"grad_norm": 4.172175407409668,
|
|
"learning_rate": 7.420628549255719e-06,
|
|
"loss": 0.8395,
|
|
"step": 6997
|
|
},
|
|
{
|
|
"epoch": 1.8611702127659573,
|
|
"grad_norm": 3.738621473312378,
|
|
"learning_rate": 7.41985894610017e-06,
|
|
"loss": 0.8366,
|
|
"step": 6998
|
|
},
|
|
{
|
|
"epoch": 1.861436170212766,
|
|
"grad_norm": 4.003189563751221,
|
|
"learning_rate": 7.4190892680713366e-06,
|
|
"loss": 0.9032,
|
|
"step": 6999
|
|
},
|
|
{
|
|
"epoch": 1.8617021276595744,
|
|
"grad_norm": 3.872437000274658,
|
|
"learning_rate": 7.418319515193032e-06,
|
|
"loss": 0.8052,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 1.8617021276595744,
|
|
"eval_loss": 1.269985556602478,
|
|
"eval_runtime": 14.1914,
|
|
"eval_samples_per_second": 28.186,
|
|
"eval_steps_per_second": 3.523,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 1.861968085106383,
|
|
"grad_norm": 4.005687713623047,
|
|
"learning_rate": 7.417549687489074e-06,
|
|
"loss": 0.7515,
|
|
"step": 7001
|
|
},
|
|
{
|
|
"epoch": 1.8622340425531916,
|
|
"grad_norm": 3.833047866821289,
|
|
"learning_rate": 7.416779784983284e-06,
|
|
"loss": 0.8487,
|
|
"step": 7002
|
|
},
|
|
{
|
|
"epoch": 1.8625,
|
|
"grad_norm": 3.902536392211914,
|
|
"learning_rate": 7.416009807699481e-06,
|
|
"loss": 0.7448,
|
|
"step": 7003
|
|
},
|
|
{
|
|
"epoch": 1.8627659574468085,
|
|
"grad_norm": 4.018909931182861,
|
|
"learning_rate": 7.41523975566149e-06,
|
|
"loss": 0.8619,
|
|
"step": 7004
|
|
},
|
|
{
|
|
"epoch": 1.863031914893617,
|
|
"grad_norm": 3.7916078567504883,
|
|
"learning_rate": 7.414469628893137e-06,
|
|
"loss": 0.7254,
|
|
"step": 7005
|
|
},
|
|
{
|
|
"epoch": 1.8632978723404254,
|
|
"grad_norm": 3.662709951400757,
|
|
"learning_rate": 7.413699427418253e-06,
|
|
"loss": 0.8801,
|
|
"step": 7006
|
|
},
|
|
{
|
|
"epoch": 1.8635638297872341,
|
|
"grad_norm": 3.8417561054229736,
|
|
"learning_rate": 7.412929151260665e-06,
|
|
"loss": 0.9611,
|
|
"step": 7007
|
|
},
|
|
{
|
|
"epoch": 1.8638297872340426,
|
|
"grad_norm": 3.8474161624908447,
|
|
"learning_rate": 7.412158800444208e-06,
|
|
"loss": 0.7215,
|
|
"step": 7008
|
|
},
|
|
{
|
|
"epoch": 1.864095744680851,
|
|
"grad_norm": 3.4360055923461914,
|
|
"learning_rate": 7.411388374992719e-06,
|
|
"loss": 0.7885,
|
|
"step": 7009
|
|
},
|
|
{
|
|
"epoch": 1.8643617021276597,
|
|
"grad_norm": 3.902475357055664,
|
|
"learning_rate": 7.410617874930034e-06,
|
|
"loss": 0.8199,
|
|
"step": 7010
|
|
},
|
|
{
|
|
"epoch": 1.864627659574468,
|
|
"grad_norm": 4.08276891708374,
|
|
"learning_rate": 7.409847300279993e-06,
|
|
"loss": 0.793,
|
|
"step": 7011
|
|
},
|
|
{
|
|
"epoch": 1.8648936170212767,
|
|
"grad_norm": 4.242387294769287,
|
|
"learning_rate": 7.4090766510664405e-06,
|
|
"loss": 0.9345,
|
|
"step": 7012
|
|
},
|
|
{
|
|
"epoch": 1.8651595744680851,
|
|
"grad_norm": 3.8312370777130127,
|
|
"learning_rate": 7.40830592731322e-06,
|
|
"loss": 0.8151,
|
|
"step": 7013
|
|
},
|
|
{
|
|
"epoch": 1.8654255319148936,
|
|
"grad_norm": 4.087930679321289,
|
|
"learning_rate": 7.407535129044179e-06,
|
|
"loss": 0.936,
|
|
"step": 7014
|
|
},
|
|
{
|
|
"epoch": 1.8656914893617023,
|
|
"grad_norm": 4.200309753417969,
|
|
"learning_rate": 7.4067642562831656e-06,
|
|
"loss": 0.8345,
|
|
"step": 7015
|
|
},
|
|
{
|
|
"epoch": 1.8659574468085105,
|
|
"grad_norm": 3.7283883094787598,
|
|
"learning_rate": 7.4059933090540315e-06,
|
|
"loss": 0.7398,
|
|
"step": 7016
|
|
},
|
|
{
|
|
"epoch": 1.8662234042553192,
|
|
"grad_norm": 4.288913249969482,
|
|
"learning_rate": 7.4052222873806345e-06,
|
|
"loss": 0.9314,
|
|
"step": 7017
|
|
},
|
|
{
|
|
"epoch": 1.8664893617021276,
|
|
"grad_norm": 4.077908515930176,
|
|
"learning_rate": 7.404451191286825e-06,
|
|
"loss": 0.8331,
|
|
"step": 7018
|
|
},
|
|
{
|
|
"epoch": 1.866755319148936,
|
|
"grad_norm": 4.040445804595947,
|
|
"learning_rate": 7.403680020796468e-06,
|
|
"loss": 0.8054,
|
|
"step": 7019
|
|
},
|
|
{
|
|
"epoch": 1.8670212765957448,
|
|
"grad_norm": 4.416097164154053,
|
|
"learning_rate": 7.402908775933419e-06,
|
|
"loss": 0.7164,
|
|
"step": 7020
|
|
},
|
|
{
|
|
"epoch": 1.867287234042553,
|
|
"grad_norm": 3.8552403450012207,
|
|
"learning_rate": 7.402137456721544e-06,
|
|
"loss": 0.8274,
|
|
"step": 7021
|
|
},
|
|
{
|
|
"epoch": 1.8675531914893617,
|
|
"grad_norm": 4.477870941162109,
|
|
"learning_rate": 7.401366063184709e-06,
|
|
"loss": 0.9087,
|
|
"step": 7022
|
|
},
|
|
{
|
|
"epoch": 1.8678191489361702,
|
|
"grad_norm": 4.315149784088135,
|
|
"learning_rate": 7.4005945953467794e-06,
|
|
"loss": 0.8275,
|
|
"step": 7023
|
|
},
|
|
{
|
|
"epoch": 1.8680851063829786,
|
|
"grad_norm": 4.013988971710205,
|
|
"learning_rate": 7.3998230532316275e-06,
|
|
"loss": 0.7935,
|
|
"step": 7024
|
|
},
|
|
{
|
|
"epoch": 1.8683510638297873,
|
|
"grad_norm": 4.538480281829834,
|
|
"learning_rate": 7.399051436863125e-06,
|
|
"loss": 0.7913,
|
|
"step": 7025
|
|
},
|
|
{
|
|
"epoch": 1.8686170212765958,
|
|
"grad_norm": 3.814431667327881,
|
|
"learning_rate": 7.398279746265144e-06,
|
|
"loss": 0.8819,
|
|
"step": 7026
|
|
},
|
|
{
|
|
"epoch": 1.8688829787234043,
|
|
"grad_norm": 4.128929615020752,
|
|
"learning_rate": 7.397507981461567e-06,
|
|
"loss": 0.7733,
|
|
"step": 7027
|
|
},
|
|
{
|
|
"epoch": 1.8691489361702127,
|
|
"grad_norm": 4.266568660736084,
|
|
"learning_rate": 7.3967361424762696e-06,
|
|
"loss": 0.8756,
|
|
"step": 7028
|
|
},
|
|
{
|
|
"epoch": 1.8694148936170212,
|
|
"grad_norm": 3.817857265472412,
|
|
"learning_rate": 7.3959642293331336e-06,
|
|
"loss": 0.8247,
|
|
"step": 7029
|
|
},
|
|
{
|
|
"epoch": 1.8696808510638299,
|
|
"grad_norm": 4.07396125793457,
|
|
"learning_rate": 7.395192242056044e-06,
|
|
"loss": 0.7925,
|
|
"step": 7030
|
|
},
|
|
{
|
|
"epoch": 1.8699468085106383,
|
|
"grad_norm": 3.3347582817077637,
|
|
"learning_rate": 7.3944201806688865e-06,
|
|
"loss": 0.647,
|
|
"step": 7031
|
|
},
|
|
{
|
|
"epoch": 1.8702127659574468,
|
|
"grad_norm": 3.7496252059936523,
|
|
"learning_rate": 7.393648045195548e-06,
|
|
"loss": 0.884,
|
|
"step": 7032
|
|
},
|
|
{
|
|
"epoch": 1.8704787234042555,
|
|
"grad_norm": 3.871969223022461,
|
|
"learning_rate": 7.392875835659923e-06,
|
|
"loss": 0.7962,
|
|
"step": 7033
|
|
},
|
|
{
|
|
"epoch": 1.8707446808510637,
|
|
"grad_norm": 4.357855796813965,
|
|
"learning_rate": 7.392103552085901e-06,
|
|
"loss": 0.8063,
|
|
"step": 7034
|
|
},
|
|
{
|
|
"epoch": 1.8710106382978724,
|
|
"grad_norm": 3.7552926540374756,
|
|
"learning_rate": 7.391331194497379e-06,
|
|
"loss": 0.7611,
|
|
"step": 7035
|
|
},
|
|
{
|
|
"epoch": 1.8712765957446809,
|
|
"grad_norm": 4.20325231552124,
|
|
"learning_rate": 7.390558762918254e-06,
|
|
"loss": 0.8825,
|
|
"step": 7036
|
|
},
|
|
{
|
|
"epoch": 1.8715425531914893,
|
|
"grad_norm": 3.433969020843506,
|
|
"learning_rate": 7.389786257372428e-06,
|
|
"loss": 0.6822,
|
|
"step": 7037
|
|
},
|
|
{
|
|
"epoch": 1.871808510638298,
|
|
"grad_norm": 3.9316911697387695,
|
|
"learning_rate": 7.3890136778837995e-06,
|
|
"loss": 0.8302,
|
|
"step": 7038
|
|
},
|
|
{
|
|
"epoch": 1.8720744680851062,
|
|
"grad_norm": 3.7068655490875244,
|
|
"learning_rate": 7.388241024476276e-06,
|
|
"loss": 0.8207,
|
|
"step": 7039
|
|
},
|
|
{
|
|
"epoch": 1.872340425531915,
|
|
"grad_norm": 3.7558844089508057,
|
|
"learning_rate": 7.387468297173764e-06,
|
|
"loss": 0.8916,
|
|
"step": 7040
|
|
},
|
|
{
|
|
"epoch": 1.8726063829787234,
|
|
"grad_norm": 3.663325786590576,
|
|
"learning_rate": 7.386695496000172e-06,
|
|
"loss": 0.8461,
|
|
"step": 7041
|
|
},
|
|
{
|
|
"epoch": 1.8728723404255319,
|
|
"grad_norm": 3.7792584896087646,
|
|
"learning_rate": 7.38592262097941e-06,
|
|
"loss": 0.775,
|
|
"step": 7042
|
|
},
|
|
{
|
|
"epoch": 1.8731382978723405,
|
|
"grad_norm": 3.6168766021728516,
|
|
"learning_rate": 7.385149672135394e-06,
|
|
"loss": 0.7552,
|
|
"step": 7043
|
|
},
|
|
{
|
|
"epoch": 1.8734042553191488,
|
|
"grad_norm": 3.5428271293640137,
|
|
"learning_rate": 7.384376649492039e-06,
|
|
"loss": 0.8633,
|
|
"step": 7044
|
|
},
|
|
{
|
|
"epoch": 1.8736702127659575,
|
|
"grad_norm": 4.00286340713501,
|
|
"learning_rate": 7.383603553073262e-06,
|
|
"loss": 0.7895,
|
|
"step": 7045
|
|
},
|
|
{
|
|
"epoch": 1.873936170212766,
|
|
"grad_norm": 4.0529890060424805,
|
|
"learning_rate": 7.382830382902986e-06,
|
|
"loss": 0.7161,
|
|
"step": 7046
|
|
},
|
|
{
|
|
"epoch": 1.8742021276595744,
|
|
"grad_norm": 4.5928425788879395,
|
|
"learning_rate": 7.382057139005132e-06,
|
|
"loss": 0.8454,
|
|
"step": 7047
|
|
},
|
|
{
|
|
"epoch": 1.874468085106383,
|
|
"grad_norm": 3.7979865074157715,
|
|
"learning_rate": 7.381283821403626e-06,
|
|
"loss": 0.8475,
|
|
"step": 7048
|
|
},
|
|
{
|
|
"epoch": 1.8747340425531915,
|
|
"grad_norm": 3.9232993125915527,
|
|
"learning_rate": 7.380510430122396e-06,
|
|
"loss": 0.8079,
|
|
"step": 7049
|
|
},
|
|
{
|
|
"epoch": 1.875,
|
|
"grad_norm": 4.084567546844482,
|
|
"learning_rate": 7.379736965185369e-06,
|
|
"loss": 0.8926,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 1.8752659574468085,
|
|
"grad_norm": 3.967013359069824,
|
|
"learning_rate": 7.378963426616479e-06,
|
|
"loss": 0.8136,
|
|
"step": 7051
|
|
},
|
|
{
|
|
"epoch": 1.875531914893617,
|
|
"grad_norm": 4.18993616104126,
|
|
"learning_rate": 7.378189814439659e-06,
|
|
"loss": 0.663,
|
|
"step": 7052
|
|
},
|
|
{
|
|
"epoch": 1.8757978723404256,
|
|
"grad_norm": 3.4214327335357666,
|
|
"learning_rate": 7.377416128678847e-06,
|
|
"loss": 0.7142,
|
|
"step": 7053
|
|
},
|
|
{
|
|
"epoch": 1.876063829787234,
|
|
"grad_norm": 4.111138343811035,
|
|
"learning_rate": 7.37664236935798e-06,
|
|
"loss": 0.8517,
|
|
"step": 7054
|
|
},
|
|
{
|
|
"epoch": 1.8763297872340425,
|
|
"grad_norm": 4.020641326904297,
|
|
"learning_rate": 7.375868536501001e-06,
|
|
"loss": 0.7649,
|
|
"step": 7055
|
|
},
|
|
{
|
|
"epoch": 1.8765957446808512,
|
|
"grad_norm": 3.6159451007843018,
|
|
"learning_rate": 7.375094630131852e-06,
|
|
"loss": 0.7219,
|
|
"step": 7056
|
|
},
|
|
{
|
|
"epoch": 1.8768617021276595,
|
|
"grad_norm": 4.138524532318115,
|
|
"learning_rate": 7.374320650274479e-06,
|
|
"loss": 0.7374,
|
|
"step": 7057
|
|
},
|
|
{
|
|
"epoch": 1.8771276595744681,
|
|
"grad_norm": 4.114788055419922,
|
|
"learning_rate": 7.373546596952829e-06,
|
|
"loss": 0.9118,
|
|
"step": 7058
|
|
},
|
|
{
|
|
"epoch": 1.8773936170212766,
|
|
"grad_norm": 3.8229057788848877,
|
|
"learning_rate": 7.372772470190852e-06,
|
|
"loss": 0.7109,
|
|
"step": 7059
|
|
},
|
|
{
|
|
"epoch": 1.877659574468085,
|
|
"grad_norm": 3.9543075561523438,
|
|
"learning_rate": 7.371998270012504e-06,
|
|
"loss": 0.7616,
|
|
"step": 7060
|
|
},
|
|
{
|
|
"epoch": 1.8779255319148938,
|
|
"grad_norm": 3.862529754638672,
|
|
"learning_rate": 7.3712239964417345e-06,
|
|
"loss": 0.8719,
|
|
"step": 7061
|
|
},
|
|
{
|
|
"epoch": 1.878191489361702,
|
|
"grad_norm": 3.855138063430786,
|
|
"learning_rate": 7.370449649502504e-06,
|
|
"loss": 0.7093,
|
|
"step": 7062
|
|
},
|
|
{
|
|
"epoch": 1.8784574468085107,
|
|
"grad_norm": 4.169119358062744,
|
|
"learning_rate": 7.36967522921877e-06,
|
|
"loss": 0.8817,
|
|
"step": 7063
|
|
},
|
|
{
|
|
"epoch": 1.8787234042553191,
|
|
"grad_norm": 3.8987720012664795,
|
|
"learning_rate": 7.368900735614494e-06,
|
|
"loss": 0.7522,
|
|
"step": 7064
|
|
},
|
|
{
|
|
"epoch": 1.8789893617021276,
|
|
"grad_norm": 3.938058853149414,
|
|
"learning_rate": 7.36812616871364e-06,
|
|
"loss": 0.7694,
|
|
"step": 7065
|
|
},
|
|
{
|
|
"epoch": 1.8792553191489363,
|
|
"grad_norm": 3.7450876235961914,
|
|
"learning_rate": 7.367351528540176e-06,
|
|
"loss": 0.7283,
|
|
"step": 7066
|
|
},
|
|
{
|
|
"epoch": 1.8795212765957445,
|
|
"grad_norm": 3.9045193195343018,
|
|
"learning_rate": 7.366576815118067e-06,
|
|
"loss": 0.735,
|
|
"step": 7067
|
|
},
|
|
{
|
|
"epoch": 1.8797872340425532,
|
|
"grad_norm": 3.4928138256073,
|
|
"learning_rate": 7.365802028471285e-06,
|
|
"loss": 0.7537,
|
|
"step": 7068
|
|
},
|
|
{
|
|
"epoch": 1.8800531914893617,
|
|
"grad_norm": 3.8254666328430176,
|
|
"learning_rate": 7.365027168623804e-06,
|
|
"loss": 0.8252,
|
|
"step": 7069
|
|
},
|
|
{
|
|
"epoch": 1.8803191489361701,
|
|
"grad_norm": 4.039599418640137,
|
|
"learning_rate": 7.364252235599596e-06,
|
|
"loss": 0.78,
|
|
"step": 7070
|
|
},
|
|
{
|
|
"epoch": 1.8805851063829788,
|
|
"grad_norm": 4.29962158203125,
|
|
"learning_rate": 7.363477229422642e-06,
|
|
"loss": 0.8651,
|
|
"step": 7071
|
|
},
|
|
{
|
|
"epoch": 1.8808510638297873,
|
|
"grad_norm": 3.891298294067383,
|
|
"learning_rate": 7.3627021501169196e-06,
|
|
"loss": 0.7887,
|
|
"step": 7072
|
|
},
|
|
{
|
|
"epoch": 1.8811170212765957,
|
|
"grad_norm": 3.8227875232696533,
|
|
"learning_rate": 7.36192699770641e-06,
|
|
"loss": 0.8563,
|
|
"step": 7073
|
|
},
|
|
{
|
|
"epoch": 1.8813829787234042,
|
|
"grad_norm": 3.881826639175415,
|
|
"learning_rate": 7.3611517722151e-06,
|
|
"loss": 0.7518,
|
|
"step": 7074
|
|
},
|
|
{
|
|
"epoch": 1.8816489361702127,
|
|
"grad_norm": 3.529783248901367,
|
|
"learning_rate": 7.360376473666973e-06,
|
|
"loss": 0.7086,
|
|
"step": 7075
|
|
},
|
|
{
|
|
"epoch": 1.8819148936170214,
|
|
"grad_norm": 3.710423231124878,
|
|
"learning_rate": 7.359601102086018e-06,
|
|
"loss": 0.8141,
|
|
"step": 7076
|
|
},
|
|
{
|
|
"epoch": 1.8821808510638298,
|
|
"grad_norm": 4.26459264755249,
|
|
"learning_rate": 7.358825657496228e-06,
|
|
"loss": 0.8523,
|
|
"step": 7077
|
|
},
|
|
{
|
|
"epoch": 1.8824468085106383,
|
|
"grad_norm": 3.9186158180236816,
|
|
"learning_rate": 7.358050139921595e-06,
|
|
"loss": 0.806,
|
|
"step": 7078
|
|
},
|
|
{
|
|
"epoch": 1.882712765957447,
|
|
"grad_norm": 3.5147833824157715,
|
|
"learning_rate": 7.3572745493861155e-06,
|
|
"loss": 0.742,
|
|
"step": 7079
|
|
},
|
|
{
|
|
"epoch": 1.8829787234042552,
|
|
"grad_norm": 3.834606885910034,
|
|
"learning_rate": 7.356498885913784e-06,
|
|
"loss": 0.9308,
|
|
"step": 7080
|
|
},
|
|
{
|
|
"epoch": 1.883244680851064,
|
|
"grad_norm": 3.989713191986084,
|
|
"learning_rate": 7.355723149528604e-06,
|
|
"loss": 0.8085,
|
|
"step": 7081
|
|
},
|
|
{
|
|
"epoch": 1.8835106382978724,
|
|
"grad_norm": 4.148540019989014,
|
|
"learning_rate": 7.354947340254576e-06,
|
|
"loss": 0.7697,
|
|
"step": 7082
|
|
},
|
|
{
|
|
"epoch": 1.8837765957446808,
|
|
"grad_norm": 3.6128063201904297,
|
|
"learning_rate": 7.354171458115704e-06,
|
|
"loss": 0.7755,
|
|
"step": 7083
|
|
},
|
|
{
|
|
"epoch": 1.8840425531914895,
|
|
"grad_norm": 4.31196928024292,
|
|
"learning_rate": 7.353395503135996e-06,
|
|
"loss": 0.7754,
|
|
"step": 7084
|
|
},
|
|
{
|
|
"epoch": 1.8843085106382977,
|
|
"grad_norm": 3.750534772872925,
|
|
"learning_rate": 7.35261947533946e-06,
|
|
"loss": 0.8237,
|
|
"step": 7085
|
|
},
|
|
{
|
|
"epoch": 1.8845744680851064,
|
|
"grad_norm": 3.8344967365264893,
|
|
"learning_rate": 7.351843374750108e-06,
|
|
"loss": 0.832,
|
|
"step": 7086
|
|
},
|
|
{
|
|
"epoch": 1.8848404255319149,
|
|
"grad_norm": 3.5898144245147705,
|
|
"learning_rate": 7.351067201391952e-06,
|
|
"loss": 0.737,
|
|
"step": 7087
|
|
},
|
|
{
|
|
"epoch": 1.8851063829787233,
|
|
"grad_norm": 3.8664729595184326,
|
|
"learning_rate": 7.35029095528901e-06,
|
|
"loss": 0.8636,
|
|
"step": 7088
|
|
},
|
|
{
|
|
"epoch": 1.885372340425532,
|
|
"grad_norm": 4.382975101470947,
|
|
"learning_rate": 7.349514636465298e-06,
|
|
"loss": 0.8923,
|
|
"step": 7089
|
|
},
|
|
{
|
|
"epoch": 1.8856382978723403,
|
|
"grad_norm": 4.070766448974609,
|
|
"learning_rate": 7.348738244944837e-06,
|
|
"loss": 0.8651,
|
|
"step": 7090
|
|
},
|
|
{
|
|
"epoch": 1.885904255319149,
|
|
"grad_norm": 4.187519073486328,
|
|
"learning_rate": 7.347961780751649e-06,
|
|
"loss": 0.8492,
|
|
"step": 7091
|
|
},
|
|
{
|
|
"epoch": 1.8861702127659574,
|
|
"grad_norm": 3.7398457527160645,
|
|
"learning_rate": 7.347185243909761e-06,
|
|
"loss": 0.7936,
|
|
"step": 7092
|
|
},
|
|
{
|
|
"epoch": 1.8864361702127659,
|
|
"grad_norm": 3.758314609527588,
|
|
"learning_rate": 7.346408634443196e-06,
|
|
"loss": 0.9086,
|
|
"step": 7093
|
|
},
|
|
{
|
|
"epoch": 1.8867021276595746,
|
|
"grad_norm": 3.800701856613159,
|
|
"learning_rate": 7.345631952375986e-06,
|
|
"loss": 0.8418,
|
|
"step": 7094
|
|
},
|
|
{
|
|
"epoch": 1.886968085106383,
|
|
"grad_norm": 4.155978202819824,
|
|
"learning_rate": 7.3448551977321615e-06,
|
|
"loss": 0.9388,
|
|
"step": 7095
|
|
},
|
|
{
|
|
"epoch": 1.8872340425531915,
|
|
"grad_norm": 3.9163780212402344,
|
|
"learning_rate": 7.344078370535757e-06,
|
|
"loss": 0.7108,
|
|
"step": 7096
|
|
},
|
|
{
|
|
"epoch": 1.8875,
|
|
"grad_norm": 3.312629222869873,
|
|
"learning_rate": 7.343301470810809e-06,
|
|
"loss": 0.6591,
|
|
"step": 7097
|
|
},
|
|
{
|
|
"epoch": 1.8877659574468084,
|
|
"grad_norm": 4.259210586547852,
|
|
"learning_rate": 7.342524498581352e-06,
|
|
"loss": 0.9209,
|
|
"step": 7098
|
|
},
|
|
{
|
|
"epoch": 1.888031914893617,
|
|
"grad_norm": 4.158624649047852,
|
|
"learning_rate": 7.34174745387143e-06,
|
|
"loss": 0.8084,
|
|
"step": 7099
|
|
},
|
|
{
|
|
"epoch": 1.8882978723404256,
|
|
"grad_norm": 4.25371789932251,
|
|
"learning_rate": 7.340970336705084e-06,
|
|
"loss": 0.8624,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 1.888563829787234,
|
|
"grad_norm": 3.780513286590576,
|
|
"learning_rate": 7.340193147106362e-06,
|
|
"loss": 0.7879,
|
|
"step": 7101
|
|
},
|
|
{
|
|
"epoch": 1.8888297872340427,
|
|
"grad_norm": 4.191688537597656,
|
|
"learning_rate": 7.339415885099307e-06,
|
|
"loss": 0.7785,
|
|
"step": 7102
|
|
},
|
|
{
|
|
"epoch": 1.889095744680851,
|
|
"grad_norm": 4.398171901702881,
|
|
"learning_rate": 7.33863855070797e-06,
|
|
"loss": 0.8883,
|
|
"step": 7103
|
|
},
|
|
{
|
|
"epoch": 1.8893617021276596,
|
|
"grad_norm": 3.6488990783691406,
|
|
"learning_rate": 7.337861143956404e-06,
|
|
"loss": 0.8097,
|
|
"step": 7104
|
|
},
|
|
{
|
|
"epoch": 1.889627659574468,
|
|
"grad_norm": 4.0780487060546875,
|
|
"learning_rate": 7.3370836648686616e-06,
|
|
"loss": 0.7897,
|
|
"step": 7105
|
|
},
|
|
{
|
|
"epoch": 1.8898936170212766,
|
|
"grad_norm": 4.089003562927246,
|
|
"learning_rate": 7.336306113468799e-06,
|
|
"loss": 0.9653,
|
|
"step": 7106
|
|
},
|
|
{
|
|
"epoch": 1.8901595744680852,
|
|
"grad_norm": 4.446435928344727,
|
|
"learning_rate": 7.335528489780874e-06,
|
|
"loss": 0.8947,
|
|
"step": 7107
|
|
},
|
|
{
|
|
"epoch": 1.8904255319148935,
|
|
"grad_norm": 3.880557060241699,
|
|
"learning_rate": 7.334750793828947e-06,
|
|
"loss": 0.9184,
|
|
"step": 7108
|
|
},
|
|
{
|
|
"epoch": 1.8906914893617022,
|
|
"grad_norm": 4.0276899337768555,
|
|
"learning_rate": 7.3339730256370834e-06,
|
|
"loss": 0.7444,
|
|
"step": 7109
|
|
},
|
|
{
|
|
"epoch": 1.8909574468085106,
|
|
"grad_norm": 4.381673336029053,
|
|
"learning_rate": 7.333195185229346e-06,
|
|
"loss": 0.7789,
|
|
"step": 7110
|
|
},
|
|
{
|
|
"epoch": 1.891223404255319,
|
|
"grad_norm": 4.908472537994385,
|
|
"learning_rate": 7.3324172726298015e-06,
|
|
"loss": 0.8258,
|
|
"step": 7111
|
|
},
|
|
{
|
|
"epoch": 1.8914893617021278,
|
|
"grad_norm": 4.257655143737793,
|
|
"learning_rate": 7.331639287862522e-06,
|
|
"loss": 0.8343,
|
|
"step": 7112
|
|
},
|
|
{
|
|
"epoch": 1.891755319148936,
|
|
"grad_norm": 3.902233600616455,
|
|
"learning_rate": 7.330861230951577e-06,
|
|
"loss": 0.7672,
|
|
"step": 7113
|
|
},
|
|
{
|
|
"epoch": 1.8920212765957447,
|
|
"grad_norm": 4.111093044281006,
|
|
"learning_rate": 7.3300831019210415e-06,
|
|
"loss": 0.9128,
|
|
"step": 7114
|
|
},
|
|
{
|
|
"epoch": 1.8922872340425532,
|
|
"grad_norm": 4.477164268493652,
|
|
"learning_rate": 7.329304900794991e-06,
|
|
"loss": 0.9389,
|
|
"step": 7115
|
|
},
|
|
{
|
|
"epoch": 1.8925531914893616,
|
|
"grad_norm": 4.585188388824463,
|
|
"learning_rate": 7.328526627597505e-06,
|
|
"loss": 0.8127,
|
|
"step": 7116
|
|
},
|
|
{
|
|
"epoch": 1.8928191489361703,
|
|
"grad_norm": 3.906665086746216,
|
|
"learning_rate": 7.327748282352664e-06,
|
|
"loss": 0.7996,
|
|
"step": 7117
|
|
},
|
|
{
|
|
"epoch": 1.8930851063829788,
|
|
"grad_norm": 4.213885307312012,
|
|
"learning_rate": 7.32696986508455e-06,
|
|
"loss": 0.8334,
|
|
"step": 7118
|
|
},
|
|
{
|
|
"epoch": 1.8933510638297872,
|
|
"grad_norm": 4.066798686981201,
|
|
"learning_rate": 7.326191375817249e-06,
|
|
"loss": 0.8217,
|
|
"step": 7119
|
|
},
|
|
{
|
|
"epoch": 1.8936170212765957,
|
|
"grad_norm": 3.510889768600464,
|
|
"learning_rate": 7.325412814574847e-06,
|
|
"loss": 0.7864,
|
|
"step": 7120
|
|
},
|
|
{
|
|
"epoch": 1.8938829787234042,
|
|
"grad_norm": 3.888808250427246,
|
|
"learning_rate": 7.324634181381436e-06,
|
|
"loss": 0.7519,
|
|
"step": 7121
|
|
},
|
|
{
|
|
"epoch": 1.8941489361702128,
|
|
"grad_norm": 3.9174201488494873,
|
|
"learning_rate": 7.323855476261106e-06,
|
|
"loss": 0.6913,
|
|
"step": 7122
|
|
},
|
|
{
|
|
"epoch": 1.8944148936170213,
|
|
"grad_norm": 4.041181564331055,
|
|
"learning_rate": 7.323076699237951e-06,
|
|
"loss": 0.6076,
|
|
"step": 7123
|
|
},
|
|
{
|
|
"epoch": 1.8946808510638298,
|
|
"grad_norm": 3.841498851776123,
|
|
"learning_rate": 7.322297850336069e-06,
|
|
"loss": 0.8645,
|
|
"step": 7124
|
|
},
|
|
{
|
|
"epoch": 1.8949468085106385,
|
|
"grad_norm": 3.5201406478881836,
|
|
"learning_rate": 7.3215189295795565e-06,
|
|
"loss": 0.7253,
|
|
"step": 7125
|
|
},
|
|
{
|
|
"epoch": 1.8952127659574467,
|
|
"grad_norm": 3.9525210857391357,
|
|
"learning_rate": 7.320739936992514e-06,
|
|
"loss": 0.8073,
|
|
"step": 7126
|
|
},
|
|
{
|
|
"epoch": 1.8954787234042554,
|
|
"grad_norm": 3.8624043464660645,
|
|
"learning_rate": 7.319960872599048e-06,
|
|
"loss": 0.8157,
|
|
"step": 7127
|
|
},
|
|
{
|
|
"epoch": 1.8957446808510638,
|
|
"grad_norm": 4.123876571655273,
|
|
"learning_rate": 7.31918173642326e-06,
|
|
"loss": 0.8038,
|
|
"step": 7128
|
|
},
|
|
{
|
|
"epoch": 1.8960106382978723,
|
|
"grad_norm": 3.812316417694092,
|
|
"learning_rate": 7.318402528489258e-06,
|
|
"loss": 0.7421,
|
|
"step": 7129
|
|
},
|
|
{
|
|
"epoch": 1.896276595744681,
|
|
"grad_norm": 4.009311199188232,
|
|
"learning_rate": 7.317623248821153e-06,
|
|
"loss": 0.835,
|
|
"step": 7130
|
|
},
|
|
{
|
|
"epoch": 1.8965425531914892,
|
|
"grad_norm": 4.297110557556152,
|
|
"learning_rate": 7.316843897443055e-06,
|
|
"loss": 0.7093,
|
|
"step": 7131
|
|
},
|
|
{
|
|
"epoch": 1.896808510638298,
|
|
"grad_norm": 4.034492015838623,
|
|
"learning_rate": 7.316064474379081e-06,
|
|
"loss": 0.7682,
|
|
"step": 7132
|
|
},
|
|
{
|
|
"epoch": 1.8970744680851064,
|
|
"grad_norm": 4.544641494750977,
|
|
"learning_rate": 7.315284979653344e-06,
|
|
"loss": 0.8832,
|
|
"step": 7133
|
|
},
|
|
{
|
|
"epoch": 1.8973404255319148,
|
|
"grad_norm": 4.383004188537598,
|
|
"learning_rate": 7.314505413289964e-06,
|
|
"loss": 0.892,
|
|
"step": 7134
|
|
},
|
|
{
|
|
"epoch": 1.8976063829787235,
|
|
"grad_norm": 3.52055025100708,
|
|
"learning_rate": 7.313725775313061e-06,
|
|
"loss": 0.7965,
|
|
"step": 7135
|
|
},
|
|
{
|
|
"epoch": 1.8978723404255318,
|
|
"grad_norm": 3.933687925338745,
|
|
"learning_rate": 7.31294606574676e-06,
|
|
"loss": 0.7829,
|
|
"step": 7136
|
|
},
|
|
{
|
|
"epoch": 1.8981382978723405,
|
|
"grad_norm": 4.500588417053223,
|
|
"learning_rate": 7.312166284615183e-06,
|
|
"loss": 0.8802,
|
|
"step": 7137
|
|
},
|
|
{
|
|
"epoch": 1.898404255319149,
|
|
"grad_norm": 3.9210360050201416,
|
|
"learning_rate": 7.31138643194246e-06,
|
|
"loss": 0.7418,
|
|
"step": 7138
|
|
},
|
|
{
|
|
"epoch": 1.8986702127659574,
|
|
"grad_norm": 4.024209022521973,
|
|
"learning_rate": 7.3106065077527175e-06,
|
|
"loss": 0.8769,
|
|
"step": 7139
|
|
},
|
|
{
|
|
"epoch": 1.898936170212766,
|
|
"grad_norm": 4.242138862609863,
|
|
"learning_rate": 7.3098265120700915e-06,
|
|
"loss": 0.8789,
|
|
"step": 7140
|
|
},
|
|
{
|
|
"epoch": 1.8992021276595743,
|
|
"grad_norm": 3.6798341274261475,
|
|
"learning_rate": 7.309046444918712e-06,
|
|
"loss": 0.7971,
|
|
"step": 7141
|
|
},
|
|
{
|
|
"epoch": 1.899468085106383,
|
|
"grad_norm": 4.092346668243408,
|
|
"learning_rate": 7.308266306322719e-06,
|
|
"loss": 0.7864,
|
|
"step": 7142
|
|
},
|
|
{
|
|
"epoch": 1.8997340425531914,
|
|
"grad_norm": 4.132681846618652,
|
|
"learning_rate": 7.307486096306247e-06,
|
|
"loss": 0.8868,
|
|
"step": 7143
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"grad_norm": 3.893075942993164,
|
|
"learning_rate": 7.30670581489344e-06,
|
|
"loss": 0.9096,
|
|
"step": 7144
|
|
},
|
|
{
|
|
"epoch": 1.9002659574468086,
|
|
"grad_norm": 3.807593822479248,
|
|
"learning_rate": 7.305925462108439e-06,
|
|
"loss": 0.7444,
|
|
"step": 7145
|
|
},
|
|
{
|
|
"epoch": 1.900531914893617,
|
|
"grad_norm": 3.6460392475128174,
|
|
"learning_rate": 7.305145037975388e-06,
|
|
"loss": 0.74,
|
|
"step": 7146
|
|
},
|
|
{
|
|
"epoch": 1.9007978723404255,
|
|
"grad_norm": 3.5041310787200928,
|
|
"learning_rate": 7.304364542518435e-06,
|
|
"loss": 0.8561,
|
|
"step": 7147
|
|
},
|
|
{
|
|
"epoch": 1.9010638297872342,
|
|
"grad_norm": 4.359119892120361,
|
|
"learning_rate": 7.303583975761732e-06,
|
|
"loss": 0.735,
|
|
"step": 7148
|
|
},
|
|
{
|
|
"epoch": 1.9013297872340424,
|
|
"grad_norm": 4.176085948944092,
|
|
"learning_rate": 7.302803337729429e-06,
|
|
"loss": 0.8723,
|
|
"step": 7149
|
|
},
|
|
{
|
|
"epoch": 1.9015957446808511,
|
|
"grad_norm": 3.764272689819336,
|
|
"learning_rate": 7.302022628445678e-06,
|
|
"loss": 0.8359,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 1.9018617021276596,
|
|
"grad_norm": 3.8661603927612305,
|
|
"learning_rate": 7.301241847934637e-06,
|
|
"loss": 0.9286,
|
|
"step": 7151
|
|
},
|
|
{
|
|
"epoch": 1.902127659574468,
|
|
"grad_norm": 3.493070363998413,
|
|
"learning_rate": 7.300460996220464e-06,
|
|
"loss": 0.7439,
|
|
"step": 7152
|
|
},
|
|
{
|
|
"epoch": 1.9023936170212767,
|
|
"grad_norm": 3.425701379776001,
|
|
"learning_rate": 7.2996800733273196e-06,
|
|
"loss": 0.8468,
|
|
"step": 7153
|
|
},
|
|
{
|
|
"epoch": 1.902659574468085,
|
|
"grad_norm": 3.9553513526916504,
|
|
"learning_rate": 7.298899079279365e-06,
|
|
"loss": 0.8075,
|
|
"step": 7154
|
|
},
|
|
{
|
|
"epoch": 1.9029255319148937,
|
|
"grad_norm": 3.900907516479492,
|
|
"learning_rate": 7.298118014100766e-06,
|
|
"loss": 0.8969,
|
|
"step": 7155
|
|
},
|
|
{
|
|
"epoch": 1.9031914893617021,
|
|
"grad_norm": 3.8822121620178223,
|
|
"learning_rate": 7.297336877815693e-06,
|
|
"loss": 0.8685,
|
|
"step": 7156
|
|
},
|
|
{
|
|
"epoch": 1.9034574468085106,
|
|
"grad_norm": 3.847317695617676,
|
|
"learning_rate": 7.29655567044831e-06,
|
|
"loss": 0.7251,
|
|
"step": 7157
|
|
},
|
|
{
|
|
"epoch": 1.9037234042553193,
|
|
"grad_norm": 3.5498738288879395,
|
|
"learning_rate": 7.295774392022791e-06,
|
|
"loss": 0.7035,
|
|
"step": 7158
|
|
},
|
|
{
|
|
"epoch": 1.9039893617021275,
|
|
"grad_norm": 3.658343553543091,
|
|
"learning_rate": 7.2949930425633095e-06,
|
|
"loss": 0.7414,
|
|
"step": 7159
|
|
},
|
|
{
|
|
"epoch": 1.9042553191489362,
|
|
"grad_norm": 3.804388999938965,
|
|
"learning_rate": 7.2942116220940406e-06,
|
|
"loss": 0.8057,
|
|
"step": 7160
|
|
},
|
|
{
|
|
"epoch": 1.9045212765957447,
|
|
"grad_norm": 3.876521348953247,
|
|
"learning_rate": 7.293430130639163e-06,
|
|
"loss": 0.886,
|
|
"step": 7161
|
|
},
|
|
{
|
|
"epoch": 1.9047872340425531,
|
|
"grad_norm": 3.969161033630371,
|
|
"learning_rate": 7.292648568222859e-06,
|
|
"loss": 0.9049,
|
|
"step": 7162
|
|
},
|
|
{
|
|
"epoch": 1.9050531914893618,
|
|
"grad_norm": 4.049928188323975,
|
|
"learning_rate": 7.2918669348693075e-06,
|
|
"loss": 0.8954,
|
|
"step": 7163
|
|
},
|
|
{
|
|
"epoch": 1.90531914893617,
|
|
"grad_norm": 3.997854232788086,
|
|
"learning_rate": 7.291085230602694e-06,
|
|
"loss": 0.9063,
|
|
"step": 7164
|
|
},
|
|
{
|
|
"epoch": 1.9055851063829787,
|
|
"grad_norm": 4.090554237365723,
|
|
"learning_rate": 7.290303455447208e-06,
|
|
"loss": 0.8132,
|
|
"step": 7165
|
|
},
|
|
{
|
|
"epoch": 1.9058510638297872,
|
|
"grad_norm": 3.8804330825805664,
|
|
"learning_rate": 7.289521609427035e-06,
|
|
"loss": 0.8245,
|
|
"step": 7166
|
|
},
|
|
{
|
|
"epoch": 1.9061170212765957,
|
|
"grad_norm": 3.7036948204040527,
|
|
"learning_rate": 7.288739692566367e-06,
|
|
"loss": 0.891,
|
|
"step": 7167
|
|
},
|
|
{
|
|
"epoch": 1.9063829787234043,
|
|
"grad_norm": 3.8350512981414795,
|
|
"learning_rate": 7.2879577048894e-06,
|
|
"loss": 0.7912,
|
|
"step": 7168
|
|
},
|
|
{
|
|
"epoch": 1.9066489361702128,
|
|
"grad_norm": 3.3897817134857178,
|
|
"learning_rate": 7.287175646420327e-06,
|
|
"loss": 0.8327,
|
|
"step": 7169
|
|
},
|
|
{
|
|
"epoch": 1.9069148936170213,
|
|
"grad_norm": 4.037939548492432,
|
|
"learning_rate": 7.2863935171833465e-06,
|
|
"loss": 0.8793,
|
|
"step": 7170
|
|
},
|
|
{
|
|
"epoch": 1.90718085106383,
|
|
"grad_norm": 3.7813265323638916,
|
|
"learning_rate": 7.285611317202661e-06,
|
|
"loss": 0.8551,
|
|
"step": 7171
|
|
},
|
|
{
|
|
"epoch": 1.9074468085106382,
|
|
"grad_norm": 3.916761636734009,
|
|
"learning_rate": 7.284829046502467e-06,
|
|
"loss": 0.7564,
|
|
"step": 7172
|
|
},
|
|
{
|
|
"epoch": 1.9077127659574469,
|
|
"grad_norm": 3.843834400177002,
|
|
"learning_rate": 7.284046705106974e-06,
|
|
"loss": 0.8456,
|
|
"step": 7173
|
|
},
|
|
{
|
|
"epoch": 1.9079787234042553,
|
|
"grad_norm": 3.752497434616089,
|
|
"learning_rate": 7.2832642930403876e-06,
|
|
"loss": 0.8221,
|
|
"step": 7174
|
|
},
|
|
{
|
|
"epoch": 1.9082446808510638,
|
|
"grad_norm": 4.00820779800415,
|
|
"learning_rate": 7.282481810326915e-06,
|
|
"loss": 0.9672,
|
|
"step": 7175
|
|
},
|
|
{
|
|
"epoch": 1.9085106382978725,
|
|
"grad_norm": 4.226334571838379,
|
|
"learning_rate": 7.281699256990766e-06,
|
|
"loss": 0.8973,
|
|
"step": 7176
|
|
},
|
|
{
|
|
"epoch": 1.9087765957446807,
|
|
"grad_norm": 3.871880531311035,
|
|
"learning_rate": 7.280916633056159e-06,
|
|
"loss": 0.8204,
|
|
"step": 7177
|
|
},
|
|
{
|
|
"epoch": 1.9090425531914894,
|
|
"grad_norm": 4.339875221252441,
|
|
"learning_rate": 7.280133938547304e-06,
|
|
"loss": 0.8958,
|
|
"step": 7178
|
|
},
|
|
{
|
|
"epoch": 1.9093085106382979,
|
|
"grad_norm": 3.7419753074645996,
|
|
"learning_rate": 7.27935117348842e-06,
|
|
"loss": 0.789,
|
|
"step": 7179
|
|
},
|
|
{
|
|
"epoch": 1.9095744680851063,
|
|
"grad_norm": 4.0317888259887695,
|
|
"learning_rate": 7.278568337903729e-06,
|
|
"loss": 0.7995,
|
|
"step": 7180
|
|
},
|
|
{
|
|
"epoch": 1.909840425531915,
|
|
"grad_norm": 3.9452288150787354,
|
|
"learning_rate": 7.277785431817449e-06,
|
|
"loss": 0.8576,
|
|
"step": 7181
|
|
},
|
|
{
|
|
"epoch": 1.9101063829787233,
|
|
"grad_norm": 3.957437753677368,
|
|
"learning_rate": 7.277002455253807e-06,
|
|
"loss": 0.8532,
|
|
"step": 7182
|
|
},
|
|
{
|
|
"epoch": 1.910372340425532,
|
|
"grad_norm": 3.9327943325042725,
|
|
"learning_rate": 7.276219408237029e-06,
|
|
"loss": 0.8155,
|
|
"step": 7183
|
|
},
|
|
{
|
|
"epoch": 1.9106382978723404,
|
|
"grad_norm": 4.20408296585083,
|
|
"learning_rate": 7.27543629079134e-06,
|
|
"loss": 0.8285,
|
|
"step": 7184
|
|
},
|
|
{
|
|
"epoch": 1.9109042553191489,
|
|
"grad_norm": 4.2042341232299805,
|
|
"learning_rate": 7.274653102940974e-06,
|
|
"loss": 0.8624,
|
|
"step": 7185
|
|
},
|
|
{
|
|
"epoch": 1.9111702127659576,
|
|
"grad_norm": 4.000115871429443,
|
|
"learning_rate": 7.2738698447101645e-06,
|
|
"loss": 0.8343,
|
|
"step": 7186
|
|
},
|
|
{
|
|
"epoch": 1.9114361702127658,
|
|
"grad_norm": 4.323785305023193,
|
|
"learning_rate": 7.273086516123145e-06,
|
|
"loss": 0.7525,
|
|
"step": 7187
|
|
},
|
|
{
|
|
"epoch": 1.9117021276595745,
|
|
"grad_norm": 3.9202396869659424,
|
|
"learning_rate": 7.27230311720415e-06,
|
|
"loss": 0.9014,
|
|
"step": 7188
|
|
},
|
|
{
|
|
"epoch": 1.911968085106383,
|
|
"grad_norm": 3.924821615219116,
|
|
"learning_rate": 7.271519647977422e-06,
|
|
"loss": 0.8206,
|
|
"step": 7189
|
|
},
|
|
{
|
|
"epoch": 1.9122340425531914,
|
|
"grad_norm": 3.9752979278564453,
|
|
"learning_rate": 7.270736108467202e-06,
|
|
"loss": 0.9627,
|
|
"step": 7190
|
|
},
|
|
{
|
|
"epoch": 1.9125,
|
|
"grad_norm": 3.7932825088500977,
|
|
"learning_rate": 7.269952498697734e-06,
|
|
"loss": 0.8227,
|
|
"step": 7191
|
|
},
|
|
{
|
|
"epoch": 1.9127659574468086,
|
|
"grad_norm": 4.589715480804443,
|
|
"learning_rate": 7.2691688186932626e-06,
|
|
"loss": 0.9176,
|
|
"step": 7192
|
|
},
|
|
{
|
|
"epoch": 1.913031914893617,
|
|
"grad_norm": 4.00385856628418,
|
|
"learning_rate": 7.268385068478037e-06,
|
|
"loss": 0.7602,
|
|
"step": 7193
|
|
},
|
|
{
|
|
"epoch": 1.9132978723404257,
|
|
"grad_norm": 4.291144847869873,
|
|
"learning_rate": 7.267601248076307e-06,
|
|
"loss": 1.0254,
|
|
"step": 7194
|
|
},
|
|
{
|
|
"epoch": 1.913563829787234,
|
|
"grad_norm": 3.699037790298462,
|
|
"learning_rate": 7.2668173575123234e-06,
|
|
"loss": 0.8528,
|
|
"step": 7195
|
|
},
|
|
{
|
|
"epoch": 1.9138297872340426,
|
|
"grad_norm": 3.936768054962158,
|
|
"learning_rate": 7.266033396810343e-06,
|
|
"loss": 0.7172,
|
|
"step": 7196
|
|
},
|
|
{
|
|
"epoch": 1.914095744680851,
|
|
"grad_norm": 3.23809814453125,
|
|
"learning_rate": 7.265249365994621e-06,
|
|
"loss": 0.6519,
|
|
"step": 7197
|
|
},
|
|
{
|
|
"epoch": 1.9143617021276595,
|
|
"grad_norm": 4.3691020011901855,
|
|
"learning_rate": 7.2644652650894155e-06,
|
|
"loss": 0.8097,
|
|
"step": 7198
|
|
},
|
|
{
|
|
"epoch": 1.9146276595744682,
|
|
"grad_norm": 4.070173263549805,
|
|
"learning_rate": 7.263681094118989e-06,
|
|
"loss": 1.0137,
|
|
"step": 7199
|
|
},
|
|
{
|
|
"epoch": 1.9148936170212765,
|
|
"grad_norm": 3.9889721870422363,
|
|
"learning_rate": 7.262896853107606e-06,
|
|
"loss": 0.8935,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 1.9151595744680852,
|
|
"grad_norm": 3.6993491649627686,
|
|
"learning_rate": 7.262112542079529e-06,
|
|
"loss": 0.7445,
|
|
"step": 7201
|
|
},
|
|
{
|
|
"epoch": 1.9154255319148936,
|
|
"grad_norm": 4.081962585449219,
|
|
"learning_rate": 7.261328161059026e-06,
|
|
"loss": 1.0239,
|
|
"step": 7202
|
|
},
|
|
{
|
|
"epoch": 1.915691489361702,
|
|
"grad_norm": 4.065913677215576,
|
|
"learning_rate": 7.260543710070369e-06,
|
|
"loss": 0.9063,
|
|
"step": 7203
|
|
},
|
|
{
|
|
"epoch": 1.9159574468085108,
|
|
"grad_norm": 3.7012364864349365,
|
|
"learning_rate": 7.259759189137827e-06,
|
|
"loss": 0.9102,
|
|
"step": 7204
|
|
},
|
|
{
|
|
"epoch": 1.916223404255319,
|
|
"grad_norm": 4.341013431549072,
|
|
"learning_rate": 7.258974598285674e-06,
|
|
"loss": 0.8309,
|
|
"step": 7205
|
|
},
|
|
{
|
|
"epoch": 1.9164893617021277,
|
|
"grad_norm": 3.8948628902435303,
|
|
"learning_rate": 7.258189937538189e-06,
|
|
"loss": 0.786,
|
|
"step": 7206
|
|
},
|
|
{
|
|
"epoch": 1.9167553191489362,
|
|
"grad_norm": 4.040065288543701,
|
|
"learning_rate": 7.257405206919649e-06,
|
|
"loss": 0.7283,
|
|
"step": 7207
|
|
},
|
|
{
|
|
"epoch": 1.9170212765957446,
|
|
"grad_norm": 3.775395631790161,
|
|
"learning_rate": 7.256620406454333e-06,
|
|
"loss": 0.7441,
|
|
"step": 7208
|
|
},
|
|
{
|
|
"epoch": 1.9172872340425533,
|
|
"grad_norm": 4.277199745178223,
|
|
"learning_rate": 7.255835536166525e-06,
|
|
"loss": 0.8784,
|
|
"step": 7209
|
|
},
|
|
{
|
|
"epoch": 1.9175531914893615,
|
|
"grad_norm": 4.311332702636719,
|
|
"learning_rate": 7.25505059608051e-06,
|
|
"loss": 0.911,
|
|
"step": 7210
|
|
},
|
|
{
|
|
"epoch": 1.9178191489361702,
|
|
"grad_norm": 3.843778371810913,
|
|
"learning_rate": 7.254265586220574e-06,
|
|
"loss": 0.7906,
|
|
"step": 7211
|
|
},
|
|
{
|
|
"epoch": 1.9180851063829787,
|
|
"grad_norm": 4.064030647277832,
|
|
"learning_rate": 7.253480506611008e-06,
|
|
"loss": 0.8904,
|
|
"step": 7212
|
|
},
|
|
{
|
|
"epoch": 1.9183510638297872,
|
|
"grad_norm": 3.85115385055542,
|
|
"learning_rate": 7.252695357276101e-06,
|
|
"loss": 0.7148,
|
|
"step": 7213
|
|
},
|
|
{
|
|
"epoch": 1.9186170212765958,
|
|
"grad_norm": 3.716801643371582,
|
|
"learning_rate": 7.251910138240147e-06,
|
|
"loss": 0.7956,
|
|
"step": 7214
|
|
},
|
|
{
|
|
"epoch": 1.9188829787234043,
|
|
"grad_norm": 3.7296745777130127,
|
|
"learning_rate": 7.251124849527442e-06,
|
|
"loss": 0.8143,
|
|
"step": 7215
|
|
},
|
|
{
|
|
"epoch": 1.9191489361702128,
|
|
"grad_norm": 3.9987385272979736,
|
|
"learning_rate": 7.250339491162284e-06,
|
|
"loss": 0.8333,
|
|
"step": 7216
|
|
},
|
|
{
|
|
"epoch": 1.9194148936170212,
|
|
"grad_norm": 3.8190033435821533,
|
|
"learning_rate": 7.2495540631689745e-06,
|
|
"loss": 0.8476,
|
|
"step": 7217
|
|
},
|
|
{
|
|
"epoch": 1.9196808510638297,
|
|
"grad_norm": 4.055121898651123,
|
|
"learning_rate": 7.248768565571811e-06,
|
|
"loss": 0.8605,
|
|
"step": 7218
|
|
},
|
|
{
|
|
"epoch": 1.9199468085106384,
|
|
"grad_norm": 4.3670525550842285,
|
|
"learning_rate": 7.247982998395102e-06,
|
|
"loss": 0.8381,
|
|
"step": 7219
|
|
},
|
|
{
|
|
"epoch": 1.9202127659574468,
|
|
"grad_norm": 4.680405139923096,
|
|
"learning_rate": 7.247197361663152e-06,
|
|
"loss": 0.9635,
|
|
"step": 7220
|
|
},
|
|
{
|
|
"epoch": 1.9204787234042553,
|
|
"grad_norm": 4.1340460777282715,
|
|
"learning_rate": 7.24641165540027e-06,
|
|
"loss": 0.8125,
|
|
"step": 7221
|
|
},
|
|
{
|
|
"epoch": 1.920744680851064,
|
|
"grad_norm": 4.003271102905273,
|
|
"learning_rate": 7.245625879630767e-06,
|
|
"loss": 0.8934,
|
|
"step": 7222
|
|
},
|
|
{
|
|
"epoch": 1.9210106382978722,
|
|
"grad_norm": 4.222568035125732,
|
|
"learning_rate": 7.244840034378955e-06,
|
|
"loss": 1.0299,
|
|
"step": 7223
|
|
},
|
|
{
|
|
"epoch": 1.921276595744681,
|
|
"grad_norm": 3.762643337249756,
|
|
"learning_rate": 7.244054119669148e-06,
|
|
"loss": 0.6798,
|
|
"step": 7224
|
|
},
|
|
{
|
|
"epoch": 1.9215425531914894,
|
|
"grad_norm": 4.137721538543701,
|
|
"learning_rate": 7.243268135525666e-06,
|
|
"loss": 0.8147,
|
|
"step": 7225
|
|
},
|
|
{
|
|
"epoch": 1.9218085106382978,
|
|
"grad_norm": 4.0250139236450195,
|
|
"learning_rate": 7.242482081972827e-06,
|
|
"loss": 0.8394,
|
|
"step": 7226
|
|
},
|
|
{
|
|
"epoch": 1.9220744680851065,
|
|
"grad_norm": 3.7539706230163574,
|
|
"learning_rate": 7.241695959034951e-06,
|
|
"loss": 0.8293,
|
|
"step": 7227
|
|
},
|
|
{
|
|
"epoch": 1.9223404255319148,
|
|
"grad_norm": 4.054415225982666,
|
|
"learning_rate": 7.2409097667363635e-06,
|
|
"loss": 0.9107,
|
|
"step": 7228
|
|
},
|
|
{
|
|
"epoch": 1.9226063829787234,
|
|
"grad_norm": 4.380495548248291,
|
|
"learning_rate": 7.2401235051013885e-06,
|
|
"loss": 0.8641,
|
|
"step": 7229
|
|
},
|
|
{
|
|
"epoch": 1.922872340425532,
|
|
"grad_norm": 4.061448097229004,
|
|
"learning_rate": 7.239337174154357e-06,
|
|
"loss": 0.8332,
|
|
"step": 7230
|
|
},
|
|
{
|
|
"epoch": 1.9231382978723404,
|
|
"grad_norm": 4.095539093017578,
|
|
"learning_rate": 7.2385507739195945e-06,
|
|
"loss": 0.828,
|
|
"step": 7231
|
|
},
|
|
{
|
|
"epoch": 1.923404255319149,
|
|
"grad_norm": 4.271059513092041,
|
|
"learning_rate": 7.2377643044214375e-06,
|
|
"loss": 0.8365,
|
|
"step": 7232
|
|
},
|
|
{
|
|
"epoch": 1.9236702127659573,
|
|
"grad_norm": 3.9962894916534424,
|
|
"learning_rate": 7.236977765684216e-06,
|
|
"loss": 0.6932,
|
|
"step": 7233
|
|
},
|
|
{
|
|
"epoch": 1.923936170212766,
|
|
"grad_norm": 4.267841339111328,
|
|
"learning_rate": 7.236191157732272e-06,
|
|
"loss": 0.8561,
|
|
"step": 7234
|
|
},
|
|
{
|
|
"epoch": 1.9242021276595744,
|
|
"grad_norm": 4.299777030944824,
|
|
"learning_rate": 7.2354044805899385e-06,
|
|
"loss": 0.864,
|
|
"step": 7235
|
|
},
|
|
{
|
|
"epoch": 1.924468085106383,
|
|
"grad_norm": 4.053724765777588,
|
|
"learning_rate": 7.234617734281558e-06,
|
|
"loss": 0.8643,
|
|
"step": 7236
|
|
},
|
|
{
|
|
"epoch": 1.9247340425531916,
|
|
"grad_norm": 4.541396141052246,
|
|
"learning_rate": 7.2338309188314745e-06,
|
|
"loss": 0.793,
|
|
"step": 7237
|
|
},
|
|
{
|
|
"epoch": 1.925,
|
|
"grad_norm": 4.2436676025390625,
|
|
"learning_rate": 7.233044034264034e-06,
|
|
"loss": 0.7894,
|
|
"step": 7238
|
|
},
|
|
{
|
|
"epoch": 1.9252659574468085,
|
|
"grad_norm": 4.764181613922119,
|
|
"learning_rate": 7.23225708060358e-06,
|
|
"loss": 0.7979,
|
|
"step": 7239
|
|
},
|
|
{
|
|
"epoch": 1.925531914893617,
|
|
"grad_norm": 4.301015377044678,
|
|
"learning_rate": 7.2314700578744635e-06,
|
|
"loss": 0.8022,
|
|
"step": 7240
|
|
},
|
|
{
|
|
"epoch": 1.9257978723404254,
|
|
"grad_norm": 3.9735851287841797,
|
|
"learning_rate": 7.230682966101038e-06,
|
|
"loss": 0.7377,
|
|
"step": 7241
|
|
},
|
|
{
|
|
"epoch": 1.9260638297872341,
|
|
"grad_norm": 4.120856285095215,
|
|
"learning_rate": 7.229895805307654e-06,
|
|
"loss": 0.7386,
|
|
"step": 7242
|
|
},
|
|
{
|
|
"epoch": 1.9263297872340426,
|
|
"grad_norm": 4.618571758270264,
|
|
"learning_rate": 7.229108575518668e-06,
|
|
"loss": 0.8771,
|
|
"step": 7243
|
|
},
|
|
{
|
|
"epoch": 1.926595744680851,
|
|
"grad_norm": 3.679917573928833,
|
|
"learning_rate": 7.22832127675844e-06,
|
|
"loss": 0.8137,
|
|
"step": 7244
|
|
},
|
|
{
|
|
"epoch": 1.9268617021276597,
|
|
"grad_norm": 4.480624198913574,
|
|
"learning_rate": 7.227533909051327e-06,
|
|
"loss": 0.8955,
|
|
"step": 7245
|
|
},
|
|
{
|
|
"epoch": 1.927127659574468,
|
|
"grad_norm": 3.715806722640991,
|
|
"learning_rate": 7.226746472421692e-06,
|
|
"loss": 0.8023,
|
|
"step": 7246
|
|
},
|
|
{
|
|
"epoch": 1.9273936170212767,
|
|
"grad_norm": 4.008445739746094,
|
|
"learning_rate": 7.2259589668939005e-06,
|
|
"loss": 0.8584,
|
|
"step": 7247
|
|
},
|
|
{
|
|
"epoch": 1.9276595744680851,
|
|
"grad_norm": 4.211793899536133,
|
|
"learning_rate": 7.225171392492316e-06,
|
|
"loss": 0.8412,
|
|
"step": 7248
|
|
},
|
|
{
|
|
"epoch": 1.9279255319148936,
|
|
"grad_norm": 4.422094821929932,
|
|
"learning_rate": 7.224383749241311e-06,
|
|
"loss": 0.811,
|
|
"step": 7249
|
|
},
|
|
{
|
|
"epoch": 1.9281914893617023,
|
|
"grad_norm": 3.894848108291626,
|
|
"learning_rate": 7.223596037165252e-06,
|
|
"loss": 0.9126,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 1.9284574468085105,
|
|
"grad_norm": 3.9139139652252197,
|
|
"learning_rate": 7.222808256288515e-06,
|
|
"loss": 0.7837,
|
|
"step": 7251
|
|
},
|
|
{
|
|
"epoch": 1.9287234042553192,
|
|
"grad_norm": 4.1469197273254395,
|
|
"learning_rate": 7.222020406635474e-06,
|
|
"loss": 0.7134,
|
|
"step": 7252
|
|
},
|
|
{
|
|
"epoch": 1.9289893617021276,
|
|
"grad_norm": 3.5331952571868896,
|
|
"learning_rate": 7.2212324882305045e-06,
|
|
"loss": 0.7372,
|
|
"step": 7253
|
|
},
|
|
{
|
|
"epoch": 1.929255319148936,
|
|
"grad_norm": 3.312333822250366,
|
|
"learning_rate": 7.220444501097986e-06,
|
|
"loss": 0.7583,
|
|
"step": 7254
|
|
},
|
|
{
|
|
"epoch": 1.9295212765957448,
|
|
"grad_norm": 4.264598846435547,
|
|
"learning_rate": 7.2196564452623015e-06,
|
|
"loss": 0.8354,
|
|
"step": 7255
|
|
},
|
|
{
|
|
"epoch": 1.929787234042553,
|
|
"grad_norm": 4.467483997344971,
|
|
"learning_rate": 7.2188683207478326e-06,
|
|
"loss": 0.8728,
|
|
"step": 7256
|
|
},
|
|
{
|
|
"epoch": 1.9300531914893617,
|
|
"grad_norm": 3.850327730178833,
|
|
"learning_rate": 7.218080127578966e-06,
|
|
"loss": 0.8222,
|
|
"step": 7257
|
|
},
|
|
{
|
|
"epoch": 1.9303191489361702,
|
|
"grad_norm": 3.970350980758667,
|
|
"learning_rate": 7.217291865780089e-06,
|
|
"loss": 0.8979,
|
|
"step": 7258
|
|
},
|
|
{
|
|
"epoch": 1.9305851063829786,
|
|
"grad_norm": 3.9415476322174072,
|
|
"learning_rate": 7.21650353537559e-06,
|
|
"loss": 0.7552,
|
|
"step": 7259
|
|
},
|
|
{
|
|
"epoch": 1.9308510638297873,
|
|
"grad_norm": 3.566114664077759,
|
|
"learning_rate": 7.215715136389862e-06,
|
|
"loss": 0.8683,
|
|
"step": 7260
|
|
},
|
|
{
|
|
"epoch": 1.9311170212765958,
|
|
"grad_norm": 3.991467237472534,
|
|
"learning_rate": 7.2149266688473005e-06,
|
|
"loss": 0.7815,
|
|
"step": 7261
|
|
},
|
|
{
|
|
"epoch": 1.9313829787234043,
|
|
"grad_norm": 4.0647406578063965,
|
|
"learning_rate": 7.214138132772299e-06,
|
|
"loss": 0.7483,
|
|
"step": 7262
|
|
},
|
|
{
|
|
"epoch": 1.9316489361702127,
|
|
"grad_norm": 4.495807647705078,
|
|
"learning_rate": 7.213349528189258e-06,
|
|
"loss": 0.9067,
|
|
"step": 7263
|
|
},
|
|
{
|
|
"epoch": 1.9319148936170212,
|
|
"grad_norm": 4.034248352050781,
|
|
"learning_rate": 7.212560855122576e-06,
|
|
"loss": 0.7541,
|
|
"step": 7264
|
|
},
|
|
{
|
|
"epoch": 1.9321808510638299,
|
|
"grad_norm": 3.8755152225494385,
|
|
"learning_rate": 7.211772113596656e-06,
|
|
"loss": 0.8805,
|
|
"step": 7265
|
|
},
|
|
{
|
|
"epoch": 1.9324468085106383,
|
|
"grad_norm": 3.655921220779419,
|
|
"learning_rate": 7.210983303635901e-06,
|
|
"loss": 0.7864,
|
|
"step": 7266
|
|
},
|
|
{
|
|
"epoch": 1.9327127659574468,
|
|
"grad_norm": 4.281502723693848,
|
|
"learning_rate": 7.210194425264723e-06,
|
|
"loss": 0.9595,
|
|
"step": 7267
|
|
},
|
|
{
|
|
"epoch": 1.9329787234042555,
|
|
"grad_norm": 3.8239359855651855,
|
|
"learning_rate": 7.209405478507525e-06,
|
|
"loss": 0.7896,
|
|
"step": 7268
|
|
},
|
|
{
|
|
"epoch": 1.9332446808510637,
|
|
"grad_norm": 3.9340760707855225,
|
|
"learning_rate": 7.20861646338872e-06,
|
|
"loss": 0.855,
|
|
"step": 7269
|
|
},
|
|
{
|
|
"epoch": 1.9335106382978724,
|
|
"grad_norm": 3.6993649005889893,
|
|
"learning_rate": 7.207827379932724e-06,
|
|
"loss": 0.774,
|
|
"step": 7270
|
|
},
|
|
{
|
|
"epoch": 1.9337765957446809,
|
|
"grad_norm": 4.12832498550415,
|
|
"learning_rate": 7.2070382281639466e-06,
|
|
"loss": 0.8031,
|
|
"step": 7271
|
|
},
|
|
{
|
|
"epoch": 1.9340425531914893,
|
|
"grad_norm": 3.675234079360962,
|
|
"learning_rate": 7.206249008106808e-06,
|
|
"loss": 0.7203,
|
|
"step": 7272
|
|
},
|
|
{
|
|
"epoch": 1.934308510638298,
|
|
"grad_norm": 4.341015338897705,
|
|
"learning_rate": 7.20545971978573e-06,
|
|
"loss": 0.7099,
|
|
"step": 7273
|
|
},
|
|
{
|
|
"epoch": 1.9345744680851062,
|
|
"grad_norm": 4.289004802703857,
|
|
"learning_rate": 7.2046703632251295e-06,
|
|
"loss": 0.8558,
|
|
"step": 7274
|
|
},
|
|
{
|
|
"epoch": 1.934840425531915,
|
|
"grad_norm": 3.8868236541748047,
|
|
"learning_rate": 7.203880938449432e-06,
|
|
"loss": 0.8851,
|
|
"step": 7275
|
|
},
|
|
{
|
|
"epoch": 1.9351063829787234,
|
|
"grad_norm": 4.085642337799072,
|
|
"learning_rate": 7.2030914454830645e-06,
|
|
"loss": 0.7872,
|
|
"step": 7276
|
|
},
|
|
{
|
|
"epoch": 1.9353723404255319,
|
|
"grad_norm": 3.6767923831939697,
|
|
"learning_rate": 7.202301884350454e-06,
|
|
"loss": 0.712,
|
|
"step": 7277
|
|
},
|
|
{
|
|
"epoch": 1.9356382978723405,
|
|
"grad_norm": 4.32539176940918,
|
|
"learning_rate": 7.201512255076031e-06,
|
|
"loss": 0.9707,
|
|
"step": 7278
|
|
},
|
|
{
|
|
"epoch": 1.9359042553191488,
|
|
"grad_norm": 3.729510545730591,
|
|
"learning_rate": 7.2007225576842255e-06,
|
|
"loss": 0.8447,
|
|
"step": 7279
|
|
},
|
|
{
|
|
"epoch": 1.9361702127659575,
|
|
"grad_norm": 4.127895832061768,
|
|
"learning_rate": 7.1999327921994735e-06,
|
|
"loss": 0.8129,
|
|
"step": 7280
|
|
},
|
|
{
|
|
"epoch": 1.936436170212766,
|
|
"grad_norm": 3.7349631786346436,
|
|
"learning_rate": 7.199142958646211e-06,
|
|
"loss": 0.6886,
|
|
"step": 7281
|
|
},
|
|
{
|
|
"epoch": 1.9367021276595744,
|
|
"grad_norm": 3.900869369506836,
|
|
"learning_rate": 7.198353057048876e-06,
|
|
"loss": 0.7183,
|
|
"step": 7282
|
|
},
|
|
{
|
|
"epoch": 1.936968085106383,
|
|
"grad_norm": 4.21663761138916,
|
|
"learning_rate": 7.197563087431909e-06,
|
|
"loss": 0.9005,
|
|
"step": 7283
|
|
},
|
|
{
|
|
"epoch": 1.9372340425531915,
|
|
"grad_norm": 3.992421865463257,
|
|
"learning_rate": 7.196773049819753e-06,
|
|
"loss": 0.8604,
|
|
"step": 7284
|
|
},
|
|
{
|
|
"epoch": 1.9375,
|
|
"grad_norm": 4.140373229980469,
|
|
"learning_rate": 7.195982944236853e-06,
|
|
"loss": 0.9231,
|
|
"step": 7285
|
|
},
|
|
{
|
|
"epoch": 1.9377659574468085,
|
|
"grad_norm": 3.9591143131256104,
|
|
"learning_rate": 7.1951927707076545e-06,
|
|
"loss": 0.9934,
|
|
"step": 7286
|
|
},
|
|
{
|
|
"epoch": 1.938031914893617,
|
|
"grad_norm": 4.134740352630615,
|
|
"learning_rate": 7.194402529256608e-06,
|
|
"loss": 0.8869,
|
|
"step": 7287
|
|
},
|
|
{
|
|
"epoch": 1.9382978723404256,
|
|
"grad_norm": 3.9935176372528076,
|
|
"learning_rate": 7.193612219908161e-06,
|
|
"loss": 0.7377,
|
|
"step": 7288
|
|
},
|
|
{
|
|
"epoch": 1.938563829787234,
|
|
"grad_norm": 4.432157039642334,
|
|
"learning_rate": 7.192821842686772e-06,
|
|
"loss": 0.864,
|
|
"step": 7289
|
|
},
|
|
{
|
|
"epoch": 1.9388297872340425,
|
|
"grad_norm": 4.096209526062012,
|
|
"learning_rate": 7.1920313976168935e-06,
|
|
"loss": 0.8539,
|
|
"step": 7290
|
|
},
|
|
{
|
|
"epoch": 1.9390957446808512,
|
|
"grad_norm": 3.792664051055908,
|
|
"learning_rate": 7.191240884722982e-06,
|
|
"loss": 0.8195,
|
|
"step": 7291
|
|
},
|
|
{
|
|
"epoch": 1.9393617021276595,
|
|
"grad_norm": 3.759690046310425,
|
|
"learning_rate": 7.190450304029497e-06,
|
|
"loss": 0.7395,
|
|
"step": 7292
|
|
},
|
|
{
|
|
"epoch": 1.9396276595744681,
|
|
"grad_norm": 3.7826247215270996,
|
|
"learning_rate": 7.1896596555609025e-06,
|
|
"loss": 0.7206,
|
|
"step": 7293
|
|
},
|
|
{
|
|
"epoch": 1.9398936170212766,
|
|
"grad_norm": 3.8327670097351074,
|
|
"learning_rate": 7.1888689393416575e-06,
|
|
"loss": 0.9116,
|
|
"step": 7294
|
|
},
|
|
{
|
|
"epoch": 1.940159574468085,
|
|
"grad_norm": 3.965418815612793,
|
|
"learning_rate": 7.188078155396232e-06,
|
|
"loss": 0.8134,
|
|
"step": 7295
|
|
},
|
|
{
|
|
"epoch": 1.9404255319148938,
|
|
"grad_norm": 3.9271137714385986,
|
|
"learning_rate": 7.187287303749093e-06,
|
|
"loss": 0.705,
|
|
"step": 7296
|
|
},
|
|
{
|
|
"epoch": 1.940691489361702,
|
|
"grad_norm": 4.100310325622559,
|
|
"learning_rate": 7.186496384424708e-06,
|
|
"loss": 0.8471,
|
|
"step": 7297
|
|
},
|
|
{
|
|
"epoch": 1.9409574468085107,
|
|
"grad_norm": 3.9107069969177246,
|
|
"learning_rate": 7.185705397447552e-06,
|
|
"loss": 0.8495,
|
|
"step": 7298
|
|
},
|
|
{
|
|
"epoch": 1.9412234042553191,
|
|
"grad_norm": 4.238333225250244,
|
|
"learning_rate": 7.1849143428420975e-06,
|
|
"loss": 0.7926,
|
|
"step": 7299
|
|
},
|
|
{
|
|
"epoch": 1.9414893617021276,
|
|
"grad_norm": 4.412265777587891,
|
|
"learning_rate": 7.18412322063282e-06,
|
|
"loss": 0.947,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 1.9417553191489363,
|
|
"grad_norm": 3.686246156692505,
|
|
"learning_rate": 7.183332030844199e-06,
|
|
"loss": 0.7733,
|
|
"step": 7301
|
|
},
|
|
{
|
|
"epoch": 1.9420212765957445,
|
|
"grad_norm": 3.924842596054077,
|
|
"learning_rate": 7.182540773500715e-06,
|
|
"loss": 0.9132,
|
|
"step": 7302
|
|
},
|
|
{
|
|
"epoch": 1.9422872340425532,
|
|
"grad_norm": 3.5468335151672363,
|
|
"learning_rate": 7.181749448626849e-06,
|
|
"loss": 0.8032,
|
|
"step": 7303
|
|
},
|
|
{
|
|
"epoch": 1.9425531914893617,
|
|
"grad_norm": 3.618908166885376,
|
|
"learning_rate": 7.180958056247087e-06,
|
|
"loss": 0.8473,
|
|
"step": 7304
|
|
},
|
|
{
|
|
"epoch": 1.9428191489361701,
|
|
"grad_norm": 3.575326919555664,
|
|
"learning_rate": 7.180166596385915e-06,
|
|
"loss": 0.7703,
|
|
"step": 7305
|
|
},
|
|
{
|
|
"epoch": 1.9430851063829788,
|
|
"grad_norm": 4.315759658813477,
|
|
"learning_rate": 7.179375069067821e-06,
|
|
"loss": 0.823,
|
|
"step": 7306
|
|
},
|
|
{
|
|
"epoch": 1.9433510638297873,
|
|
"grad_norm": 3.9836225509643555,
|
|
"learning_rate": 7.178583474317295e-06,
|
|
"loss": 0.6672,
|
|
"step": 7307
|
|
},
|
|
{
|
|
"epoch": 1.9436170212765957,
|
|
"grad_norm": 4.030239105224609,
|
|
"learning_rate": 7.177791812158835e-06,
|
|
"loss": 0.806,
|
|
"step": 7308
|
|
},
|
|
{
|
|
"epoch": 1.9438829787234042,
|
|
"grad_norm": 3.8376708030700684,
|
|
"learning_rate": 7.17700008261693e-06,
|
|
"loss": 0.7224,
|
|
"step": 7309
|
|
},
|
|
{
|
|
"epoch": 1.9441489361702127,
|
|
"grad_norm": 4.117557048797607,
|
|
"learning_rate": 7.176208285716079e-06,
|
|
"loss": 0.8359,
|
|
"step": 7310
|
|
},
|
|
{
|
|
"epoch": 1.9444148936170214,
|
|
"grad_norm": 4.3215012550354,
|
|
"learning_rate": 7.175416421480783e-06,
|
|
"loss": 0.7143,
|
|
"step": 7311
|
|
},
|
|
{
|
|
"epoch": 1.9446808510638298,
|
|
"grad_norm": 3.8996849060058594,
|
|
"learning_rate": 7.174624489935541e-06,
|
|
"loss": 0.806,
|
|
"step": 7312
|
|
},
|
|
{
|
|
"epoch": 1.9449468085106383,
|
|
"grad_norm": 3.478804588317871,
|
|
"learning_rate": 7.173832491104858e-06,
|
|
"loss": 0.7916,
|
|
"step": 7313
|
|
},
|
|
{
|
|
"epoch": 1.945212765957447,
|
|
"grad_norm": 3.8935012817382812,
|
|
"learning_rate": 7.173040425013236e-06,
|
|
"loss": 0.719,
|
|
"step": 7314
|
|
},
|
|
{
|
|
"epoch": 1.9454787234042552,
|
|
"grad_norm": 3.9126412868499756,
|
|
"learning_rate": 7.172248291685187e-06,
|
|
"loss": 0.6975,
|
|
"step": 7315
|
|
},
|
|
{
|
|
"epoch": 1.945744680851064,
|
|
"grad_norm": 3.790658712387085,
|
|
"learning_rate": 7.171456091145217e-06,
|
|
"loss": 0.8119,
|
|
"step": 7316
|
|
},
|
|
{
|
|
"epoch": 1.9460106382978724,
|
|
"grad_norm": 4.477363109588623,
|
|
"learning_rate": 7.170663823417839e-06,
|
|
"loss": 0.8697,
|
|
"step": 7317
|
|
},
|
|
{
|
|
"epoch": 1.9462765957446808,
|
|
"grad_norm": 4.502041816711426,
|
|
"learning_rate": 7.1698714885275665e-06,
|
|
"loss": 0.9479,
|
|
"step": 7318
|
|
},
|
|
{
|
|
"epoch": 1.9465425531914895,
|
|
"grad_norm": 3.928950071334839,
|
|
"learning_rate": 7.169079086498915e-06,
|
|
"loss": 0.7123,
|
|
"step": 7319
|
|
},
|
|
{
|
|
"epoch": 1.9468085106382977,
|
|
"grad_norm": 3.781550168991089,
|
|
"learning_rate": 7.168286617356406e-06,
|
|
"loss": 0.7275,
|
|
"step": 7320
|
|
},
|
|
{
|
|
"epoch": 1.9470744680851064,
|
|
"grad_norm": 4.246979236602783,
|
|
"learning_rate": 7.167494081124553e-06,
|
|
"loss": 0.885,
|
|
"step": 7321
|
|
},
|
|
{
|
|
"epoch": 1.9473404255319149,
|
|
"grad_norm": 4.124865531921387,
|
|
"learning_rate": 7.166701477827882e-06,
|
|
"loss": 0.8088,
|
|
"step": 7322
|
|
},
|
|
{
|
|
"epoch": 1.9476063829787233,
|
|
"grad_norm": 4.21986198425293,
|
|
"learning_rate": 7.165908807490916e-06,
|
|
"loss": 0.9175,
|
|
"step": 7323
|
|
},
|
|
{
|
|
"epoch": 1.947872340425532,
|
|
"grad_norm": 4.153756618499756,
|
|
"learning_rate": 7.165116070138183e-06,
|
|
"loss": 0.8633,
|
|
"step": 7324
|
|
},
|
|
{
|
|
"epoch": 1.9481382978723403,
|
|
"grad_norm": 3.5365302562713623,
|
|
"learning_rate": 7.164323265794209e-06,
|
|
"loss": 0.8274,
|
|
"step": 7325
|
|
},
|
|
{
|
|
"epoch": 1.948404255319149,
|
|
"grad_norm": 4.312306880950928,
|
|
"learning_rate": 7.1635303944835246e-06,
|
|
"loss": 0.847,
|
|
"step": 7326
|
|
},
|
|
{
|
|
"epoch": 1.9486702127659574,
|
|
"grad_norm": 4.010374069213867,
|
|
"learning_rate": 7.162737456230662e-06,
|
|
"loss": 0.82,
|
|
"step": 7327
|
|
},
|
|
{
|
|
"epoch": 1.9489361702127659,
|
|
"grad_norm": 5.155407905578613,
|
|
"learning_rate": 7.161944451060157e-06,
|
|
"loss": 0.9241,
|
|
"step": 7328
|
|
},
|
|
{
|
|
"epoch": 1.9492021276595746,
|
|
"grad_norm": 3.665374279022217,
|
|
"learning_rate": 7.161151378996545e-06,
|
|
"loss": 0.8255,
|
|
"step": 7329
|
|
},
|
|
{
|
|
"epoch": 1.949468085106383,
|
|
"grad_norm": 3.6932079792022705,
|
|
"learning_rate": 7.1603582400643646e-06,
|
|
"loss": 0.8187,
|
|
"step": 7330
|
|
},
|
|
{
|
|
"epoch": 1.9497340425531915,
|
|
"grad_norm": 3.555961847305298,
|
|
"learning_rate": 7.159565034288157e-06,
|
|
"loss": 0.7523,
|
|
"step": 7331
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"grad_norm": 4.505660533905029,
|
|
"learning_rate": 7.158771761692464e-06,
|
|
"loss": 0.7903,
|
|
"step": 7332
|
|
},
|
|
{
|
|
"epoch": 1.9502659574468084,
|
|
"grad_norm": 3.616476058959961,
|
|
"learning_rate": 7.157978422301832e-06,
|
|
"loss": 0.8853,
|
|
"step": 7333
|
|
},
|
|
{
|
|
"epoch": 1.950531914893617,
|
|
"grad_norm": 4.25620698928833,
|
|
"learning_rate": 7.157185016140809e-06,
|
|
"loss": 0.8566,
|
|
"step": 7334
|
|
},
|
|
{
|
|
"epoch": 1.9507978723404256,
|
|
"grad_norm": 3.9593820571899414,
|
|
"learning_rate": 7.156391543233938e-06,
|
|
"loss": 0.7797,
|
|
"step": 7335
|
|
},
|
|
{
|
|
"epoch": 1.951063829787234,
|
|
"grad_norm": 4.379816055297852,
|
|
"learning_rate": 7.155598003605776e-06,
|
|
"loss": 0.9148,
|
|
"step": 7336
|
|
},
|
|
{
|
|
"epoch": 1.9513297872340427,
|
|
"grad_norm": 3.731823205947876,
|
|
"learning_rate": 7.154804397280873e-06,
|
|
"loss": 0.7223,
|
|
"step": 7337
|
|
},
|
|
{
|
|
"epoch": 1.951595744680851,
|
|
"grad_norm": 3.8849217891693115,
|
|
"learning_rate": 7.154010724283786e-06,
|
|
"loss": 0.8446,
|
|
"step": 7338
|
|
},
|
|
{
|
|
"epoch": 1.9518617021276596,
|
|
"grad_norm": 3.7477874755859375,
|
|
"learning_rate": 7.15321698463907e-06,
|
|
"loss": 0.6922,
|
|
"step": 7339
|
|
},
|
|
{
|
|
"epoch": 1.952127659574468,
|
|
"grad_norm": 4.323108673095703,
|
|
"learning_rate": 7.152423178371286e-06,
|
|
"loss": 0.8153,
|
|
"step": 7340
|
|
},
|
|
{
|
|
"epoch": 1.9523936170212766,
|
|
"grad_norm": 4.16124153137207,
|
|
"learning_rate": 7.1516293055049944e-06,
|
|
"loss": 0.8003,
|
|
"step": 7341
|
|
},
|
|
{
|
|
"epoch": 1.9526595744680852,
|
|
"grad_norm": 4.236426830291748,
|
|
"learning_rate": 7.150835366064759e-06,
|
|
"loss": 0.7843,
|
|
"step": 7342
|
|
},
|
|
{
|
|
"epoch": 1.9529255319148935,
|
|
"grad_norm": 3.637660026550293,
|
|
"learning_rate": 7.1500413600751465e-06,
|
|
"loss": 0.7665,
|
|
"step": 7343
|
|
},
|
|
{
|
|
"epoch": 1.9531914893617022,
|
|
"grad_norm": 3.838202476501465,
|
|
"learning_rate": 7.14924728756072e-06,
|
|
"loss": 0.7723,
|
|
"step": 7344
|
|
},
|
|
{
|
|
"epoch": 1.9534574468085106,
|
|
"grad_norm": 4.209107875823975,
|
|
"learning_rate": 7.148453148546055e-06,
|
|
"loss": 0.8646,
|
|
"step": 7345
|
|
},
|
|
{
|
|
"epoch": 1.953723404255319,
|
|
"grad_norm": 3.9335439205169678,
|
|
"learning_rate": 7.147658943055718e-06,
|
|
"loss": 0.6881,
|
|
"step": 7346
|
|
},
|
|
{
|
|
"epoch": 1.9539893617021278,
|
|
"grad_norm": 3.6025755405426025,
|
|
"learning_rate": 7.1468646711142855e-06,
|
|
"loss": 0.6567,
|
|
"step": 7347
|
|
},
|
|
{
|
|
"epoch": 1.954255319148936,
|
|
"grad_norm": 3.8079092502593994,
|
|
"learning_rate": 7.146070332746332e-06,
|
|
"loss": 0.7122,
|
|
"step": 7348
|
|
},
|
|
{
|
|
"epoch": 1.9545212765957447,
|
|
"grad_norm": 4.033806800842285,
|
|
"learning_rate": 7.145275927976436e-06,
|
|
"loss": 0.7522,
|
|
"step": 7349
|
|
},
|
|
{
|
|
"epoch": 1.9547872340425532,
|
|
"grad_norm": 4.1563310623168945,
|
|
"learning_rate": 7.144481456829178e-06,
|
|
"loss": 0.7998,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 1.9550531914893616,
|
|
"grad_norm": 4.061034202575684,
|
|
"learning_rate": 7.143686919329138e-06,
|
|
"loss": 0.9232,
|
|
"step": 7351
|
|
},
|
|
{
|
|
"epoch": 1.9553191489361703,
|
|
"grad_norm": 4.174419403076172,
|
|
"learning_rate": 7.1428923155009e-06,
|
|
"loss": 0.6807,
|
|
"step": 7352
|
|
},
|
|
{
|
|
"epoch": 1.9555851063829788,
|
|
"grad_norm": 3.6197104454040527,
|
|
"learning_rate": 7.142097645369052e-06,
|
|
"loss": 0.8129,
|
|
"step": 7353
|
|
},
|
|
{
|
|
"epoch": 1.9558510638297872,
|
|
"grad_norm": 4.288638591766357,
|
|
"learning_rate": 7.141302908958181e-06,
|
|
"loss": 0.9342,
|
|
"step": 7354
|
|
},
|
|
{
|
|
"epoch": 1.9561170212765957,
|
|
"grad_norm": 3.9184861183166504,
|
|
"learning_rate": 7.140508106292876e-06,
|
|
"loss": 0.7052,
|
|
"step": 7355
|
|
},
|
|
{
|
|
"epoch": 1.9563829787234042,
|
|
"grad_norm": 4.214428901672363,
|
|
"learning_rate": 7.1397132373977295e-06,
|
|
"loss": 0.8679,
|
|
"step": 7356
|
|
},
|
|
{
|
|
"epoch": 1.9566489361702128,
|
|
"grad_norm": 4.283886909484863,
|
|
"learning_rate": 7.138918302297338e-06,
|
|
"loss": 0.8816,
|
|
"step": 7357
|
|
},
|
|
{
|
|
"epoch": 1.9569148936170213,
|
|
"grad_norm": 3.77843976020813,
|
|
"learning_rate": 7.138123301016295e-06,
|
|
"loss": 0.7901,
|
|
"step": 7358
|
|
},
|
|
{
|
|
"epoch": 1.9571808510638298,
|
|
"grad_norm": 3.9347009658813477,
|
|
"learning_rate": 7.137328233579201e-06,
|
|
"loss": 0.7385,
|
|
"step": 7359
|
|
},
|
|
{
|
|
"epoch": 1.9574468085106385,
|
|
"grad_norm": 3.9841034412384033,
|
|
"learning_rate": 7.136533100010654e-06,
|
|
"loss": 0.7738,
|
|
"step": 7360
|
|
},
|
|
{
|
|
"epoch": 1.9577127659574467,
|
|
"grad_norm": 3.536179780960083,
|
|
"learning_rate": 7.1357379003352565e-06,
|
|
"loss": 0.8311,
|
|
"step": 7361
|
|
},
|
|
{
|
|
"epoch": 1.9579787234042554,
|
|
"grad_norm": 4.386892318725586,
|
|
"learning_rate": 7.134942634577615e-06,
|
|
"loss": 0.9451,
|
|
"step": 7362
|
|
},
|
|
{
|
|
"epoch": 1.9582446808510638,
|
|
"grad_norm": 3.738041877746582,
|
|
"learning_rate": 7.1341473027623355e-06,
|
|
"loss": 0.6454,
|
|
"step": 7363
|
|
},
|
|
{
|
|
"epoch": 1.9585106382978723,
|
|
"grad_norm": 3.718473434448242,
|
|
"learning_rate": 7.133351904914024e-06,
|
|
"loss": 0.8613,
|
|
"step": 7364
|
|
},
|
|
{
|
|
"epoch": 1.958776595744681,
|
|
"grad_norm": 4.3047661781311035,
|
|
"learning_rate": 7.132556441057294e-06,
|
|
"loss": 0.7499,
|
|
"step": 7365
|
|
},
|
|
{
|
|
"epoch": 1.9590425531914892,
|
|
"grad_norm": 3.821338415145874,
|
|
"learning_rate": 7.131760911216756e-06,
|
|
"loss": 0.737,
|
|
"step": 7366
|
|
},
|
|
{
|
|
"epoch": 1.959308510638298,
|
|
"grad_norm": 3.7964980602264404,
|
|
"learning_rate": 7.130965315417027e-06,
|
|
"loss": 0.8637,
|
|
"step": 7367
|
|
},
|
|
{
|
|
"epoch": 1.9595744680851064,
|
|
"grad_norm": 3.9412569999694824,
|
|
"learning_rate": 7.130169653682721e-06,
|
|
"loss": 0.6788,
|
|
"step": 7368
|
|
},
|
|
{
|
|
"epoch": 1.9598404255319148,
|
|
"grad_norm": 4.125255584716797,
|
|
"learning_rate": 7.129373926038459e-06,
|
|
"loss": 0.86,
|
|
"step": 7369
|
|
},
|
|
{
|
|
"epoch": 1.9601063829787235,
|
|
"grad_norm": 3.7982115745544434,
|
|
"learning_rate": 7.128578132508859e-06,
|
|
"loss": 0.9386,
|
|
"step": 7370
|
|
},
|
|
{
|
|
"epoch": 1.9603723404255318,
|
|
"grad_norm": 3.9143412113189697,
|
|
"learning_rate": 7.1277822731185475e-06,
|
|
"loss": 0.911,
|
|
"step": 7371
|
|
},
|
|
{
|
|
"epoch": 1.9606382978723405,
|
|
"grad_norm": 4.226142883300781,
|
|
"learning_rate": 7.126986347892146e-06,
|
|
"loss": 0.7375,
|
|
"step": 7372
|
|
},
|
|
{
|
|
"epoch": 1.960904255319149,
|
|
"grad_norm": 3.8393430709838867,
|
|
"learning_rate": 7.126190356854283e-06,
|
|
"loss": 0.8341,
|
|
"step": 7373
|
|
},
|
|
{
|
|
"epoch": 1.9611702127659574,
|
|
"grad_norm": 4.1616926193237305,
|
|
"learning_rate": 7.1253943000295865e-06,
|
|
"loss": 0.8532,
|
|
"step": 7374
|
|
},
|
|
{
|
|
"epoch": 1.961436170212766,
|
|
"grad_norm": 3.9134316444396973,
|
|
"learning_rate": 7.12459817744269e-06,
|
|
"loss": 0.7566,
|
|
"step": 7375
|
|
},
|
|
{
|
|
"epoch": 1.9617021276595743,
|
|
"grad_norm": 3.930948495864868,
|
|
"learning_rate": 7.123801989118223e-06,
|
|
"loss": 0.7781,
|
|
"step": 7376
|
|
},
|
|
{
|
|
"epoch": 1.961968085106383,
|
|
"grad_norm": 3.913886785507202,
|
|
"learning_rate": 7.1230057350808234e-06,
|
|
"loss": 0.8081,
|
|
"step": 7377
|
|
},
|
|
{
|
|
"epoch": 1.9622340425531914,
|
|
"grad_norm": 4.381828308105469,
|
|
"learning_rate": 7.122209415355125e-06,
|
|
"loss": 0.9048,
|
|
"step": 7378
|
|
},
|
|
{
|
|
"epoch": 1.9625,
|
|
"grad_norm": 3.839282512664795,
|
|
"learning_rate": 7.121413029965769e-06,
|
|
"loss": 0.7002,
|
|
"step": 7379
|
|
},
|
|
{
|
|
"epoch": 1.9627659574468086,
|
|
"grad_norm": 4.018161773681641,
|
|
"learning_rate": 7.120616578937397e-06,
|
|
"loss": 0.803,
|
|
"step": 7380
|
|
},
|
|
{
|
|
"epoch": 1.963031914893617,
|
|
"grad_norm": 4.220311164855957,
|
|
"learning_rate": 7.1198200622946516e-06,
|
|
"loss": 0.8337,
|
|
"step": 7381
|
|
},
|
|
{
|
|
"epoch": 1.9632978723404255,
|
|
"grad_norm": 3.790156841278076,
|
|
"learning_rate": 7.119023480062176e-06,
|
|
"loss": 0.7224,
|
|
"step": 7382
|
|
},
|
|
{
|
|
"epoch": 1.9635638297872342,
|
|
"grad_norm": 4.560417652130127,
|
|
"learning_rate": 7.1182268322646205e-06,
|
|
"loss": 0.8584,
|
|
"step": 7383
|
|
},
|
|
{
|
|
"epoch": 1.9638297872340424,
|
|
"grad_norm": 4.3043999671936035,
|
|
"learning_rate": 7.117430118926633e-06,
|
|
"loss": 0.8294,
|
|
"step": 7384
|
|
},
|
|
{
|
|
"epoch": 1.9640957446808511,
|
|
"grad_norm": 3.781405210494995,
|
|
"learning_rate": 7.116633340072863e-06,
|
|
"loss": 0.7876,
|
|
"step": 7385
|
|
},
|
|
{
|
|
"epoch": 1.9643617021276596,
|
|
"grad_norm": 3.986027956008911,
|
|
"learning_rate": 7.115836495727968e-06,
|
|
"loss": 0.7581,
|
|
"step": 7386
|
|
},
|
|
{
|
|
"epoch": 1.964627659574468,
|
|
"grad_norm": 3.9813320636749268,
|
|
"learning_rate": 7.1150395859165985e-06,
|
|
"loss": 0.9021,
|
|
"step": 7387
|
|
},
|
|
{
|
|
"epoch": 1.9648936170212767,
|
|
"grad_norm": 4.043676376342773,
|
|
"learning_rate": 7.114242610663415e-06,
|
|
"loss": 0.791,
|
|
"step": 7388
|
|
},
|
|
{
|
|
"epoch": 1.965159574468085,
|
|
"grad_norm": 4.014968395233154,
|
|
"learning_rate": 7.113445569993076e-06,
|
|
"loss": 0.7437,
|
|
"step": 7389
|
|
},
|
|
{
|
|
"epoch": 1.9654255319148937,
|
|
"grad_norm": 3.8244807720184326,
|
|
"learning_rate": 7.1126484639302425e-06,
|
|
"loss": 0.7376,
|
|
"step": 7390
|
|
},
|
|
{
|
|
"epoch": 1.9656914893617021,
|
|
"grad_norm": 3.804473400115967,
|
|
"learning_rate": 7.111851292499579e-06,
|
|
"loss": 0.8358,
|
|
"step": 7391
|
|
},
|
|
{
|
|
"epoch": 1.9659574468085106,
|
|
"grad_norm": 3.598792552947998,
|
|
"learning_rate": 7.111054055725749e-06,
|
|
"loss": 0.7728,
|
|
"step": 7392
|
|
},
|
|
{
|
|
"epoch": 1.9662234042553193,
|
|
"grad_norm": 4.2588677406311035,
|
|
"learning_rate": 7.110256753633421e-06,
|
|
"loss": 0.884,
|
|
"step": 7393
|
|
},
|
|
{
|
|
"epoch": 1.9664893617021275,
|
|
"grad_norm": 3.7859714031219482,
|
|
"learning_rate": 7.109459386247265e-06,
|
|
"loss": 0.6813,
|
|
"step": 7394
|
|
},
|
|
{
|
|
"epoch": 1.9667553191489362,
|
|
"grad_norm": 4.303823471069336,
|
|
"learning_rate": 7.108661953591953e-06,
|
|
"loss": 0.9044,
|
|
"step": 7395
|
|
},
|
|
{
|
|
"epoch": 1.9670212765957447,
|
|
"grad_norm": 3.953003406524658,
|
|
"learning_rate": 7.107864455692156e-06,
|
|
"loss": 0.7632,
|
|
"step": 7396
|
|
},
|
|
{
|
|
"epoch": 1.9672872340425531,
|
|
"grad_norm": 4.125672817230225,
|
|
"learning_rate": 7.107066892572552e-06,
|
|
"loss": 0.7153,
|
|
"step": 7397
|
|
},
|
|
{
|
|
"epoch": 1.9675531914893618,
|
|
"grad_norm": 4.01138973236084,
|
|
"learning_rate": 7.106269264257817e-06,
|
|
"loss": 0.8052,
|
|
"step": 7398
|
|
},
|
|
{
|
|
"epoch": 1.96781914893617,
|
|
"grad_norm": 3.7055439949035645,
|
|
"learning_rate": 7.10547157077263e-06,
|
|
"loss": 0.7684,
|
|
"step": 7399
|
|
},
|
|
{
|
|
"epoch": 1.9680851063829787,
|
|
"grad_norm": 4.636490821838379,
|
|
"learning_rate": 7.104673812141676e-06,
|
|
"loss": 0.7504,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 1.9683510638297872,
|
|
"grad_norm": 3.961894989013672,
|
|
"learning_rate": 7.103875988389636e-06,
|
|
"loss": 0.9316,
|
|
"step": 7401
|
|
},
|
|
{
|
|
"epoch": 1.9686170212765957,
|
|
"grad_norm": 3.978306770324707,
|
|
"learning_rate": 7.103078099541194e-06,
|
|
"loss": 0.8276,
|
|
"step": 7402
|
|
},
|
|
{
|
|
"epoch": 1.9688829787234043,
|
|
"grad_norm": 3.9166336059570312,
|
|
"learning_rate": 7.102280145621041e-06,
|
|
"loss": 0.7308,
|
|
"step": 7403
|
|
},
|
|
{
|
|
"epoch": 1.9691489361702128,
|
|
"grad_norm": 3.680129289627075,
|
|
"learning_rate": 7.101482126653865e-06,
|
|
"loss": 0.8355,
|
|
"step": 7404
|
|
},
|
|
{
|
|
"epoch": 1.9694148936170213,
|
|
"grad_norm": 4.1183857917785645,
|
|
"learning_rate": 7.1006840426643576e-06,
|
|
"loss": 0.7782,
|
|
"step": 7405
|
|
},
|
|
{
|
|
"epoch": 1.96968085106383,
|
|
"grad_norm": 4.286891460418701,
|
|
"learning_rate": 7.099885893677213e-06,
|
|
"loss": 0.8094,
|
|
"step": 7406
|
|
},
|
|
{
|
|
"epoch": 1.9699468085106382,
|
|
"grad_norm": 4.037398338317871,
|
|
"learning_rate": 7.099087679717127e-06,
|
|
"loss": 0.8141,
|
|
"step": 7407
|
|
},
|
|
{
|
|
"epoch": 1.9702127659574469,
|
|
"grad_norm": 3.8752505779266357,
|
|
"learning_rate": 7.098289400808795e-06,
|
|
"loss": 0.7824,
|
|
"step": 7408
|
|
},
|
|
{
|
|
"epoch": 1.9704787234042553,
|
|
"grad_norm": 3.7574338912963867,
|
|
"learning_rate": 7.0974910569769195e-06,
|
|
"loss": 0.6398,
|
|
"step": 7409
|
|
},
|
|
{
|
|
"epoch": 1.9707446808510638,
|
|
"grad_norm": 3.918271064758301,
|
|
"learning_rate": 7.096692648246203e-06,
|
|
"loss": 0.7949,
|
|
"step": 7410
|
|
},
|
|
{
|
|
"epoch": 1.9710106382978725,
|
|
"grad_norm": 4.124891757965088,
|
|
"learning_rate": 7.095894174641345e-06,
|
|
"loss": 0.9578,
|
|
"step": 7411
|
|
},
|
|
{
|
|
"epoch": 1.9712765957446807,
|
|
"grad_norm": 3.764817953109741,
|
|
"learning_rate": 7.0950956361870536e-06,
|
|
"loss": 0.8013,
|
|
"step": 7412
|
|
},
|
|
{
|
|
"epoch": 1.9715425531914894,
|
|
"grad_norm": 4.22829008102417,
|
|
"learning_rate": 7.094297032908037e-06,
|
|
"loss": 0.7897,
|
|
"step": 7413
|
|
},
|
|
{
|
|
"epoch": 1.9718085106382979,
|
|
"grad_norm": 4.174428462982178,
|
|
"learning_rate": 7.093498364829006e-06,
|
|
"loss": 0.8182,
|
|
"step": 7414
|
|
},
|
|
{
|
|
"epoch": 1.9720744680851063,
|
|
"grad_norm": 4.265493392944336,
|
|
"learning_rate": 7.09269963197467e-06,
|
|
"loss": 0.7067,
|
|
"step": 7415
|
|
},
|
|
{
|
|
"epoch": 1.972340425531915,
|
|
"grad_norm": 3.417632579803467,
|
|
"learning_rate": 7.091900834369743e-06,
|
|
"loss": 0.6767,
|
|
"step": 7416
|
|
},
|
|
{
|
|
"epoch": 1.9726063829787233,
|
|
"grad_norm": 3.931145429611206,
|
|
"learning_rate": 7.09110197203894e-06,
|
|
"loss": 0.7581,
|
|
"step": 7417
|
|
},
|
|
{
|
|
"epoch": 1.972872340425532,
|
|
"grad_norm": 3.808061361312866,
|
|
"learning_rate": 7.090303045006983e-06,
|
|
"loss": 0.88,
|
|
"step": 7418
|
|
},
|
|
{
|
|
"epoch": 1.9731382978723404,
|
|
"grad_norm": 4.074621677398682,
|
|
"learning_rate": 7.089504053298587e-06,
|
|
"loss": 0.8391,
|
|
"step": 7419
|
|
},
|
|
{
|
|
"epoch": 1.9734042553191489,
|
|
"grad_norm": 3.7446646690368652,
|
|
"learning_rate": 7.0887049969384756e-06,
|
|
"loss": 0.778,
|
|
"step": 7420
|
|
},
|
|
{
|
|
"epoch": 1.9736702127659576,
|
|
"grad_norm": 4.311694622039795,
|
|
"learning_rate": 7.087905875951373e-06,
|
|
"loss": 0.6362,
|
|
"step": 7421
|
|
},
|
|
{
|
|
"epoch": 1.9739361702127658,
|
|
"grad_norm": 3.7492148876190186,
|
|
"learning_rate": 7.087106690362003e-06,
|
|
"loss": 0.85,
|
|
"step": 7422
|
|
},
|
|
{
|
|
"epoch": 1.9742021276595745,
|
|
"grad_norm": 3.8154044151306152,
|
|
"learning_rate": 7.086307440195096e-06,
|
|
"loss": 0.8229,
|
|
"step": 7423
|
|
},
|
|
{
|
|
"epoch": 1.974468085106383,
|
|
"grad_norm": 3.8786826133728027,
|
|
"learning_rate": 7.085508125475381e-06,
|
|
"loss": 0.8001,
|
|
"step": 7424
|
|
},
|
|
{
|
|
"epoch": 1.9747340425531914,
|
|
"grad_norm": 3.972696304321289,
|
|
"learning_rate": 7.084708746227589e-06,
|
|
"loss": 0.9101,
|
|
"step": 7425
|
|
},
|
|
{
|
|
"epoch": 1.975,
|
|
"grad_norm": 4.224587440490723,
|
|
"learning_rate": 7.083909302476453e-06,
|
|
"loss": 0.7869,
|
|
"step": 7426
|
|
},
|
|
{
|
|
"epoch": 1.9752659574468086,
|
|
"grad_norm": 3.700507164001465,
|
|
"learning_rate": 7.08310979424671e-06,
|
|
"loss": 0.7123,
|
|
"step": 7427
|
|
},
|
|
{
|
|
"epoch": 1.975531914893617,
|
|
"grad_norm": 3.8128812313079834,
|
|
"learning_rate": 7.082310221563098e-06,
|
|
"loss": 0.7205,
|
|
"step": 7428
|
|
},
|
|
{
|
|
"epoch": 1.9757978723404257,
|
|
"grad_norm": 4.028718948364258,
|
|
"learning_rate": 7.081510584450355e-06,
|
|
"loss": 0.9249,
|
|
"step": 7429
|
|
},
|
|
{
|
|
"epoch": 1.976063829787234,
|
|
"grad_norm": 3.798619270324707,
|
|
"learning_rate": 7.080710882933225e-06,
|
|
"loss": 0.7412,
|
|
"step": 7430
|
|
},
|
|
{
|
|
"epoch": 1.9763297872340426,
|
|
"grad_norm": 4.599943161010742,
|
|
"learning_rate": 7.07991111703645e-06,
|
|
"loss": 0.8713,
|
|
"step": 7431
|
|
},
|
|
{
|
|
"epoch": 1.976595744680851,
|
|
"grad_norm": 4.6581854820251465,
|
|
"learning_rate": 7.079111286784775e-06,
|
|
"loss": 0.8165,
|
|
"step": 7432
|
|
},
|
|
{
|
|
"epoch": 1.9768617021276595,
|
|
"grad_norm": 3.9097495079040527,
|
|
"learning_rate": 7.078311392202951e-06,
|
|
"loss": 0.7803,
|
|
"step": 7433
|
|
},
|
|
{
|
|
"epoch": 1.9771276595744682,
|
|
"grad_norm": 4.4464802742004395,
|
|
"learning_rate": 7.077511433315725e-06,
|
|
"loss": 0.9244,
|
|
"step": 7434
|
|
},
|
|
{
|
|
"epoch": 1.9773936170212765,
|
|
"grad_norm": 4.222725868225098,
|
|
"learning_rate": 7.076711410147849e-06,
|
|
"loss": 0.9159,
|
|
"step": 7435
|
|
},
|
|
{
|
|
"epoch": 1.9776595744680852,
|
|
"grad_norm": 3.8437206745147705,
|
|
"learning_rate": 7.075911322724077e-06,
|
|
"loss": 0.7657,
|
|
"step": 7436
|
|
},
|
|
{
|
|
"epoch": 1.9779255319148936,
|
|
"grad_norm": 3.891757011413574,
|
|
"learning_rate": 7.075111171069165e-06,
|
|
"loss": 0.574,
|
|
"step": 7437
|
|
},
|
|
{
|
|
"epoch": 1.978191489361702,
|
|
"grad_norm": 3.8077917098999023,
|
|
"learning_rate": 7.074310955207869e-06,
|
|
"loss": 0.713,
|
|
"step": 7438
|
|
},
|
|
{
|
|
"epoch": 1.9784574468085108,
|
|
"grad_norm": 3.8292224407196045,
|
|
"learning_rate": 7.073510675164952e-06,
|
|
"loss": 0.8645,
|
|
"step": 7439
|
|
},
|
|
{
|
|
"epoch": 1.978723404255319,
|
|
"grad_norm": 3.931783437728882,
|
|
"learning_rate": 7.072710330965171e-06,
|
|
"loss": 0.7588,
|
|
"step": 7440
|
|
},
|
|
{
|
|
"epoch": 1.9789893617021277,
|
|
"grad_norm": 3.6988885402679443,
|
|
"learning_rate": 7.071909922633293e-06,
|
|
"loss": 0.8146,
|
|
"step": 7441
|
|
},
|
|
{
|
|
"epoch": 1.9792553191489362,
|
|
"grad_norm": 3.7726998329162598,
|
|
"learning_rate": 7.071109450194085e-06,
|
|
"loss": 0.8082,
|
|
"step": 7442
|
|
},
|
|
{
|
|
"epoch": 1.9795212765957446,
|
|
"grad_norm": 4.304258346557617,
|
|
"learning_rate": 7.070308913672309e-06,
|
|
"loss": 0.8142,
|
|
"step": 7443
|
|
},
|
|
{
|
|
"epoch": 1.9797872340425533,
|
|
"grad_norm": 3.6615335941314697,
|
|
"learning_rate": 7.069508313092739e-06,
|
|
"loss": 0.7409,
|
|
"step": 7444
|
|
},
|
|
{
|
|
"epoch": 1.9800531914893615,
|
|
"grad_norm": 4.02711296081543,
|
|
"learning_rate": 7.068707648480145e-06,
|
|
"loss": 0.8662,
|
|
"step": 7445
|
|
},
|
|
{
|
|
"epoch": 1.9803191489361702,
|
|
"grad_norm": 3.48976993560791,
|
|
"learning_rate": 7.067906919859301e-06,
|
|
"loss": 0.7655,
|
|
"step": 7446
|
|
},
|
|
{
|
|
"epoch": 1.9805851063829787,
|
|
"grad_norm": 4.168039321899414,
|
|
"learning_rate": 7.067106127254983e-06,
|
|
"loss": 0.8516,
|
|
"step": 7447
|
|
},
|
|
{
|
|
"epoch": 1.9808510638297872,
|
|
"grad_norm": 3.757882833480835,
|
|
"learning_rate": 7.066305270691965e-06,
|
|
"loss": 0.7557,
|
|
"step": 7448
|
|
},
|
|
{
|
|
"epoch": 1.9811170212765958,
|
|
"grad_norm": 4.09896183013916,
|
|
"learning_rate": 7.065504350195031e-06,
|
|
"loss": 0.7227,
|
|
"step": 7449
|
|
},
|
|
{
|
|
"epoch": 1.9813829787234043,
|
|
"grad_norm": 3.6728386878967285,
|
|
"learning_rate": 7.064703365788961e-06,
|
|
"loss": 0.8711,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 1.9816489361702128,
|
|
"grad_norm": 4.336848735809326,
|
|
"learning_rate": 7.063902317498537e-06,
|
|
"loss": 0.8427,
|
|
"step": 7451
|
|
},
|
|
{
|
|
"epoch": 1.9819148936170212,
|
|
"grad_norm": 3.715324640274048,
|
|
"learning_rate": 7.063101205348546e-06,
|
|
"loss": 0.8392,
|
|
"step": 7452
|
|
},
|
|
{
|
|
"epoch": 1.9821808510638297,
|
|
"grad_norm": 3.8472211360931396,
|
|
"learning_rate": 7.062300029363775e-06,
|
|
"loss": 0.8386,
|
|
"step": 7453
|
|
},
|
|
{
|
|
"epoch": 1.9824468085106384,
|
|
"grad_norm": 4.4139533042907715,
|
|
"learning_rate": 7.061498789569012e-06,
|
|
"loss": 0.7736,
|
|
"step": 7454
|
|
},
|
|
{
|
|
"epoch": 1.9827127659574468,
|
|
"grad_norm": 4.422085285186768,
|
|
"learning_rate": 7.06069748598905e-06,
|
|
"loss": 0.8175,
|
|
"step": 7455
|
|
},
|
|
{
|
|
"epoch": 1.9829787234042553,
|
|
"grad_norm": 4.3708696365356445,
|
|
"learning_rate": 7.059896118648681e-06,
|
|
"loss": 0.8802,
|
|
"step": 7456
|
|
},
|
|
{
|
|
"epoch": 1.983244680851064,
|
|
"grad_norm": 3.6612091064453125,
|
|
"learning_rate": 7.059094687572701e-06,
|
|
"loss": 0.73,
|
|
"step": 7457
|
|
},
|
|
{
|
|
"epoch": 1.9835106382978722,
|
|
"grad_norm": 4.2330780029296875,
|
|
"learning_rate": 7.058293192785907e-06,
|
|
"loss": 0.7638,
|
|
"step": 7458
|
|
},
|
|
{
|
|
"epoch": 1.983776595744681,
|
|
"grad_norm": 4.289926528930664,
|
|
"learning_rate": 7.0574916343130995e-06,
|
|
"loss": 0.7821,
|
|
"step": 7459
|
|
},
|
|
{
|
|
"epoch": 1.9840425531914894,
|
|
"grad_norm": 4.122095108032227,
|
|
"learning_rate": 7.0566900121790775e-06,
|
|
"loss": 0.9189,
|
|
"step": 7460
|
|
},
|
|
{
|
|
"epoch": 1.9843085106382978,
|
|
"grad_norm": 3.974686861038208,
|
|
"learning_rate": 7.055888326408645e-06,
|
|
"loss": 0.7231,
|
|
"step": 7461
|
|
},
|
|
{
|
|
"epoch": 1.9845744680851065,
|
|
"grad_norm": 3.515641450881958,
|
|
"learning_rate": 7.055086577026608e-06,
|
|
"loss": 0.8235,
|
|
"step": 7462
|
|
},
|
|
{
|
|
"epoch": 1.9848404255319148,
|
|
"grad_norm": 4.1052565574646,
|
|
"learning_rate": 7.0542847640577725e-06,
|
|
"loss": 0.7862,
|
|
"step": 7463
|
|
},
|
|
{
|
|
"epoch": 1.9851063829787234,
|
|
"grad_norm": 3.889636516571045,
|
|
"learning_rate": 7.0534828875269466e-06,
|
|
"loss": 0.7854,
|
|
"step": 7464
|
|
},
|
|
{
|
|
"epoch": 1.985372340425532,
|
|
"grad_norm": 4.208193778991699,
|
|
"learning_rate": 7.052680947458944e-06,
|
|
"loss": 0.7854,
|
|
"step": 7465
|
|
},
|
|
{
|
|
"epoch": 1.9856382978723404,
|
|
"grad_norm": 4.233124732971191,
|
|
"learning_rate": 7.051878943878575e-06,
|
|
"loss": 0.7895,
|
|
"step": 7466
|
|
},
|
|
{
|
|
"epoch": 1.985904255319149,
|
|
"grad_norm": 4.030735969543457,
|
|
"learning_rate": 7.051076876810656e-06,
|
|
"loss": 0.8551,
|
|
"step": 7467
|
|
},
|
|
{
|
|
"epoch": 1.9861702127659573,
|
|
"grad_norm": 3.666236639022827,
|
|
"learning_rate": 7.050274746280005e-06,
|
|
"loss": 0.7758,
|
|
"step": 7468
|
|
},
|
|
{
|
|
"epoch": 1.986436170212766,
|
|
"grad_norm": 3.7510082721710205,
|
|
"learning_rate": 7.0494725523114375e-06,
|
|
"loss": 0.9323,
|
|
"step": 7469
|
|
},
|
|
{
|
|
"epoch": 1.9867021276595744,
|
|
"grad_norm": 3.9435558319091797,
|
|
"learning_rate": 7.048670294929777e-06,
|
|
"loss": 0.9059,
|
|
"step": 7470
|
|
},
|
|
{
|
|
"epoch": 1.986968085106383,
|
|
"grad_norm": 3.691020965576172,
|
|
"learning_rate": 7.047867974159845e-06,
|
|
"loss": 0.7602,
|
|
"step": 7471
|
|
},
|
|
{
|
|
"epoch": 1.9872340425531916,
|
|
"grad_norm": 3.697643518447876,
|
|
"learning_rate": 7.047065590026467e-06,
|
|
"loss": 0.7624,
|
|
"step": 7472
|
|
},
|
|
{
|
|
"epoch": 1.9875,
|
|
"grad_norm": 3.759286880493164,
|
|
"learning_rate": 7.04626314255447e-06,
|
|
"loss": 0.8639,
|
|
"step": 7473
|
|
},
|
|
{
|
|
"epoch": 1.9877659574468085,
|
|
"grad_norm": 4.054465293884277,
|
|
"learning_rate": 7.045460631768684e-06,
|
|
"loss": 0.7268,
|
|
"step": 7474
|
|
},
|
|
{
|
|
"epoch": 1.988031914893617,
|
|
"grad_norm": 4.61219596862793,
|
|
"learning_rate": 7.0446580576939346e-06,
|
|
"loss": 0.9591,
|
|
"step": 7475
|
|
},
|
|
{
|
|
"epoch": 1.9882978723404254,
|
|
"grad_norm": 4.135398864746094,
|
|
"learning_rate": 7.04385542035506e-06,
|
|
"loss": 0.9273,
|
|
"step": 7476
|
|
},
|
|
{
|
|
"epoch": 1.9885638297872341,
|
|
"grad_norm": 3.8725779056549072,
|
|
"learning_rate": 7.043052719776891e-06,
|
|
"loss": 0.803,
|
|
"step": 7477
|
|
},
|
|
{
|
|
"epoch": 1.9888297872340426,
|
|
"grad_norm": 3.9959404468536377,
|
|
"learning_rate": 7.042249955984265e-06,
|
|
"loss": 0.8572,
|
|
"step": 7478
|
|
},
|
|
{
|
|
"epoch": 1.989095744680851,
|
|
"grad_norm": 3.542355537414551,
|
|
"learning_rate": 7.041447129002023e-06,
|
|
"loss": 0.8041,
|
|
"step": 7479
|
|
},
|
|
{
|
|
"epoch": 1.9893617021276597,
|
|
"grad_norm": 4.780427932739258,
|
|
"learning_rate": 7.0406442388550016e-06,
|
|
"loss": 0.88,
|
|
"step": 7480
|
|
},
|
|
{
|
|
"epoch": 1.989627659574468,
|
|
"grad_norm": 3.5344386100769043,
|
|
"learning_rate": 7.039841285568045e-06,
|
|
"loss": 0.7503,
|
|
"step": 7481
|
|
},
|
|
{
|
|
"epoch": 1.9898936170212767,
|
|
"grad_norm": 3.8678970336914062,
|
|
"learning_rate": 7.039038269165999e-06,
|
|
"loss": 0.74,
|
|
"step": 7482
|
|
},
|
|
{
|
|
"epoch": 1.9901595744680851,
|
|
"grad_norm": 3.366485834121704,
|
|
"learning_rate": 7.038235189673706e-06,
|
|
"loss": 0.7804,
|
|
"step": 7483
|
|
},
|
|
{
|
|
"epoch": 1.9904255319148936,
|
|
"grad_norm": 3.5538713932037354,
|
|
"learning_rate": 7.037432047116018e-06,
|
|
"loss": 0.7362,
|
|
"step": 7484
|
|
},
|
|
{
|
|
"epoch": 1.9906914893617023,
|
|
"grad_norm": 4.539484977722168,
|
|
"learning_rate": 7.036628841517783e-06,
|
|
"loss": 0.8812,
|
|
"step": 7485
|
|
},
|
|
{
|
|
"epoch": 1.9909574468085105,
|
|
"grad_norm": 3.830280065536499,
|
|
"learning_rate": 7.035825572903854e-06,
|
|
"loss": 0.809,
|
|
"step": 7486
|
|
},
|
|
{
|
|
"epoch": 1.9912234042553192,
|
|
"grad_norm": 4.038280963897705,
|
|
"learning_rate": 7.035022241299083e-06,
|
|
"loss": 0.7987,
|
|
"step": 7487
|
|
},
|
|
{
|
|
"epoch": 1.9914893617021276,
|
|
"grad_norm": 4.29449462890625,
|
|
"learning_rate": 7.034218846728331e-06,
|
|
"loss": 0.8703,
|
|
"step": 7488
|
|
},
|
|
{
|
|
"epoch": 1.991755319148936,
|
|
"grad_norm": 4.56672477722168,
|
|
"learning_rate": 7.033415389216452e-06,
|
|
"loss": 0.9195,
|
|
"step": 7489
|
|
},
|
|
{
|
|
"epoch": 1.9920212765957448,
|
|
"grad_norm": 4.10626745223999,
|
|
"learning_rate": 7.032611868788306e-06,
|
|
"loss": 0.7476,
|
|
"step": 7490
|
|
},
|
|
{
|
|
"epoch": 1.992287234042553,
|
|
"grad_norm": 3.6163523197174072,
|
|
"learning_rate": 7.031808285468756e-06,
|
|
"loss": 0.8082,
|
|
"step": 7491
|
|
},
|
|
{
|
|
"epoch": 1.9925531914893617,
|
|
"grad_norm": 4.114681243896484,
|
|
"learning_rate": 7.031004639282666e-06,
|
|
"loss": 0.9355,
|
|
"step": 7492
|
|
},
|
|
{
|
|
"epoch": 1.9928191489361702,
|
|
"grad_norm": 3.9397499561309814,
|
|
"learning_rate": 7.0302009302549e-06,
|
|
"loss": 0.7364,
|
|
"step": 7493
|
|
},
|
|
{
|
|
"epoch": 1.9930851063829786,
|
|
"grad_norm": 3.4797003269195557,
|
|
"learning_rate": 7.029397158410329e-06,
|
|
"loss": 0.8413,
|
|
"step": 7494
|
|
},
|
|
{
|
|
"epoch": 1.9933510638297873,
|
|
"grad_norm": 4.215932369232178,
|
|
"learning_rate": 7.028593323773819e-06,
|
|
"loss": 0.8095,
|
|
"step": 7495
|
|
},
|
|
{
|
|
"epoch": 1.9936170212765958,
|
|
"grad_norm": 3.694060802459717,
|
|
"learning_rate": 7.027789426370244e-06,
|
|
"loss": 0.8051,
|
|
"step": 7496
|
|
},
|
|
{
|
|
"epoch": 1.9938829787234043,
|
|
"grad_norm": 4.0490875244140625,
|
|
"learning_rate": 7.026985466224477e-06,
|
|
"loss": 0.874,
|
|
"step": 7497
|
|
},
|
|
{
|
|
"epoch": 1.9941489361702127,
|
|
"grad_norm": 4.0154194831848145,
|
|
"learning_rate": 7.026181443361392e-06,
|
|
"loss": 0.807,
|
|
"step": 7498
|
|
},
|
|
{
|
|
"epoch": 1.9944148936170212,
|
|
"grad_norm": 3.8070061206817627,
|
|
"learning_rate": 7.025377357805867e-06,
|
|
"loss": 0.8078,
|
|
"step": 7499
|
|
},
|
|
{
|
|
"epoch": 1.9946808510638299,
|
|
"grad_norm": 4.185990810394287,
|
|
"learning_rate": 7.024573209582783e-06,
|
|
"loss": 0.7529,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 1.9946808510638299,
|
|
"eval_loss": 1.260877251625061,
|
|
"eval_runtime": 13.905,
|
|
"eval_samples_per_second": 28.767,
|
|
"eval_steps_per_second": 3.596,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 1.9949468085106383,
|
|
"grad_norm": 3.18033504486084,
|
|
"learning_rate": 7.023768998717022e-06,
|
|
"loss": 0.7159,
|
|
"step": 7501
|
|
},
|
|
{
|
|
"epoch": 1.9952127659574468,
|
|
"grad_norm": 3.839970111846924,
|
|
"learning_rate": 7.022964725233463e-06,
|
|
"loss": 0.7902,
|
|
"step": 7502
|
|
},
|
|
{
|
|
"epoch": 1.9954787234042555,
|
|
"grad_norm": 4.011384963989258,
|
|
"learning_rate": 7.022160389156995e-06,
|
|
"loss": 0.7596,
|
|
"step": 7503
|
|
},
|
|
{
|
|
"epoch": 1.9957446808510637,
|
|
"grad_norm": 3.67543888092041,
|
|
"learning_rate": 7.0213559905125016e-06,
|
|
"loss": 0.7987,
|
|
"step": 7504
|
|
},
|
|
{
|
|
"epoch": 1.9960106382978724,
|
|
"grad_norm": 4.240528583526611,
|
|
"learning_rate": 7.020551529324877e-06,
|
|
"loss": 0.8651,
|
|
"step": 7505
|
|
},
|
|
{
|
|
"epoch": 1.9962765957446809,
|
|
"grad_norm": 3.9020180702209473,
|
|
"learning_rate": 7.0197470056190075e-06,
|
|
"loss": 0.9205,
|
|
"step": 7506
|
|
},
|
|
{
|
|
"epoch": 1.9965425531914893,
|
|
"grad_norm": 4.0633368492126465,
|
|
"learning_rate": 7.0189424194197875e-06,
|
|
"loss": 0.8294,
|
|
"step": 7507
|
|
},
|
|
{
|
|
"epoch": 1.996808510638298,
|
|
"grad_norm": 3.88988995552063,
|
|
"learning_rate": 7.018137770752114e-06,
|
|
"loss": 0.861,
|
|
"step": 7508
|
|
},
|
|
{
|
|
"epoch": 1.9970744680851062,
|
|
"grad_norm": 3.5177197456359863,
|
|
"learning_rate": 7.01733305964088e-06,
|
|
"loss": 0.772,
|
|
"step": 7509
|
|
},
|
|
{
|
|
"epoch": 1.997340425531915,
|
|
"grad_norm": 3.661116123199463,
|
|
"learning_rate": 7.016528286110986e-06,
|
|
"loss": 0.7985,
|
|
"step": 7510
|
|
},
|
|
{
|
|
"epoch": 1.9976063829787234,
|
|
"grad_norm": 4.28385591506958,
|
|
"learning_rate": 7.015723450187334e-06,
|
|
"loss": 0.9045,
|
|
"step": 7511
|
|
},
|
|
{
|
|
"epoch": 1.9978723404255319,
|
|
"grad_norm": 3.899296522140503,
|
|
"learning_rate": 7.014918551894824e-06,
|
|
"loss": 0.7558,
|
|
"step": 7512
|
|
},
|
|
{
|
|
"epoch": 1.9981382978723405,
|
|
"grad_norm": 3.9070241451263428,
|
|
"learning_rate": 7.014113591258361e-06,
|
|
"loss": 0.8287,
|
|
"step": 7513
|
|
},
|
|
{
|
|
"epoch": 1.9984042553191488,
|
|
"grad_norm": 3.7345831394195557,
|
|
"learning_rate": 7.013308568302855e-06,
|
|
"loss": 0.781,
|
|
"step": 7514
|
|
},
|
|
{
|
|
"epoch": 1.9986702127659575,
|
|
"grad_norm": 3.6665847301483154,
|
|
"learning_rate": 7.012503483053209e-06,
|
|
"loss": 0.9715,
|
|
"step": 7515
|
|
},
|
|
{
|
|
"epoch": 1.998936170212766,
|
|
"grad_norm": 3.48984956741333,
|
|
"learning_rate": 7.011698335534336e-06,
|
|
"loss": 0.6823,
|
|
"step": 7516
|
|
},
|
|
{
|
|
"epoch": 1.9992021276595744,
|
|
"grad_norm": 3.7711336612701416,
|
|
"learning_rate": 7.01089312577115e-06,
|
|
"loss": 0.8192,
|
|
"step": 7517
|
|
},
|
|
{
|
|
"epoch": 1.999468085106383,
|
|
"grad_norm": 4.02569580078125,
|
|
"learning_rate": 7.0100878537885605e-06,
|
|
"loss": 0.856,
|
|
"step": 7518
|
|
},
|
|
{
|
|
"epoch": 1.9997340425531915,
|
|
"grad_norm": 4.044494152069092,
|
|
"learning_rate": 7.009282519611488e-06,
|
|
"loss": 0.8349,
|
|
"step": 7519
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 3.897979259490967,
|
|
"learning_rate": 7.008477123264849e-06,
|
|
"loss": 0.6436,
|
|
"step": 7520
|
|
}
|
|
],
|
|
"logging_steps": 1.0,
|
|
"max_steps": 18800,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 500.0,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.4371418007171236e+18,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|