Files
Law_Justice-llama3_1_8B_ins…/trainer_state.json
ModelHub XC 1222d4b555 初始化项目,由ModelHub XC社区提供模型
Model: BAAI/Law_Justice-llama3_1_8B_instruct
Source: Original Platform
2026-05-18 17:35:32 +08:00

52794 lines
1.2 MiB

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 7520,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00026595744680851064,
"grad_norm": 12.928236961364746,
"learning_rate": 1.0638297872340427e-08,
"loss": 1.5564,
"step": 1
},
{
"epoch": 0.0005319148936170213,
"grad_norm": 12.116073608398438,
"learning_rate": 2.1276595744680853e-08,
"loss": 1.5756,
"step": 2
},
{
"epoch": 0.0007978723404255319,
"grad_norm": 13.450613975524902,
"learning_rate": 3.191489361702128e-08,
"loss": 1.6078,
"step": 3
},
{
"epoch": 0.0010638297872340426,
"grad_norm": 14.591333389282227,
"learning_rate": 4.2553191489361707e-08,
"loss": 1.6333,
"step": 4
},
{
"epoch": 0.0013297872340425532,
"grad_norm": 14.167532920837402,
"learning_rate": 5.319148936170213e-08,
"loss": 1.4764,
"step": 5
},
{
"epoch": 0.0015957446808510637,
"grad_norm": 11.665863037109375,
"learning_rate": 6.382978723404255e-08,
"loss": 1.5681,
"step": 6
},
{
"epoch": 0.0018617021276595746,
"grad_norm": 12.705963134765625,
"learning_rate": 7.446808510638299e-08,
"loss": 1.5249,
"step": 7
},
{
"epoch": 0.002127659574468085,
"grad_norm": 13.839447021484375,
"learning_rate": 8.510638297872341e-08,
"loss": 1.6567,
"step": 8
},
{
"epoch": 0.0023936170212765957,
"grad_norm": 11.46570110321045,
"learning_rate": 9.574468085106384e-08,
"loss": 1.4166,
"step": 9
},
{
"epoch": 0.0026595744680851063,
"grad_norm": 12.468977928161621,
"learning_rate": 1.0638297872340426e-07,
"loss": 1.4788,
"step": 10
},
{
"epoch": 0.002925531914893617,
"grad_norm": 10.813947677612305,
"learning_rate": 1.1702127659574468e-07,
"loss": 1.3127,
"step": 11
},
{
"epoch": 0.0031914893617021275,
"grad_norm": 12.833952903747559,
"learning_rate": 1.276595744680851e-07,
"loss": 1.5291,
"step": 12
},
{
"epoch": 0.003457446808510638,
"grad_norm": 13.475564956665039,
"learning_rate": 1.3829787234042553e-07,
"loss": 1.4629,
"step": 13
},
{
"epoch": 0.003723404255319149,
"grad_norm": 11.995802879333496,
"learning_rate": 1.4893617021276598e-07,
"loss": 1.5887,
"step": 14
},
{
"epoch": 0.003989361702127659,
"grad_norm": 14.704851150512695,
"learning_rate": 1.5957446808510638e-07,
"loss": 1.4533,
"step": 15
},
{
"epoch": 0.00425531914893617,
"grad_norm": 11.153929710388184,
"learning_rate": 1.7021276595744683e-07,
"loss": 1.4027,
"step": 16
},
{
"epoch": 0.0045212765957446804,
"grad_norm": 14.091814994812012,
"learning_rate": 1.8085106382978722e-07,
"loss": 1.6199,
"step": 17
},
{
"epoch": 0.0047872340425531915,
"grad_norm": 13.533143997192383,
"learning_rate": 1.9148936170212767e-07,
"loss": 1.4809,
"step": 18
},
{
"epoch": 0.0050531914893617025,
"grad_norm": 13.076473236083984,
"learning_rate": 2.0212765957446812e-07,
"loss": 1.5374,
"step": 19
},
{
"epoch": 0.005319148936170213,
"grad_norm": 13.062971115112305,
"learning_rate": 2.1276595744680852e-07,
"loss": 1.6008,
"step": 20
},
{
"epoch": 0.005585106382978724,
"grad_norm": 13.033509254455566,
"learning_rate": 2.2340425531914897e-07,
"loss": 1.4679,
"step": 21
},
{
"epoch": 0.005851063829787234,
"grad_norm": 11.98855972290039,
"learning_rate": 2.3404255319148937e-07,
"loss": 1.5049,
"step": 22
},
{
"epoch": 0.006117021276595745,
"grad_norm": 13.161596298217773,
"learning_rate": 2.446808510638298e-07,
"loss": 1.5114,
"step": 23
},
{
"epoch": 0.006382978723404255,
"grad_norm": 12.387269020080566,
"learning_rate": 2.553191489361702e-07,
"loss": 1.3019,
"step": 24
},
{
"epoch": 0.006648936170212766,
"grad_norm": 10.667431831359863,
"learning_rate": 2.6595744680851066e-07,
"loss": 1.3113,
"step": 25
},
{
"epoch": 0.006914893617021276,
"grad_norm": 11.682806015014648,
"learning_rate": 2.7659574468085106e-07,
"loss": 1.627,
"step": 26
},
{
"epoch": 0.007180851063829787,
"grad_norm": 11.338486671447754,
"learning_rate": 2.872340425531915e-07,
"loss": 1.6309,
"step": 27
},
{
"epoch": 0.007446808510638298,
"grad_norm": 12.796504020690918,
"learning_rate": 2.9787234042553196e-07,
"loss": 1.4464,
"step": 28
},
{
"epoch": 0.007712765957446808,
"grad_norm": 12.2352876663208,
"learning_rate": 3.0851063829787236e-07,
"loss": 1.5748,
"step": 29
},
{
"epoch": 0.007978723404255319,
"grad_norm": 10.04947566986084,
"learning_rate": 3.1914893617021275e-07,
"loss": 1.3302,
"step": 30
},
{
"epoch": 0.00824468085106383,
"grad_norm": 11.51389217376709,
"learning_rate": 3.297872340425532e-07,
"loss": 1.3543,
"step": 31
},
{
"epoch": 0.00851063829787234,
"grad_norm": 9.522992134094238,
"learning_rate": 3.4042553191489365e-07,
"loss": 1.4485,
"step": 32
},
{
"epoch": 0.008776595744680852,
"grad_norm": 8.156554222106934,
"learning_rate": 3.510638297872341e-07,
"loss": 1.3791,
"step": 33
},
{
"epoch": 0.009042553191489361,
"grad_norm": 10.546247482299805,
"learning_rate": 3.6170212765957445e-07,
"loss": 1.6197,
"step": 34
},
{
"epoch": 0.009308510638297872,
"grad_norm": 8.094082832336426,
"learning_rate": 3.723404255319149e-07,
"loss": 1.2722,
"step": 35
},
{
"epoch": 0.009574468085106383,
"grad_norm": 7.64621114730835,
"learning_rate": 3.8297872340425535e-07,
"loss": 1.2489,
"step": 36
},
{
"epoch": 0.009840425531914894,
"grad_norm": 7.087127208709717,
"learning_rate": 3.936170212765958e-07,
"loss": 1.3383,
"step": 37
},
{
"epoch": 0.010106382978723405,
"grad_norm": 7.989037990570068,
"learning_rate": 4.0425531914893625e-07,
"loss": 1.2275,
"step": 38
},
{
"epoch": 0.010372340425531914,
"grad_norm": 9.057306289672852,
"learning_rate": 4.148936170212766e-07,
"loss": 1.4094,
"step": 39
},
{
"epoch": 0.010638297872340425,
"grad_norm": 7.628477573394775,
"learning_rate": 4.2553191489361704e-07,
"loss": 1.3137,
"step": 40
},
{
"epoch": 0.010904255319148936,
"grad_norm": 7.493610858917236,
"learning_rate": 4.361702127659575e-07,
"loss": 1.3603,
"step": 41
},
{
"epoch": 0.011170212765957447,
"grad_norm": 6.819916725158691,
"learning_rate": 4.4680851063829794e-07,
"loss": 1.5013,
"step": 42
},
{
"epoch": 0.011436170212765957,
"grad_norm": 7.222757339477539,
"learning_rate": 4.574468085106383e-07,
"loss": 1.4389,
"step": 43
},
{
"epoch": 0.011702127659574468,
"grad_norm": 6.92927885055542,
"learning_rate": 4.6808510638297873e-07,
"loss": 1.386,
"step": 44
},
{
"epoch": 0.011968085106382979,
"grad_norm": 6.100423336029053,
"learning_rate": 4.787234042553192e-07,
"loss": 1.3654,
"step": 45
},
{
"epoch": 0.01223404255319149,
"grad_norm": 6.047520637512207,
"learning_rate": 4.893617021276596e-07,
"loss": 1.2467,
"step": 46
},
{
"epoch": 0.0125,
"grad_norm": 6.429448127746582,
"learning_rate": 5.000000000000001e-07,
"loss": 1.2826,
"step": 47
},
{
"epoch": 0.01276595744680851,
"grad_norm": 6.81625509262085,
"learning_rate": 5.106382978723404e-07,
"loss": 1.4576,
"step": 48
},
{
"epoch": 0.013031914893617021,
"grad_norm": 5.9020609855651855,
"learning_rate": 5.212765957446809e-07,
"loss": 1.2929,
"step": 49
},
{
"epoch": 0.013297872340425532,
"grad_norm": 6.343348979949951,
"learning_rate": 5.319148936170213e-07,
"loss": 1.4692,
"step": 50
},
{
"epoch": 0.013563829787234043,
"grad_norm": 6.274758338928223,
"learning_rate": 5.425531914893618e-07,
"loss": 1.3331,
"step": 51
},
{
"epoch": 0.013829787234042552,
"grad_norm": 6.188233852386475,
"learning_rate": 5.531914893617021e-07,
"loss": 1.4061,
"step": 52
},
{
"epoch": 0.014095744680851063,
"grad_norm": 6.108701705932617,
"learning_rate": 5.638297872340426e-07,
"loss": 1.2786,
"step": 53
},
{
"epoch": 0.014361702127659574,
"grad_norm": 6.032108306884766,
"learning_rate": 5.74468085106383e-07,
"loss": 1.3159,
"step": 54
},
{
"epoch": 0.014627659574468085,
"grad_norm": 6.019993305206299,
"learning_rate": 5.851063829787235e-07,
"loss": 1.3846,
"step": 55
},
{
"epoch": 0.014893617021276596,
"grad_norm": 6.405829906463623,
"learning_rate": 5.957446808510639e-07,
"loss": 1.3691,
"step": 56
},
{
"epoch": 0.015159574468085106,
"grad_norm": 6.517266273498535,
"learning_rate": 6.063829787234043e-07,
"loss": 1.416,
"step": 57
},
{
"epoch": 0.015425531914893617,
"grad_norm": 5.831709861755371,
"learning_rate": 6.170212765957447e-07,
"loss": 1.3022,
"step": 58
},
{
"epoch": 0.015691489361702126,
"grad_norm": 6.413986682891846,
"learning_rate": 6.276595744680851e-07,
"loss": 1.2001,
"step": 59
},
{
"epoch": 0.015957446808510637,
"grad_norm": 5.887234687805176,
"learning_rate": 6.382978723404255e-07,
"loss": 1.301,
"step": 60
},
{
"epoch": 0.016223404255319148,
"grad_norm": 6.500317573547363,
"learning_rate": 6.48936170212766e-07,
"loss": 1.2389,
"step": 61
},
{
"epoch": 0.01648936170212766,
"grad_norm": 5.423646450042725,
"learning_rate": 6.595744680851064e-07,
"loss": 1.1179,
"step": 62
},
{
"epoch": 0.01675531914893617,
"grad_norm": 6.422118663787842,
"learning_rate": 6.702127659574469e-07,
"loss": 1.2685,
"step": 63
},
{
"epoch": 0.01702127659574468,
"grad_norm": 6.100841999053955,
"learning_rate": 6.808510638297873e-07,
"loss": 1.3432,
"step": 64
},
{
"epoch": 0.017287234042553192,
"grad_norm": 6.879647254943848,
"learning_rate": 6.914893617021278e-07,
"loss": 1.4595,
"step": 65
},
{
"epoch": 0.017553191489361703,
"grad_norm": 5.739667892456055,
"learning_rate": 7.021276595744682e-07,
"loss": 1.254,
"step": 66
},
{
"epoch": 0.017819148936170214,
"grad_norm": 5.58401346206665,
"learning_rate": 7.127659574468087e-07,
"loss": 1.275,
"step": 67
},
{
"epoch": 0.018085106382978722,
"grad_norm": 5.75786018371582,
"learning_rate": 7.234042553191489e-07,
"loss": 1.2797,
"step": 68
},
{
"epoch": 0.018351063829787233,
"grad_norm": 5.23975133895874,
"learning_rate": 7.340425531914893e-07,
"loss": 1.2314,
"step": 69
},
{
"epoch": 0.018617021276595744,
"grad_norm": 5.783809661865234,
"learning_rate": 7.446808510638298e-07,
"loss": 1.2621,
"step": 70
},
{
"epoch": 0.018882978723404255,
"grad_norm": 6.303256988525391,
"learning_rate": 7.553191489361702e-07,
"loss": 1.2988,
"step": 71
},
{
"epoch": 0.019148936170212766,
"grad_norm": 6.035338401794434,
"learning_rate": 7.659574468085107e-07,
"loss": 1.3572,
"step": 72
},
{
"epoch": 0.019414893617021277,
"grad_norm": 5.458433628082275,
"learning_rate": 7.765957446808511e-07,
"loss": 1.2515,
"step": 73
},
{
"epoch": 0.019680851063829788,
"grad_norm": 5.706748008728027,
"learning_rate": 7.872340425531916e-07,
"loss": 1.2144,
"step": 74
},
{
"epoch": 0.0199468085106383,
"grad_norm": 5.4996018409729,
"learning_rate": 7.97872340425532e-07,
"loss": 1.2999,
"step": 75
},
{
"epoch": 0.02021276595744681,
"grad_norm": 5.666746139526367,
"learning_rate": 8.085106382978725e-07,
"loss": 1.2947,
"step": 76
},
{
"epoch": 0.020478723404255317,
"grad_norm": 5.446689128875732,
"learning_rate": 8.191489361702127e-07,
"loss": 1.4081,
"step": 77
},
{
"epoch": 0.02074468085106383,
"grad_norm": 5.886783123016357,
"learning_rate": 8.297872340425532e-07,
"loss": 1.5147,
"step": 78
},
{
"epoch": 0.02101063829787234,
"grad_norm": 5.839478969573975,
"learning_rate": 8.404255319148936e-07,
"loss": 1.3047,
"step": 79
},
{
"epoch": 0.02127659574468085,
"grad_norm": 5.6594767570495605,
"learning_rate": 8.510638297872341e-07,
"loss": 1.3499,
"step": 80
},
{
"epoch": 0.02154255319148936,
"grad_norm": 5.712738990783691,
"learning_rate": 8.617021276595745e-07,
"loss": 1.2731,
"step": 81
},
{
"epoch": 0.021808510638297873,
"grad_norm": 5.7129316329956055,
"learning_rate": 8.72340425531915e-07,
"loss": 1.2454,
"step": 82
},
{
"epoch": 0.022074468085106384,
"grad_norm": 5.676748275756836,
"learning_rate": 8.829787234042554e-07,
"loss": 1.4916,
"step": 83
},
{
"epoch": 0.022340425531914895,
"grad_norm": 5.481147289276123,
"learning_rate": 8.936170212765959e-07,
"loss": 1.3493,
"step": 84
},
{
"epoch": 0.022606382978723406,
"grad_norm": 5.774475574493408,
"learning_rate": 9.042553191489363e-07,
"loss": 1.2583,
"step": 85
},
{
"epoch": 0.022872340425531913,
"grad_norm": 6.059263229370117,
"learning_rate": 9.148936170212766e-07,
"loss": 1.2257,
"step": 86
},
{
"epoch": 0.023138297872340424,
"grad_norm": 5.5594258308410645,
"learning_rate": 9.25531914893617e-07,
"loss": 1.3313,
"step": 87
},
{
"epoch": 0.023404255319148935,
"grad_norm": 5.335761070251465,
"learning_rate": 9.361702127659575e-07,
"loss": 1.221,
"step": 88
},
{
"epoch": 0.023670212765957446,
"grad_norm": 5.275820255279541,
"learning_rate": 9.468085106382979e-07,
"loss": 1.315,
"step": 89
},
{
"epoch": 0.023936170212765957,
"grad_norm": 5.96125602722168,
"learning_rate": 9.574468085106384e-07,
"loss": 1.2792,
"step": 90
},
{
"epoch": 0.02420212765957447,
"grad_norm": 5.549777984619141,
"learning_rate": 9.680851063829788e-07,
"loss": 1.2194,
"step": 91
},
{
"epoch": 0.02446808510638298,
"grad_norm": 5.814997673034668,
"learning_rate": 9.787234042553193e-07,
"loss": 1.2917,
"step": 92
},
{
"epoch": 0.02473404255319149,
"grad_norm": 5.332813739776611,
"learning_rate": 9.893617021276597e-07,
"loss": 1.2458,
"step": 93
},
{
"epoch": 0.025,
"grad_norm": 5.473198890686035,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.2752,
"step": 94
},
{
"epoch": 0.02526595744680851,
"grad_norm": 5.484592914581299,
"learning_rate": 1.0106382978723404e-06,
"loss": 1.3052,
"step": 95
},
{
"epoch": 0.02553191489361702,
"grad_norm": 6.4860453605651855,
"learning_rate": 1.0212765957446809e-06,
"loss": 1.4454,
"step": 96
},
{
"epoch": 0.02579787234042553,
"grad_norm": 5.582982540130615,
"learning_rate": 1.0319148936170213e-06,
"loss": 1.2514,
"step": 97
},
{
"epoch": 0.026063829787234042,
"grad_norm": 5.618495464324951,
"learning_rate": 1.0425531914893618e-06,
"loss": 1.4123,
"step": 98
},
{
"epoch": 0.026329787234042553,
"grad_norm": 5.169803619384766,
"learning_rate": 1.0531914893617022e-06,
"loss": 1.3128,
"step": 99
},
{
"epoch": 0.026595744680851064,
"grad_norm": 5.215284824371338,
"learning_rate": 1.0638297872340427e-06,
"loss": 1.4286,
"step": 100
},
{
"epoch": 0.026861702127659575,
"grad_norm": 5.888491153717041,
"learning_rate": 1.074468085106383e-06,
"loss": 1.2953,
"step": 101
},
{
"epoch": 0.027127659574468086,
"grad_norm": 5.597144603729248,
"learning_rate": 1.0851063829787236e-06,
"loss": 1.2401,
"step": 102
},
{
"epoch": 0.027393617021276597,
"grad_norm": 5.215080261230469,
"learning_rate": 1.095744680851064e-06,
"loss": 1.1961,
"step": 103
},
{
"epoch": 0.027659574468085105,
"grad_norm": 5.162172794342041,
"learning_rate": 1.1063829787234042e-06,
"loss": 1.2641,
"step": 104
},
{
"epoch": 0.027925531914893616,
"grad_norm": 5.490815162658691,
"learning_rate": 1.1170212765957447e-06,
"loss": 1.1788,
"step": 105
},
{
"epoch": 0.028191489361702127,
"grad_norm": 5.236513137817383,
"learning_rate": 1.1276595744680851e-06,
"loss": 1.3241,
"step": 106
},
{
"epoch": 0.028457446808510638,
"grad_norm": 5.335816860198975,
"learning_rate": 1.1382978723404256e-06,
"loss": 1.299,
"step": 107
},
{
"epoch": 0.02872340425531915,
"grad_norm": 5.176724910736084,
"learning_rate": 1.148936170212766e-06,
"loss": 1.3305,
"step": 108
},
{
"epoch": 0.02898936170212766,
"grad_norm": 6.114458084106445,
"learning_rate": 1.1595744680851065e-06,
"loss": 1.3005,
"step": 109
},
{
"epoch": 0.02925531914893617,
"grad_norm": 5.407876491546631,
"learning_rate": 1.170212765957447e-06,
"loss": 1.2806,
"step": 110
},
{
"epoch": 0.029521276595744682,
"grad_norm": 4.949467658996582,
"learning_rate": 1.1808510638297874e-06,
"loss": 1.2961,
"step": 111
},
{
"epoch": 0.029787234042553193,
"grad_norm": 6.091759204864502,
"learning_rate": 1.1914893617021278e-06,
"loss": 1.3533,
"step": 112
},
{
"epoch": 0.0300531914893617,
"grad_norm": 6.605318069458008,
"learning_rate": 1.202127659574468e-06,
"loss": 1.3292,
"step": 113
},
{
"epoch": 0.03031914893617021,
"grad_norm": 5.556684494018555,
"learning_rate": 1.2127659574468085e-06,
"loss": 1.2438,
"step": 114
},
{
"epoch": 0.030585106382978722,
"grad_norm": 5.465230941772461,
"learning_rate": 1.223404255319149e-06,
"loss": 1.2679,
"step": 115
},
{
"epoch": 0.030851063829787233,
"grad_norm": 5.770520210266113,
"learning_rate": 1.2340425531914894e-06,
"loss": 1.355,
"step": 116
},
{
"epoch": 0.031117021276595744,
"grad_norm": 5.495830535888672,
"learning_rate": 1.2446808510638299e-06,
"loss": 1.2153,
"step": 117
},
{
"epoch": 0.03138297872340425,
"grad_norm": 5.549342632293701,
"learning_rate": 1.2553191489361701e-06,
"loss": 1.3283,
"step": 118
},
{
"epoch": 0.03164893617021276,
"grad_norm": 5.871270656585693,
"learning_rate": 1.2659574468085106e-06,
"loss": 1.2485,
"step": 119
},
{
"epoch": 0.031914893617021274,
"grad_norm": 5.074721813201904,
"learning_rate": 1.276595744680851e-06,
"loss": 1.2725,
"step": 120
},
{
"epoch": 0.032180851063829785,
"grad_norm": 5.2500715255737305,
"learning_rate": 1.2872340425531915e-06,
"loss": 1.1767,
"step": 121
},
{
"epoch": 0.032446808510638296,
"grad_norm": 5.220420837402344,
"learning_rate": 1.297872340425532e-06,
"loss": 1.2566,
"step": 122
},
{
"epoch": 0.03271276595744681,
"grad_norm": 5.691092014312744,
"learning_rate": 1.3085106382978724e-06,
"loss": 1.1828,
"step": 123
},
{
"epoch": 0.03297872340425532,
"grad_norm": 5.540714740753174,
"learning_rate": 1.3191489361702128e-06,
"loss": 1.4373,
"step": 124
},
{
"epoch": 0.03324468085106383,
"grad_norm": 5.538027286529541,
"learning_rate": 1.3297872340425533e-06,
"loss": 1.2955,
"step": 125
},
{
"epoch": 0.03351063829787234,
"grad_norm": 5.601515769958496,
"learning_rate": 1.3404255319148937e-06,
"loss": 1.4246,
"step": 126
},
{
"epoch": 0.03377659574468085,
"grad_norm": 5.398896217346191,
"learning_rate": 1.3510638297872342e-06,
"loss": 1.2479,
"step": 127
},
{
"epoch": 0.03404255319148936,
"grad_norm": 5.281778335571289,
"learning_rate": 1.3617021276595746e-06,
"loss": 1.4188,
"step": 128
},
{
"epoch": 0.03430851063829787,
"grad_norm": 5.898463249206543,
"learning_rate": 1.372340425531915e-06,
"loss": 1.2214,
"step": 129
},
{
"epoch": 0.034574468085106384,
"grad_norm": 5.390676975250244,
"learning_rate": 1.3829787234042555e-06,
"loss": 1.2872,
"step": 130
},
{
"epoch": 0.034840425531914895,
"grad_norm": 5.157502174377441,
"learning_rate": 1.393617021276596e-06,
"loss": 1.2954,
"step": 131
},
{
"epoch": 0.035106382978723406,
"grad_norm": 5.678062438964844,
"learning_rate": 1.4042553191489364e-06,
"loss": 1.2732,
"step": 132
},
{
"epoch": 0.03537234042553192,
"grad_norm": 5.359380722045898,
"learning_rate": 1.4148936170212769e-06,
"loss": 1.2858,
"step": 133
},
{
"epoch": 0.03563829787234043,
"grad_norm": 6.153907775878906,
"learning_rate": 1.4255319148936173e-06,
"loss": 1.3225,
"step": 134
},
{
"epoch": 0.03590425531914894,
"grad_norm": 5.03823709487915,
"learning_rate": 1.4361702127659578e-06,
"loss": 1.196,
"step": 135
},
{
"epoch": 0.036170212765957444,
"grad_norm": 5.12296199798584,
"learning_rate": 1.4468085106382978e-06,
"loss": 1.1534,
"step": 136
},
{
"epoch": 0.036436170212765955,
"grad_norm": 5.526867866516113,
"learning_rate": 1.4574468085106382e-06,
"loss": 1.3099,
"step": 137
},
{
"epoch": 0.036702127659574466,
"grad_norm": 5.23512601852417,
"learning_rate": 1.4680851063829787e-06,
"loss": 1.167,
"step": 138
},
{
"epoch": 0.03696808510638298,
"grad_norm": 5.28326940536499,
"learning_rate": 1.4787234042553191e-06,
"loss": 1.2882,
"step": 139
},
{
"epoch": 0.03723404255319149,
"grad_norm": 6.0062336921691895,
"learning_rate": 1.4893617021276596e-06,
"loss": 1.2937,
"step": 140
},
{
"epoch": 0.0375,
"grad_norm": 5.471292495727539,
"learning_rate": 1.5e-06,
"loss": 1.2783,
"step": 141
},
{
"epoch": 0.03776595744680851,
"grad_norm": 4.784001350402832,
"learning_rate": 1.5106382978723405e-06,
"loss": 1.1493,
"step": 142
},
{
"epoch": 0.03803191489361702,
"grad_norm": 5.167656898498535,
"learning_rate": 1.521276595744681e-06,
"loss": 1.2872,
"step": 143
},
{
"epoch": 0.03829787234042553,
"grad_norm": 5.2528276443481445,
"learning_rate": 1.5319148936170214e-06,
"loss": 1.2876,
"step": 144
},
{
"epoch": 0.03856382978723404,
"grad_norm": 5.4960784912109375,
"learning_rate": 1.5425531914893618e-06,
"loss": 1.2364,
"step": 145
},
{
"epoch": 0.038829787234042554,
"grad_norm": 5.419551372528076,
"learning_rate": 1.5531914893617023e-06,
"loss": 1.3695,
"step": 146
},
{
"epoch": 0.039095744680851065,
"grad_norm": 5.1890974044799805,
"learning_rate": 1.5638297872340427e-06,
"loss": 1.2263,
"step": 147
},
{
"epoch": 0.039361702127659576,
"grad_norm": 5.578823566436768,
"learning_rate": 1.5744680851063832e-06,
"loss": 1.2531,
"step": 148
},
{
"epoch": 0.03962765957446809,
"grad_norm": 5.37275505065918,
"learning_rate": 1.5851063829787236e-06,
"loss": 1.2201,
"step": 149
},
{
"epoch": 0.0398936170212766,
"grad_norm": 5.344025135040283,
"learning_rate": 1.595744680851064e-06,
"loss": 1.1419,
"step": 150
},
{
"epoch": 0.04015957446808511,
"grad_norm": 5.697562217712402,
"learning_rate": 1.6063829787234045e-06,
"loss": 1.3923,
"step": 151
},
{
"epoch": 0.04042553191489362,
"grad_norm": 5.420823097229004,
"learning_rate": 1.617021276595745e-06,
"loss": 1.2936,
"step": 152
},
{
"epoch": 0.04069148936170213,
"grad_norm": 5.53727912902832,
"learning_rate": 1.6276595744680854e-06,
"loss": 1.2047,
"step": 153
},
{
"epoch": 0.040957446808510635,
"grad_norm": 5.577879428863525,
"learning_rate": 1.6382978723404255e-06,
"loss": 1.2495,
"step": 154
},
{
"epoch": 0.041223404255319146,
"grad_norm": 5.115095138549805,
"learning_rate": 1.648936170212766e-06,
"loss": 1.3324,
"step": 155
},
{
"epoch": 0.04148936170212766,
"grad_norm": 5.6801862716674805,
"learning_rate": 1.6595744680851064e-06,
"loss": 1.3554,
"step": 156
},
{
"epoch": 0.04175531914893617,
"grad_norm": 5.293743133544922,
"learning_rate": 1.6702127659574468e-06,
"loss": 1.2226,
"step": 157
},
{
"epoch": 0.04202127659574468,
"grad_norm": 5.129601955413818,
"learning_rate": 1.6808510638297873e-06,
"loss": 1.3393,
"step": 158
},
{
"epoch": 0.04228723404255319,
"grad_norm": 5.572645664215088,
"learning_rate": 1.6914893617021277e-06,
"loss": 1.2734,
"step": 159
},
{
"epoch": 0.0425531914893617,
"grad_norm": 4.944756507873535,
"learning_rate": 1.7021276595744682e-06,
"loss": 1.3417,
"step": 160
},
{
"epoch": 0.04281914893617021,
"grad_norm": 4.982651710510254,
"learning_rate": 1.7127659574468086e-06,
"loss": 1.2622,
"step": 161
},
{
"epoch": 0.04308510638297872,
"grad_norm": 5.134377479553223,
"learning_rate": 1.723404255319149e-06,
"loss": 1.1741,
"step": 162
},
{
"epoch": 0.043351063829787234,
"grad_norm": 4.829857349395752,
"learning_rate": 1.7340425531914895e-06,
"loss": 1.2298,
"step": 163
},
{
"epoch": 0.043617021276595745,
"grad_norm": 5.052809715270996,
"learning_rate": 1.74468085106383e-06,
"loss": 1.1607,
"step": 164
},
{
"epoch": 0.043882978723404256,
"grad_norm": 5.3465776443481445,
"learning_rate": 1.7553191489361704e-06,
"loss": 1.3924,
"step": 165
},
{
"epoch": 0.04414893617021277,
"grad_norm": 5.502316951751709,
"learning_rate": 1.7659574468085109e-06,
"loss": 1.1488,
"step": 166
},
{
"epoch": 0.04441489361702128,
"grad_norm": 5.253002643585205,
"learning_rate": 1.7765957446808513e-06,
"loss": 1.2004,
"step": 167
},
{
"epoch": 0.04468085106382979,
"grad_norm": 5.437882900238037,
"learning_rate": 1.7872340425531918e-06,
"loss": 1.3885,
"step": 168
},
{
"epoch": 0.0449468085106383,
"grad_norm": 5.526264190673828,
"learning_rate": 1.7978723404255322e-06,
"loss": 1.2351,
"step": 169
},
{
"epoch": 0.04521276595744681,
"grad_norm": 5.078868389129639,
"learning_rate": 1.8085106382978727e-06,
"loss": 1.1479,
"step": 170
},
{
"epoch": 0.04547872340425532,
"grad_norm": 5.379688739776611,
"learning_rate": 1.8191489361702131e-06,
"loss": 1.246,
"step": 171
},
{
"epoch": 0.045744680851063826,
"grad_norm": 4.756881237030029,
"learning_rate": 1.8297872340425531e-06,
"loss": 1.3602,
"step": 172
},
{
"epoch": 0.04601063829787234,
"grad_norm": 5.651166915893555,
"learning_rate": 1.8404255319148936e-06,
"loss": 1.1183,
"step": 173
},
{
"epoch": 0.04627659574468085,
"grad_norm": 5.725973129272461,
"learning_rate": 1.851063829787234e-06,
"loss": 1.2474,
"step": 174
},
{
"epoch": 0.04654255319148936,
"grad_norm": 4.994713306427002,
"learning_rate": 1.8617021276595745e-06,
"loss": 1.1945,
"step": 175
},
{
"epoch": 0.04680851063829787,
"grad_norm": 4.701328277587891,
"learning_rate": 1.872340425531915e-06,
"loss": 1.2735,
"step": 176
},
{
"epoch": 0.04707446808510638,
"grad_norm": 5.917819023132324,
"learning_rate": 1.8829787234042554e-06,
"loss": 1.2192,
"step": 177
},
{
"epoch": 0.04734042553191489,
"grad_norm": 5.055963039398193,
"learning_rate": 1.8936170212765958e-06,
"loss": 1.4119,
"step": 178
},
{
"epoch": 0.047606382978723404,
"grad_norm": 5.516870021820068,
"learning_rate": 1.9042553191489363e-06,
"loss": 1.2739,
"step": 179
},
{
"epoch": 0.047872340425531915,
"grad_norm": 5.217896461486816,
"learning_rate": 1.9148936170212767e-06,
"loss": 1.0916,
"step": 180
},
{
"epoch": 0.048138297872340426,
"grad_norm": 5.3772807121276855,
"learning_rate": 1.925531914893617e-06,
"loss": 1.2636,
"step": 181
},
{
"epoch": 0.04840425531914894,
"grad_norm": 5.261349678039551,
"learning_rate": 1.9361702127659576e-06,
"loss": 1.1872,
"step": 182
},
{
"epoch": 0.04867021276595745,
"grad_norm": 5.209681510925293,
"learning_rate": 1.946808510638298e-06,
"loss": 1.1946,
"step": 183
},
{
"epoch": 0.04893617021276596,
"grad_norm": 6.393560886383057,
"learning_rate": 1.9574468085106385e-06,
"loss": 1.4354,
"step": 184
},
{
"epoch": 0.04920212765957447,
"grad_norm": 5.200966835021973,
"learning_rate": 1.968085106382979e-06,
"loss": 1.264,
"step": 185
},
{
"epoch": 0.04946808510638298,
"grad_norm": 4.81060791015625,
"learning_rate": 1.9787234042553194e-06,
"loss": 1.345,
"step": 186
},
{
"epoch": 0.04973404255319149,
"grad_norm": 5.786832332611084,
"learning_rate": 1.98936170212766e-06,
"loss": 1.2897,
"step": 187
},
{
"epoch": 0.05,
"grad_norm": 5.332983493804932,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.3621,
"step": 188
},
{
"epoch": 0.050265957446808514,
"grad_norm": 5.093095779418945,
"learning_rate": 2.0106382978723408e-06,
"loss": 1.3366,
"step": 189
},
{
"epoch": 0.05053191489361702,
"grad_norm": 5.604922771453857,
"learning_rate": 2.021276595744681e-06,
"loss": 1.2009,
"step": 190
},
{
"epoch": 0.05079787234042553,
"grad_norm": 5.312707901000977,
"learning_rate": 2.0319148936170213e-06,
"loss": 1.1604,
"step": 191
},
{
"epoch": 0.05106382978723404,
"grad_norm": 5.330122470855713,
"learning_rate": 2.0425531914893617e-06,
"loss": 1.2102,
"step": 192
},
{
"epoch": 0.05132978723404255,
"grad_norm": 5.350152015686035,
"learning_rate": 2.053191489361702e-06,
"loss": 1.3483,
"step": 193
},
{
"epoch": 0.05159574468085106,
"grad_norm": 5.540630340576172,
"learning_rate": 2.0638297872340426e-06,
"loss": 1.437,
"step": 194
},
{
"epoch": 0.05186170212765957,
"grad_norm": 4.698929309844971,
"learning_rate": 2.074468085106383e-06,
"loss": 1.2083,
"step": 195
},
{
"epoch": 0.052127659574468084,
"grad_norm": 5.128317356109619,
"learning_rate": 2.0851063829787235e-06,
"loss": 1.1502,
"step": 196
},
{
"epoch": 0.052393617021276595,
"grad_norm": 5.425604343414307,
"learning_rate": 2.095744680851064e-06,
"loss": 1.2919,
"step": 197
},
{
"epoch": 0.052659574468085106,
"grad_norm": 5.3685712814331055,
"learning_rate": 2.1063829787234044e-06,
"loss": 1.2305,
"step": 198
},
{
"epoch": 0.05292553191489362,
"grad_norm": 6.010136127471924,
"learning_rate": 2.117021276595745e-06,
"loss": 1.0582,
"step": 199
},
{
"epoch": 0.05319148936170213,
"grad_norm": 5.427469253540039,
"learning_rate": 2.1276595744680853e-06,
"loss": 1.2515,
"step": 200
},
{
"epoch": 0.05345744680851064,
"grad_norm": 5.31635856628418,
"learning_rate": 2.1382978723404258e-06,
"loss": 1.2157,
"step": 201
},
{
"epoch": 0.05372340425531915,
"grad_norm": 5.334502220153809,
"learning_rate": 2.148936170212766e-06,
"loss": 1.271,
"step": 202
},
{
"epoch": 0.05398936170212766,
"grad_norm": 4.88215970993042,
"learning_rate": 2.1595744680851067e-06,
"loss": 1.2777,
"step": 203
},
{
"epoch": 0.05425531914893617,
"grad_norm": 5.919299602508545,
"learning_rate": 2.170212765957447e-06,
"loss": 1.3336,
"step": 204
},
{
"epoch": 0.05452127659574468,
"grad_norm": 5.037824630737305,
"learning_rate": 2.1808510638297876e-06,
"loss": 1.316,
"step": 205
},
{
"epoch": 0.054787234042553194,
"grad_norm": 5.16343879699707,
"learning_rate": 2.191489361702128e-06,
"loss": 1.2724,
"step": 206
},
{
"epoch": 0.055053191489361705,
"grad_norm": 5.36834192276001,
"learning_rate": 2.2021276595744685e-06,
"loss": 1.1693,
"step": 207
},
{
"epoch": 0.05531914893617021,
"grad_norm": 4.99350118637085,
"learning_rate": 2.2127659574468085e-06,
"loss": 1.225,
"step": 208
},
{
"epoch": 0.05558510638297872,
"grad_norm": 5.564612865447998,
"learning_rate": 2.223404255319149e-06,
"loss": 1.2125,
"step": 209
},
{
"epoch": 0.05585106382978723,
"grad_norm": 5.21875,
"learning_rate": 2.2340425531914894e-06,
"loss": 1.3788,
"step": 210
},
{
"epoch": 0.05611702127659574,
"grad_norm": 5.006836891174316,
"learning_rate": 2.24468085106383e-06,
"loss": 1.2095,
"step": 211
},
{
"epoch": 0.05638297872340425,
"grad_norm": 5.6003546714782715,
"learning_rate": 2.2553191489361703e-06,
"loss": 1.3872,
"step": 212
},
{
"epoch": 0.056648936170212764,
"grad_norm": 4.7773613929748535,
"learning_rate": 2.2659574468085107e-06,
"loss": 1.1979,
"step": 213
},
{
"epoch": 0.056914893617021275,
"grad_norm": 4.554566860198975,
"learning_rate": 2.276595744680851e-06,
"loss": 1.1656,
"step": 214
},
{
"epoch": 0.057180851063829786,
"grad_norm": 5.66951322555542,
"learning_rate": 2.2872340425531916e-06,
"loss": 1.3728,
"step": 215
},
{
"epoch": 0.0574468085106383,
"grad_norm": 5.2931013107299805,
"learning_rate": 2.297872340425532e-06,
"loss": 1.2003,
"step": 216
},
{
"epoch": 0.05771276595744681,
"grad_norm": 5.449213981628418,
"learning_rate": 2.3085106382978725e-06,
"loss": 1.2337,
"step": 217
},
{
"epoch": 0.05797872340425532,
"grad_norm": 5.684970378875732,
"learning_rate": 2.319148936170213e-06,
"loss": 1.2196,
"step": 218
},
{
"epoch": 0.05824468085106383,
"grad_norm": 5.038141250610352,
"learning_rate": 2.3297872340425534e-06,
"loss": 1.0954,
"step": 219
},
{
"epoch": 0.05851063829787234,
"grad_norm": 5.255678176879883,
"learning_rate": 2.340425531914894e-06,
"loss": 1.3141,
"step": 220
},
{
"epoch": 0.05877659574468085,
"grad_norm": 5.490760326385498,
"learning_rate": 2.3510638297872343e-06,
"loss": 1.1469,
"step": 221
},
{
"epoch": 0.059042553191489364,
"grad_norm": 5.482240676879883,
"learning_rate": 2.3617021276595748e-06,
"loss": 1.2831,
"step": 222
},
{
"epoch": 0.059308510638297875,
"grad_norm": 6.045271873474121,
"learning_rate": 2.3723404255319152e-06,
"loss": 1.1601,
"step": 223
},
{
"epoch": 0.059574468085106386,
"grad_norm": 5.145684719085693,
"learning_rate": 2.3829787234042557e-06,
"loss": 1.1432,
"step": 224
},
{
"epoch": 0.0598404255319149,
"grad_norm": 4.948934555053711,
"learning_rate": 2.393617021276596e-06,
"loss": 1.1199,
"step": 225
},
{
"epoch": 0.0601063829787234,
"grad_norm": 5.273087978363037,
"learning_rate": 2.404255319148936e-06,
"loss": 1.3225,
"step": 226
},
{
"epoch": 0.06037234042553191,
"grad_norm": 5.76677131652832,
"learning_rate": 2.4148936170212766e-06,
"loss": 1.3144,
"step": 227
},
{
"epoch": 0.06063829787234042,
"grad_norm": 5.51316499710083,
"learning_rate": 2.425531914893617e-06,
"loss": 1.2931,
"step": 228
},
{
"epoch": 0.060904255319148934,
"grad_norm": 5.077220916748047,
"learning_rate": 2.4361702127659575e-06,
"loss": 1.1972,
"step": 229
},
{
"epoch": 0.061170212765957445,
"grad_norm": 5.733246803283691,
"learning_rate": 2.446808510638298e-06,
"loss": 1.2773,
"step": 230
},
{
"epoch": 0.061436170212765956,
"grad_norm": 4.702721118927002,
"learning_rate": 2.4574468085106384e-06,
"loss": 1.2654,
"step": 231
},
{
"epoch": 0.06170212765957447,
"grad_norm": 5.210516452789307,
"learning_rate": 2.468085106382979e-06,
"loss": 1.3222,
"step": 232
},
{
"epoch": 0.06196808510638298,
"grad_norm": 5.6721720695495605,
"learning_rate": 2.4787234042553193e-06,
"loss": 1.1756,
"step": 233
},
{
"epoch": 0.06223404255319149,
"grad_norm": 4.598169326782227,
"learning_rate": 2.4893617021276598e-06,
"loss": 1.2613,
"step": 234
},
{
"epoch": 0.0625,
"grad_norm": 5.069137096405029,
"learning_rate": 2.5e-06,
"loss": 1.2629,
"step": 235
},
{
"epoch": 0.0627659574468085,
"grad_norm": 4.875532627105713,
"learning_rate": 2.5106382978723402e-06,
"loss": 1.1515,
"step": 236
},
{
"epoch": 0.06303191489361702,
"grad_norm": 5.547458171844482,
"learning_rate": 2.521276595744681e-06,
"loss": 1.4157,
"step": 237
},
{
"epoch": 0.06329787234042553,
"grad_norm": 5.377124786376953,
"learning_rate": 2.531914893617021e-06,
"loss": 1.3036,
"step": 238
},
{
"epoch": 0.06356382978723404,
"grad_norm": 5.135563850402832,
"learning_rate": 2.542553191489362e-06,
"loss": 1.1638,
"step": 239
},
{
"epoch": 0.06382978723404255,
"grad_norm": 5.6008172035217285,
"learning_rate": 2.553191489361702e-06,
"loss": 1.2787,
"step": 240
},
{
"epoch": 0.06409574468085107,
"grad_norm": 5.453914165496826,
"learning_rate": 2.563829787234043e-06,
"loss": 1.3239,
"step": 241
},
{
"epoch": 0.06436170212765957,
"grad_norm": 5.219985485076904,
"learning_rate": 2.574468085106383e-06,
"loss": 1.0942,
"step": 242
},
{
"epoch": 0.06462765957446809,
"grad_norm": 5.180700778961182,
"learning_rate": 2.585106382978724e-06,
"loss": 1.1501,
"step": 243
},
{
"epoch": 0.06489361702127659,
"grad_norm": 5.2240071296691895,
"learning_rate": 2.595744680851064e-06,
"loss": 1.2269,
"step": 244
},
{
"epoch": 0.06515957446808511,
"grad_norm": 6.328047275543213,
"learning_rate": 2.6063829787234047e-06,
"loss": 1.405,
"step": 245
},
{
"epoch": 0.06542553191489361,
"grad_norm": 5.10886287689209,
"learning_rate": 2.6170212765957447e-06,
"loss": 1.2698,
"step": 246
},
{
"epoch": 0.06569148936170213,
"grad_norm": 5.45538330078125,
"learning_rate": 2.6276595744680856e-06,
"loss": 1.33,
"step": 247
},
{
"epoch": 0.06595744680851064,
"grad_norm": 5.294386386871338,
"learning_rate": 2.6382978723404256e-06,
"loss": 1.2895,
"step": 248
},
{
"epoch": 0.06622340425531915,
"grad_norm": 4.7668776512146,
"learning_rate": 2.6489361702127665e-06,
"loss": 1.1176,
"step": 249
},
{
"epoch": 0.06648936170212766,
"grad_norm": 4.915814399719238,
"learning_rate": 2.6595744680851065e-06,
"loss": 1.2469,
"step": 250
},
{
"epoch": 0.06675531914893618,
"grad_norm": 5.320147514343262,
"learning_rate": 2.6702127659574474e-06,
"loss": 1.4904,
"step": 251
},
{
"epoch": 0.06702127659574468,
"grad_norm": 5.417577266693115,
"learning_rate": 2.6808510638297874e-06,
"loss": 1.3166,
"step": 252
},
{
"epoch": 0.0672872340425532,
"grad_norm": 4.704782485961914,
"learning_rate": 2.6914893617021283e-06,
"loss": 1.2362,
"step": 253
},
{
"epoch": 0.0675531914893617,
"grad_norm": 5.100544452667236,
"learning_rate": 2.7021276595744683e-06,
"loss": 1.2969,
"step": 254
},
{
"epoch": 0.0678191489361702,
"grad_norm": 6.336488723754883,
"learning_rate": 2.7127659574468084e-06,
"loss": 1.2708,
"step": 255
},
{
"epoch": 0.06808510638297872,
"grad_norm": 5.281217098236084,
"learning_rate": 2.7234042553191492e-06,
"loss": 1.3103,
"step": 256
},
{
"epoch": 0.06835106382978723,
"grad_norm": 5.127480983734131,
"learning_rate": 2.7340425531914893e-06,
"loss": 1.2957,
"step": 257
},
{
"epoch": 0.06861702127659575,
"grad_norm": 5.289313316345215,
"learning_rate": 2.74468085106383e-06,
"loss": 1.2658,
"step": 258
},
{
"epoch": 0.06888297872340425,
"grad_norm": 5.088155746459961,
"learning_rate": 2.75531914893617e-06,
"loss": 1.1359,
"step": 259
},
{
"epoch": 0.06914893617021277,
"grad_norm": 5.367323875427246,
"learning_rate": 2.765957446808511e-06,
"loss": 1.2408,
"step": 260
},
{
"epoch": 0.06941489361702127,
"grad_norm": 5.337047576904297,
"learning_rate": 2.776595744680851e-06,
"loss": 1.2908,
"step": 261
},
{
"epoch": 0.06968085106382979,
"grad_norm": 5.167153358459473,
"learning_rate": 2.787234042553192e-06,
"loss": 1.3217,
"step": 262
},
{
"epoch": 0.0699468085106383,
"grad_norm": 5.522439956665039,
"learning_rate": 2.797872340425532e-06,
"loss": 1.2799,
"step": 263
},
{
"epoch": 0.07021276595744681,
"grad_norm": 4.691408157348633,
"learning_rate": 2.808510638297873e-06,
"loss": 1.096,
"step": 264
},
{
"epoch": 0.07047872340425532,
"grad_norm": 5.208773612976074,
"learning_rate": 2.819148936170213e-06,
"loss": 1.3215,
"step": 265
},
{
"epoch": 0.07074468085106383,
"grad_norm": 5.4790496826171875,
"learning_rate": 2.8297872340425537e-06,
"loss": 1.4218,
"step": 266
},
{
"epoch": 0.07101063829787234,
"grad_norm": 5.256765842437744,
"learning_rate": 2.8404255319148938e-06,
"loss": 1.4242,
"step": 267
},
{
"epoch": 0.07127659574468086,
"grad_norm": 4.874395370483398,
"learning_rate": 2.8510638297872346e-06,
"loss": 1.2518,
"step": 268
},
{
"epoch": 0.07154255319148936,
"grad_norm": 5.108527183532715,
"learning_rate": 2.8617021276595747e-06,
"loss": 1.2919,
"step": 269
},
{
"epoch": 0.07180851063829788,
"grad_norm": 5.333227157592773,
"learning_rate": 2.8723404255319155e-06,
"loss": 1.459,
"step": 270
},
{
"epoch": 0.07207446808510638,
"grad_norm": 5.232532501220703,
"learning_rate": 2.8829787234042556e-06,
"loss": 1.1832,
"step": 271
},
{
"epoch": 0.07234042553191489,
"grad_norm": 5.147657871246338,
"learning_rate": 2.8936170212765956e-06,
"loss": 1.3219,
"step": 272
},
{
"epoch": 0.0726063829787234,
"grad_norm": 5.002472400665283,
"learning_rate": 2.9042553191489365e-06,
"loss": 1.2989,
"step": 273
},
{
"epoch": 0.07287234042553191,
"grad_norm": 4.903095722198486,
"learning_rate": 2.9148936170212765e-06,
"loss": 1.1621,
"step": 274
},
{
"epoch": 0.07313829787234043,
"grad_norm": 5.269963264465332,
"learning_rate": 2.9255319148936174e-06,
"loss": 1.2966,
"step": 275
},
{
"epoch": 0.07340425531914893,
"grad_norm": 5.356837749481201,
"learning_rate": 2.9361702127659574e-06,
"loss": 1.2455,
"step": 276
},
{
"epoch": 0.07367021276595745,
"grad_norm": 5.510587215423584,
"learning_rate": 2.9468085106382983e-06,
"loss": 1.2386,
"step": 277
},
{
"epoch": 0.07393617021276595,
"grad_norm": 5.7554755210876465,
"learning_rate": 2.9574468085106383e-06,
"loss": 1.3096,
"step": 278
},
{
"epoch": 0.07420212765957447,
"grad_norm": 5.236169815063477,
"learning_rate": 2.968085106382979e-06,
"loss": 1.2496,
"step": 279
},
{
"epoch": 0.07446808510638298,
"grad_norm": 4.870725631713867,
"learning_rate": 2.978723404255319e-06,
"loss": 1.083,
"step": 280
},
{
"epoch": 0.0747340425531915,
"grad_norm": 5.181726455688477,
"learning_rate": 2.98936170212766e-06,
"loss": 1.223,
"step": 281
},
{
"epoch": 0.075,
"grad_norm": 4.924530506134033,
"learning_rate": 3e-06,
"loss": 1.2855,
"step": 282
},
{
"epoch": 0.07526595744680852,
"grad_norm": 5.177605628967285,
"learning_rate": 3.010638297872341e-06,
"loss": 1.2215,
"step": 283
},
{
"epoch": 0.07553191489361702,
"grad_norm": 4.895737648010254,
"learning_rate": 3.021276595744681e-06,
"loss": 1.2451,
"step": 284
},
{
"epoch": 0.07579787234042554,
"grad_norm": 5.425995349884033,
"learning_rate": 3.031914893617022e-06,
"loss": 1.6053,
"step": 285
},
{
"epoch": 0.07606382978723404,
"grad_norm": 5.228978157043457,
"learning_rate": 3.042553191489362e-06,
"loss": 1.1846,
"step": 286
},
{
"epoch": 0.07632978723404256,
"grad_norm": 4.825231552124023,
"learning_rate": 3.0531914893617027e-06,
"loss": 1.1355,
"step": 287
},
{
"epoch": 0.07659574468085106,
"grad_norm": 6.309840679168701,
"learning_rate": 3.0638297872340428e-06,
"loss": 1.1388,
"step": 288
},
{
"epoch": 0.07686170212765958,
"grad_norm": 5.012725830078125,
"learning_rate": 3.0744680851063836e-06,
"loss": 0.9926,
"step": 289
},
{
"epoch": 0.07712765957446809,
"grad_norm": 5.028249263763428,
"learning_rate": 3.0851063829787237e-06,
"loss": 1.2024,
"step": 290
},
{
"epoch": 0.07739361702127659,
"grad_norm": 5.77925968170166,
"learning_rate": 3.0957446808510637e-06,
"loss": 1.5436,
"step": 291
},
{
"epoch": 0.07765957446808511,
"grad_norm": 5.277095794677734,
"learning_rate": 3.1063829787234046e-06,
"loss": 1.2018,
"step": 292
},
{
"epoch": 0.07792553191489361,
"grad_norm": 5.4600958824157715,
"learning_rate": 3.1170212765957446e-06,
"loss": 1.072,
"step": 293
},
{
"epoch": 0.07819148936170213,
"grad_norm": 5.168891906738281,
"learning_rate": 3.1276595744680855e-06,
"loss": 1.3841,
"step": 294
},
{
"epoch": 0.07845744680851063,
"grad_norm": 4.869060516357422,
"learning_rate": 3.1382978723404255e-06,
"loss": 1.1663,
"step": 295
},
{
"epoch": 0.07872340425531915,
"grad_norm": 5.289313316345215,
"learning_rate": 3.1489361702127664e-06,
"loss": 1.0781,
"step": 296
},
{
"epoch": 0.07898936170212766,
"grad_norm": 5.145017147064209,
"learning_rate": 3.1595744680851064e-06,
"loss": 1.1087,
"step": 297
},
{
"epoch": 0.07925531914893617,
"grad_norm": 5.634250640869141,
"learning_rate": 3.1702127659574473e-06,
"loss": 1.3936,
"step": 298
},
{
"epoch": 0.07952127659574468,
"grad_norm": 5.201961040496826,
"learning_rate": 3.1808510638297873e-06,
"loss": 1.3752,
"step": 299
},
{
"epoch": 0.0797872340425532,
"grad_norm": 5.372065544128418,
"learning_rate": 3.191489361702128e-06,
"loss": 1.1715,
"step": 300
},
{
"epoch": 0.0800531914893617,
"grad_norm": 6.010387420654297,
"learning_rate": 3.202127659574468e-06,
"loss": 1.2187,
"step": 301
},
{
"epoch": 0.08031914893617022,
"grad_norm": 5.143375396728516,
"learning_rate": 3.212765957446809e-06,
"loss": 1.2051,
"step": 302
},
{
"epoch": 0.08058510638297872,
"grad_norm": 5.376684665679932,
"learning_rate": 3.223404255319149e-06,
"loss": 1.2319,
"step": 303
},
{
"epoch": 0.08085106382978724,
"grad_norm": 4.905093193054199,
"learning_rate": 3.23404255319149e-06,
"loss": 1.2187,
"step": 304
},
{
"epoch": 0.08111702127659574,
"grad_norm": 5.650513648986816,
"learning_rate": 3.24468085106383e-06,
"loss": 1.1528,
"step": 305
},
{
"epoch": 0.08138297872340426,
"grad_norm": 5.2889227867126465,
"learning_rate": 3.255319148936171e-06,
"loss": 1.0795,
"step": 306
},
{
"epoch": 0.08164893617021277,
"grad_norm": 5.284914970397949,
"learning_rate": 3.265957446808511e-06,
"loss": 1.2885,
"step": 307
},
{
"epoch": 0.08191489361702127,
"grad_norm": 5.4190449714660645,
"learning_rate": 3.276595744680851e-06,
"loss": 1.4991,
"step": 308
},
{
"epoch": 0.08218085106382979,
"grad_norm": 4.965026378631592,
"learning_rate": 3.287234042553192e-06,
"loss": 1.2674,
"step": 309
},
{
"epoch": 0.08244680851063829,
"grad_norm": 5.040426254272461,
"learning_rate": 3.297872340425532e-06,
"loss": 1.2347,
"step": 310
},
{
"epoch": 0.08271276595744681,
"grad_norm": 5.759904384613037,
"learning_rate": 3.3085106382978727e-06,
"loss": 1.2976,
"step": 311
},
{
"epoch": 0.08297872340425531,
"grad_norm": 4.893044471740723,
"learning_rate": 3.3191489361702127e-06,
"loss": 1.213,
"step": 312
},
{
"epoch": 0.08324468085106383,
"grad_norm": 4.674813270568848,
"learning_rate": 3.3297872340425536e-06,
"loss": 1.2795,
"step": 313
},
{
"epoch": 0.08351063829787234,
"grad_norm": 5.59810209274292,
"learning_rate": 3.3404255319148936e-06,
"loss": 1.2338,
"step": 314
},
{
"epoch": 0.08377659574468085,
"grad_norm": 4.63198709487915,
"learning_rate": 3.3510638297872345e-06,
"loss": 1.2026,
"step": 315
},
{
"epoch": 0.08404255319148936,
"grad_norm": 5.4756245613098145,
"learning_rate": 3.3617021276595745e-06,
"loss": 1.2838,
"step": 316
},
{
"epoch": 0.08430851063829788,
"grad_norm": 5.258046627044678,
"learning_rate": 3.3723404255319154e-06,
"loss": 1.1449,
"step": 317
},
{
"epoch": 0.08457446808510638,
"grad_norm": 5.205422878265381,
"learning_rate": 3.3829787234042554e-06,
"loss": 1.223,
"step": 318
},
{
"epoch": 0.0848404255319149,
"grad_norm": 5.365026473999023,
"learning_rate": 3.3936170212765963e-06,
"loss": 1.191,
"step": 319
},
{
"epoch": 0.0851063829787234,
"grad_norm": 5.367187023162842,
"learning_rate": 3.4042553191489363e-06,
"loss": 1.2246,
"step": 320
},
{
"epoch": 0.08537234042553192,
"grad_norm": 5.512171745300293,
"learning_rate": 3.414893617021277e-06,
"loss": 1.2601,
"step": 321
},
{
"epoch": 0.08563829787234042,
"grad_norm": 5.804540157318115,
"learning_rate": 3.4255319148936172e-06,
"loss": 1.1537,
"step": 322
},
{
"epoch": 0.08590425531914894,
"grad_norm": 5.474178791046143,
"learning_rate": 3.436170212765958e-06,
"loss": 1.3175,
"step": 323
},
{
"epoch": 0.08617021276595745,
"grad_norm": 5.454108715057373,
"learning_rate": 3.446808510638298e-06,
"loss": 1.1764,
"step": 324
},
{
"epoch": 0.08643617021276596,
"grad_norm": 5.368601322174072,
"learning_rate": 3.457446808510639e-06,
"loss": 1.2001,
"step": 325
},
{
"epoch": 0.08670212765957447,
"grad_norm": 5.19401741027832,
"learning_rate": 3.468085106382979e-06,
"loss": 1.2673,
"step": 326
},
{
"epoch": 0.08696808510638297,
"grad_norm": 4.70231294631958,
"learning_rate": 3.478723404255319e-06,
"loss": 1.1736,
"step": 327
},
{
"epoch": 0.08723404255319149,
"grad_norm": 5.607789039611816,
"learning_rate": 3.48936170212766e-06,
"loss": 1.1986,
"step": 328
},
{
"epoch": 0.0875,
"grad_norm": 5.1046013832092285,
"learning_rate": 3.5e-06,
"loss": 1.2426,
"step": 329
},
{
"epoch": 0.08776595744680851,
"grad_norm": 5.214546203613281,
"learning_rate": 3.510638297872341e-06,
"loss": 1.1211,
"step": 330
},
{
"epoch": 0.08803191489361702,
"grad_norm": 4.989225387573242,
"learning_rate": 3.521276595744681e-06,
"loss": 1.3025,
"step": 331
},
{
"epoch": 0.08829787234042553,
"grad_norm": 4.886022567749023,
"learning_rate": 3.5319148936170217e-06,
"loss": 1.2109,
"step": 332
},
{
"epoch": 0.08856382978723404,
"grad_norm": 5.30552339553833,
"learning_rate": 3.5425531914893617e-06,
"loss": 1.1811,
"step": 333
},
{
"epoch": 0.08882978723404256,
"grad_norm": 4.81152868270874,
"learning_rate": 3.5531914893617026e-06,
"loss": 1.1677,
"step": 334
},
{
"epoch": 0.08909574468085106,
"grad_norm": 5.06434440612793,
"learning_rate": 3.5638297872340426e-06,
"loss": 1.2425,
"step": 335
},
{
"epoch": 0.08936170212765958,
"grad_norm": 7.036694526672363,
"learning_rate": 3.5744680851063835e-06,
"loss": 1.2682,
"step": 336
},
{
"epoch": 0.08962765957446808,
"grad_norm": 5.208419322967529,
"learning_rate": 3.5851063829787235e-06,
"loss": 1.2394,
"step": 337
},
{
"epoch": 0.0898936170212766,
"grad_norm": 4.592006206512451,
"learning_rate": 3.5957446808510644e-06,
"loss": 1.2083,
"step": 338
},
{
"epoch": 0.0901595744680851,
"grad_norm": 5.002110481262207,
"learning_rate": 3.6063829787234044e-06,
"loss": 1.2284,
"step": 339
},
{
"epoch": 0.09042553191489362,
"grad_norm": 4.708452224731445,
"learning_rate": 3.6170212765957453e-06,
"loss": 1.1616,
"step": 340
},
{
"epoch": 0.09069148936170213,
"grad_norm": 4.872410297393799,
"learning_rate": 3.6276595744680853e-06,
"loss": 1.181,
"step": 341
},
{
"epoch": 0.09095744680851064,
"grad_norm": 5.24644136428833,
"learning_rate": 3.6382978723404262e-06,
"loss": 1.285,
"step": 342
},
{
"epoch": 0.09122340425531915,
"grad_norm": 5.019744396209717,
"learning_rate": 3.6489361702127662e-06,
"loss": 1.2677,
"step": 343
},
{
"epoch": 0.09148936170212765,
"grad_norm": 6.380999565124512,
"learning_rate": 3.6595744680851063e-06,
"loss": 1.1268,
"step": 344
},
{
"epoch": 0.09175531914893617,
"grad_norm": 5.100999355316162,
"learning_rate": 3.670212765957447e-06,
"loss": 1.2023,
"step": 345
},
{
"epoch": 0.09202127659574467,
"grad_norm": 5.221463203430176,
"learning_rate": 3.680851063829787e-06,
"loss": 1.2482,
"step": 346
},
{
"epoch": 0.09228723404255319,
"grad_norm": 4.895312309265137,
"learning_rate": 3.691489361702128e-06,
"loss": 1.2515,
"step": 347
},
{
"epoch": 0.0925531914893617,
"grad_norm": 4.988393306732178,
"learning_rate": 3.702127659574468e-06,
"loss": 1.1969,
"step": 348
},
{
"epoch": 0.09281914893617021,
"grad_norm": 5.19982385635376,
"learning_rate": 3.712765957446809e-06,
"loss": 1.2488,
"step": 349
},
{
"epoch": 0.09308510638297872,
"grad_norm": 5.010618686676025,
"learning_rate": 3.723404255319149e-06,
"loss": 1.2475,
"step": 350
},
{
"epoch": 0.09335106382978724,
"grad_norm": 4.905212879180908,
"learning_rate": 3.73404255319149e-06,
"loss": 1.3921,
"step": 351
},
{
"epoch": 0.09361702127659574,
"grad_norm": 5.373055458068848,
"learning_rate": 3.74468085106383e-06,
"loss": 1.4741,
"step": 352
},
{
"epoch": 0.09388297872340426,
"grad_norm": 4.804662704467773,
"learning_rate": 3.7553191489361707e-06,
"loss": 1.2208,
"step": 353
},
{
"epoch": 0.09414893617021276,
"grad_norm": 5.451242923736572,
"learning_rate": 3.7659574468085108e-06,
"loss": 1.3764,
"step": 354
},
{
"epoch": 0.09441489361702128,
"grad_norm": 5.5642409324646,
"learning_rate": 3.7765957446808516e-06,
"loss": 1.4001,
"step": 355
},
{
"epoch": 0.09468085106382979,
"grad_norm": 4.492448806762695,
"learning_rate": 3.7872340425531917e-06,
"loss": 1.1094,
"step": 356
},
{
"epoch": 0.0949468085106383,
"grad_norm": 5.439316749572754,
"learning_rate": 3.7978723404255325e-06,
"loss": 1.3348,
"step": 357
},
{
"epoch": 0.09521276595744681,
"grad_norm": 4.795385837554932,
"learning_rate": 3.8085106382978726e-06,
"loss": 1.23,
"step": 358
},
{
"epoch": 0.09547872340425533,
"grad_norm": 5.010631084442139,
"learning_rate": 3.819148936170213e-06,
"loss": 1.1724,
"step": 359
},
{
"epoch": 0.09574468085106383,
"grad_norm": 5.740480422973633,
"learning_rate": 3.8297872340425535e-06,
"loss": 1.3756,
"step": 360
},
{
"epoch": 0.09601063829787235,
"grad_norm": 4.986555099487305,
"learning_rate": 3.840425531914894e-06,
"loss": 1.2722,
"step": 361
},
{
"epoch": 0.09627659574468085,
"grad_norm": 5.041133880615234,
"learning_rate": 3.851063829787234e-06,
"loss": 1.0448,
"step": 362
},
{
"epoch": 0.09654255319148936,
"grad_norm": 5.378165245056152,
"learning_rate": 3.861702127659575e-06,
"loss": 1.2111,
"step": 363
},
{
"epoch": 0.09680851063829787,
"grad_norm": 4.8053059577941895,
"learning_rate": 3.872340425531915e-06,
"loss": 1.1344,
"step": 364
},
{
"epoch": 0.09707446808510638,
"grad_norm": 5.25260066986084,
"learning_rate": 3.882978723404256e-06,
"loss": 1.1288,
"step": 365
},
{
"epoch": 0.0973404255319149,
"grad_norm": 4.839104175567627,
"learning_rate": 3.893617021276596e-06,
"loss": 1.2131,
"step": 366
},
{
"epoch": 0.0976063829787234,
"grad_norm": 5.487301826477051,
"learning_rate": 3.904255319148937e-06,
"loss": 1.1969,
"step": 367
},
{
"epoch": 0.09787234042553192,
"grad_norm": 4.733921051025391,
"learning_rate": 3.914893617021277e-06,
"loss": 1.097,
"step": 368
},
{
"epoch": 0.09813829787234042,
"grad_norm": 5.042628765106201,
"learning_rate": 3.9255319148936175e-06,
"loss": 1.3554,
"step": 369
},
{
"epoch": 0.09840425531914894,
"grad_norm": 6.3879876136779785,
"learning_rate": 3.936170212765958e-06,
"loss": 1.1231,
"step": 370
},
{
"epoch": 0.09867021276595744,
"grad_norm": 4.907758712768555,
"learning_rate": 3.946808510638298e-06,
"loss": 1.4223,
"step": 371
},
{
"epoch": 0.09893617021276596,
"grad_norm": 4.765664577484131,
"learning_rate": 3.957446808510639e-06,
"loss": 1.2346,
"step": 372
},
{
"epoch": 0.09920212765957447,
"grad_norm": 4.949317932128906,
"learning_rate": 3.968085106382979e-06,
"loss": 1.1447,
"step": 373
},
{
"epoch": 0.09946808510638298,
"grad_norm": 5.256651878356934,
"learning_rate": 3.97872340425532e-06,
"loss": 1.25,
"step": 374
},
{
"epoch": 0.09973404255319149,
"grad_norm": 5.307461261749268,
"learning_rate": 3.98936170212766e-06,
"loss": 1.3373,
"step": 375
},
{
"epoch": 0.1,
"grad_norm": 5.324861526489258,
"learning_rate": 4.000000000000001e-06,
"loss": 1.1654,
"step": 376
},
{
"epoch": 0.10026595744680851,
"grad_norm": 5.055593013763428,
"learning_rate": 4.010638297872341e-06,
"loss": 1.1508,
"step": 377
},
{
"epoch": 0.10053191489361703,
"grad_norm": 4.892101287841797,
"learning_rate": 4.0212765957446816e-06,
"loss": 1.2529,
"step": 378
},
{
"epoch": 0.10079787234042553,
"grad_norm": 4.846734523773193,
"learning_rate": 4.031914893617022e-06,
"loss": 1.1536,
"step": 379
},
{
"epoch": 0.10106382978723404,
"grad_norm": 5.4368462562561035,
"learning_rate": 4.042553191489362e-06,
"loss": 1.1512,
"step": 380
},
{
"epoch": 0.10132978723404255,
"grad_norm": 5.102158546447754,
"learning_rate": 4.053191489361702e-06,
"loss": 1.2382,
"step": 381
},
{
"epoch": 0.10159574468085106,
"grad_norm": 5.7933030128479,
"learning_rate": 4.0638297872340425e-06,
"loss": 1.4996,
"step": 382
},
{
"epoch": 0.10186170212765958,
"grad_norm": 4.7221221923828125,
"learning_rate": 4.074468085106383e-06,
"loss": 1.3471,
"step": 383
},
{
"epoch": 0.10212765957446808,
"grad_norm": 4.660311222076416,
"learning_rate": 4.085106382978723e-06,
"loss": 1.103,
"step": 384
},
{
"epoch": 0.1023936170212766,
"grad_norm": 5.399576663970947,
"learning_rate": 4.095744680851064e-06,
"loss": 1.3684,
"step": 385
},
{
"epoch": 0.1026595744680851,
"grad_norm": 4.925390720367432,
"learning_rate": 4.106382978723404e-06,
"loss": 1.2596,
"step": 386
},
{
"epoch": 0.10292553191489362,
"grad_norm": 5.198457717895508,
"learning_rate": 4.117021276595745e-06,
"loss": 1.2224,
"step": 387
},
{
"epoch": 0.10319148936170212,
"grad_norm": 5.053544044494629,
"learning_rate": 4.127659574468085e-06,
"loss": 1.0447,
"step": 388
},
{
"epoch": 0.10345744680851064,
"grad_norm": 5.769658088684082,
"learning_rate": 4.138297872340426e-06,
"loss": 1.4491,
"step": 389
},
{
"epoch": 0.10372340425531915,
"grad_norm": 4.969061851501465,
"learning_rate": 4.148936170212766e-06,
"loss": 1.2964,
"step": 390
},
{
"epoch": 0.10398936170212766,
"grad_norm": 4.825634479522705,
"learning_rate": 4.1595744680851066e-06,
"loss": 1.1521,
"step": 391
},
{
"epoch": 0.10425531914893617,
"grad_norm": 5.240276336669922,
"learning_rate": 4.170212765957447e-06,
"loss": 1.27,
"step": 392
},
{
"epoch": 0.10452127659574469,
"grad_norm": 4.926823139190674,
"learning_rate": 4.1808510638297875e-06,
"loss": 1.1428,
"step": 393
},
{
"epoch": 0.10478723404255319,
"grad_norm": 5.143110275268555,
"learning_rate": 4.191489361702128e-06,
"loss": 1.2502,
"step": 394
},
{
"epoch": 0.10505319148936171,
"grad_norm": 5.7517876625061035,
"learning_rate": 4.202127659574468e-06,
"loss": 1.3353,
"step": 395
},
{
"epoch": 0.10531914893617021,
"grad_norm": 5.096099853515625,
"learning_rate": 4.212765957446809e-06,
"loss": 1.2383,
"step": 396
},
{
"epoch": 0.10558510638297873,
"grad_norm": 5.0476484298706055,
"learning_rate": 4.223404255319149e-06,
"loss": 1.1639,
"step": 397
},
{
"epoch": 0.10585106382978723,
"grad_norm": 5.166505813598633,
"learning_rate": 4.23404255319149e-06,
"loss": 1.327,
"step": 398
},
{
"epoch": 0.10611702127659574,
"grad_norm": 5.315145969390869,
"learning_rate": 4.24468085106383e-06,
"loss": 1.2239,
"step": 399
},
{
"epoch": 0.10638297872340426,
"grad_norm": 5.185245990753174,
"learning_rate": 4.255319148936171e-06,
"loss": 1.3102,
"step": 400
},
{
"epoch": 0.10664893617021276,
"grad_norm": 5.77607536315918,
"learning_rate": 4.265957446808511e-06,
"loss": 1.3943,
"step": 401
},
{
"epoch": 0.10691489361702128,
"grad_norm": 5.244495391845703,
"learning_rate": 4.2765957446808515e-06,
"loss": 1.2495,
"step": 402
},
{
"epoch": 0.10718085106382978,
"grad_norm": 4.943081378936768,
"learning_rate": 4.287234042553192e-06,
"loss": 1.1773,
"step": 403
},
{
"epoch": 0.1074468085106383,
"grad_norm": 4.948064804077148,
"learning_rate": 4.297872340425532e-06,
"loss": 1.2758,
"step": 404
},
{
"epoch": 0.1077127659574468,
"grad_norm": 5.133402347564697,
"learning_rate": 4.308510638297873e-06,
"loss": 1.28,
"step": 405
},
{
"epoch": 0.10797872340425532,
"grad_norm": 5.113506317138672,
"learning_rate": 4.319148936170213e-06,
"loss": 1.3164,
"step": 406
},
{
"epoch": 0.10824468085106383,
"grad_norm": 5.551205635070801,
"learning_rate": 4.329787234042554e-06,
"loss": 1.3766,
"step": 407
},
{
"epoch": 0.10851063829787234,
"grad_norm": 5.358046531677246,
"learning_rate": 4.340425531914894e-06,
"loss": 1.3146,
"step": 408
},
{
"epoch": 0.10877659574468085,
"grad_norm": 4.947327136993408,
"learning_rate": 4.351063829787235e-06,
"loss": 1.2566,
"step": 409
},
{
"epoch": 0.10904255319148937,
"grad_norm": 5.421116828918457,
"learning_rate": 4.361702127659575e-06,
"loss": 1.3041,
"step": 410
},
{
"epoch": 0.10930851063829787,
"grad_norm": 5.073742866516113,
"learning_rate": 4.3723404255319156e-06,
"loss": 1.2297,
"step": 411
},
{
"epoch": 0.10957446808510639,
"grad_norm": 4.688051700592041,
"learning_rate": 4.382978723404256e-06,
"loss": 1.281,
"step": 412
},
{
"epoch": 0.10984042553191489,
"grad_norm": 4.957024097442627,
"learning_rate": 4.3936170212765965e-06,
"loss": 1.2235,
"step": 413
},
{
"epoch": 0.11010638297872341,
"grad_norm": 4.920490741729736,
"learning_rate": 4.404255319148937e-06,
"loss": 1.3369,
"step": 414
},
{
"epoch": 0.11037234042553191,
"grad_norm": 4.797316551208496,
"learning_rate": 4.414893617021277e-06,
"loss": 1.2144,
"step": 415
},
{
"epoch": 0.11063829787234042,
"grad_norm": 5.424980640411377,
"learning_rate": 4.425531914893617e-06,
"loss": 1.3891,
"step": 416
},
{
"epoch": 0.11090425531914894,
"grad_norm": 6.654335021972656,
"learning_rate": 4.436170212765957e-06,
"loss": 1.2438,
"step": 417
},
{
"epoch": 0.11117021276595744,
"grad_norm": 4.950499057769775,
"learning_rate": 4.446808510638298e-06,
"loss": 1.1873,
"step": 418
},
{
"epoch": 0.11143617021276596,
"grad_norm": 4.553642272949219,
"learning_rate": 4.457446808510638e-06,
"loss": 1.1059,
"step": 419
},
{
"epoch": 0.11170212765957446,
"grad_norm": 5.221842288970947,
"learning_rate": 4.468085106382979e-06,
"loss": 1.2645,
"step": 420
},
{
"epoch": 0.11196808510638298,
"grad_norm": 5.45412015914917,
"learning_rate": 4.478723404255319e-06,
"loss": 1.234,
"step": 421
},
{
"epoch": 0.11223404255319148,
"grad_norm": 5.6037750244140625,
"learning_rate": 4.48936170212766e-06,
"loss": 1.2393,
"step": 422
},
{
"epoch": 0.1125,
"grad_norm": 6.701963901519775,
"learning_rate": 4.5e-06,
"loss": 1.2275,
"step": 423
},
{
"epoch": 0.1127659574468085,
"grad_norm": 5.183774471282959,
"learning_rate": 4.5106382978723406e-06,
"loss": 1.345,
"step": 424
},
{
"epoch": 0.11303191489361702,
"grad_norm": 5.005707263946533,
"learning_rate": 4.521276595744681e-06,
"loss": 1.2778,
"step": 425
},
{
"epoch": 0.11329787234042553,
"grad_norm": 4.887904644012451,
"learning_rate": 4.5319148936170215e-06,
"loss": 1.2156,
"step": 426
},
{
"epoch": 0.11356382978723405,
"grad_norm": 5.077915191650391,
"learning_rate": 4.542553191489362e-06,
"loss": 1.3213,
"step": 427
},
{
"epoch": 0.11382978723404255,
"grad_norm": 5.669859409332275,
"learning_rate": 4.553191489361702e-06,
"loss": 1.2028,
"step": 428
},
{
"epoch": 0.11409574468085107,
"grad_norm": 4.871664047241211,
"learning_rate": 4.563829787234043e-06,
"loss": 1.2471,
"step": 429
},
{
"epoch": 0.11436170212765957,
"grad_norm": 6.208220958709717,
"learning_rate": 4.574468085106383e-06,
"loss": 1.3042,
"step": 430
},
{
"epoch": 0.11462765957446809,
"grad_norm": 5.47734260559082,
"learning_rate": 4.585106382978724e-06,
"loss": 1.1327,
"step": 431
},
{
"epoch": 0.1148936170212766,
"grad_norm": 4.876042366027832,
"learning_rate": 4.595744680851064e-06,
"loss": 1.2484,
"step": 432
},
{
"epoch": 0.11515957446808511,
"grad_norm": 4.497283458709717,
"learning_rate": 4.606382978723405e-06,
"loss": 1.0734,
"step": 433
},
{
"epoch": 0.11542553191489362,
"grad_norm": 5.2405314445495605,
"learning_rate": 4.617021276595745e-06,
"loss": 1.3122,
"step": 434
},
{
"epoch": 0.11569148936170212,
"grad_norm": 5.948802947998047,
"learning_rate": 4.6276595744680855e-06,
"loss": 1.2006,
"step": 435
},
{
"epoch": 0.11595744680851064,
"grad_norm": 5.318106174468994,
"learning_rate": 4.638297872340426e-06,
"loss": 1.2712,
"step": 436
},
{
"epoch": 0.11622340425531914,
"grad_norm": 5.686134338378906,
"learning_rate": 4.648936170212766e-06,
"loss": 1.3471,
"step": 437
},
{
"epoch": 0.11648936170212766,
"grad_norm": 5.246779441833496,
"learning_rate": 4.659574468085107e-06,
"loss": 1.2967,
"step": 438
},
{
"epoch": 0.11675531914893617,
"grad_norm": 4.675699710845947,
"learning_rate": 4.670212765957447e-06,
"loss": 1.2304,
"step": 439
},
{
"epoch": 0.11702127659574468,
"grad_norm": 5.018355846405029,
"learning_rate": 4.680851063829788e-06,
"loss": 1.3061,
"step": 440
},
{
"epoch": 0.11728723404255319,
"grad_norm": 5.387866497039795,
"learning_rate": 4.691489361702128e-06,
"loss": 1.3658,
"step": 441
},
{
"epoch": 0.1175531914893617,
"grad_norm": 4.927948951721191,
"learning_rate": 4.702127659574469e-06,
"loss": 1.3331,
"step": 442
},
{
"epoch": 0.11781914893617021,
"grad_norm": 5.1225738525390625,
"learning_rate": 4.712765957446809e-06,
"loss": 1.1334,
"step": 443
},
{
"epoch": 0.11808510638297873,
"grad_norm": 4.9314751625061035,
"learning_rate": 4.7234042553191496e-06,
"loss": 1.2384,
"step": 444
},
{
"epoch": 0.11835106382978723,
"grad_norm": 5.148207664489746,
"learning_rate": 4.73404255319149e-06,
"loss": 1.2677,
"step": 445
},
{
"epoch": 0.11861702127659575,
"grad_norm": 4.629826068878174,
"learning_rate": 4.7446808510638305e-06,
"loss": 1.2096,
"step": 446
},
{
"epoch": 0.11888297872340425,
"grad_norm": 4.850092887878418,
"learning_rate": 4.755319148936171e-06,
"loss": 1.2004,
"step": 447
},
{
"epoch": 0.11914893617021277,
"grad_norm": 5.228341102600098,
"learning_rate": 4.765957446808511e-06,
"loss": 1.1828,
"step": 448
},
{
"epoch": 0.11941489361702128,
"grad_norm": 4.738990306854248,
"learning_rate": 4.776595744680852e-06,
"loss": 1.2557,
"step": 449
},
{
"epoch": 0.1196808510638298,
"grad_norm": 4.737931251525879,
"learning_rate": 4.787234042553192e-06,
"loss": 1.1705,
"step": 450
},
{
"epoch": 0.1199468085106383,
"grad_norm": 4.852109432220459,
"learning_rate": 4.797872340425533e-06,
"loss": 1.175,
"step": 451
},
{
"epoch": 0.1202127659574468,
"grad_norm": 4.808513641357422,
"learning_rate": 4.808510638297872e-06,
"loss": 1.3285,
"step": 452
},
{
"epoch": 0.12047872340425532,
"grad_norm": 5.352870464324951,
"learning_rate": 4.819148936170213e-06,
"loss": 1.2471,
"step": 453
},
{
"epoch": 0.12074468085106382,
"grad_norm": 4.533960819244385,
"learning_rate": 4.829787234042553e-06,
"loss": 1.2059,
"step": 454
},
{
"epoch": 0.12101063829787234,
"grad_norm": 4.770225524902344,
"learning_rate": 4.840425531914894e-06,
"loss": 1.2049,
"step": 455
},
{
"epoch": 0.12127659574468085,
"grad_norm": 5.0733418464660645,
"learning_rate": 4.851063829787234e-06,
"loss": 1.2758,
"step": 456
},
{
"epoch": 0.12154255319148936,
"grad_norm": 4.347215175628662,
"learning_rate": 4.8617021276595746e-06,
"loss": 1.1401,
"step": 457
},
{
"epoch": 0.12180851063829787,
"grad_norm": 5.329954147338867,
"learning_rate": 4.872340425531915e-06,
"loss": 1.276,
"step": 458
},
{
"epoch": 0.12207446808510639,
"grad_norm": 5.255573272705078,
"learning_rate": 4.8829787234042555e-06,
"loss": 1.234,
"step": 459
},
{
"epoch": 0.12234042553191489,
"grad_norm": 5.189822196960449,
"learning_rate": 4.893617021276596e-06,
"loss": 1.3676,
"step": 460
},
{
"epoch": 0.12260638297872341,
"grad_norm": 5.039921283721924,
"learning_rate": 4.904255319148936e-06,
"loss": 1.3342,
"step": 461
},
{
"epoch": 0.12287234042553191,
"grad_norm": 4.65778923034668,
"learning_rate": 4.914893617021277e-06,
"loss": 1.1117,
"step": 462
},
{
"epoch": 0.12313829787234043,
"grad_norm": 5.006718635559082,
"learning_rate": 4.925531914893617e-06,
"loss": 1.2543,
"step": 463
},
{
"epoch": 0.12340425531914893,
"grad_norm": 5.547107219696045,
"learning_rate": 4.936170212765958e-06,
"loss": 1.2113,
"step": 464
},
{
"epoch": 0.12367021276595745,
"grad_norm": 6.148080348968506,
"learning_rate": 4.946808510638298e-06,
"loss": 1.1889,
"step": 465
},
{
"epoch": 0.12393617021276596,
"grad_norm": 5.120206832885742,
"learning_rate": 4.957446808510639e-06,
"loss": 1.2198,
"step": 466
},
{
"epoch": 0.12420212765957447,
"grad_norm": 5.487342834472656,
"learning_rate": 4.968085106382979e-06,
"loss": 1.2786,
"step": 467
},
{
"epoch": 0.12446808510638298,
"grad_norm": 8.382891654968262,
"learning_rate": 4.9787234042553195e-06,
"loss": 1.3757,
"step": 468
},
{
"epoch": 0.1247340425531915,
"grad_norm": 5.241554260253906,
"learning_rate": 4.98936170212766e-06,
"loss": 1.3302,
"step": 469
},
{
"epoch": 0.125,
"grad_norm": 5.201963901519775,
"learning_rate": 5e-06,
"loss": 1.2948,
"step": 470
},
{
"epoch": 0.12526595744680852,
"grad_norm": 5.143476486206055,
"learning_rate": 5.010638297872341e-06,
"loss": 1.2364,
"step": 471
},
{
"epoch": 0.125531914893617,
"grad_norm": 4.847978115081787,
"learning_rate": 5.0212765957446805e-06,
"loss": 1.1692,
"step": 472
},
{
"epoch": 0.12579787234042553,
"grad_norm": 7.869311809539795,
"learning_rate": 5.031914893617022e-06,
"loss": 1.3719,
"step": 473
},
{
"epoch": 0.12606382978723404,
"grad_norm": 5.498979091644287,
"learning_rate": 5.042553191489362e-06,
"loss": 1.3422,
"step": 474
},
{
"epoch": 0.12632978723404256,
"grad_norm": 6.362303256988525,
"learning_rate": 5.053191489361703e-06,
"loss": 1.4323,
"step": 475
},
{
"epoch": 0.12659574468085105,
"grad_norm": 5.051971435546875,
"learning_rate": 5.063829787234042e-06,
"loss": 1.1821,
"step": 476
},
{
"epoch": 0.12686170212765957,
"grad_norm": 4.8123250007629395,
"learning_rate": 5.0744680851063836e-06,
"loss": 1.2988,
"step": 477
},
{
"epoch": 0.1271276595744681,
"grad_norm": 5.487412452697754,
"learning_rate": 5.085106382978724e-06,
"loss": 1.3167,
"step": 478
},
{
"epoch": 0.1273936170212766,
"grad_norm": 8.315117835998535,
"learning_rate": 5.0957446808510645e-06,
"loss": 1.192,
"step": 479
},
{
"epoch": 0.1276595744680851,
"grad_norm": 5.151649475097656,
"learning_rate": 5.106382978723404e-06,
"loss": 1.2499,
"step": 480
},
{
"epoch": 0.12792553191489361,
"grad_norm": 5.335565567016602,
"learning_rate": 5.117021276595745e-06,
"loss": 1.2643,
"step": 481
},
{
"epoch": 0.12819148936170213,
"grad_norm": 4.590991020202637,
"learning_rate": 5.127659574468086e-06,
"loss": 1.218,
"step": 482
},
{
"epoch": 0.12845744680851065,
"grad_norm": 4.4650750160217285,
"learning_rate": 5.138297872340426e-06,
"loss": 1.1962,
"step": 483
},
{
"epoch": 0.12872340425531914,
"grad_norm": 4.609473705291748,
"learning_rate": 5.148936170212766e-06,
"loss": 1.476,
"step": 484
},
{
"epoch": 0.12898936170212766,
"grad_norm": 4.7010087966918945,
"learning_rate": 5.159574468085107e-06,
"loss": 1.1609,
"step": 485
},
{
"epoch": 0.12925531914893618,
"grad_norm": 4.8034257888793945,
"learning_rate": 5.170212765957448e-06,
"loss": 1.3393,
"step": 486
},
{
"epoch": 0.1295212765957447,
"grad_norm": 5.149427890777588,
"learning_rate": 5.180851063829788e-06,
"loss": 1.2883,
"step": 487
},
{
"epoch": 0.12978723404255318,
"grad_norm": 5.017268657684326,
"learning_rate": 5.191489361702128e-06,
"loss": 1.1178,
"step": 488
},
{
"epoch": 0.1300531914893617,
"grad_norm": 4.924554347991943,
"learning_rate": 5.202127659574468e-06,
"loss": 1.3381,
"step": 489
},
{
"epoch": 0.13031914893617022,
"grad_norm": 4.674248218536377,
"learning_rate": 5.212765957446809e-06,
"loss": 1.0916,
"step": 490
},
{
"epoch": 0.1305851063829787,
"grad_norm": 4.853366851806641,
"learning_rate": 5.223404255319149e-06,
"loss": 1.2784,
"step": 491
},
{
"epoch": 0.13085106382978723,
"grad_norm": 5.032970428466797,
"learning_rate": 5.2340425531914895e-06,
"loss": 1.2575,
"step": 492
},
{
"epoch": 0.13111702127659575,
"grad_norm": 4.911726474761963,
"learning_rate": 5.24468085106383e-06,
"loss": 1.2049,
"step": 493
},
{
"epoch": 0.13138297872340426,
"grad_norm": 5.197798252105713,
"learning_rate": 5.255319148936171e-06,
"loss": 1.3461,
"step": 494
},
{
"epoch": 0.13164893617021275,
"grad_norm": 4.873477458953857,
"learning_rate": 5.265957446808511e-06,
"loss": 1.2681,
"step": 495
},
{
"epoch": 0.13191489361702127,
"grad_norm": 4.855223178863525,
"learning_rate": 5.276595744680851e-06,
"loss": 1.1849,
"step": 496
},
{
"epoch": 0.1321808510638298,
"grad_norm": 5.735394477844238,
"learning_rate": 5.287234042553192e-06,
"loss": 1.2821,
"step": 497
},
{
"epoch": 0.1324468085106383,
"grad_norm": 4.7265305519104,
"learning_rate": 5.297872340425533e-06,
"loss": 1.1253,
"step": 498
},
{
"epoch": 0.1327127659574468,
"grad_norm": 5.138075351715088,
"learning_rate": 5.308510638297873e-06,
"loss": 1.1951,
"step": 499
},
{
"epoch": 0.13297872340425532,
"grad_norm": 4.761940002441406,
"learning_rate": 5.319148936170213e-06,
"loss": 1.4573,
"step": 500
},
{
"epoch": 0.13297872340425532,
"eval_loss": 1.276181697845459,
"eval_runtime": 12.4372,
"eval_samples_per_second": 32.162,
"eval_steps_per_second": 4.02,
"step": 500
},
{
"epoch": 0.13324468085106383,
"grad_norm": 5.0954132080078125,
"learning_rate": 5.3297872340425535e-06,
"loss": 1.43,
"step": 501
},
{
"epoch": 0.13351063829787235,
"grad_norm": 5.592034816741943,
"learning_rate": 5.340425531914895e-06,
"loss": 1.3052,
"step": 502
},
{
"epoch": 0.13377659574468084,
"grad_norm": 5.18677282333374,
"learning_rate": 5.351063829787234e-06,
"loss": 1.3141,
"step": 503
},
{
"epoch": 0.13404255319148936,
"grad_norm": 5.0918707847595215,
"learning_rate": 5.361702127659575e-06,
"loss": 1.3649,
"step": 504
},
{
"epoch": 0.13430851063829788,
"grad_norm": 4.749475002288818,
"learning_rate": 5.372340425531915e-06,
"loss": 1.1692,
"step": 505
},
{
"epoch": 0.1345744680851064,
"grad_norm": 4.383024215698242,
"learning_rate": 5.382978723404257e-06,
"loss": 1.3438,
"step": 506
},
{
"epoch": 0.1348404255319149,
"grad_norm": 4.863028049468994,
"learning_rate": 5.393617021276596e-06,
"loss": 1.3332,
"step": 507
},
{
"epoch": 0.1351063829787234,
"grad_norm": 4.633965492248535,
"learning_rate": 5.404255319148937e-06,
"loss": 1.2012,
"step": 508
},
{
"epoch": 0.13537234042553192,
"grad_norm": 5.257637023925781,
"learning_rate": 5.414893617021277e-06,
"loss": 1.3595,
"step": 509
},
{
"epoch": 0.1356382978723404,
"grad_norm": 4.795042037963867,
"learning_rate": 5.425531914893617e-06,
"loss": 1.3843,
"step": 510
},
{
"epoch": 0.13590425531914893,
"grad_norm": 5.261885643005371,
"learning_rate": 5.436170212765958e-06,
"loss": 1.2708,
"step": 511
},
{
"epoch": 0.13617021276595745,
"grad_norm": 4.95104455947876,
"learning_rate": 5.4468085106382985e-06,
"loss": 1.2268,
"step": 512
},
{
"epoch": 0.13643617021276597,
"grad_norm": 5.171029567718506,
"learning_rate": 5.457446808510639e-06,
"loss": 1.38,
"step": 513
},
{
"epoch": 0.13670212765957446,
"grad_norm": 4.671914577484131,
"learning_rate": 5.4680851063829785e-06,
"loss": 1.1485,
"step": 514
},
{
"epoch": 0.13696808510638298,
"grad_norm": 4.562173843383789,
"learning_rate": 5.47872340425532e-06,
"loss": 1.3282,
"step": 515
},
{
"epoch": 0.1372340425531915,
"grad_norm": 4.870545387268066,
"learning_rate": 5.48936170212766e-06,
"loss": 1.1943,
"step": 516
},
{
"epoch": 0.1375,
"grad_norm": 5.231775760650635,
"learning_rate": 5.500000000000001e-06,
"loss": 1.2763,
"step": 517
},
{
"epoch": 0.1377659574468085,
"grad_norm": 5.05985689163208,
"learning_rate": 5.51063829787234e-06,
"loss": 1.2018,
"step": 518
},
{
"epoch": 0.13803191489361702,
"grad_norm": 4.818659782409668,
"learning_rate": 5.521276595744682e-06,
"loss": 1.2307,
"step": 519
},
{
"epoch": 0.13829787234042554,
"grad_norm": 4.803600311279297,
"learning_rate": 5.531914893617022e-06,
"loss": 1.3586,
"step": 520
},
{
"epoch": 0.13856382978723406,
"grad_norm": 4.65132999420166,
"learning_rate": 5.5425531914893625e-06,
"loss": 1.2147,
"step": 521
},
{
"epoch": 0.13882978723404255,
"grad_norm": 4.503746032714844,
"learning_rate": 5.553191489361702e-06,
"loss": 1.2307,
"step": 522
},
{
"epoch": 0.13909574468085106,
"grad_norm": 4.557102203369141,
"learning_rate": 5.563829787234043e-06,
"loss": 1.1906,
"step": 523
},
{
"epoch": 0.13936170212765958,
"grad_norm": 4.347774028778076,
"learning_rate": 5.574468085106384e-06,
"loss": 1.1632,
"step": 524
},
{
"epoch": 0.13962765957446807,
"grad_norm": 4.431983947753906,
"learning_rate": 5.5851063829787235e-06,
"loss": 1.2617,
"step": 525
},
{
"epoch": 0.1398936170212766,
"grad_norm": 4.971803665161133,
"learning_rate": 5.595744680851064e-06,
"loss": 1.2581,
"step": 526
},
{
"epoch": 0.1401595744680851,
"grad_norm": 4.5451979637146,
"learning_rate": 5.606382978723404e-06,
"loss": 1.3048,
"step": 527
},
{
"epoch": 0.14042553191489363,
"grad_norm": 4.687234878540039,
"learning_rate": 5.617021276595746e-06,
"loss": 1.2556,
"step": 528
},
{
"epoch": 0.14069148936170212,
"grad_norm": 4.7519378662109375,
"learning_rate": 5.627659574468085e-06,
"loss": 1.2017,
"step": 529
},
{
"epoch": 0.14095744680851063,
"grad_norm": 5.454826354980469,
"learning_rate": 5.638297872340426e-06,
"loss": 1.137,
"step": 530
},
{
"epoch": 0.14122340425531915,
"grad_norm": 5.442596435546875,
"learning_rate": 5.648936170212766e-06,
"loss": 1.3776,
"step": 531
},
{
"epoch": 0.14148936170212767,
"grad_norm": 5.057155132293701,
"learning_rate": 5.6595744680851075e-06,
"loss": 1.4229,
"step": 532
},
{
"epoch": 0.14175531914893616,
"grad_norm": 4.806349277496338,
"learning_rate": 5.670212765957447e-06,
"loss": 1.2874,
"step": 533
},
{
"epoch": 0.14202127659574468,
"grad_norm": 4.934086322784424,
"learning_rate": 5.6808510638297875e-06,
"loss": 1.3149,
"step": 534
},
{
"epoch": 0.1422872340425532,
"grad_norm": 4.371129035949707,
"learning_rate": 5.691489361702128e-06,
"loss": 1.2567,
"step": 535
},
{
"epoch": 0.1425531914893617,
"grad_norm": 5.498307228088379,
"learning_rate": 5.702127659574469e-06,
"loss": 1.166,
"step": 536
},
{
"epoch": 0.1428191489361702,
"grad_norm": 4.467796802520752,
"learning_rate": 5.712765957446809e-06,
"loss": 1.1359,
"step": 537
},
{
"epoch": 0.14308510638297872,
"grad_norm": 4.92448091506958,
"learning_rate": 5.723404255319149e-06,
"loss": 1.2873,
"step": 538
},
{
"epoch": 0.14335106382978724,
"grad_norm": 4.561826705932617,
"learning_rate": 5.73404255319149e-06,
"loss": 1.0615,
"step": 539
},
{
"epoch": 0.14361702127659576,
"grad_norm": 4.773728370666504,
"learning_rate": 5.744680851063831e-06,
"loss": 1.1718,
"step": 540
},
{
"epoch": 0.14388297872340425,
"grad_norm": 4.3747639656066895,
"learning_rate": 5.755319148936171e-06,
"loss": 1.165,
"step": 541
},
{
"epoch": 0.14414893617021277,
"grad_norm": 5.261002063751221,
"learning_rate": 5.765957446808511e-06,
"loss": 1.3091,
"step": 542
},
{
"epoch": 0.14441489361702128,
"grad_norm": 5.58752965927124,
"learning_rate": 5.7765957446808516e-06,
"loss": 1.2045,
"step": 543
},
{
"epoch": 0.14468085106382977,
"grad_norm": 4.371783256530762,
"learning_rate": 5.787234042553191e-06,
"loss": 1.1548,
"step": 544
},
{
"epoch": 0.1449468085106383,
"grad_norm": 4.958721160888672,
"learning_rate": 5.7978723404255325e-06,
"loss": 1.4517,
"step": 545
},
{
"epoch": 0.1452127659574468,
"grad_norm": 4.846461296081543,
"learning_rate": 5.808510638297873e-06,
"loss": 1.3224,
"step": 546
},
{
"epoch": 0.14547872340425533,
"grad_norm": 5.132719039916992,
"learning_rate": 5.819148936170213e-06,
"loss": 1.1865,
"step": 547
},
{
"epoch": 0.14574468085106382,
"grad_norm": 4.791563987731934,
"learning_rate": 5.829787234042553e-06,
"loss": 1.2571,
"step": 548
},
{
"epoch": 0.14601063829787234,
"grad_norm": 5.137845039367676,
"learning_rate": 5.840425531914894e-06,
"loss": 1.3008,
"step": 549
},
{
"epoch": 0.14627659574468085,
"grad_norm": 4.80680513381958,
"learning_rate": 5.851063829787235e-06,
"loss": 1.243,
"step": 550
},
{
"epoch": 0.14654255319148937,
"grad_norm": 4.938924312591553,
"learning_rate": 5.861702127659575e-06,
"loss": 1.3482,
"step": 551
},
{
"epoch": 0.14680851063829786,
"grad_norm": 5.239283561706543,
"learning_rate": 5.872340425531915e-06,
"loss": 1.1938,
"step": 552
},
{
"epoch": 0.14707446808510638,
"grad_norm": 4.885773658752441,
"learning_rate": 5.882978723404256e-06,
"loss": 1.1257,
"step": 553
},
{
"epoch": 0.1473404255319149,
"grad_norm": 5.183603763580322,
"learning_rate": 5.8936170212765965e-06,
"loss": 1.3353,
"step": 554
},
{
"epoch": 0.14760638297872342,
"grad_norm": 4.765013694763184,
"learning_rate": 5.904255319148937e-06,
"loss": 1.2058,
"step": 555
},
{
"epoch": 0.1478723404255319,
"grad_norm": 5.2760419845581055,
"learning_rate": 5.9148936170212766e-06,
"loss": 1.2109,
"step": 556
},
{
"epoch": 0.14813829787234042,
"grad_norm": 5.04670524597168,
"learning_rate": 5.925531914893618e-06,
"loss": 1.3347,
"step": 557
},
{
"epoch": 0.14840425531914894,
"grad_norm": 4.968268394470215,
"learning_rate": 5.936170212765958e-06,
"loss": 1.3295,
"step": 558
},
{
"epoch": 0.14867021276595746,
"grad_norm": 4.791049480438232,
"learning_rate": 5.946808510638299e-06,
"loss": 1.2116,
"step": 559
},
{
"epoch": 0.14893617021276595,
"grad_norm": 4.980474948883057,
"learning_rate": 5.957446808510638e-06,
"loss": 1.4063,
"step": 560
},
{
"epoch": 0.14920212765957447,
"grad_norm": 4.56986141204834,
"learning_rate": 5.968085106382979e-06,
"loss": 1.2442,
"step": 561
},
{
"epoch": 0.149468085106383,
"grad_norm": 4.691464424133301,
"learning_rate": 5.97872340425532e-06,
"loss": 1.2784,
"step": 562
},
{
"epoch": 0.14973404255319148,
"grad_norm": 5.040019512176514,
"learning_rate": 5.98936170212766e-06,
"loss": 1.2195,
"step": 563
},
{
"epoch": 0.15,
"grad_norm": 5.160355091094971,
"learning_rate": 6e-06,
"loss": 1.4814,
"step": 564
},
{
"epoch": 0.1502659574468085,
"grad_norm": 4.696538925170898,
"learning_rate": 6.010638297872341e-06,
"loss": 1.2542,
"step": 565
},
{
"epoch": 0.15053191489361703,
"grad_norm": 4.901849269866943,
"learning_rate": 6.021276595744682e-06,
"loss": 1.2633,
"step": 566
},
{
"epoch": 0.15079787234042552,
"grad_norm": 4.936095237731934,
"learning_rate": 6.0319148936170215e-06,
"loss": 1.2812,
"step": 567
},
{
"epoch": 0.15106382978723404,
"grad_norm": 4.6663055419921875,
"learning_rate": 6.042553191489362e-06,
"loss": 1.3449,
"step": 568
},
{
"epoch": 0.15132978723404256,
"grad_norm": 4.95345401763916,
"learning_rate": 6.053191489361702e-06,
"loss": 1.1968,
"step": 569
},
{
"epoch": 0.15159574468085107,
"grad_norm": 4.66139030456543,
"learning_rate": 6.063829787234044e-06,
"loss": 1.1773,
"step": 570
},
{
"epoch": 0.15186170212765956,
"grad_norm": 5.310500144958496,
"learning_rate": 6.074468085106383e-06,
"loss": 1.2606,
"step": 571
},
{
"epoch": 0.15212765957446808,
"grad_norm": 5.423430442810059,
"learning_rate": 6.085106382978724e-06,
"loss": 1.4334,
"step": 572
},
{
"epoch": 0.1523936170212766,
"grad_norm": 5.189186096191406,
"learning_rate": 6.095744680851064e-06,
"loss": 1.2955,
"step": 573
},
{
"epoch": 0.15265957446808512,
"grad_norm": 5.515524864196777,
"learning_rate": 6.1063829787234055e-06,
"loss": 1.2777,
"step": 574
},
{
"epoch": 0.1529255319148936,
"grad_norm": 4.615379810333252,
"learning_rate": 6.117021276595745e-06,
"loss": 1.2492,
"step": 575
},
{
"epoch": 0.15319148936170213,
"grad_norm": 4.674113750457764,
"learning_rate": 6.1276595744680855e-06,
"loss": 1.2807,
"step": 576
},
{
"epoch": 0.15345744680851064,
"grad_norm": 4.907557487487793,
"learning_rate": 6.138297872340426e-06,
"loss": 1.4288,
"step": 577
},
{
"epoch": 0.15372340425531916,
"grad_norm": 4.517690658569336,
"learning_rate": 6.148936170212767e-06,
"loss": 1.2274,
"step": 578
},
{
"epoch": 0.15398936170212765,
"grad_norm": 4.350996971130371,
"learning_rate": 6.159574468085107e-06,
"loss": 1.284,
"step": 579
},
{
"epoch": 0.15425531914893617,
"grad_norm": 4.552090644836426,
"learning_rate": 6.170212765957447e-06,
"loss": 1.193,
"step": 580
},
{
"epoch": 0.1545212765957447,
"grad_norm": 5.3864827156066895,
"learning_rate": 6.180851063829788e-06,
"loss": 1.2869,
"step": 581
},
{
"epoch": 0.15478723404255318,
"grad_norm": 4.946741104125977,
"learning_rate": 6.191489361702127e-06,
"loss": 1.1894,
"step": 582
},
{
"epoch": 0.1550531914893617,
"grad_norm": 4.652212619781494,
"learning_rate": 6.202127659574469e-06,
"loss": 1.3841,
"step": 583
},
{
"epoch": 0.15531914893617021,
"grad_norm": 4.876087188720703,
"learning_rate": 6.212765957446809e-06,
"loss": 1.4244,
"step": 584
},
{
"epoch": 0.15558510638297873,
"grad_norm": 4.947083473205566,
"learning_rate": 6.22340425531915e-06,
"loss": 1.3616,
"step": 585
},
{
"epoch": 0.15585106382978722,
"grad_norm": 4.663647174835205,
"learning_rate": 6.234042553191489e-06,
"loss": 1.2258,
"step": 586
},
{
"epoch": 0.15611702127659574,
"grad_norm": 4.758052825927734,
"learning_rate": 6.2446808510638305e-06,
"loss": 1.1514,
"step": 587
},
{
"epoch": 0.15638297872340426,
"grad_norm": 4.887540340423584,
"learning_rate": 6.255319148936171e-06,
"loss": 1.1887,
"step": 588
},
{
"epoch": 0.15664893617021278,
"grad_norm": 4.9997477531433105,
"learning_rate": 6.265957446808511e-06,
"loss": 1.2235,
"step": 589
},
{
"epoch": 0.15691489361702127,
"grad_norm": 5.29210090637207,
"learning_rate": 6.276595744680851e-06,
"loss": 1.3761,
"step": 590
},
{
"epoch": 0.15718085106382979,
"grad_norm": 4.92548942565918,
"learning_rate": 6.287234042553192e-06,
"loss": 1.3848,
"step": 591
},
{
"epoch": 0.1574468085106383,
"grad_norm": 5.194962978363037,
"learning_rate": 6.297872340425533e-06,
"loss": 1.4225,
"step": 592
},
{
"epoch": 0.15771276595744682,
"grad_norm": 4.7201080322265625,
"learning_rate": 6.308510638297873e-06,
"loss": 1.142,
"step": 593
},
{
"epoch": 0.1579787234042553,
"grad_norm": 4.397183895111084,
"learning_rate": 6.319148936170213e-06,
"loss": 1.0353,
"step": 594
},
{
"epoch": 0.15824468085106383,
"grad_norm": 4.910755157470703,
"learning_rate": 6.329787234042554e-06,
"loss": 1.3927,
"step": 595
},
{
"epoch": 0.15851063829787235,
"grad_norm": 4.846840858459473,
"learning_rate": 6.3404255319148945e-06,
"loss": 1.3298,
"step": 596
},
{
"epoch": 0.15877659574468084,
"grad_norm": 4.725717067718506,
"learning_rate": 6.351063829787234e-06,
"loss": 1.319,
"step": 597
},
{
"epoch": 0.15904255319148936,
"grad_norm": 4.561202049255371,
"learning_rate": 6.361702127659575e-06,
"loss": 1.3586,
"step": 598
},
{
"epoch": 0.15930851063829787,
"grad_norm": 5.391122817993164,
"learning_rate": 6.372340425531915e-06,
"loss": 1.2876,
"step": 599
},
{
"epoch": 0.1595744680851064,
"grad_norm": 4.996328830718994,
"learning_rate": 6.382978723404256e-06,
"loss": 1.5125,
"step": 600
},
{
"epoch": 0.15984042553191488,
"grad_norm": 5.271803855895996,
"learning_rate": 6.393617021276596e-06,
"loss": 1.3858,
"step": 601
},
{
"epoch": 0.1601063829787234,
"grad_norm": 4.3907318115234375,
"learning_rate": 6.404255319148936e-06,
"loss": 1.1134,
"step": 602
},
{
"epoch": 0.16037234042553192,
"grad_norm": 5.224330902099609,
"learning_rate": 6.414893617021277e-06,
"loss": 1.572,
"step": 603
},
{
"epoch": 0.16063829787234044,
"grad_norm": 5.044121742248535,
"learning_rate": 6.425531914893618e-06,
"loss": 1.4531,
"step": 604
},
{
"epoch": 0.16090425531914893,
"grad_norm": 4.903571128845215,
"learning_rate": 6.436170212765958e-06,
"loss": 1.2779,
"step": 605
},
{
"epoch": 0.16117021276595744,
"grad_norm": 4.621399402618408,
"learning_rate": 6.446808510638298e-06,
"loss": 1.1709,
"step": 606
},
{
"epoch": 0.16143617021276596,
"grad_norm": 4.697232723236084,
"learning_rate": 6.457446808510639e-06,
"loss": 1.1601,
"step": 607
},
{
"epoch": 0.16170212765957448,
"grad_norm": 5.482996940612793,
"learning_rate": 6.46808510638298e-06,
"loss": 1.401,
"step": 608
},
{
"epoch": 0.16196808510638297,
"grad_norm": 4.974328994750977,
"learning_rate": 6.4787234042553195e-06,
"loss": 1.288,
"step": 609
},
{
"epoch": 0.1622340425531915,
"grad_norm": 4.7073140144348145,
"learning_rate": 6.48936170212766e-06,
"loss": 1.331,
"step": 610
},
{
"epoch": 0.1625,
"grad_norm": 4.540210247039795,
"learning_rate": 6.5000000000000004e-06,
"loss": 1.217,
"step": 611
},
{
"epoch": 0.16276595744680852,
"grad_norm": 4.792731285095215,
"learning_rate": 6.510638297872342e-06,
"loss": 1.2696,
"step": 612
},
{
"epoch": 0.163031914893617,
"grad_norm": 4.365908622741699,
"learning_rate": 6.521276595744681e-06,
"loss": 1.1104,
"step": 613
},
{
"epoch": 0.16329787234042553,
"grad_norm": 4.6623101234436035,
"learning_rate": 6.531914893617022e-06,
"loss": 1.0165,
"step": 614
},
{
"epoch": 0.16356382978723405,
"grad_norm": 4.874281883239746,
"learning_rate": 6.542553191489362e-06,
"loss": 1.3418,
"step": 615
},
{
"epoch": 0.16382978723404254,
"grad_norm": 5.30225133895874,
"learning_rate": 6.553191489361702e-06,
"loss": 1.2965,
"step": 616
},
{
"epoch": 0.16409574468085106,
"grad_norm": 5.1621880531311035,
"learning_rate": 6.563829787234043e-06,
"loss": 1.411,
"step": 617
},
{
"epoch": 0.16436170212765958,
"grad_norm": 5.011656761169434,
"learning_rate": 6.574468085106384e-06,
"loss": 1.2324,
"step": 618
},
{
"epoch": 0.1646276595744681,
"grad_norm": 4.633167743682861,
"learning_rate": 6.585106382978724e-06,
"loss": 1.2498,
"step": 619
},
{
"epoch": 0.16489361702127658,
"grad_norm": 4.762227535247803,
"learning_rate": 6.595744680851064e-06,
"loss": 1.3774,
"step": 620
},
{
"epoch": 0.1651595744680851,
"grad_norm": 4.581019401550293,
"learning_rate": 6.606382978723405e-06,
"loss": 1.2745,
"step": 621
},
{
"epoch": 0.16542553191489362,
"grad_norm": 4.845024585723877,
"learning_rate": 6.617021276595745e-06,
"loss": 1.2003,
"step": 622
},
{
"epoch": 0.16569148936170214,
"grad_norm": 4.555243015289307,
"learning_rate": 6.627659574468086e-06,
"loss": 1.265,
"step": 623
},
{
"epoch": 0.16595744680851063,
"grad_norm": 4.3719987869262695,
"learning_rate": 6.6382978723404254e-06,
"loss": 1.2131,
"step": 624
},
{
"epoch": 0.16622340425531915,
"grad_norm": 4.629434108734131,
"learning_rate": 6.648936170212767e-06,
"loss": 1.3491,
"step": 625
},
{
"epoch": 0.16648936170212766,
"grad_norm": 5.0472540855407715,
"learning_rate": 6.659574468085107e-06,
"loss": 1.4119,
"step": 626
},
{
"epoch": 0.16675531914893618,
"grad_norm": 4.784181594848633,
"learning_rate": 6.670212765957448e-06,
"loss": 1.3079,
"step": 627
},
{
"epoch": 0.16702127659574467,
"grad_norm": 5.000133514404297,
"learning_rate": 6.680851063829787e-06,
"loss": 1.2378,
"step": 628
},
{
"epoch": 0.1672872340425532,
"grad_norm": 4.911679267883301,
"learning_rate": 6.6914893617021285e-06,
"loss": 1.1824,
"step": 629
},
{
"epoch": 0.1675531914893617,
"grad_norm": 4.674395561218262,
"learning_rate": 6.702127659574469e-06,
"loss": 1.1836,
"step": 630
},
{
"epoch": 0.16781914893617023,
"grad_norm": 4.964152812957764,
"learning_rate": 6.7127659574468094e-06,
"loss": 1.2419,
"step": 631
},
{
"epoch": 0.16808510638297872,
"grad_norm": 4.766603946685791,
"learning_rate": 6.723404255319149e-06,
"loss": 1.2885,
"step": 632
},
{
"epoch": 0.16835106382978723,
"grad_norm": 4.679075241088867,
"learning_rate": 6.7340425531914895e-06,
"loss": 1.279,
"step": 633
},
{
"epoch": 0.16861702127659575,
"grad_norm": 4.590879440307617,
"learning_rate": 6.744680851063831e-06,
"loss": 1.2808,
"step": 634
},
{
"epoch": 0.16888297872340424,
"grad_norm": 4.539956092834473,
"learning_rate": 6.75531914893617e-06,
"loss": 1.3353,
"step": 635
},
{
"epoch": 0.16914893617021276,
"grad_norm": 4.546907424926758,
"learning_rate": 6.765957446808511e-06,
"loss": 1.2691,
"step": 636
},
{
"epoch": 0.16941489361702128,
"grad_norm": 4.260477066040039,
"learning_rate": 6.776595744680851e-06,
"loss": 1.313,
"step": 637
},
{
"epoch": 0.1696808510638298,
"grad_norm": 4.697219371795654,
"learning_rate": 6.787234042553193e-06,
"loss": 1.131,
"step": 638
},
{
"epoch": 0.1699468085106383,
"grad_norm": 4.471210479736328,
"learning_rate": 6.797872340425532e-06,
"loss": 1.1466,
"step": 639
},
{
"epoch": 0.1702127659574468,
"grad_norm": 5.731024742126465,
"learning_rate": 6.808510638297873e-06,
"loss": 1.1923,
"step": 640
},
{
"epoch": 0.17047872340425532,
"grad_norm": 4.853487491607666,
"learning_rate": 6.819148936170213e-06,
"loss": 1.3019,
"step": 641
},
{
"epoch": 0.17074468085106384,
"grad_norm": 4.857687950134277,
"learning_rate": 6.829787234042554e-06,
"loss": 1.382,
"step": 642
},
{
"epoch": 0.17101063829787233,
"grad_norm": 5.497145652770996,
"learning_rate": 6.840425531914894e-06,
"loss": 1.2611,
"step": 643
},
{
"epoch": 0.17127659574468085,
"grad_norm": 4.852382659912109,
"learning_rate": 6.8510638297872344e-06,
"loss": 1.3002,
"step": 644
},
{
"epoch": 0.17154255319148937,
"grad_norm": 4.891834259033203,
"learning_rate": 6.861702127659575e-06,
"loss": 1.3009,
"step": 645
},
{
"epoch": 0.17180851063829788,
"grad_norm": 5.264189720153809,
"learning_rate": 6.872340425531916e-06,
"loss": 1.2047,
"step": 646
},
{
"epoch": 0.17207446808510637,
"grad_norm": 4.408929347991943,
"learning_rate": 6.882978723404256e-06,
"loss": 1.4105,
"step": 647
},
{
"epoch": 0.1723404255319149,
"grad_norm": 4.550996780395508,
"learning_rate": 6.893617021276596e-06,
"loss": 1.4495,
"step": 648
},
{
"epoch": 0.1726063829787234,
"grad_norm": 4.704092025756836,
"learning_rate": 6.904255319148937e-06,
"loss": 1.2031,
"step": 649
},
{
"epoch": 0.17287234042553193,
"grad_norm": 4.802618026733398,
"learning_rate": 6.914893617021278e-06,
"loss": 1.2879,
"step": 650
},
{
"epoch": 0.17313829787234042,
"grad_norm": 4.637843608856201,
"learning_rate": 6.925531914893618e-06,
"loss": 1.2621,
"step": 651
},
{
"epoch": 0.17340425531914894,
"grad_norm": 4.558661937713623,
"learning_rate": 6.936170212765958e-06,
"loss": 1.1671,
"step": 652
},
{
"epoch": 0.17367021276595745,
"grad_norm": 4.981627464294434,
"learning_rate": 6.9468085106382985e-06,
"loss": 1.2137,
"step": 653
},
{
"epoch": 0.17393617021276594,
"grad_norm": 4.708109378814697,
"learning_rate": 6.957446808510638e-06,
"loss": 1.1408,
"step": 654
},
{
"epoch": 0.17420212765957446,
"grad_norm": 5.328996658325195,
"learning_rate": 6.968085106382979e-06,
"loss": 1.1697,
"step": 655
},
{
"epoch": 0.17446808510638298,
"grad_norm": 4.988645553588867,
"learning_rate": 6.97872340425532e-06,
"loss": 1.2962,
"step": 656
},
{
"epoch": 0.1747340425531915,
"grad_norm": 5.570682048797607,
"learning_rate": 6.98936170212766e-06,
"loss": 1.4083,
"step": 657
},
{
"epoch": 0.175,
"grad_norm": 5.141003608703613,
"learning_rate": 7e-06,
"loss": 1.2558,
"step": 658
},
{
"epoch": 0.1752659574468085,
"grad_norm": 4.548361778259277,
"learning_rate": 7.010638297872341e-06,
"loss": 1.2556,
"step": 659
},
{
"epoch": 0.17553191489361702,
"grad_norm": 4.381852149963379,
"learning_rate": 7.021276595744682e-06,
"loss": 1.3609,
"step": 660
},
{
"epoch": 0.17579787234042554,
"grad_norm": 4.388241767883301,
"learning_rate": 7.031914893617022e-06,
"loss": 1.2165,
"step": 661
},
{
"epoch": 0.17606382978723403,
"grad_norm": 4.472124099731445,
"learning_rate": 7.042553191489362e-06,
"loss": 1.3372,
"step": 662
},
{
"epoch": 0.17632978723404255,
"grad_norm": 4.284490585327148,
"learning_rate": 7.053191489361703e-06,
"loss": 1.1206,
"step": 663
},
{
"epoch": 0.17659574468085107,
"grad_norm": 4.448127269744873,
"learning_rate": 7.0638297872340434e-06,
"loss": 1.3206,
"step": 664
},
{
"epoch": 0.1768617021276596,
"grad_norm": 4.701923847198486,
"learning_rate": 7.074468085106384e-06,
"loss": 1.1289,
"step": 665
},
{
"epoch": 0.17712765957446808,
"grad_norm": 4.249335289001465,
"learning_rate": 7.0851063829787235e-06,
"loss": 1.136,
"step": 666
},
{
"epoch": 0.1773936170212766,
"grad_norm": 4.292792320251465,
"learning_rate": 7.095744680851065e-06,
"loss": 1.1827,
"step": 667
},
{
"epoch": 0.1776595744680851,
"grad_norm": 4.595381736755371,
"learning_rate": 7.106382978723405e-06,
"loss": 1.1449,
"step": 668
},
{
"epoch": 0.1779255319148936,
"grad_norm": 4.856510162353516,
"learning_rate": 7.117021276595745e-06,
"loss": 1.2378,
"step": 669
},
{
"epoch": 0.17819148936170212,
"grad_norm": 4.735593318939209,
"learning_rate": 7.127659574468085e-06,
"loss": 1.1641,
"step": 670
},
{
"epoch": 0.17845744680851064,
"grad_norm": 4.771074295043945,
"learning_rate": 7.138297872340426e-06,
"loss": 1.33,
"step": 671
},
{
"epoch": 0.17872340425531916,
"grad_norm": 4.873645782470703,
"learning_rate": 7.148936170212767e-06,
"loss": 1.3388,
"step": 672
},
{
"epoch": 0.17898936170212765,
"grad_norm": 4.672497749328613,
"learning_rate": 7.159574468085107e-06,
"loss": 1.3479,
"step": 673
},
{
"epoch": 0.17925531914893617,
"grad_norm": 4.454950332641602,
"learning_rate": 7.170212765957447e-06,
"loss": 1.3631,
"step": 674
},
{
"epoch": 0.17952127659574468,
"grad_norm": 5.085921764373779,
"learning_rate": 7.1808510638297875e-06,
"loss": 1.4711,
"step": 675
},
{
"epoch": 0.1797872340425532,
"grad_norm": 4.528400421142578,
"learning_rate": 7.191489361702129e-06,
"loss": 1.1868,
"step": 676
},
{
"epoch": 0.1800531914893617,
"grad_norm": 4.722430229187012,
"learning_rate": 7.2021276595744684e-06,
"loss": 1.3842,
"step": 677
},
{
"epoch": 0.1803191489361702,
"grad_norm": 4.894054889678955,
"learning_rate": 7.212765957446809e-06,
"loss": 1.4365,
"step": 678
},
{
"epoch": 0.18058510638297873,
"grad_norm": 4.8365559577941895,
"learning_rate": 7.223404255319149e-06,
"loss": 1.4409,
"step": 679
},
{
"epoch": 0.18085106382978725,
"grad_norm": 5.0071916580200195,
"learning_rate": 7.234042553191491e-06,
"loss": 1.214,
"step": 680
},
{
"epoch": 0.18111702127659574,
"grad_norm": 4.514876365661621,
"learning_rate": 7.24468085106383e-06,
"loss": 1.1646,
"step": 681
},
{
"epoch": 0.18138297872340425,
"grad_norm": 4.465925693511963,
"learning_rate": 7.255319148936171e-06,
"loss": 1.2662,
"step": 682
},
{
"epoch": 0.18164893617021277,
"grad_norm": 4.698017120361328,
"learning_rate": 7.265957446808511e-06,
"loss": 1.3683,
"step": 683
},
{
"epoch": 0.1819148936170213,
"grad_norm": 4.704659461975098,
"learning_rate": 7.2765957446808524e-06,
"loss": 1.2236,
"step": 684
},
{
"epoch": 0.18218085106382978,
"grad_norm": 4.9184675216674805,
"learning_rate": 7.287234042553192e-06,
"loss": 1.1904,
"step": 685
},
{
"epoch": 0.1824468085106383,
"grad_norm": 4.5409088134765625,
"learning_rate": 7.2978723404255325e-06,
"loss": 1.2257,
"step": 686
},
{
"epoch": 0.18271276595744682,
"grad_norm": 4.9037556648254395,
"learning_rate": 7.308510638297873e-06,
"loss": 1.31,
"step": 687
},
{
"epoch": 0.1829787234042553,
"grad_norm": 4.719064235687256,
"learning_rate": 7.3191489361702125e-06,
"loss": 1.2651,
"step": 688
},
{
"epoch": 0.18324468085106382,
"grad_norm": 4.5164971351623535,
"learning_rate": 7.329787234042554e-06,
"loss": 1.306,
"step": 689
},
{
"epoch": 0.18351063829787234,
"grad_norm": 4.281124591827393,
"learning_rate": 7.340425531914894e-06,
"loss": 1.1963,
"step": 690
},
{
"epoch": 0.18377659574468086,
"grad_norm": 4.6168951988220215,
"learning_rate": 7.351063829787235e-06,
"loss": 1.2118,
"step": 691
},
{
"epoch": 0.18404255319148935,
"grad_norm": 4.85908842086792,
"learning_rate": 7.361702127659574e-06,
"loss": 1.2587,
"step": 692
},
{
"epoch": 0.18430851063829787,
"grad_norm": 4.3025336265563965,
"learning_rate": 7.372340425531916e-06,
"loss": 1.1239,
"step": 693
},
{
"epoch": 0.18457446808510639,
"grad_norm": 4.3702311515808105,
"learning_rate": 7.382978723404256e-06,
"loss": 1.0654,
"step": 694
},
{
"epoch": 0.1848404255319149,
"grad_norm": 4.243852615356445,
"learning_rate": 7.3936170212765965e-06,
"loss": 1.2725,
"step": 695
},
{
"epoch": 0.1851063829787234,
"grad_norm": 4.241601467132568,
"learning_rate": 7.404255319148936e-06,
"loss": 1.1379,
"step": 696
},
{
"epoch": 0.1853723404255319,
"grad_norm": 4.863661766052246,
"learning_rate": 7.4148936170212774e-06,
"loss": 1.2644,
"step": 697
},
{
"epoch": 0.18563829787234043,
"grad_norm": 4.637073040008545,
"learning_rate": 7.425531914893618e-06,
"loss": 1.3296,
"step": 698
},
{
"epoch": 0.18590425531914895,
"grad_norm": 4.703394889831543,
"learning_rate": 7.436170212765958e-06,
"loss": 1.3016,
"step": 699
},
{
"epoch": 0.18617021276595744,
"grad_norm": 4.478874206542969,
"learning_rate": 7.446808510638298e-06,
"loss": 1.3163,
"step": 700
},
{
"epoch": 0.18643617021276596,
"grad_norm": 4.600717067718506,
"learning_rate": 7.457446808510639e-06,
"loss": 1.3648,
"step": 701
},
{
"epoch": 0.18670212765957447,
"grad_norm": 4.729065418243408,
"learning_rate": 7.46808510638298e-06,
"loss": 1.3604,
"step": 702
},
{
"epoch": 0.186968085106383,
"grad_norm": 4.127298831939697,
"learning_rate": 7.47872340425532e-06,
"loss": 1.153,
"step": 703
},
{
"epoch": 0.18723404255319148,
"grad_norm": 4.612214088439941,
"learning_rate": 7.48936170212766e-06,
"loss": 1.2951,
"step": 704
},
{
"epoch": 0.1875,
"grad_norm": 5.011428356170654,
"learning_rate": 7.500000000000001e-06,
"loss": 1.4121,
"step": 705
},
{
"epoch": 0.18776595744680852,
"grad_norm": 4.605989933013916,
"learning_rate": 7.5106382978723415e-06,
"loss": 1.262,
"step": 706
},
{
"epoch": 0.188031914893617,
"grad_norm": 5.028648853302002,
"learning_rate": 7.521276595744681e-06,
"loss": 1.4181,
"step": 707
},
{
"epoch": 0.18829787234042553,
"grad_norm": 4.571159839630127,
"learning_rate": 7.5319148936170215e-06,
"loss": 1.2364,
"step": 708
},
{
"epoch": 0.18856382978723404,
"grad_norm": 4.608417510986328,
"learning_rate": 7.542553191489362e-06,
"loss": 1.3094,
"step": 709
},
{
"epoch": 0.18882978723404256,
"grad_norm": 4.881725311279297,
"learning_rate": 7.553191489361703e-06,
"loss": 1.313,
"step": 710
},
{
"epoch": 0.18909574468085105,
"grad_norm": 4.912058353424072,
"learning_rate": 7.563829787234043e-06,
"loss": 1.392,
"step": 711
},
{
"epoch": 0.18936170212765957,
"grad_norm": 4.419525623321533,
"learning_rate": 7.574468085106383e-06,
"loss": 1.2366,
"step": 712
},
{
"epoch": 0.1896276595744681,
"grad_norm": 4.507438659667969,
"learning_rate": 7.585106382978724e-06,
"loss": 1.2404,
"step": 713
},
{
"epoch": 0.1898936170212766,
"grad_norm": 4.561898708343506,
"learning_rate": 7.595744680851065e-06,
"loss": 1.3596,
"step": 714
},
{
"epoch": 0.1901595744680851,
"grad_norm": 4.635844707489014,
"learning_rate": 7.606382978723405e-06,
"loss": 1.2898,
"step": 715
},
{
"epoch": 0.19042553191489361,
"grad_norm": 5.374488353729248,
"learning_rate": 7.617021276595745e-06,
"loss": 1.3445,
"step": 716
},
{
"epoch": 0.19069148936170213,
"grad_norm": 4.574670314788818,
"learning_rate": 7.627659574468086e-06,
"loss": 1.2414,
"step": 717
},
{
"epoch": 0.19095744680851065,
"grad_norm": 4.509703159332275,
"learning_rate": 7.638297872340426e-06,
"loss": 1.1649,
"step": 718
},
{
"epoch": 0.19122340425531914,
"grad_norm": 4.2057929039001465,
"learning_rate": 7.648936170212766e-06,
"loss": 1.3734,
"step": 719
},
{
"epoch": 0.19148936170212766,
"grad_norm": 4.571545124053955,
"learning_rate": 7.659574468085107e-06,
"loss": 1.2722,
"step": 720
},
{
"epoch": 0.19175531914893618,
"grad_norm": 4.561543941497803,
"learning_rate": 7.670212765957448e-06,
"loss": 1.4057,
"step": 721
},
{
"epoch": 0.1920212765957447,
"grad_norm": 4.365459442138672,
"learning_rate": 7.680851063829788e-06,
"loss": 1.2348,
"step": 722
},
{
"epoch": 0.19228723404255318,
"grad_norm": 4.416993141174316,
"learning_rate": 7.691489361702127e-06,
"loss": 1.3065,
"step": 723
},
{
"epoch": 0.1925531914893617,
"grad_norm": 4.762002944946289,
"learning_rate": 7.702127659574469e-06,
"loss": 1.3231,
"step": 724
},
{
"epoch": 0.19281914893617022,
"grad_norm": 5.0312604904174805,
"learning_rate": 7.71276595744681e-06,
"loss": 1.3851,
"step": 725
},
{
"epoch": 0.1930851063829787,
"grad_norm": 4.8303046226501465,
"learning_rate": 7.72340425531915e-06,
"loss": 1.3391,
"step": 726
},
{
"epoch": 0.19335106382978723,
"grad_norm": 5.312425136566162,
"learning_rate": 7.73404255319149e-06,
"loss": 1.3422,
"step": 727
},
{
"epoch": 0.19361702127659575,
"grad_norm": 4.574582576751709,
"learning_rate": 7.74468085106383e-06,
"loss": 1.2543,
"step": 728
},
{
"epoch": 0.19388297872340426,
"grad_norm": 4.735869884490967,
"learning_rate": 7.755319148936172e-06,
"loss": 1.427,
"step": 729
},
{
"epoch": 0.19414893617021275,
"grad_norm": 4.317601203918457,
"learning_rate": 7.765957446808511e-06,
"loss": 1.221,
"step": 730
},
{
"epoch": 0.19441489361702127,
"grad_norm": 4.69275426864624,
"learning_rate": 7.776595744680851e-06,
"loss": 1.2186,
"step": 731
},
{
"epoch": 0.1946808510638298,
"grad_norm": 4.865464210510254,
"learning_rate": 7.787234042553192e-06,
"loss": 1.3243,
"step": 732
},
{
"epoch": 0.1949468085106383,
"grad_norm": 4.288273811340332,
"learning_rate": 7.797872340425534e-06,
"loss": 1.2224,
"step": 733
},
{
"epoch": 0.1952127659574468,
"grad_norm": 4.230968475341797,
"learning_rate": 7.808510638297873e-06,
"loss": 1.1869,
"step": 734
},
{
"epoch": 0.19547872340425532,
"grad_norm": 5.056215286254883,
"learning_rate": 7.819148936170213e-06,
"loss": 1.2755,
"step": 735
},
{
"epoch": 0.19574468085106383,
"grad_norm": 4.373525142669678,
"learning_rate": 7.829787234042554e-06,
"loss": 1.2649,
"step": 736
},
{
"epoch": 0.19601063829787235,
"grad_norm": 4.4216179847717285,
"learning_rate": 7.840425531914895e-06,
"loss": 1.2578,
"step": 737
},
{
"epoch": 0.19627659574468084,
"grad_norm": 4.517039775848389,
"learning_rate": 7.851063829787235e-06,
"loss": 1.1759,
"step": 738
},
{
"epoch": 0.19654255319148936,
"grad_norm": 4.973018169403076,
"learning_rate": 7.861702127659575e-06,
"loss": 1.2073,
"step": 739
},
{
"epoch": 0.19680851063829788,
"grad_norm": 4.714282035827637,
"learning_rate": 7.872340425531916e-06,
"loss": 1.3551,
"step": 740
},
{
"epoch": 0.1970744680851064,
"grad_norm": 4.824267387390137,
"learning_rate": 7.882978723404257e-06,
"loss": 1.287,
"step": 741
},
{
"epoch": 0.1973404255319149,
"grad_norm": 4.343824863433838,
"learning_rate": 7.893617021276597e-06,
"loss": 1.1736,
"step": 742
},
{
"epoch": 0.1976063829787234,
"grad_norm": 5.130711555480957,
"learning_rate": 7.904255319148936e-06,
"loss": 1.3622,
"step": 743
},
{
"epoch": 0.19787234042553192,
"grad_norm": 4.943610191345215,
"learning_rate": 7.914893617021278e-06,
"loss": 1.2538,
"step": 744
},
{
"epoch": 0.1981382978723404,
"grad_norm": 4.978169918060303,
"learning_rate": 7.925531914893617e-06,
"loss": 1.2547,
"step": 745
},
{
"epoch": 0.19840425531914893,
"grad_norm": 4.933815956115723,
"learning_rate": 7.936170212765959e-06,
"loss": 1.3827,
"step": 746
},
{
"epoch": 0.19867021276595745,
"grad_norm": 4.288017272949219,
"learning_rate": 7.946808510638298e-06,
"loss": 1.2695,
"step": 747
},
{
"epoch": 0.19893617021276597,
"grad_norm": 4.4305267333984375,
"learning_rate": 7.95744680851064e-06,
"loss": 1.1459,
"step": 748
},
{
"epoch": 0.19920212765957446,
"grad_norm": 4.959934711456299,
"learning_rate": 7.968085106382979e-06,
"loss": 1.1793,
"step": 749
},
{
"epoch": 0.19946808510638298,
"grad_norm": 4.623016834259033,
"learning_rate": 7.97872340425532e-06,
"loss": 1.2508,
"step": 750
},
{
"epoch": 0.1997340425531915,
"grad_norm": 4.426565170288086,
"learning_rate": 7.98936170212766e-06,
"loss": 1.2464,
"step": 751
},
{
"epoch": 0.2,
"grad_norm": 4.914389610290527,
"learning_rate": 8.000000000000001e-06,
"loss": 1.2941,
"step": 752
},
{
"epoch": 0.2002659574468085,
"grad_norm": 4.474592685699463,
"learning_rate": 8.010638297872341e-06,
"loss": 1.2285,
"step": 753
},
{
"epoch": 0.20053191489361702,
"grad_norm": 4.237037181854248,
"learning_rate": 8.021276595744682e-06,
"loss": 1.3422,
"step": 754
},
{
"epoch": 0.20079787234042554,
"grad_norm": 4.545922756195068,
"learning_rate": 8.031914893617022e-06,
"loss": 1.2456,
"step": 755
},
{
"epoch": 0.20106382978723406,
"grad_norm": 4.951487064361572,
"learning_rate": 8.042553191489363e-06,
"loss": 1.3001,
"step": 756
},
{
"epoch": 0.20132978723404255,
"grad_norm": 5.056552886962891,
"learning_rate": 8.053191489361703e-06,
"loss": 1.3875,
"step": 757
},
{
"epoch": 0.20159574468085106,
"grad_norm": 4.5373101234436035,
"learning_rate": 8.063829787234044e-06,
"loss": 1.2855,
"step": 758
},
{
"epoch": 0.20186170212765958,
"grad_norm": 4.698331832885742,
"learning_rate": 8.074468085106384e-06,
"loss": 1.1841,
"step": 759
},
{
"epoch": 0.20212765957446807,
"grad_norm": 4.885603904724121,
"learning_rate": 8.085106382978723e-06,
"loss": 1.2843,
"step": 760
},
{
"epoch": 0.2023936170212766,
"grad_norm": 4.819825172424316,
"learning_rate": 8.095744680851065e-06,
"loss": 1.2908,
"step": 761
},
{
"epoch": 0.2026595744680851,
"grad_norm": 4.332822799682617,
"learning_rate": 8.106382978723404e-06,
"loss": 1.1986,
"step": 762
},
{
"epoch": 0.20292553191489363,
"grad_norm": 4.102404594421387,
"learning_rate": 8.117021276595745e-06,
"loss": 1.3478,
"step": 763
},
{
"epoch": 0.20319148936170212,
"grad_norm": 4.496637344360352,
"learning_rate": 8.127659574468085e-06,
"loss": 1.265,
"step": 764
},
{
"epoch": 0.20345744680851063,
"grad_norm": 4.544750690460205,
"learning_rate": 8.138297872340426e-06,
"loss": 1.2299,
"step": 765
},
{
"epoch": 0.20372340425531915,
"grad_norm": 4.774095058441162,
"learning_rate": 8.148936170212766e-06,
"loss": 1.3596,
"step": 766
},
{
"epoch": 0.20398936170212767,
"grad_norm": 4.508190155029297,
"learning_rate": 8.159574468085107e-06,
"loss": 1.3143,
"step": 767
},
{
"epoch": 0.20425531914893616,
"grad_norm": 4.832380771636963,
"learning_rate": 8.170212765957447e-06,
"loss": 1.2449,
"step": 768
},
{
"epoch": 0.20452127659574468,
"grad_norm": 4.282026290893555,
"learning_rate": 8.180851063829788e-06,
"loss": 1.199,
"step": 769
},
{
"epoch": 0.2047872340425532,
"grad_norm": 4.594806671142578,
"learning_rate": 8.191489361702128e-06,
"loss": 1.2466,
"step": 770
},
{
"epoch": 0.2050531914893617,
"grad_norm": 4.925674915313721,
"learning_rate": 8.202127659574469e-06,
"loss": 1.2771,
"step": 771
},
{
"epoch": 0.2053191489361702,
"grad_norm": 4.634965419769287,
"learning_rate": 8.212765957446809e-06,
"loss": 1.2511,
"step": 772
},
{
"epoch": 0.20558510638297872,
"grad_norm": 4.774378776550293,
"learning_rate": 8.22340425531915e-06,
"loss": 1.1902,
"step": 773
},
{
"epoch": 0.20585106382978724,
"grad_norm": 4.943484783172607,
"learning_rate": 8.23404255319149e-06,
"loss": 1.454,
"step": 774
},
{
"epoch": 0.20611702127659576,
"grad_norm": 4.800187587738037,
"learning_rate": 8.24468085106383e-06,
"loss": 1.3709,
"step": 775
},
{
"epoch": 0.20638297872340425,
"grad_norm": 5.566744327545166,
"learning_rate": 8.25531914893617e-06,
"loss": 1.3158,
"step": 776
},
{
"epoch": 0.20664893617021277,
"grad_norm": 4.241647720336914,
"learning_rate": 8.265957446808512e-06,
"loss": 1.3173,
"step": 777
},
{
"epoch": 0.20691489361702128,
"grad_norm": 4.561349868774414,
"learning_rate": 8.276595744680851e-06,
"loss": 1.1971,
"step": 778
},
{
"epoch": 0.20718085106382977,
"grad_norm": 4.4153828620910645,
"learning_rate": 8.287234042553191e-06,
"loss": 1.2479,
"step": 779
},
{
"epoch": 0.2074468085106383,
"grad_norm": 4.6610107421875,
"learning_rate": 8.297872340425532e-06,
"loss": 1.5759,
"step": 780
},
{
"epoch": 0.2077127659574468,
"grad_norm": 5.142064094543457,
"learning_rate": 8.308510638297874e-06,
"loss": 1.3802,
"step": 781
},
{
"epoch": 0.20797872340425533,
"grad_norm": 4.54619026184082,
"learning_rate": 8.319148936170213e-06,
"loss": 1.3185,
"step": 782
},
{
"epoch": 0.20824468085106382,
"grad_norm": 4.640912055969238,
"learning_rate": 8.329787234042553e-06,
"loss": 1.2491,
"step": 783
},
{
"epoch": 0.20851063829787234,
"grad_norm": 4.866705894470215,
"learning_rate": 8.340425531914894e-06,
"loss": 1.28,
"step": 784
},
{
"epoch": 0.20877659574468085,
"grad_norm": 4.362489700317383,
"learning_rate": 8.351063829787235e-06,
"loss": 1.3603,
"step": 785
},
{
"epoch": 0.20904255319148937,
"grad_norm": 4.756308078765869,
"learning_rate": 8.361702127659575e-06,
"loss": 1.4108,
"step": 786
},
{
"epoch": 0.20930851063829786,
"grad_norm": 4.564047813415527,
"learning_rate": 8.372340425531915e-06,
"loss": 1.3404,
"step": 787
},
{
"epoch": 0.20957446808510638,
"grad_norm": 4.4327921867370605,
"learning_rate": 8.382978723404256e-06,
"loss": 1.2675,
"step": 788
},
{
"epoch": 0.2098404255319149,
"grad_norm": 4.656761646270752,
"learning_rate": 8.393617021276597e-06,
"loss": 1.2601,
"step": 789
},
{
"epoch": 0.21010638297872342,
"grad_norm": 4.353705883026123,
"learning_rate": 8.404255319148937e-06,
"loss": 1.2144,
"step": 790
},
{
"epoch": 0.2103723404255319,
"grad_norm": 4.420286655426025,
"learning_rate": 8.414893617021276e-06,
"loss": 1.249,
"step": 791
},
{
"epoch": 0.21063829787234042,
"grad_norm": 4.781008243560791,
"learning_rate": 8.425531914893618e-06,
"loss": 1.3132,
"step": 792
},
{
"epoch": 0.21090425531914894,
"grad_norm": 5.137455463409424,
"learning_rate": 8.436170212765959e-06,
"loss": 1.2915,
"step": 793
},
{
"epoch": 0.21117021276595746,
"grad_norm": 4.893155097961426,
"learning_rate": 8.446808510638299e-06,
"loss": 1.3679,
"step": 794
},
{
"epoch": 0.21143617021276595,
"grad_norm": 4.635669708251953,
"learning_rate": 8.457446808510638e-06,
"loss": 1.3222,
"step": 795
},
{
"epoch": 0.21170212765957447,
"grad_norm": 4.853140354156494,
"learning_rate": 8.46808510638298e-06,
"loss": 1.2849,
"step": 796
},
{
"epoch": 0.211968085106383,
"grad_norm": 4.836693286895752,
"learning_rate": 8.47872340425532e-06,
"loss": 1.395,
"step": 797
},
{
"epoch": 0.21223404255319148,
"grad_norm": 4.493725299835205,
"learning_rate": 8.48936170212766e-06,
"loss": 1.3197,
"step": 798
},
{
"epoch": 0.2125,
"grad_norm": 5.088167190551758,
"learning_rate": 8.5e-06,
"loss": 1.4093,
"step": 799
},
{
"epoch": 0.2127659574468085,
"grad_norm": 4.372249603271484,
"learning_rate": 8.510638297872341e-06,
"loss": 1.3612,
"step": 800
},
{
"epoch": 0.21303191489361703,
"grad_norm": 4.2862420082092285,
"learning_rate": 8.521276595744683e-06,
"loss": 1.2227,
"step": 801
},
{
"epoch": 0.21329787234042552,
"grad_norm": 4.741192817687988,
"learning_rate": 8.531914893617022e-06,
"loss": 1.2799,
"step": 802
},
{
"epoch": 0.21356382978723404,
"grad_norm": 5.022809982299805,
"learning_rate": 8.542553191489362e-06,
"loss": 1.407,
"step": 803
},
{
"epoch": 0.21382978723404256,
"grad_norm": 4.443842887878418,
"learning_rate": 8.553191489361703e-06,
"loss": 1.3346,
"step": 804
},
{
"epoch": 0.21409574468085107,
"grad_norm": 4.133638858795166,
"learning_rate": 8.563829787234044e-06,
"loss": 1.2443,
"step": 805
},
{
"epoch": 0.21436170212765956,
"grad_norm": 4.916075706481934,
"learning_rate": 8.574468085106384e-06,
"loss": 1.3503,
"step": 806
},
{
"epoch": 0.21462765957446808,
"grad_norm": 4.634794235229492,
"learning_rate": 8.585106382978724e-06,
"loss": 1.4072,
"step": 807
},
{
"epoch": 0.2148936170212766,
"grad_norm": 4.912757396697998,
"learning_rate": 8.595744680851065e-06,
"loss": 1.3311,
"step": 808
},
{
"epoch": 0.21515957446808512,
"grad_norm": 5.202310085296631,
"learning_rate": 8.606382978723406e-06,
"loss": 1.3224,
"step": 809
},
{
"epoch": 0.2154255319148936,
"grad_norm": 4.477729320526123,
"learning_rate": 8.617021276595746e-06,
"loss": 1.2806,
"step": 810
},
{
"epoch": 0.21569148936170213,
"grad_norm": 4.493345260620117,
"learning_rate": 8.627659574468085e-06,
"loss": 1.0227,
"step": 811
},
{
"epoch": 0.21595744680851064,
"grad_norm": 5.053197383880615,
"learning_rate": 8.638297872340427e-06,
"loss": 1.2941,
"step": 812
},
{
"epoch": 0.21622340425531916,
"grad_norm": 4.492358684539795,
"learning_rate": 8.648936170212768e-06,
"loss": 1.2651,
"step": 813
},
{
"epoch": 0.21648936170212765,
"grad_norm": 4.270611763000488,
"learning_rate": 8.659574468085108e-06,
"loss": 1.2417,
"step": 814
},
{
"epoch": 0.21675531914893617,
"grad_norm": 4.236185073852539,
"learning_rate": 8.670212765957447e-06,
"loss": 1.1717,
"step": 815
},
{
"epoch": 0.2170212765957447,
"grad_norm": 4.765509128570557,
"learning_rate": 8.680851063829788e-06,
"loss": 1.3134,
"step": 816
},
{
"epoch": 0.21728723404255318,
"grad_norm": 5.146259784698486,
"learning_rate": 8.691489361702128e-06,
"loss": 1.4561,
"step": 817
},
{
"epoch": 0.2175531914893617,
"grad_norm": 4.461063385009766,
"learning_rate": 8.70212765957447e-06,
"loss": 1.2138,
"step": 818
},
{
"epoch": 0.21781914893617021,
"grad_norm": 4.676782608032227,
"learning_rate": 8.712765957446809e-06,
"loss": 1.2614,
"step": 819
},
{
"epoch": 0.21808510638297873,
"grad_norm": 4.411204814910889,
"learning_rate": 8.72340425531915e-06,
"loss": 1.3142,
"step": 820
},
{
"epoch": 0.21835106382978722,
"grad_norm": 4.208769798278809,
"learning_rate": 8.73404255319149e-06,
"loss": 1.4278,
"step": 821
},
{
"epoch": 0.21861702127659574,
"grad_norm": 4.132145404815674,
"learning_rate": 8.744680851063831e-06,
"loss": 1.214,
"step": 822
},
{
"epoch": 0.21888297872340426,
"grad_norm": 4.246182441711426,
"learning_rate": 8.75531914893617e-06,
"loss": 1.4079,
"step": 823
},
{
"epoch": 0.21914893617021278,
"grad_norm": 4.859819412231445,
"learning_rate": 8.765957446808512e-06,
"loss": 1.2343,
"step": 824
},
{
"epoch": 0.21941489361702127,
"grad_norm": 4.722071170806885,
"learning_rate": 8.776595744680852e-06,
"loss": 1.276,
"step": 825
},
{
"epoch": 0.21968085106382979,
"grad_norm": 4.489323139190674,
"learning_rate": 8.787234042553193e-06,
"loss": 1.2388,
"step": 826
},
{
"epoch": 0.2199468085106383,
"grad_norm": 4.459937572479248,
"learning_rate": 8.797872340425533e-06,
"loss": 1.1911,
"step": 827
},
{
"epoch": 0.22021276595744682,
"grad_norm": 4.6483988761901855,
"learning_rate": 8.808510638297874e-06,
"loss": 1.5344,
"step": 828
},
{
"epoch": 0.2204787234042553,
"grad_norm": 4.822110176086426,
"learning_rate": 8.819148936170213e-06,
"loss": 1.2885,
"step": 829
},
{
"epoch": 0.22074468085106383,
"grad_norm": 4.722024917602539,
"learning_rate": 8.829787234042555e-06,
"loss": 1.2496,
"step": 830
},
{
"epoch": 0.22101063829787235,
"grad_norm": 5.146275520324707,
"learning_rate": 8.840425531914894e-06,
"loss": 1.3017,
"step": 831
},
{
"epoch": 0.22127659574468084,
"grad_norm": 4.489665508270264,
"learning_rate": 8.851063829787234e-06,
"loss": 1.1933,
"step": 832
},
{
"epoch": 0.22154255319148936,
"grad_norm": 4.318885803222656,
"learning_rate": 8.861702127659575e-06,
"loss": 1.1849,
"step": 833
},
{
"epoch": 0.22180851063829787,
"grad_norm": 4.603454113006592,
"learning_rate": 8.872340425531915e-06,
"loss": 1.3538,
"step": 834
},
{
"epoch": 0.2220744680851064,
"grad_norm": 4.531906604766846,
"learning_rate": 8.882978723404256e-06,
"loss": 1.3913,
"step": 835
},
{
"epoch": 0.22234042553191488,
"grad_norm": 4.391329288482666,
"learning_rate": 8.893617021276596e-06,
"loss": 1.289,
"step": 836
},
{
"epoch": 0.2226063829787234,
"grad_norm": 5.546546459197998,
"learning_rate": 8.904255319148937e-06,
"loss": 1.2507,
"step": 837
},
{
"epoch": 0.22287234042553192,
"grad_norm": 4.61740779876709,
"learning_rate": 8.914893617021277e-06,
"loss": 1.3726,
"step": 838
},
{
"epoch": 0.22313829787234044,
"grad_norm": 4.953794479370117,
"learning_rate": 8.925531914893618e-06,
"loss": 1.2434,
"step": 839
},
{
"epoch": 0.22340425531914893,
"grad_norm": 4.278190612792969,
"learning_rate": 8.936170212765958e-06,
"loss": 1.2559,
"step": 840
},
{
"epoch": 0.22367021276595744,
"grad_norm": 4.941532135009766,
"learning_rate": 8.946808510638299e-06,
"loss": 1.3278,
"step": 841
},
{
"epoch": 0.22393617021276596,
"grad_norm": 4.883002758026123,
"learning_rate": 8.957446808510638e-06,
"loss": 1.2537,
"step": 842
},
{
"epoch": 0.22420212765957448,
"grad_norm": 4.7191619873046875,
"learning_rate": 8.96808510638298e-06,
"loss": 1.2726,
"step": 843
},
{
"epoch": 0.22446808510638297,
"grad_norm": 4.509050369262695,
"learning_rate": 8.97872340425532e-06,
"loss": 1.2025,
"step": 844
},
{
"epoch": 0.2247340425531915,
"grad_norm": 3.9332523345947266,
"learning_rate": 8.98936170212766e-06,
"loss": 1.1207,
"step": 845
},
{
"epoch": 0.225,
"grad_norm": 4.3128204345703125,
"learning_rate": 9e-06,
"loss": 1.2433,
"step": 846
},
{
"epoch": 0.22526595744680852,
"grad_norm": 4.253404140472412,
"learning_rate": 9.010638297872342e-06,
"loss": 1.2193,
"step": 847
},
{
"epoch": 0.225531914893617,
"grad_norm": 4.779951572418213,
"learning_rate": 9.021276595744681e-06,
"loss": 1.2158,
"step": 848
},
{
"epoch": 0.22579787234042553,
"grad_norm": 4.481555461883545,
"learning_rate": 9.031914893617022e-06,
"loss": 1.4551,
"step": 849
},
{
"epoch": 0.22606382978723405,
"grad_norm": 4.955724239349365,
"learning_rate": 9.042553191489362e-06,
"loss": 1.4291,
"step": 850
},
{
"epoch": 0.22632978723404254,
"grad_norm": 4.106208801269531,
"learning_rate": 9.053191489361702e-06,
"loss": 1.3655,
"step": 851
},
{
"epoch": 0.22659574468085106,
"grad_norm": 4.6892499923706055,
"learning_rate": 9.063829787234043e-06,
"loss": 1.2516,
"step": 852
},
{
"epoch": 0.22686170212765958,
"grad_norm": 4.553836822509766,
"learning_rate": 9.074468085106384e-06,
"loss": 1.2107,
"step": 853
},
{
"epoch": 0.2271276595744681,
"grad_norm": 5.072434902191162,
"learning_rate": 9.085106382978724e-06,
"loss": 1.3445,
"step": 854
},
{
"epoch": 0.22739361702127658,
"grad_norm": 4.725018501281738,
"learning_rate": 9.095744680851063e-06,
"loss": 1.2701,
"step": 855
},
{
"epoch": 0.2276595744680851,
"grad_norm": 4.630471706390381,
"learning_rate": 9.106382978723405e-06,
"loss": 1.3229,
"step": 856
},
{
"epoch": 0.22792553191489362,
"grad_norm": 4.0610880851745605,
"learning_rate": 9.117021276595746e-06,
"loss": 1.0857,
"step": 857
},
{
"epoch": 0.22819148936170214,
"grad_norm": 4.523334503173828,
"learning_rate": 9.127659574468086e-06,
"loss": 1.446,
"step": 858
},
{
"epoch": 0.22845744680851063,
"grad_norm": 5.042343616485596,
"learning_rate": 9.138297872340425e-06,
"loss": 1.3728,
"step": 859
},
{
"epoch": 0.22872340425531915,
"grad_norm": 4.5774664878845215,
"learning_rate": 9.148936170212767e-06,
"loss": 1.3178,
"step": 860
},
{
"epoch": 0.22898936170212766,
"grad_norm": 4.425473213195801,
"learning_rate": 9.159574468085108e-06,
"loss": 1.3412,
"step": 861
},
{
"epoch": 0.22925531914893618,
"grad_norm": 4.738778114318848,
"learning_rate": 9.170212765957447e-06,
"loss": 1.3676,
"step": 862
},
{
"epoch": 0.22952127659574467,
"grad_norm": 4.462982654571533,
"learning_rate": 9.180851063829787e-06,
"loss": 1.2755,
"step": 863
},
{
"epoch": 0.2297872340425532,
"grad_norm": 4.682027816772461,
"learning_rate": 9.191489361702128e-06,
"loss": 1.2625,
"step": 864
},
{
"epoch": 0.2300531914893617,
"grad_norm": 4.37489652633667,
"learning_rate": 9.20212765957447e-06,
"loss": 1.291,
"step": 865
},
{
"epoch": 0.23031914893617023,
"grad_norm": 4.652685642242432,
"learning_rate": 9.21276595744681e-06,
"loss": 1.1782,
"step": 866
},
{
"epoch": 0.23058510638297872,
"grad_norm": 4.401131629943848,
"learning_rate": 9.223404255319149e-06,
"loss": 1.2626,
"step": 867
},
{
"epoch": 0.23085106382978723,
"grad_norm": 4.712587356567383,
"learning_rate": 9.23404255319149e-06,
"loss": 1.2888,
"step": 868
},
{
"epoch": 0.23111702127659575,
"grad_norm": 4.425190448760986,
"learning_rate": 9.244680851063831e-06,
"loss": 1.2566,
"step": 869
},
{
"epoch": 0.23138297872340424,
"grad_norm": 5.040404319763184,
"learning_rate": 9.255319148936171e-06,
"loss": 1.1856,
"step": 870
},
{
"epoch": 0.23164893617021276,
"grad_norm": 4.372191905975342,
"learning_rate": 9.26595744680851e-06,
"loss": 1.3153,
"step": 871
},
{
"epoch": 0.23191489361702128,
"grad_norm": 4.518852233886719,
"learning_rate": 9.276595744680852e-06,
"loss": 1.2652,
"step": 872
},
{
"epoch": 0.2321808510638298,
"grad_norm": 5.675739288330078,
"learning_rate": 9.287234042553193e-06,
"loss": 1.2654,
"step": 873
},
{
"epoch": 0.2324468085106383,
"grad_norm": 4.503605842590332,
"learning_rate": 9.297872340425533e-06,
"loss": 1.2693,
"step": 874
},
{
"epoch": 0.2327127659574468,
"grad_norm": 4.573145866394043,
"learning_rate": 9.308510638297872e-06,
"loss": 1.3126,
"step": 875
},
{
"epoch": 0.23297872340425532,
"grad_norm": 4.833911418914795,
"learning_rate": 9.319148936170214e-06,
"loss": 1.3583,
"step": 876
},
{
"epoch": 0.23324468085106384,
"grad_norm": 4.768589496612549,
"learning_rate": 9.329787234042555e-06,
"loss": 1.273,
"step": 877
},
{
"epoch": 0.23351063829787233,
"grad_norm": 4.1959638595581055,
"learning_rate": 9.340425531914895e-06,
"loss": 1.1774,
"step": 878
},
{
"epoch": 0.23377659574468085,
"grad_norm": 4.231587886810303,
"learning_rate": 9.351063829787234e-06,
"loss": 1.3215,
"step": 879
},
{
"epoch": 0.23404255319148937,
"grad_norm": 4.725379943847656,
"learning_rate": 9.361702127659576e-06,
"loss": 1.3458,
"step": 880
},
{
"epoch": 0.23430851063829788,
"grad_norm": 4.831368446350098,
"learning_rate": 9.372340425531917e-06,
"loss": 1.3499,
"step": 881
},
{
"epoch": 0.23457446808510637,
"grad_norm": 4.571084499359131,
"learning_rate": 9.382978723404256e-06,
"loss": 1.2071,
"step": 882
},
{
"epoch": 0.2348404255319149,
"grad_norm": 4.676523208618164,
"learning_rate": 9.393617021276596e-06,
"loss": 1.3009,
"step": 883
},
{
"epoch": 0.2351063829787234,
"grad_norm": 4.406195640563965,
"learning_rate": 9.404255319148937e-06,
"loss": 1.3127,
"step": 884
},
{
"epoch": 0.23537234042553193,
"grad_norm": 4.958892822265625,
"learning_rate": 9.414893617021279e-06,
"loss": 1.3724,
"step": 885
},
{
"epoch": 0.23563829787234042,
"grad_norm": 4.296865463256836,
"learning_rate": 9.425531914893618e-06,
"loss": 1.2535,
"step": 886
},
{
"epoch": 0.23590425531914894,
"grad_norm": 4.650951862335205,
"learning_rate": 9.436170212765958e-06,
"loss": 1.2432,
"step": 887
},
{
"epoch": 0.23617021276595745,
"grad_norm": 4.3874831199646,
"learning_rate": 9.446808510638299e-06,
"loss": 1.4075,
"step": 888
},
{
"epoch": 0.23643617021276594,
"grad_norm": 4.246219158172607,
"learning_rate": 9.457446808510639e-06,
"loss": 1.2787,
"step": 889
},
{
"epoch": 0.23670212765957446,
"grad_norm": 4.379426956176758,
"learning_rate": 9.46808510638298e-06,
"loss": 1.2586,
"step": 890
},
{
"epoch": 0.23696808510638298,
"grad_norm": 4.164050102233887,
"learning_rate": 9.47872340425532e-06,
"loss": 1.3071,
"step": 891
},
{
"epoch": 0.2372340425531915,
"grad_norm": 4.572608947753906,
"learning_rate": 9.489361702127661e-06,
"loss": 1.3735,
"step": 892
},
{
"epoch": 0.2375,
"grad_norm": 4.812750339508057,
"learning_rate": 9.5e-06,
"loss": 1.3627,
"step": 893
},
{
"epoch": 0.2377659574468085,
"grad_norm": 4.5463056564331055,
"learning_rate": 9.510638297872342e-06,
"loss": 1.2688,
"step": 894
},
{
"epoch": 0.23803191489361702,
"grad_norm": 4.700718402862549,
"learning_rate": 9.521276595744681e-06,
"loss": 1.3242,
"step": 895
},
{
"epoch": 0.23829787234042554,
"grad_norm": 4.626996040344238,
"learning_rate": 9.531914893617023e-06,
"loss": 1.3346,
"step": 896
},
{
"epoch": 0.23856382978723403,
"grad_norm": 4.4340643882751465,
"learning_rate": 9.542553191489362e-06,
"loss": 1.266,
"step": 897
},
{
"epoch": 0.23882978723404255,
"grad_norm": 4.288296222686768,
"learning_rate": 9.553191489361704e-06,
"loss": 1.3097,
"step": 898
},
{
"epoch": 0.23909574468085107,
"grad_norm": 4.531320571899414,
"learning_rate": 9.563829787234043e-06,
"loss": 1.2607,
"step": 899
},
{
"epoch": 0.2393617021276596,
"grad_norm": 4.4416985511779785,
"learning_rate": 9.574468085106385e-06,
"loss": 1.2443,
"step": 900
},
{
"epoch": 0.23962765957446808,
"grad_norm": 4.752575397491455,
"learning_rate": 9.585106382978724e-06,
"loss": 1.263,
"step": 901
},
{
"epoch": 0.2398936170212766,
"grad_norm": 4.418696403503418,
"learning_rate": 9.595744680851065e-06,
"loss": 1.4263,
"step": 902
},
{
"epoch": 0.2401595744680851,
"grad_norm": 4.149245262145996,
"learning_rate": 9.606382978723405e-06,
"loss": 1.2097,
"step": 903
},
{
"epoch": 0.2404255319148936,
"grad_norm": 4.261038303375244,
"learning_rate": 9.617021276595745e-06,
"loss": 1.284,
"step": 904
},
{
"epoch": 0.24069148936170212,
"grad_norm": 4.526815414428711,
"learning_rate": 9.627659574468086e-06,
"loss": 1.2036,
"step": 905
},
{
"epoch": 0.24095744680851064,
"grad_norm": 4.194947719573975,
"learning_rate": 9.638297872340426e-06,
"loss": 1.3215,
"step": 906
},
{
"epoch": 0.24122340425531916,
"grad_norm": 4.903501987457275,
"learning_rate": 9.648936170212767e-06,
"loss": 1.2824,
"step": 907
},
{
"epoch": 0.24148936170212765,
"grad_norm": 4.600060939788818,
"learning_rate": 9.659574468085106e-06,
"loss": 1.3283,
"step": 908
},
{
"epoch": 0.24175531914893617,
"grad_norm": 4.43640661239624,
"learning_rate": 9.670212765957448e-06,
"loss": 1.2952,
"step": 909
},
{
"epoch": 0.24202127659574468,
"grad_norm": 4.518085479736328,
"learning_rate": 9.680851063829787e-06,
"loss": 1.2436,
"step": 910
},
{
"epoch": 0.2422872340425532,
"grad_norm": 4.508195877075195,
"learning_rate": 9.691489361702129e-06,
"loss": 1.448,
"step": 911
},
{
"epoch": 0.2425531914893617,
"grad_norm": 4.132392406463623,
"learning_rate": 9.702127659574468e-06,
"loss": 1.2467,
"step": 912
},
{
"epoch": 0.2428191489361702,
"grad_norm": 4.272422790527344,
"learning_rate": 9.71276595744681e-06,
"loss": 1.1718,
"step": 913
},
{
"epoch": 0.24308510638297873,
"grad_norm": 3.7474145889282227,
"learning_rate": 9.723404255319149e-06,
"loss": 1.2312,
"step": 914
},
{
"epoch": 0.24335106382978725,
"grad_norm": 4.318002700805664,
"learning_rate": 9.73404255319149e-06,
"loss": 1.2954,
"step": 915
},
{
"epoch": 0.24361702127659574,
"grad_norm": 4.300724506378174,
"learning_rate": 9.74468085106383e-06,
"loss": 1.324,
"step": 916
},
{
"epoch": 0.24388297872340425,
"grad_norm": 4.362585067749023,
"learning_rate": 9.755319148936171e-06,
"loss": 1.2939,
"step": 917
},
{
"epoch": 0.24414893617021277,
"grad_norm": 4.705591678619385,
"learning_rate": 9.765957446808511e-06,
"loss": 1.3472,
"step": 918
},
{
"epoch": 0.2444148936170213,
"grad_norm": 4.612809658050537,
"learning_rate": 9.776595744680852e-06,
"loss": 1.323,
"step": 919
},
{
"epoch": 0.24468085106382978,
"grad_norm": 4.289991855621338,
"learning_rate": 9.787234042553192e-06,
"loss": 1.3352,
"step": 920
},
{
"epoch": 0.2449468085106383,
"grad_norm": 4.43556022644043,
"learning_rate": 9.797872340425533e-06,
"loss": 1.2358,
"step": 921
},
{
"epoch": 0.24521276595744682,
"grad_norm": 4.365429878234863,
"learning_rate": 9.808510638297873e-06,
"loss": 1.3711,
"step": 922
},
{
"epoch": 0.2454787234042553,
"grad_norm": 4.680497646331787,
"learning_rate": 9.819148936170212e-06,
"loss": 1.3057,
"step": 923
},
{
"epoch": 0.24574468085106382,
"grad_norm": 4.54257869720459,
"learning_rate": 9.829787234042554e-06,
"loss": 1.4173,
"step": 924
},
{
"epoch": 0.24601063829787234,
"grad_norm": 4.676888465881348,
"learning_rate": 9.840425531914895e-06,
"loss": 1.386,
"step": 925
},
{
"epoch": 0.24627659574468086,
"grad_norm": 4.417918682098389,
"learning_rate": 9.851063829787235e-06,
"loss": 1.4044,
"step": 926
},
{
"epoch": 0.24654255319148935,
"grad_norm": 4.195037841796875,
"learning_rate": 9.861702127659574e-06,
"loss": 1.2735,
"step": 927
},
{
"epoch": 0.24680851063829787,
"grad_norm": 4.587873935699463,
"learning_rate": 9.872340425531915e-06,
"loss": 1.2647,
"step": 928
},
{
"epoch": 0.24707446808510639,
"grad_norm": 4.467301845550537,
"learning_rate": 9.882978723404257e-06,
"loss": 1.387,
"step": 929
},
{
"epoch": 0.2473404255319149,
"grad_norm": 4.606912136077881,
"learning_rate": 9.893617021276596e-06,
"loss": 1.3188,
"step": 930
},
{
"epoch": 0.2476063829787234,
"grad_norm": 4.470932483673096,
"learning_rate": 9.904255319148936e-06,
"loss": 1.3166,
"step": 931
},
{
"epoch": 0.2478723404255319,
"grad_norm": 4.317614555358887,
"learning_rate": 9.914893617021277e-06,
"loss": 1.3514,
"step": 932
},
{
"epoch": 0.24813829787234043,
"grad_norm": 4.443989276885986,
"learning_rate": 9.925531914893619e-06,
"loss": 1.2636,
"step": 933
},
{
"epoch": 0.24840425531914895,
"grad_norm": 4.796088218688965,
"learning_rate": 9.936170212765958e-06,
"loss": 1.2652,
"step": 934
},
{
"epoch": 0.24867021276595744,
"grad_norm": 4.967231750488281,
"learning_rate": 9.946808510638298e-06,
"loss": 1.4264,
"step": 935
},
{
"epoch": 0.24893617021276596,
"grad_norm": 4.075037002563477,
"learning_rate": 9.957446808510639e-06,
"loss": 1.1912,
"step": 936
},
{
"epoch": 0.24920212765957447,
"grad_norm": 4.505919933319092,
"learning_rate": 9.96808510638298e-06,
"loss": 1.3069,
"step": 937
},
{
"epoch": 0.249468085106383,
"grad_norm": 4.194151878356934,
"learning_rate": 9.97872340425532e-06,
"loss": 1.3177,
"step": 938
},
{
"epoch": 0.24973404255319148,
"grad_norm": 4.591639518737793,
"learning_rate": 9.98936170212766e-06,
"loss": 1.3742,
"step": 939
},
{
"epoch": 0.25,
"grad_norm": 4.259275913238525,
"learning_rate": 1e-05,
"loss": 1.2802,
"step": 940
},
{
"epoch": 0.2502659574468085,
"grad_norm": 5.042564392089844,
"learning_rate": 9.999999922647056e-06,
"loss": 1.3329,
"step": 941
},
{
"epoch": 0.25053191489361704,
"grad_norm": 4.728914737701416,
"learning_rate": 9.999999690588228e-06,
"loss": 1.2498,
"step": 942
},
{
"epoch": 0.25079787234042555,
"grad_norm": 4.191166877746582,
"learning_rate": 9.999999303823525e-06,
"loss": 1.3322,
"step": 943
},
{
"epoch": 0.251063829787234,
"grad_norm": 4.627315044403076,
"learning_rate": 9.999998762352953e-06,
"loss": 1.4223,
"step": 944
},
{
"epoch": 0.25132978723404253,
"grad_norm": 4.210728168487549,
"learning_rate": 9.999998066176536e-06,
"loss": 1.2534,
"step": 945
},
{
"epoch": 0.25159574468085105,
"grad_norm": 4.210343837738037,
"learning_rate": 9.99999721529429e-06,
"loss": 1.2587,
"step": 946
},
{
"epoch": 0.25186170212765957,
"grad_norm": 4.43513298034668,
"learning_rate": 9.999996209706243e-06,
"loss": 1.2222,
"step": 947
},
{
"epoch": 0.2521276595744681,
"grad_norm": 4.577609539031982,
"learning_rate": 9.999995049412428e-06,
"loss": 1.3063,
"step": 948
},
{
"epoch": 0.2523936170212766,
"grad_norm": 4.520708084106445,
"learning_rate": 9.99999373441288e-06,
"loss": 1.2357,
"step": 949
},
{
"epoch": 0.2526595744680851,
"grad_norm": 4.051931858062744,
"learning_rate": 9.999992264707636e-06,
"loss": 1.265,
"step": 950
},
{
"epoch": 0.25292553191489364,
"grad_norm": 4.30267333984375,
"learning_rate": 9.999990640296747e-06,
"loss": 1.1791,
"step": 951
},
{
"epoch": 0.2531914893617021,
"grad_norm": 4.397022724151611,
"learning_rate": 9.99998886118026e-06,
"loss": 1.2239,
"step": 952
},
{
"epoch": 0.2534574468085106,
"grad_norm": 4.552164077758789,
"learning_rate": 9.999986927358231e-06,
"loss": 1.3983,
"step": 953
},
{
"epoch": 0.25372340425531914,
"grad_norm": 4.569587707519531,
"learning_rate": 9.999984838830721e-06,
"loss": 1.3307,
"step": 954
},
{
"epoch": 0.25398936170212766,
"grad_norm": 4.352025985717773,
"learning_rate": 9.999982595597793e-06,
"loss": 1.3996,
"step": 955
},
{
"epoch": 0.2542553191489362,
"grad_norm": 4.358248710632324,
"learning_rate": 9.999980197659515e-06,
"loss": 1.4166,
"step": 956
},
{
"epoch": 0.2545212765957447,
"grad_norm": 4.449854373931885,
"learning_rate": 9.999977645015963e-06,
"loss": 1.2414,
"step": 957
},
{
"epoch": 0.2547872340425532,
"grad_norm": 4.66248083114624,
"learning_rate": 9.999974937667217e-06,
"loss": 1.2852,
"step": 958
},
{
"epoch": 0.2550531914893617,
"grad_norm": 4.217624187469482,
"learning_rate": 9.99997207561336e-06,
"loss": 1.2624,
"step": 959
},
{
"epoch": 0.2553191489361702,
"grad_norm": 4.449913501739502,
"learning_rate": 9.99996905885448e-06,
"loss": 1.2733,
"step": 960
},
{
"epoch": 0.2555851063829787,
"grad_norm": 3.9325287342071533,
"learning_rate": 9.99996588739067e-06,
"loss": 1.2253,
"step": 961
},
{
"epoch": 0.25585106382978723,
"grad_norm": 4.425497531890869,
"learning_rate": 9.99996256122203e-06,
"loss": 1.1233,
"step": 962
},
{
"epoch": 0.25611702127659575,
"grad_norm": 3.946796178817749,
"learning_rate": 9.99995908034866e-06,
"loss": 1.2961,
"step": 963
},
{
"epoch": 0.25638297872340426,
"grad_norm": 4.145402431488037,
"learning_rate": 9.999955444770671e-06,
"loss": 1.3856,
"step": 964
},
{
"epoch": 0.2566489361702128,
"grad_norm": 4.4032206535339355,
"learning_rate": 9.99995165448817e-06,
"loss": 1.3649,
"step": 965
},
{
"epoch": 0.2569148936170213,
"grad_norm": 4.492345333099365,
"learning_rate": 9.999947709501282e-06,
"loss": 1.2992,
"step": 966
},
{
"epoch": 0.25718085106382976,
"grad_norm": 4.298032760620117,
"learning_rate": 9.999943609810125e-06,
"loss": 1.3756,
"step": 967
},
{
"epoch": 0.2574468085106383,
"grad_norm": 3.9896862506866455,
"learning_rate": 9.999939355414825e-06,
"loss": 1.2034,
"step": 968
},
{
"epoch": 0.2577127659574468,
"grad_norm": 4.537227630615234,
"learning_rate": 9.999934946315516e-06,
"loss": 1.2959,
"step": 969
},
{
"epoch": 0.2579787234042553,
"grad_norm": 4.087522029876709,
"learning_rate": 9.999930382512331e-06,
"loss": 1.2928,
"step": 970
},
{
"epoch": 0.25824468085106383,
"grad_norm": 4.388976573944092,
"learning_rate": 9.999925664005415e-06,
"loss": 1.2452,
"step": 971
},
{
"epoch": 0.25851063829787235,
"grad_norm": 4.264836311340332,
"learning_rate": 9.99992079079491e-06,
"loss": 1.3477,
"step": 972
},
{
"epoch": 0.25877659574468087,
"grad_norm": 4.548455715179443,
"learning_rate": 9.999915762880971e-06,
"loss": 1.2818,
"step": 973
},
{
"epoch": 0.2590425531914894,
"grad_norm": 4.096053600311279,
"learning_rate": 9.99991058026375e-06,
"loss": 1.1407,
"step": 974
},
{
"epoch": 0.25930851063829785,
"grad_norm": 4.8142571449279785,
"learning_rate": 9.99990524294341e-06,
"loss": 1.5322,
"step": 975
},
{
"epoch": 0.25957446808510637,
"grad_norm": 4.194404602050781,
"learning_rate": 9.999899750920115e-06,
"loss": 1.2874,
"step": 976
},
{
"epoch": 0.2598404255319149,
"grad_norm": 3.905287504196167,
"learning_rate": 9.999894104194037e-06,
"loss": 1.1986,
"step": 977
},
{
"epoch": 0.2601063829787234,
"grad_norm": 4.401111602783203,
"learning_rate": 9.999888302765347e-06,
"loss": 1.2148,
"step": 978
},
{
"epoch": 0.2603723404255319,
"grad_norm": 4.558286666870117,
"learning_rate": 9.999882346634225e-06,
"loss": 1.247,
"step": 979
},
{
"epoch": 0.26063829787234044,
"grad_norm": 3.902086019515991,
"learning_rate": 9.999876235800859e-06,
"loss": 1.3395,
"step": 980
},
{
"epoch": 0.26090425531914896,
"grad_norm": 4.327469825744629,
"learning_rate": 9.999869970265434e-06,
"loss": 1.301,
"step": 981
},
{
"epoch": 0.2611702127659574,
"grad_norm": 4.4269609451293945,
"learning_rate": 9.999863550028147e-06,
"loss": 1.3436,
"step": 982
},
{
"epoch": 0.26143617021276594,
"grad_norm": 4.277595520019531,
"learning_rate": 9.999856975089193e-06,
"loss": 1.3487,
"step": 983
},
{
"epoch": 0.26170212765957446,
"grad_norm": 5.5637311935424805,
"learning_rate": 9.99985024544878e-06,
"loss": 1.3848,
"step": 984
},
{
"epoch": 0.261968085106383,
"grad_norm": 4.938830852508545,
"learning_rate": 9.999843361107111e-06,
"loss": 1.2637,
"step": 985
},
{
"epoch": 0.2622340425531915,
"grad_norm": 4.1854376792907715,
"learning_rate": 9.999836322064404e-06,
"loss": 1.2802,
"step": 986
},
{
"epoch": 0.2625,
"grad_norm": 4.120711803436279,
"learning_rate": 9.999829128320873e-06,
"loss": 1.2468,
"step": 987
},
{
"epoch": 0.26276595744680853,
"grad_norm": 4.207146167755127,
"learning_rate": 9.999821779876744e-06,
"loss": 1.2662,
"step": 988
},
{
"epoch": 0.26303191489361705,
"grad_norm": 4.666594505310059,
"learning_rate": 9.999814276732242e-06,
"loss": 1.3755,
"step": 989
},
{
"epoch": 0.2632978723404255,
"grad_norm": 4.344621181488037,
"learning_rate": 9.9998066188876e-06,
"loss": 1.3096,
"step": 990
},
{
"epoch": 0.263563829787234,
"grad_norm": 4.433095455169678,
"learning_rate": 9.999798806343055e-06,
"loss": 1.3499,
"step": 991
},
{
"epoch": 0.26382978723404255,
"grad_norm": 4.92564058303833,
"learning_rate": 9.999790839098847e-06,
"loss": 1.281,
"step": 992
},
{
"epoch": 0.26409574468085106,
"grad_norm": 4.6375603675842285,
"learning_rate": 9.999782717155225e-06,
"loss": 1.3261,
"step": 993
},
{
"epoch": 0.2643617021276596,
"grad_norm": 4.372560024261475,
"learning_rate": 9.999774440512438e-06,
"loss": 1.186,
"step": 994
},
{
"epoch": 0.2646276595744681,
"grad_norm": 4.910377502441406,
"learning_rate": 9.999766009170743e-06,
"loss": 1.4187,
"step": 995
},
{
"epoch": 0.2648936170212766,
"grad_norm": 4.599401473999023,
"learning_rate": 9.999757423130402e-06,
"loss": 1.4278,
"step": 996
},
{
"epoch": 0.2651595744680851,
"grad_norm": 4.204658508300781,
"learning_rate": 9.999748682391682e-06,
"loss": 1.3376,
"step": 997
},
{
"epoch": 0.2654255319148936,
"grad_norm": 4.476613998413086,
"learning_rate": 9.999739786954849e-06,
"loss": 1.1909,
"step": 998
},
{
"epoch": 0.2656914893617021,
"grad_norm": 4.173623561859131,
"learning_rate": 9.999730736820182e-06,
"loss": 1.2678,
"step": 999
},
{
"epoch": 0.26595744680851063,
"grad_norm": 4.294970989227295,
"learning_rate": 9.999721531987958e-06,
"loss": 1.224,
"step": 1000
},
{
"epoch": 0.26595744680851063,
"eval_loss": 1.3182601928710938,
"eval_runtime": 12.5838,
"eval_samples_per_second": 31.787,
"eval_steps_per_second": 3.973,
"step": 1000
},
{
"epoch": 0.26622340425531915,
"grad_norm": 4.1402411460876465,
"learning_rate": 9.999712172458462e-06,
"loss": 1.1836,
"step": 1001
},
{
"epoch": 0.26648936170212767,
"grad_norm": 5.045607566833496,
"learning_rate": 9.999702658231987e-06,
"loss": 1.2545,
"step": 1002
},
{
"epoch": 0.2667553191489362,
"grad_norm": 4.2975921630859375,
"learning_rate": 9.999692989308827e-06,
"loss": 1.4903,
"step": 1003
},
{
"epoch": 0.2670212765957447,
"grad_norm": 4.366122245788574,
"learning_rate": 9.999683165689277e-06,
"loss": 1.3197,
"step": 1004
},
{
"epoch": 0.26728723404255317,
"grad_norm": 4.20319938659668,
"learning_rate": 9.999673187373644e-06,
"loss": 1.5023,
"step": 1005
},
{
"epoch": 0.2675531914893617,
"grad_norm": 4.779364109039307,
"learning_rate": 9.999663054362236e-06,
"loss": 1.4043,
"step": 1006
},
{
"epoch": 0.2678191489361702,
"grad_norm": 4.18774938583374,
"learning_rate": 9.999652766655367e-06,
"loss": 1.2043,
"step": 1007
},
{
"epoch": 0.2680851063829787,
"grad_norm": 4.277698040008545,
"learning_rate": 9.999642324253357e-06,
"loss": 1.3012,
"step": 1008
},
{
"epoch": 0.26835106382978724,
"grad_norm": 4.673196315765381,
"learning_rate": 9.999631727156523e-06,
"loss": 1.4028,
"step": 1009
},
{
"epoch": 0.26861702127659576,
"grad_norm": 3.9610633850097656,
"learning_rate": 9.9996209753652e-06,
"loss": 1.2564,
"step": 1010
},
{
"epoch": 0.2688829787234043,
"grad_norm": 4.724634170532227,
"learning_rate": 9.999610068879717e-06,
"loss": 1.2371,
"step": 1011
},
{
"epoch": 0.2691489361702128,
"grad_norm": 4.770898342132568,
"learning_rate": 9.999599007700411e-06,
"loss": 1.3291,
"step": 1012
},
{
"epoch": 0.26941489361702126,
"grad_norm": 4.2460551261901855,
"learning_rate": 9.999587791827627e-06,
"loss": 1.321,
"step": 1013
},
{
"epoch": 0.2696808510638298,
"grad_norm": 4.29102897644043,
"learning_rate": 9.99957642126171e-06,
"loss": 1.2469,
"step": 1014
},
{
"epoch": 0.2699468085106383,
"grad_norm": 4.516227722167969,
"learning_rate": 9.999564896003013e-06,
"loss": 1.2158,
"step": 1015
},
{
"epoch": 0.2702127659574468,
"grad_norm": 4.530557632446289,
"learning_rate": 9.999553216051892e-06,
"loss": 1.3454,
"step": 1016
},
{
"epoch": 0.27047872340425533,
"grad_norm": 4.2970290184021,
"learning_rate": 9.999541381408706e-06,
"loss": 1.3784,
"step": 1017
},
{
"epoch": 0.27074468085106385,
"grad_norm": 4.136434078216553,
"learning_rate": 9.999529392073825e-06,
"loss": 1.2268,
"step": 1018
},
{
"epoch": 0.27101063829787236,
"grad_norm": 4.108096122741699,
"learning_rate": 9.999517248047618e-06,
"loss": 1.2798,
"step": 1019
},
{
"epoch": 0.2712765957446808,
"grad_norm": 4.367121696472168,
"learning_rate": 9.99950494933046e-06,
"loss": 1.2629,
"step": 1020
},
{
"epoch": 0.27154255319148934,
"grad_norm": 4.400355815887451,
"learning_rate": 9.999492495922735e-06,
"loss": 1.3386,
"step": 1021
},
{
"epoch": 0.27180851063829786,
"grad_norm": 4.384739875793457,
"learning_rate": 9.999479887824826e-06,
"loss": 1.2904,
"step": 1022
},
{
"epoch": 0.2720744680851064,
"grad_norm": 4.273925304412842,
"learning_rate": 9.999467125037121e-06,
"loss": 1.268,
"step": 1023
},
{
"epoch": 0.2723404255319149,
"grad_norm": 4.222406387329102,
"learning_rate": 9.999454207560019e-06,
"loss": 1.2875,
"step": 1024
},
{
"epoch": 0.2726063829787234,
"grad_norm": 4.79681396484375,
"learning_rate": 9.999441135393917e-06,
"loss": 1.3315,
"step": 1025
},
{
"epoch": 0.27287234042553193,
"grad_norm": 4.473938941955566,
"learning_rate": 9.99942790853922e-06,
"loss": 1.4033,
"step": 1026
},
{
"epoch": 0.27313829787234045,
"grad_norm": 4.128412246704102,
"learning_rate": 9.999414526996337e-06,
"loss": 1.1818,
"step": 1027
},
{
"epoch": 0.2734042553191489,
"grad_norm": 4.2525739669799805,
"learning_rate": 9.999400990765683e-06,
"loss": 1.2004,
"step": 1028
},
{
"epoch": 0.27367021276595743,
"grad_norm": 4.565985202789307,
"learning_rate": 9.999387299847677e-06,
"loss": 1.3035,
"step": 1029
},
{
"epoch": 0.27393617021276595,
"grad_norm": 4.308706283569336,
"learning_rate": 9.99937345424274e-06,
"loss": 1.2976,
"step": 1030
},
{
"epoch": 0.27420212765957447,
"grad_norm": 4.31046724319458,
"learning_rate": 9.999359453951303e-06,
"loss": 1.3213,
"step": 1031
},
{
"epoch": 0.274468085106383,
"grad_norm": 4.618355751037598,
"learning_rate": 9.9993452989738e-06,
"loss": 1.3231,
"step": 1032
},
{
"epoch": 0.2747340425531915,
"grad_norm": 4.580687999725342,
"learning_rate": 9.999330989310665e-06,
"loss": 1.3654,
"step": 1033
},
{
"epoch": 0.275,
"grad_norm": 4.229262351989746,
"learning_rate": 9.999316524962347e-06,
"loss": 1.2944,
"step": 1034
},
{
"epoch": 0.2752659574468085,
"grad_norm": 3.708747148513794,
"learning_rate": 9.999301905929286e-06,
"loss": 1.154,
"step": 1035
},
{
"epoch": 0.275531914893617,
"grad_norm": 4.275104999542236,
"learning_rate": 9.999287132211938e-06,
"loss": 1.2148,
"step": 1036
},
{
"epoch": 0.2757978723404255,
"grad_norm": 4.225863456726074,
"learning_rate": 9.999272203810763e-06,
"loss": 1.4705,
"step": 1037
},
{
"epoch": 0.27606382978723404,
"grad_norm": 4.132633209228516,
"learning_rate": 9.999257120726219e-06,
"loss": 1.2538,
"step": 1038
},
{
"epoch": 0.27632978723404256,
"grad_norm": 5.643379211425781,
"learning_rate": 9.999241882958772e-06,
"loss": 1.2564,
"step": 1039
},
{
"epoch": 0.2765957446808511,
"grad_norm": 4.306319713592529,
"learning_rate": 9.999226490508897e-06,
"loss": 1.4085,
"step": 1040
},
{
"epoch": 0.2768617021276596,
"grad_norm": 4.2022247314453125,
"learning_rate": 9.99921094337707e-06,
"loss": 1.3632,
"step": 1041
},
{
"epoch": 0.2771276595744681,
"grad_norm": 4.866800785064697,
"learning_rate": 9.999195241563768e-06,
"loss": 1.3262,
"step": 1042
},
{
"epoch": 0.2773936170212766,
"grad_norm": 4.111828327178955,
"learning_rate": 9.99917938506948e-06,
"loss": 1.3087,
"step": 1043
},
{
"epoch": 0.2776595744680851,
"grad_norm": 4.37149715423584,
"learning_rate": 9.999163373894696e-06,
"loss": 1.2089,
"step": 1044
},
{
"epoch": 0.2779255319148936,
"grad_norm": 4.524958610534668,
"learning_rate": 9.999147208039912e-06,
"loss": 1.1935,
"step": 1045
},
{
"epoch": 0.2781914893617021,
"grad_norm": 4.5271406173706055,
"learning_rate": 9.999130887505627e-06,
"loss": 1.3111,
"step": 1046
},
{
"epoch": 0.27845744680851064,
"grad_norm": 4.4966301918029785,
"learning_rate": 9.999114412292347e-06,
"loss": 1.3695,
"step": 1047
},
{
"epoch": 0.27872340425531916,
"grad_norm": 4.8100714683532715,
"learning_rate": 9.999097782400582e-06,
"loss": 1.3152,
"step": 1048
},
{
"epoch": 0.2789893617021277,
"grad_norm": 4.238595962524414,
"learning_rate": 9.999080997830845e-06,
"loss": 1.2533,
"step": 1049
},
{
"epoch": 0.27925531914893614,
"grad_norm": 4.036017417907715,
"learning_rate": 9.999064058583657e-06,
"loss": 1.1984,
"step": 1050
},
{
"epoch": 0.27952127659574466,
"grad_norm": 4.587932586669922,
"learning_rate": 9.99904696465954e-06,
"loss": 1.2216,
"step": 1051
},
{
"epoch": 0.2797872340425532,
"grad_norm": 5.027749538421631,
"learning_rate": 9.999029716059026e-06,
"loss": 1.4618,
"step": 1052
},
{
"epoch": 0.2800531914893617,
"grad_norm": 4.331791400909424,
"learning_rate": 9.999012312782645e-06,
"loss": 1.2566,
"step": 1053
},
{
"epoch": 0.2803191489361702,
"grad_norm": 4.737422943115234,
"learning_rate": 9.99899475483094e-06,
"loss": 1.2935,
"step": 1054
},
{
"epoch": 0.28058510638297873,
"grad_norm": 4.8805832862854,
"learning_rate": 9.998977042204449e-06,
"loss": 1.3277,
"step": 1055
},
{
"epoch": 0.28085106382978725,
"grad_norm": 4.296173095703125,
"learning_rate": 9.998959174903725e-06,
"loss": 1.341,
"step": 1056
},
{
"epoch": 0.28111702127659577,
"grad_norm": 4.3713788986206055,
"learning_rate": 9.998941152929316e-06,
"loss": 1.308,
"step": 1057
},
{
"epoch": 0.28138297872340423,
"grad_norm": 4.576108932495117,
"learning_rate": 9.998922976281785e-06,
"loss": 1.2585,
"step": 1058
},
{
"epoch": 0.28164893617021275,
"grad_norm": 4.187806129455566,
"learning_rate": 9.998904644961689e-06,
"loss": 1.393,
"step": 1059
},
{
"epoch": 0.28191489361702127,
"grad_norm": 4.360199928283691,
"learning_rate": 9.9988861589696e-06,
"loss": 1.4,
"step": 1060
},
{
"epoch": 0.2821808510638298,
"grad_norm": 4.283745288848877,
"learning_rate": 9.998867518306087e-06,
"loss": 1.2823,
"step": 1061
},
{
"epoch": 0.2824468085106383,
"grad_norm": 3.8223369121551514,
"learning_rate": 9.998848722971727e-06,
"loss": 1.3144,
"step": 1062
},
{
"epoch": 0.2827127659574468,
"grad_norm": 4.405114650726318,
"learning_rate": 9.998829772967103e-06,
"loss": 1.4051,
"step": 1063
},
{
"epoch": 0.28297872340425534,
"grad_norm": 4.547544479370117,
"learning_rate": 9.9988106682928e-06,
"loss": 1.2622,
"step": 1064
},
{
"epoch": 0.28324468085106386,
"grad_norm": 3.850954055786133,
"learning_rate": 9.998791408949408e-06,
"loss": 1.197,
"step": 1065
},
{
"epoch": 0.2835106382978723,
"grad_norm": 3.994758367538452,
"learning_rate": 9.998771994937528e-06,
"loss": 1.1907,
"step": 1066
},
{
"epoch": 0.28377659574468084,
"grad_norm": 4.24208927154541,
"learning_rate": 9.998752426257754e-06,
"loss": 1.4078,
"step": 1067
},
{
"epoch": 0.28404255319148936,
"grad_norm": 4.435787200927734,
"learning_rate": 9.998732702910697e-06,
"loss": 1.2044,
"step": 1068
},
{
"epoch": 0.2843085106382979,
"grad_norm": 4.169311046600342,
"learning_rate": 9.998712824896963e-06,
"loss": 1.2126,
"step": 1069
},
{
"epoch": 0.2845744680851064,
"grad_norm": 4.478437900543213,
"learning_rate": 9.99869279221717e-06,
"loss": 1.3164,
"step": 1070
},
{
"epoch": 0.2848404255319149,
"grad_norm": 4.775943756103516,
"learning_rate": 9.998672604871936e-06,
"loss": 1.3169,
"step": 1071
},
{
"epoch": 0.2851063829787234,
"grad_norm": 4.637179374694824,
"learning_rate": 9.998652262861888e-06,
"loss": 1.2441,
"step": 1072
},
{
"epoch": 0.2853723404255319,
"grad_norm": 4.511475086212158,
"learning_rate": 9.998631766187651e-06,
"loss": 1.3766,
"step": 1073
},
{
"epoch": 0.2856382978723404,
"grad_norm": 4.503199100494385,
"learning_rate": 9.998611114849866e-06,
"loss": 1.1787,
"step": 1074
},
{
"epoch": 0.2859042553191489,
"grad_norm": 4.549198627471924,
"learning_rate": 9.998590308849164e-06,
"loss": 1.3229,
"step": 1075
},
{
"epoch": 0.28617021276595744,
"grad_norm": 4.182891368865967,
"learning_rate": 9.998569348186194e-06,
"loss": 1.2659,
"step": 1076
},
{
"epoch": 0.28643617021276596,
"grad_norm": 4.964444160461426,
"learning_rate": 9.998548232861604e-06,
"loss": 1.4196,
"step": 1077
},
{
"epoch": 0.2867021276595745,
"grad_norm": 4.905456066131592,
"learning_rate": 9.998526962876047e-06,
"loss": 1.3089,
"step": 1078
},
{
"epoch": 0.286968085106383,
"grad_norm": 4.207391738891602,
"learning_rate": 9.998505538230179e-06,
"loss": 1.3231,
"step": 1079
},
{
"epoch": 0.2872340425531915,
"grad_norm": 4.414906024932861,
"learning_rate": 9.998483958924666e-06,
"loss": 1.229,
"step": 1080
},
{
"epoch": 0.2875,
"grad_norm": 4.2714667320251465,
"learning_rate": 9.998462224960176e-06,
"loss": 1.4204,
"step": 1081
},
{
"epoch": 0.2877659574468085,
"grad_norm": 4.423734188079834,
"learning_rate": 9.998440336337376e-06,
"loss": 1.3774,
"step": 1082
},
{
"epoch": 0.288031914893617,
"grad_norm": 4.450468063354492,
"learning_rate": 9.998418293056949e-06,
"loss": 1.2639,
"step": 1083
},
{
"epoch": 0.28829787234042553,
"grad_norm": 4.328600883483887,
"learning_rate": 9.998396095119575e-06,
"loss": 1.3594,
"step": 1084
},
{
"epoch": 0.28856382978723405,
"grad_norm": 4.951174259185791,
"learning_rate": 9.998373742525941e-06,
"loss": 1.4862,
"step": 1085
},
{
"epoch": 0.28882978723404257,
"grad_norm": 4.484705924987793,
"learning_rate": 9.998351235276738e-06,
"loss": 1.3577,
"step": 1086
},
{
"epoch": 0.2890957446808511,
"grad_norm": 4.428178310394287,
"learning_rate": 9.998328573372664e-06,
"loss": 1.2438,
"step": 1087
},
{
"epoch": 0.28936170212765955,
"grad_norm": 4.682640552520752,
"learning_rate": 9.998305756814419e-06,
"loss": 1.3493,
"step": 1088
},
{
"epoch": 0.28962765957446807,
"grad_norm": 4.30879020690918,
"learning_rate": 9.998282785602709e-06,
"loss": 1.253,
"step": 1089
},
{
"epoch": 0.2898936170212766,
"grad_norm": 4.327608108520508,
"learning_rate": 9.998259659738243e-06,
"loss": 1.3574,
"step": 1090
},
{
"epoch": 0.2901595744680851,
"grad_norm": 3.996189594268799,
"learning_rate": 9.998236379221742e-06,
"loss": 1.1811,
"step": 1091
},
{
"epoch": 0.2904255319148936,
"grad_norm": 4.262546062469482,
"learning_rate": 9.99821294405392e-06,
"loss": 1.1899,
"step": 1092
},
{
"epoch": 0.29069148936170214,
"grad_norm": 3.7779383659362793,
"learning_rate": 9.998189354235506e-06,
"loss": 1.3034,
"step": 1093
},
{
"epoch": 0.29095744680851066,
"grad_norm": 4.748449325561523,
"learning_rate": 9.998165609767228e-06,
"loss": 1.1943,
"step": 1094
},
{
"epoch": 0.2912234042553192,
"grad_norm": 4.325401782989502,
"learning_rate": 9.998141710649822e-06,
"loss": 1.2955,
"step": 1095
},
{
"epoch": 0.29148936170212764,
"grad_norm": 4.276817321777344,
"learning_rate": 9.998117656884025e-06,
"loss": 1.2853,
"step": 1096
},
{
"epoch": 0.29175531914893615,
"grad_norm": 4.66014289855957,
"learning_rate": 9.998093448470585e-06,
"loss": 1.2643,
"step": 1097
},
{
"epoch": 0.29202127659574467,
"grad_norm": 3.963014602661133,
"learning_rate": 9.998069085410249e-06,
"loss": 1.2145,
"step": 1098
},
{
"epoch": 0.2922872340425532,
"grad_norm": 4.040323734283447,
"learning_rate": 9.99804456770377e-06,
"loss": 1.3845,
"step": 1099
},
{
"epoch": 0.2925531914893617,
"grad_norm": 3.8575801849365234,
"learning_rate": 9.99801989535191e-06,
"loss": 1.131,
"step": 1100
},
{
"epoch": 0.2928191489361702,
"grad_norm": 4.067200183868408,
"learning_rate": 9.997995068355428e-06,
"loss": 1.352,
"step": 1101
},
{
"epoch": 0.29308510638297874,
"grad_norm": 4.207942962646484,
"learning_rate": 9.997970086715096e-06,
"loss": 1.2372,
"step": 1102
},
{
"epoch": 0.2933510638297872,
"grad_norm": 4.058019638061523,
"learning_rate": 9.997944950431684e-06,
"loss": 1.203,
"step": 1103
},
{
"epoch": 0.2936170212765957,
"grad_norm": 4.622230052947998,
"learning_rate": 9.99791965950597e-06,
"loss": 1.3916,
"step": 1104
},
{
"epoch": 0.29388297872340424,
"grad_norm": 4.3508076667785645,
"learning_rate": 9.997894213938738e-06,
"loss": 1.3344,
"step": 1105
},
{
"epoch": 0.29414893617021276,
"grad_norm": 3.9889092445373535,
"learning_rate": 9.997868613730775e-06,
"loss": 1.1658,
"step": 1106
},
{
"epoch": 0.2944148936170213,
"grad_norm": 4.091287136077881,
"learning_rate": 9.997842858882873e-06,
"loss": 1.3258,
"step": 1107
},
{
"epoch": 0.2946808510638298,
"grad_norm": 4.280172824859619,
"learning_rate": 9.997816949395828e-06,
"loss": 1.3231,
"step": 1108
},
{
"epoch": 0.2949468085106383,
"grad_norm": 4.268125057220459,
"learning_rate": 9.997790885270444e-06,
"loss": 1.1984,
"step": 1109
},
{
"epoch": 0.29521276595744683,
"grad_norm": 4.030393600463867,
"learning_rate": 9.997764666507523e-06,
"loss": 1.3441,
"step": 1110
},
{
"epoch": 0.2954787234042553,
"grad_norm": 4.591287136077881,
"learning_rate": 9.997738293107882e-06,
"loss": 1.3059,
"step": 1111
},
{
"epoch": 0.2957446808510638,
"grad_norm": 5.225955486297607,
"learning_rate": 9.997711765072333e-06,
"loss": 1.3236,
"step": 1112
},
{
"epoch": 0.29601063829787233,
"grad_norm": 4.161701679229736,
"learning_rate": 9.997685082401698e-06,
"loss": 1.2,
"step": 1113
},
{
"epoch": 0.29627659574468085,
"grad_norm": 4.316693305969238,
"learning_rate": 9.997658245096802e-06,
"loss": 1.2758,
"step": 1114
},
{
"epoch": 0.29654255319148937,
"grad_norm": 4.311786651611328,
"learning_rate": 9.997631253158477e-06,
"loss": 1.1873,
"step": 1115
},
{
"epoch": 0.2968085106382979,
"grad_norm": 4.271190643310547,
"learning_rate": 9.997604106587555e-06,
"loss": 1.1661,
"step": 1116
},
{
"epoch": 0.2970744680851064,
"grad_norm": 4.620399475097656,
"learning_rate": 9.99757680538488e-06,
"loss": 1.3542,
"step": 1117
},
{
"epoch": 0.2973404255319149,
"grad_norm": 4.287705421447754,
"learning_rate": 9.997549349551295e-06,
"loss": 1.3467,
"step": 1118
},
{
"epoch": 0.2976063829787234,
"grad_norm": 4.158224105834961,
"learning_rate": 9.997521739087647e-06,
"loss": 1.229,
"step": 1119
},
{
"epoch": 0.2978723404255319,
"grad_norm": 4.308200836181641,
"learning_rate": 9.997493973994793e-06,
"loss": 1.3478,
"step": 1120
},
{
"epoch": 0.2981382978723404,
"grad_norm": 4.467398643493652,
"learning_rate": 9.997466054273593e-06,
"loss": 1.2729,
"step": 1121
},
{
"epoch": 0.29840425531914894,
"grad_norm": 4.264455318450928,
"learning_rate": 9.997437979924908e-06,
"loss": 1.234,
"step": 1122
},
{
"epoch": 0.29867021276595745,
"grad_norm": 4.258848190307617,
"learning_rate": 9.99740975094961e-06,
"loss": 1.1682,
"step": 1123
},
{
"epoch": 0.298936170212766,
"grad_norm": 4.3061089515686035,
"learning_rate": 9.99738136734857e-06,
"loss": 1.3241,
"step": 1124
},
{
"epoch": 0.2992021276595745,
"grad_norm": 4.324080467224121,
"learning_rate": 9.997352829122667e-06,
"loss": 1.254,
"step": 1125
},
{
"epoch": 0.29946808510638295,
"grad_norm": 4.312755584716797,
"learning_rate": 9.997324136272784e-06,
"loss": 1.309,
"step": 1126
},
{
"epoch": 0.29973404255319147,
"grad_norm": 4.023726463317871,
"learning_rate": 9.997295288799806e-06,
"loss": 1.238,
"step": 1127
},
{
"epoch": 0.3,
"grad_norm": 4.355762004852295,
"learning_rate": 9.99726628670463e-06,
"loss": 1.2271,
"step": 1128
},
{
"epoch": 0.3002659574468085,
"grad_norm": 4.85224723815918,
"learning_rate": 9.997237129988154e-06,
"loss": 1.2849,
"step": 1129
},
{
"epoch": 0.300531914893617,
"grad_norm": 4.464909553527832,
"learning_rate": 9.997207818651273e-06,
"loss": 1.2992,
"step": 1130
},
{
"epoch": 0.30079787234042554,
"grad_norm": 3.7525863647460938,
"learning_rate": 9.997178352694902e-06,
"loss": 1.1764,
"step": 1131
},
{
"epoch": 0.30106382978723406,
"grad_norm": 4.892136096954346,
"learning_rate": 9.997148732119947e-06,
"loss": 1.4041,
"step": 1132
},
{
"epoch": 0.3013297872340426,
"grad_norm": 3.8774726390838623,
"learning_rate": 9.99711895692733e-06,
"loss": 1.1936,
"step": 1133
},
{
"epoch": 0.30159574468085104,
"grad_norm": 4.585043907165527,
"learning_rate": 9.997089027117966e-06,
"loss": 1.2402,
"step": 1134
},
{
"epoch": 0.30186170212765956,
"grad_norm": 4.731383800506592,
"learning_rate": 9.997058942692786e-06,
"loss": 1.3886,
"step": 1135
},
{
"epoch": 0.3021276595744681,
"grad_norm": 4.4259033203125,
"learning_rate": 9.997028703652718e-06,
"loss": 1.4784,
"step": 1136
},
{
"epoch": 0.3023936170212766,
"grad_norm": 4.584959030151367,
"learning_rate": 9.996998309998699e-06,
"loss": 1.1575,
"step": 1137
},
{
"epoch": 0.3026595744680851,
"grad_norm": 4.300727844238281,
"learning_rate": 9.996967761731668e-06,
"loss": 1.3999,
"step": 1138
},
{
"epoch": 0.30292553191489363,
"grad_norm": 4.30328893661499,
"learning_rate": 9.996937058852575e-06,
"loss": 1.3061,
"step": 1139
},
{
"epoch": 0.30319148936170215,
"grad_norm": 4.1981964111328125,
"learning_rate": 9.996906201362361e-06,
"loss": 1.3078,
"step": 1140
},
{
"epoch": 0.3034574468085106,
"grad_norm": 4.507598876953125,
"learning_rate": 9.99687518926199e-06,
"loss": 1.3732,
"step": 1141
},
{
"epoch": 0.30372340425531913,
"grad_norm": 4.559037685394287,
"learning_rate": 9.996844022552416e-06,
"loss": 1.3447,
"step": 1142
},
{
"epoch": 0.30398936170212765,
"grad_norm": 4.10542106628418,
"learning_rate": 9.996812701234604e-06,
"loss": 1.2118,
"step": 1143
},
{
"epoch": 0.30425531914893617,
"grad_norm": 4.441193103790283,
"learning_rate": 9.996781225309526e-06,
"loss": 1.3549,
"step": 1144
},
{
"epoch": 0.3045212765957447,
"grad_norm": 4.166191577911377,
"learning_rate": 9.996749594778153e-06,
"loss": 1.3067,
"step": 1145
},
{
"epoch": 0.3047872340425532,
"grad_norm": 4.284362316131592,
"learning_rate": 9.996717809641464e-06,
"loss": 1.31,
"step": 1146
},
{
"epoch": 0.3050531914893617,
"grad_norm": 4.457339286804199,
"learning_rate": 9.996685869900444e-06,
"loss": 1.2858,
"step": 1147
},
{
"epoch": 0.30531914893617024,
"grad_norm": 5.572897434234619,
"learning_rate": 9.99665377555608e-06,
"loss": 1.3094,
"step": 1148
},
{
"epoch": 0.3055851063829787,
"grad_norm": 3.9291319847106934,
"learning_rate": 9.996621526609364e-06,
"loss": 1.1499,
"step": 1149
},
{
"epoch": 0.3058510638297872,
"grad_norm": 4.23716926574707,
"learning_rate": 9.996589123061297e-06,
"loss": 1.1395,
"step": 1150
},
{
"epoch": 0.30611702127659574,
"grad_norm": 4.1819047927856445,
"learning_rate": 9.99655656491288e-06,
"loss": 1.2152,
"step": 1151
},
{
"epoch": 0.30638297872340425,
"grad_norm": 4.467685222625732,
"learning_rate": 9.99652385216512e-06,
"loss": 1.38,
"step": 1152
},
{
"epoch": 0.30664893617021277,
"grad_norm": 3.723454236984253,
"learning_rate": 9.996490984819027e-06,
"loss": 1.1745,
"step": 1153
},
{
"epoch": 0.3069148936170213,
"grad_norm": 4.097151756286621,
"learning_rate": 9.996457962875623e-06,
"loss": 1.3743,
"step": 1154
},
{
"epoch": 0.3071808510638298,
"grad_norm": 4.7414326667785645,
"learning_rate": 9.996424786335925e-06,
"loss": 1.4252,
"step": 1155
},
{
"epoch": 0.3074468085106383,
"grad_norm": 3.7857699394226074,
"learning_rate": 9.996391455200963e-06,
"loss": 1.2984,
"step": 1156
},
{
"epoch": 0.3077127659574468,
"grad_norm": 4.953484535217285,
"learning_rate": 9.996357969471767e-06,
"loss": 1.3539,
"step": 1157
},
{
"epoch": 0.3079787234042553,
"grad_norm": 4.564802646636963,
"learning_rate": 9.996324329149372e-06,
"loss": 1.2833,
"step": 1158
},
{
"epoch": 0.3082446808510638,
"grad_norm": 4.2867045402526855,
"learning_rate": 9.99629053423482e-06,
"loss": 1.2933,
"step": 1159
},
{
"epoch": 0.30851063829787234,
"grad_norm": 4.2070817947387695,
"learning_rate": 9.996256584729157e-06,
"loss": 1.163,
"step": 1160
},
{
"epoch": 0.30877659574468086,
"grad_norm": 4.603311061859131,
"learning_rate": 9.996222480633433e-06,
"loss": 1.2404,
"step": 1161
},
{
"epoch": 0.3090425531914894,
"grad_norm": 4.443660736083984,
"learning_rate": 9.996188221948702e-06,
"loss": 1.3518,
"step": 1162
},
{
"epoch": 0.3093085106382979,
"grad_norm": 4.2897443771362305,
"learning_rate": 9.996153808676025e-06,
"loss": 1.2786,
"step": 1163
},
{
"epoch": 0.30957446808510636,
"grad_norm": 4.69590425491333,
"learning_rate": 9.996119240816469e-06,
"loss": 1.3259,
"step": 1164
},
{
"epoch": 0.3098404255319149,
"grad_norm": 4.064958095550537,
"learning_rate": 9.996084518371101e-06,
"loss": 1.2768,
"step": 1165
},
{
"epoch": 0.3101063829787234,
"grad_norm": 4.3534626960754395,
"learning_rate": 9.996049641340994e-06,
"loss": 1.3245,
"step": 1166
},
{
"epoch": 0.3103723404255319,
"grad_norm": 4.278623580932617,
"learning_rate": 9.996014609727232e-06,
"loss": 1.405,
"step": 1167
},
{
"epoch": 0.31063829787234043,
"grad_norm": 4.835923671722412,
"learning_rate": 9.995979423530893e-06,
"loss": 1.2416,
"step": 1168
},
{
"epoch": 0.31090425531914895,
"grad_norm": 4.191746711730957,
"learning_rate": 9.99594408275307e-06,
"loss": 1.154,
"step": 1169
},
{
"epoch": 0.31117021276595747,
"grad_norm": 3.9082558155059814,
"learning_rate": 9.995908587394854e-06,
"loss": 1.2412,
"step": 1170
},
{
"epoch": 0.311436170212766,
"grad_norm": 4.342267036437988,
"learning_rate": 9.995872937457345e-06,
"loss": 1.2312,
"step": 1171
},
{
"epoch": 0.31170212765957445,
"grad_norm": 4.569537162780762,
"learning_rate": 9.995837132941646e-06,
"loss": 1.3551,
"step": 1172
},
{
"epoch": 0.31196808510638296,
"grad_norm": 4.246980667114258,
"learning_rate": 9.995801173848863e-06,
"loss": 1.2517,
"step": 1173
},
{
"epoch": 0.3122340425531915,
"grad_norm": 4.276669025421143,
"learning_rate": 9.995765060180111e-06,
"loss": 1.2417,
"step": 1174
},
{
"epoch": 0.3125,
"grad_norm": 4.076509952545166,
"learning_rate": 9.995728791936505e-06,
"loss": 1.2837,
"step": 1175
},
{
"epoch": 0.3127659574468085,
"grad_norm": 4.078117370605469,
"learning_rate": 9.99569236911917e-06,
"loss": 1.1589,
"step": 1176
},
{
"epoch": 0.31303191489361704,
"grad_norm": 4.253208637237549,
"learning_rate": 9.995655791729231e-06,
"loss": 1.4023,
"step": 1177
},
{
"epoch": 0.31329787234042555,
"grad_norm": 4.0782790184021,
"learning_rate": 9.99561905976782e-06,
"loss": 1.2094,
"step": 1178
},
{
"epoch": 0.313563829787234,
"grad_norm": 4.714814186096191,
"learning_rate": 9.995582173236073e-06,
"loss": 1.2883,
"step": 1179
},
{
"epoch": 0.31382978723404253,
"grad_norm": 4.640500068664551,
"learning_rate": 9.995545132135133e-06,
"loss": 1.3784,
"step": 1180
},
{
"epoch": 0.31409574468085105,
"grad_norm": 4.722717761993408,
"learning_rate": 9.995507936466144e-06,
"loss": 1.2644,
"step": 1181
},
{
"epoch": 0.31436170212765957,
"grad_norm": 4.296687602996826,
"learning_rate": 9.99547058623026e-06,
"loss": 1.2238,
"step": 1182
},
{
"epoch": 0.3146276595744681,
"grad_norm": 4.157870769500732,
"learning_rate": 9.995433081428631e-06,
"loss": 1.2275,
"step": 1183
},
{
"epoch": 0.3148936170212766,
"grad_norm": 4.162895202636719,
"learning_rate": 9.995395422062424e-06,
"loss": 1.2697,
"step": 1184
},
{
"epoch": 0.3151595744680851,
"grad_norm": 4.142743110656738,
"learning_rate": 9.9953576081328e-06,
"loss": 1.2514,
"step": 1185
},
{
"epoch": 0.31542553191489364,
"grad_norm": 4.504545211791992,
"learning_rate": 9.995319639640932e-06,
"loss": 1.1996,
"step": 1186
},
{
"epoch": 0.3156914893617021,
"grad_norm": 4.5642523765563965,
"learning_rate": 9.995281516587992e-06,
"loss": 1.4783,
"step": 1187
},
{
"epoch": 0.3159574468085106,
"grad_norm": 4.14572286605835,
"learning_rate": 9.99524323897516e-06,
"loss": 1.3261,
"step": 1188
},
{
"epoch": 0.31622340425531914,
"grad_norm": 4.159525394439697,
"learning_rate": 9.995204806803622e-06,
"loss": 1.3492,
"step": 1189
},
{
"epoch": 0.31648936170212766,
"grad_norm": 3.9404852390289307,
"learning_rate": 9.995166220074566e-06,
"loss": 1.2726,
"step": 1190
},
{
"epoch": 0.3167553191489362,
"grad_norm": 4.158994197845459,
"learning_rate": 9.995127478789186e-06,
"loss": 1.2472,
"step": 1191
},
{
"epoch": 0.3170212765957447,
"grad_norm": 4.277184009552002,
"learning_rate": 9.995088582948682e-06,
"loss": 1.3549,
"step": 1192
},
{
"epoch": 0.3172872340425532,
"grad_norm": 4.210202217102051,
"learning_rate": 9.995049532554253e-06,
"loss": 1.313,
"step": 1193
},
{
"epoch": 0.3175531914893617,
"grad_norm": 4.146048545837402,
"learning_rate": 9.995010327607113e-06,
"loss": 1.3272,
"step": 1194
},
{
"epoch": 0.3178191489361702,
"grad_norm": 4.287917137145996,
"learning_rate": 9.994970968108473e-06,
"loss": 1.4158,
"step": 1195
},
{
"epoch": 0.3180851063829787,
"grad_norm": 3.8834691047668457,
"learning_rate": 9.99493145405955e-06,
"loss": 1.1957,
"step": 1196
},
{
"epoch": 0.31835106382978723,
"grad_norm": 4.134634494781494,
"learning_rate": 9.994891785461565e-06,
"loss": 1.3806,
"step": 1197
},
{
"epoch": 0.31861702127659575,
"grad_norm": 4.137069225311279,
"learning_rate": 9.99485196231575e-06,
"loss": 1.2337,
"step": 1198
},
{
"epoch": 0.31888297872340426,
"grad_norm": 3.9084503650665283,
"learning_rate": 9.994811984623332e-06,
"loss": 1.1263,
"step": 1199
},
{
"epoch": 0.3191489361702128,
"grad_norm": 4.515985012054443,
"learning_rate": 9.994771852385552e-06,
"loss": 1.3851,
"step": 1200
},
{
"epoch": 0.3194148936170213,
"grad_norm": 4.150672912597656,
"learning_rate": 9.994731565603651e-06,
"loss": 1.2034,
"step": 1201
},
{
"epoch": 0.31968085106382976,
"grad_norm": 4.727832317352295,
"learning_rate": 9.994691124278874e-06,
"loss": 1.3987,
"step": 1202
},
{
"epoch": 0.3199468085106383,
"grad_norm": 4.292087554931641,
"learning_rate": 9.994650528412472e-06,
"loss": 1.3757,
"step": 1203
},
{
"epoch": 0.3202127659574468,
"grad_norm": 4.135016918182373,
"learning_rate": 9.994609778005704e-06,
"loss": 1.3413,
"step": 1204
},
{
"epoch": 0.3204787234042553,
"grad_norm": 4.273712635040283,
"learning_rate": 9.994568873059829e-06,
"loss": 1.2102,
"step": 1205
},
{
"epoch": 0.32074468085106383,
"grad_norm": 4.216573715209961,
"learning_rate": 9.994527813576111e-06,
"loss": 1.3998,
"step": 1206
},
{
"epoch": 0.32101063829787235,
"grad_norm": 3.847257375717163,
"learning_rate": 9.994486599555823e-06,
"loss": 1.1265,
"step": 1207
},
{
"epoch": 0.32127659574468087,
"grad_norm": 4.784033298492432,
"learning_rate": 9.99444523100024e-06,
"loss": 1.3363,
"step": 1208
},
{
"epoch": 0.3215425531914894,
"grad_norm": 4.474783897399902,
"learning_rate": 9.994403707910642e-06,
"loss": 1.2317,
"step": 1209
},
{
"epoch": 0.32180851063829785,
"grad_norm": 4.004277229309082,
"learning_rate": 9.994362030288312e-06,
"loss": 1.2477,
"step": 1210
},
{
"epoch": 0.32207446808510637,
"grad_norm": 3.9819071292877197,
"learning_rate": 9.99432019813454e-06,
"loss": 1.1898,
"step": 1211
},
{
"epoch": 0.3223404255319149,
"grad_norm": 3.8308217525482178,
"learning_rate": 9.994278211450622e-06,
"loss": 1.287,
"step": 1212
},
{
"epoch": 0.3226063829787234,
"grad_norm": 4.272090435028076,
"learning_rate": 9.994236070237854e-06,
"loss": 1.3905,
"step": 1213
},
{
"epoch": 0.3228723404255319,
"grad_norm": 4.1817169189453125,
"learning_rate": 9.994193774497544e-06,
"loss": 1.2512,
"step": 1214
},
{
"epoch": 0.32313829787234044,
"grad_norm": 3.9769554138183594,
"learning_rate": 9.994151324231e-06,
"loss": 1.2287,
"step": 1215
},
{
"epoch": 0.32340425531914896,
"grad_norm": 4.290254592895508,
"learning_rate": 9.994108719439533e-06,
"loss": 1.2741,
"step": 1216
},
{
"epoch": 0.3236702127659574,
"grad_norm": 4.185919284820557,
"learning_rate": 9.994065960124462e-06,
"loss": 1.3203,
"step": 1217
},
{
"epoch": 0.32393617021276594,
"grad_norm": 4.25853967666626,
"learning_rate": 9.994023046287109e-06,
"loss": 1.3062,
"step": 1218
},
{
"epoch": 0.32420212765957446,
"grad_norm": 3.9912209510803223,
"learning_rate": 9.993979977928805e-06,
"loss": 1.1988,
"step": 1219
},
{
"epoch": 0.324468085106383,
"grad_norm": 3.865492343902588,
"learning_rate": 9.993936755050881e-06,
"loss": 1.1626,
"step": 1220
},
{
"epoch": 0.3247340425531915,
"grad_norm": 4.017344951629639,
"learning_rate": 9.993893377654673e-06,
"loss": 1.3626,
"step": 1221
},
{
"epoch": 0.325,
"grad_norm": 3.9618587493896484,
"learning_rate": 9.993849845741525e-06,
"loss": 1.361,
"step": 1222
},
{
"epoch": 0.32526595744680853,
"grad_norm": 4.2321648597717285,
"learning_rate": 9.993806159312783e-06,
"loss": 1.3773,
"step": 1223
},
{
"epoch": 0.32553191489361705,
"grad_norm": 4.570196151733398,
"learning_rate": 9.9937623183698e-06,
"loss": 1.3895,
"step": 1224
},
{
"epoch": 0.3257978723404255,
"grad_norm": 3.9867353439331055,
"learning_rate": 9.99371832291393e-06,
"loss": 1.1623,
"step": 1225
},
{
"epoch": 0.326063829787234,
"grad_norm": 5.1412200927734375,
"learning_rate": 9.993674172946536e-06,
"loss": 1.3987,
"step": 1226
},
{
"epoch": 0.32632978723404255,
"grad_norm": 4.0850605964660645,
"learning_rate": 9.993629868468984e-06,
"loss": 1.2399,
"step": 1227
},
{
"epoch": 0.32659574468085106,
"grad_norm": 5.263411521911621,
"learning_rate": 9.993585409482645e-06,
"loss": 1.311,
"step": 1228
},
{
"epoch": 0.3268617021276596,
"grad_norm": 3.8653786182403564,
"learning_rate": 9.993540795988895e-06,
"loss": 1.1391,
"step": 1229
},
{
"epoch": 0.3271276595744681,
"grad_norm": 4.475793838500977,
"learning_rate": 9.993496027989112e-06,
"loss": 1.2644,
"step": 1230
},
{
"epoch": 0.3273936170212766,
"grad_norm": 4.395388603210449,
"learning_rate": 9.993451105484682e-06,
"loss": 1.342,
"step": 1231
},
{
"epoch": 0.3276595744680851,
"grad_norm": 4.290927410125732,
"learning_rate": 9.993406028476997e-06,
"loss": 1.3893,
"step": 1232
},
{
"epoch": 0.3279255319148936,
"grad_norm": 4.348012924194336,
"learning_rate": 9.993360796967451e-06,
"loss": 1.2903,
"step": 1233
},
{
"epoch": 0.3281914893617021,
"grad_norm": 4.174604415893555,
"learning_rate": 9.993315410957442e-06,
"loss": 1.2951,
"step": 1234
},
{
"epoch": 0.32845744680851063,
"grad_norm": 4.359421253204346,
"learning_rate": 9.993269870448375e-06,
"loss": 1.4433,
"step": 1235
},
{
"epoch": 0.32872340425531915,
"grad_norm": 4.25851583480835,
"learning_rate": 9.99322417544166e-06,
"loss": 1.2445,
"step": 1236
},
{
"epoch": 0.32898936170212767,
"grad_norm": 4.110776901245117,
"learning_rate": 9.993178325938711e-06,
"loss": 1.3569,
"step": 1237
},
{
"epoch": 0.3292553191489362,
"grad_norm": 4.008944988250732,
"learning_rate": 9.993132321940947e-06,
"loss": 1.2227,
"step": 1238
},
{
"epoch": 0.3295212765957447,
"grad_norm": 4.228448390960693,
"learning_rate": 9.993086163449787e-06,
"loss": 1.2388,
"step": 1239
},
{
"epoch": 0.32978723404255317,
"grad_norm": 4.701793193817139,
"learning_rate": 9.993039850466664e-06,
"loss": 1.5212,
"step": 1240
},
{
"epoch": 0.3300531914893617,
"grad_norm": 4.4202094078063965,
"learning_rate": 9.99299338299301e-06,
"loss": 1.2413,
"step": 1241
},
{
"epoch": 0.3303191489361702,
"grad_norm": 4.218541622161865,
"learning_rate": 9.992946761030261e-06,
"loss": 1.2663,
"step": 1242
},
{
"epoch": 0.3305851063829787,
"grad_norm": 4.355581283569336,
"learning_rate": 9.99289998457986e-06,
"loss": 1.3233,
"step": 1243
},
{
"epoch": 0.33085106382978724,
"grad_norm": 4.184298992156982,
"learning_rate": 9.992853053643257e-06,
"loss": 1.3291,
"step": 1244
},
{
"epoch": 0.33111702127659576,
"grad_norm": 4.030219078063965,
"learning_rate": 9.992805968221902e-06,
"loss": 1.3502,
"step": 1245
},
{
"epoch": 0.3313829787234043,
"grad_norm": 4.068756103515625,
"learning_rate": 9.992758728317252e-06,
"loss": 1.1977,
"step": 1246
},
{
"epoch": 0.3316489361702128,
"grad_norm": 4.332919120788574,
"learning_rate": 9.99271133393077e-06,
"loss": 1.2899,
"step": 1247
},
{
"epoch": 0.33191489361702126,
"grad_norm": 3.9694416522979736,
"learning_rate": 9.992663785063919e-06,
"loss": 1.3366,
"step": 1248
},
{
"epoch": 0.3321808510638298,
"grad_norm": 3.924436569213867,
"learning_rate": 9.992616081718171e-06,
"loss": 1.2552,
"step": 1249
},
{
"epoch": 0.3324468085106383,
"grad_norm": 4.128008842468262,
"learning_rate": 9.992568223895007e-06,
"loss": 1.2872,
"step": 1250
},
{
"epoch": 0.3327127659574468,
"grad_norm": 4.744760036468506,
"learning_rate": 9.992520211595902e-06,
"loss": 1.2885,
"step": 1251
},
{
"epoch": 0.33297872340425533,
"grad_norm": 3.722013235092163,
"learning_rate": 9.992472044822344e-06,
"loss": 1.1684,
"step": 1252
},
{
"epoch": 0.33324468085106385,
"grad_norm": 4.375733852386475,
"learning_rate": 9.992423723575822e-06,
"loss": 1.4177,
"step": 1253
},
{
"epoch": 0.33351063829787236,
"grad_norm": 4.03129243850708,
"learning_rate": 9.992375247857833e-06,
"loss": 1.3669,
"step": 1254
},
{
"epoch": 0.3337765957446808,
"grad_norm": 3.828651189804077,
"learning_rate": 9.992326617669876e-06,
"loss": 1.3573,
"step": 1255
},
{
"epoch": 0.33404255319148934,
"grad_norm": 4.016900062561035,
"learning_rate": 9.992277833013457e-06,
"loss": 1.2265,
"step": 1256
},
{
"epoch": 0.33430851063829786,
"grad_norm": 4.38175630569458,
"learning_rate": 9.992228893890084e-06,
"loss": 1.3774,
"step": 1257
},
{
"epoch": 0.3345744680851064,
"grad_norm": 4.081117153167725,
"learning_rate": 9.992179800301269e-06,
"loss": 1.2978,
"step": 1258
},
{
"epoch": 0.3348404255319149,
"grad_norm": 4.280460834503174,
"learning_rate": 9.992130552248535e-06,
"loss": 1.1316,
"step": 1259
},
{
"epoch": 0.3351063829787234,
"grad_norm": 4.5057268142700195,
"learning_rate": 9.992081149733404e-06,
"loss": 1.3776,
"step": 1260
},
{
"epoch": 0.33537234042553193,
"grad_norm": 3.8671257495880127,
"learning_rate": 9.992031592757405e-06,
"loss": 1.3541,
"step": 1261
},
{
"epoch": 0.33563829787234045,
"grad_norm": 4.478667736053467,
"learning_rate": 9.991981881322072e-06,
"loss": 1.3155,
"step": 1262
},
{
"epoch": 0.3359042553191489,
"grad_norm": 5.32509183883667,
"learning_rate": 9.991932015428941e-06,
"loss": 1.3662,
"step": 1263
},
{
"epoch": 0.33617021276595743,
"grad_norm": 4.138638973236084,
"learning_rate": 9.991881995079558e-06,
"loss": 1.3641,
"step": 1264
},
{
"epoch": 0.33643617021276595,
"grad_norm": 4.780951499938965,
"learning_rate": 9.991831820275466e-06,
"loss": 1.4626,
"step": 1265
},
{
"epoch": 0.33670212765957447,
"grad_norm": 3.6165192127227783,
"learning_rate": 9.991781491018223e-06,
"loss": 1.2914,
"step": 1266
},
{
"epoch": 0.336968085106383,
"grad_norm": 4.3747992515563965,
"learning_rate": 9.991731007309382e-06,
"loss": 1.2756,
"step": 1267
},
{
"epoch": 0.3372340425531915,
"grad_norm": 5.0972580909729,
"learning_rate": 9.991680369150507e-06,
"loss": 1.4694,
"step": 1268
},
{
"epoch": 0.3375,
"grad_norm": 3.841791868209839,
"learning_rate": 9.991629576543164e-06,
"loss": 1.1905,
"step": 1269
},
{
"epoch": 0.3377659574468085,
"grad_norm": 4.1475324630737305,
"learning_rate": 9.991578629488926e-06,
"loss": 1.3379,
"step": 1270
},
{
"epoch": 0.338031914893617,
"grad_norm": 4.152446269989014,
"learning_rate": 9.991527527989366e-06,
"loss": 1.1402,
"step": 1271
},
{
"epoch": 0.3382978723404255,
"grad_norm": 4.5577006340026855,
"learning_rate": 9.99147627204607e-06,
"loss": 1.3844,
"step": 1272
},
{
"epoch": 0.33856382978723404,
"grad_norm": 4.605076313018799,
"learning_rate": 9.991424861660621e-06,
"loss": 1.4557,
"step": 1273
},
{
"epoch": 0.33882978723404256,
"grad_norm": 4.045496940612793,
"learning_rate": 9.99137329683461e-06,
"loss": 1.2976,
"step": 1274
},
{
"epoch": 0.3390957446808511,
"grad_norm": 4.148492336273193,
"learning_rate": 9.991321577569632e-06,
"loss": 1.4065,
"step": 1275
},
{
"epoch": 0.3393617021276596,
"grad_norm": 4.128026485443115,
"learning_rate": 9.991269703867288e-06,
"loss": 1.3056,
"step": 1276
},
{
"epoch": 0.3396276595744681,
"grad_norm": 4.140103340148926,
"learning_rate": 9.991217675729184e-06,
"loss": 1.3136,
"step": 1277
},
{
"epoch": 0.3398936170212766,
"grad_norm": 4.122238636016846,
"learning_rate": 9.991165493156927e-06,
"loss": 1.2575,
"step": 1278
},
{
"epoch": 0.3401595744680851,
"grad_norm": 4.590948104858398,
"learning_rate": 9.991113156152134e-06,
"loss": 1.2896,
"step": 1279
},
{
"epoch": 0.3404255319148936,
"grad_norm": 4.469196796417236,
"learning_rate": 9.991060664716423e-06,
"loss": 1.4088,
"step": 1280
},
{
"epoch": 0.3406914893617021,
"grad_norm": 4.643316268920898,
"learning_rate": 9.99100801885142e-06,
"loss": 1.4124,
"step": 1281
},
{
"epoch": 0.34095744680851064,
"grad_norm": 4.106162071228027,
"learning_rate": 9.990955218558751e-06,
"loss": 1.3555,
"step": 1282
},
{
"epoch": 0.34122340425531916,
"grad_norm": 4.337850093841553,
"learning_rate": 9.990902263840053e-06,
"loss": 1.1865,
"step": 1283
},
{
"epoch": 0.3414893617021277,
"grad_norm": 3.8557538986206055,
"learning_rate": 9.990849154696963e-06,
"loss": 1.2002,
"step": 1284
},
{
"epoch": 0.34175531914893614,
"grad_norm": 4.412120342254639,
"learning_rate": 9.990795891131125e-06,
"loss": 1.3584,
"step": 1285
},
{
"epoch": 0.34202127659574466,
"grad_norm": 5.199094772338867,
"learning_rate": 9.990742473144184e-06,
"loss": 1.3745,
"step": 1286
},
{
"epoch": 0.3422872340425532,
"grad_norm": 3.8888189792633057,
"learning_rate": 9.990688900737795e-06,
"loss": 1.2443,
"step": 1287
},
{
"epoch": 0.3425531914893617,
"grad_norm": 3.81540846824646,
"learning_rate": 9.990635173913616e-06,
"loss": 1.347,
"step": 1288
},
{
"epoch": 0.3428191489361702,
"grad_norm": 4.090488910675049,
"learning_rate": 9.990581292673309e-06,
"loss": 1.283,
"step": 1289
},
{
"epoch": 0.34308510638297873,
"grad_norm": 4.115976333618164,
"learning_rate": 9.990527257018544e-06,
"loss": 1.2893,
"step": 1290
},
{
"epoch": 0.34335106382978725,
"grad_norm": 3.9170165061950684,
"learning_rate": 9.990473066950987e-06,
"loss": 1.2133,
"step": 1291
},
{
"epoch": 0.34361702127659577,
"grad_norm": 3.8994202613830566,
"learning_rate": 9.990418722472317e-06,
"loss": 1.1986,
"step": 1292
},
{
"epoch": 0.34388297872340423,
"grad_norm": 3.8675310611724854,
"learning_rate": 9.990364223584218e-06,
"loss": 1.16,
"step": 1293
},
{
"epoch": 0.34414893617021275,
"grad_norm": 4.010871410369873,
"learning_rate": 9.990309570288374e-06,
"loss": 1.2748,
"step": 1294
},
{
"epoch": 0.34441489361702127,
"grad_norm": 4.264376163482666,
"learning_rate": 9.990254762586477e-06,
"loss": 1.167,
"step": 1295
},
{
"epoch": 0.3446808510638298,
"grad_norm": 4.201075553894043,
"learning_rate": 9.990199800480222e-06,
"loss": 1.2061,
"step": 1296
},
{
"epoch": 0.3449468085106383,
"grad_norm": 4.1181535720825195,
"learning_rate": 9.99014468397131e-06,
"loss": 1.188,
"step": 1297
},
{
"epoch": 0.3452127659574468,
"grad_norm": 3.747342824935913,
"learning_rate": 9.990089413061445e-06,
"loss": 1.1944,
"step": 1298
},
{
"epoch": 0.34547872340425534,
"grad_norm": 4.067655086517334,
"learning_rate": 9.990033987752341e-06,
"loss": 1.1876,
"step": 1299
},
{
"epoch": 0.34574468085106386,
"grad_norm": 4.090482234954834,
"learning_rate": 9.989978408045709e-06,
"loss": 1.2122,
"step": 1300
},
{
"epoch": 0.3460106382978723,
"grad_norm": 3.879619598388672,
"learning_rate": 9.989922673943271e-06,
"loss": 1.2099,
"step": 1301
},
{
"epoch": 0.34627659574468084,
"grad_norm": 4.814892768859863,
"learning_rate": 9.98986678544675e-06,
"loss": 1.3879,
"step": 1302
},
{
"epoch": 0.34654255319148936,
"grad_norm": 4.234111309051514,
"learning_rate": 9.989810742557875e-06,
"loss": 1.5134,
"step": 1303
},
{
"epoch": 0.3468085106382979,
"grad_norm": 4.2561469078063965,
"learning_rate": 9.989754545278381e-06,
"loss": 1.3591,
"step": 1304
},
{
"epoch": 0.3470744680851064,
"grad_norm": 4.519184112548828,
"learning_rate": 9.989698193610007e-06,
"loss": 1.1676,
"step": 1305
},
{
"epoch": 0.3473404255319149,
"grad_norm": 4.09921407699585,
"learning_rate": 9.989641687554496e-06,
"loss": 1.238,
"step": 1306
},
{
"epoch": 0.3476063829787234,
"grad_norm": 3.9749245643615723,
"learning_rate": 9.989585027113598e-06,
"loss": 1.2444,
"step": 1307
},
{
"epoch": 0.3478723404255319,
"grad_norm": 4.225282192230225,
"learning_rate": 9.989528212289064e-06,
"loss": 1.1724,
"step": 1308
},
{
"epoch": 0.3481382978723404,
"grad_norm": 4.391535758972168,
"learning_rate": 9.98947124308265e-06,
"loss": 1.4058,
"step": 1309
},
{
"epoch": 0.3484042553191489,
"grad_norm": 3.8815417289733887,
"learning_rate": 9.989414119496126e-06,
"loss": 1.2464,
"step": 1310
},
{
"epoch": 0.34867021276595744,
"grad_norm": 4.186168193817139,
"learning_rate": 9.989356841531252e-06,
"loss": 1.2393,
"step": 1311
},
{
"epoch": 0.34893617021276596,
"grad_norm": 3.9777474403381348,
"learning_rate": 9.989299409189802e-06,
"loss": 1.1674,
"step": 1312
},
{
"epoch": 0.3492021276595745,
"grad_norm": 4.088747978210449,
"learning_rate": 9.989241822473557e-06,
"loss": 1.2024,
"step": 1313
},
{
"epoch": 0.349468085106383,
"grad_norm": 4.297309398651123,
"learning_rate": 9.989184081384295e-06,
"loss": 1.384,
"step": 1314
},
{
"epoch": 0.3497340425531915,
"grad_norm": 3.6362228393554688,
"learning_rate": 9.989126185923803e-06,
"loss": 1.266,
"step": 1315
},
{
"epoch": 0.35,
"grad_norm": 4.015252113342285,
"learning_rate": 9.989068136093873e-06,
"loss": 1.2447,
"step": 1316
},
{
"epoch": 0.3502659574468085,
"grad_norm": 3.9256210327148438,
"learning_rate": 9.989009931896302e-06,
"loss": 1.2674,
"step": 1317
},
{
"epoch": 0.350531914893617,
"grad_norm": 4.108496189117432,
"learning_rate": 9.988951573332888e-06,
"loss": 1.232,
"step": 1318
},
{
"epoch": 0.35079787234042553,
"grad_norm": 4.183421611785889,
"learning_rate": 9.98889306040544e-06,
"loss": 1.2652,
"step": 1319
},
{
"epoch": 0.35106382978723405,
"grad_norm": 4.556921482086182,
"learning_rate": 9.988834393115768e-06,
"loss": 1.3536,
"step": 1320
},
{
"epoch": 0.35132978723404257,
"grad_norm": 4.081547737121582,
"learning_rate": 9.988775571465684e-06,
"loss": 1.3168,
"step": 1321
},
{
"epoch": 0.3515957446808511,
"grad_norm": 4.136814594268799,
"learning_rate": 9.988716595457011e-06,
"loss": 1.3124,
"step": 1322
},
{
"epoch": 0.35186170212765955,
"grad_norm": 4.485897064208984,
"learning_rate": 9.988657465091572e-06,
"loss": 1.3164,
"step": 1323
},
{
"epoch": 0.35212765957446807,
"grad_norm": 4.273427963256836,
"learning_rate": 9.988598180371198e-06,
"loss": 1.2051,
"step": 1324
},
{
"epoch": 0.3523936170212766,
"grad_norm": 3.715895175933838,
"learning_rate": 9.988538741297724e-06,
"loss": 1.0755,
"step": 1325
},
{
"epoch": 0.3526595744680851,
"grad_norm": 3.932218551635742,
"learning_rate": 9.98847914787299e-06,
"loss": 1.4028,
"step": 1326
},
{
"epoch": 0.3529255319148936,
"grad_norm": 4.555146217346191,
"learning_rate": 9.988419400098834e-06,
"loss": 1.2805,
"step": 1327
},
{
"epoch": 0.35319148936170214,
"grad_norm": 4.291238784790039,
"learning_rate": 9.98835949797711e-06,
"loss": 1.3683,
"step": 1328
},
{
"epoch": 0.35345744680851066,
"grad_norm": 4.525993824005127,
"learning_rate": 9.98829944150967e-06,
"loss": 1.2788,
"step": 1329
},
{
"epoch": 0.3537234042553192,
"grad_norm": 3.771448850631714,
"learning_rate": 9.988239230698373e-06,
"loss": 1.3256,
"step": 1330
},
{
"epoch": 0.35398936170212764,
"grad_norm": 4.0126633644104,
"learning_rate": 9.988178865545081e-06,
"loss": 1.2984,
"step": 1331
},
{
"epoch": 0.35425531914893615,
"grad_norm": 3.521714210510254,
"learning_rate": 9.988118346051663e-06,
"loss": 1.192,
"step": 1332
},
{
"epoch": 0.35452127659574467,
"grad_norm": 4.065241813659668,
"learning_rate": 9.98805767221999e-06,
"loss": 1.383,
"step": 1333
},
{
"epoch": 0.3547872340425532,
"grad_norm": 4.3708720207214355,
"learning_rate": 9.987996844051939e-06,
"loss": 1.3586,
"step": 1334
},
{
"epoch": 0.3550531914893617,
"grad_norm": 4.104064464569092,
"learning_rate": 9.987935861549393e-06,
"loss": 1.2583,
"step": 1335
},
{
"epoch": 0.3553191489361702,
"grad_norm": 4.293087959289551,
"learning_rate": 9.98787472471424e-06,
"loss": 1.3606,
"step": 1336
},
{
"epoch": 0.35558510638297874,
"grad_norm": 3.906818151473999,
"learning_rate": 9.98781343354837e-06,
"loss": 1.2305,
"step": 1337
},
{
"epoch": 0.3558510638297872,
"grad_norm": 4.049057960510254,
"learning_rate": 9.98775198805368e-06,
"loss": 1.1915,
"step": 1338
},
{
"epoch": 0.3561170212765957,
"grad_norm": 4.160476207733154,
"learning_rate": 9.987690388232071e-06,
"loss": 1.3273,
"step": 1339
},
{
"epoch": 0.35638297872340424,
"grad_norm": 4.2301344871521,
"learning_rate": 9.98762863408545e-06,
"loss": 1.242,
"step": 1340
},
{
"epoch": 0.35664893617021276,
"grad_norm": 4.272438049316406,
"learning_rate": 9.987566725615725e-06,
"loss": 1.3378,
"step": 1341
},
{
"epoch": 0.3569148936170213,
"grad_norm": 4.048627853393555,
"learning_rate": 9.987504662824814e-06,
"loss": 1.2938,
"step": 1342
},
{
"epoch": 0.3571808510638298,
"grad_norm": 4.272396087646484,
"learning_rate": 9.987442445714637e-06,
"loss": 1.363,
"step": 1343
},
{
"epoch": 0.3574468085106383,
"grad_norm": 4.04710578918457,
"learning_rate": 9.98738007428712e-06,
"loss": 1.3823,
"step": 1344
},
{
"epoch": 0.35771276595744683,
"grad_norm": 4.724300384521484,
"learning_rate": 9.98731754854419e-06,
"loss": 1.4429,
"step": 1345
},
{
"epoch": 0.3579787234042553,
"grad_norm": 4.071347713470459,
"learning_rate": 9.987254868487783e-06,
"loss": 1.2203,
"step": 1346
},
{
"epoch": 0.3582446808510638,
"grad_norm": 3.8509132862091064,
"learning_rate": 9.987192034119839e-06,
"loss": 1.2774,
"step": 1347
},
{
"epoch": 0.35851063829787233,
"grad_norm": 3.7690467834472656,
"learning_rate": 9.987129045442304e-06,
"loss": 1.1786,
"step": 1348
},
{
"epoch": 0.35877659574468085,
"grad_norm": 4.102452754974365,
"learning_rate": 9.987065902457122e-06,
"loss": 1.232,
"step": 1349
},
{
"epoch": 0.35904255319148937,
"grad_norm": 4.353301048278809,
"learning_rate": 9.98700260516625e-06,
"loss": 1.204,
"step": 1350
},
{
"epoch": 0.3593085106382979,
"grad_norm": 4.020050048828125,
"learning_rate": 9.986939153571647e-06,
"loss": 1.2681,
"step": 1351
},
{
"epoch": 0.3595744680851064,
"grad_norm": 4.041562080383301,
"learning_rate": 9.986875547675274e-06,
"loss": 1.2093,
"step": 1352
},
{
"epoch": 0.3598404255319149,
"grad_norm": 3.9428937435150146,
"learning_rate": 9.9868117874791e-06,
"loss": 1.4088,
"step": 1353
},
{
"epoch": 0.3601063829787234,
"grad_norm": 3.8776018619537354,
"learning_rate": 9.986747872985099e-06,
"loss": 1.2944,
"step": 1354
},
{
"epoch": 0.3603723404255319,
"grad_norm": 4.4396796226501465,
"learning_rate": 9.986683804195248e-06,
"loss": 1.2328,
"step": 1355
},
{
"epoch": 0.3606382978723404,
"grad_norm": 6.8338093757629395,
"learning_rate": 9.986619581111528e-06,
"loss": 1.2865,
"step": 1356
},
{
"epoch": 0.36090425531914894,
"grad_norm": 3.8783535957336426,
"learning_rate": 9.986555203735926e-06,
"loss": 1.2004,
"step": 1357
},
{
"epoch": 0.36117021276595745,
"grad_norm": 4.063074111938477,
"learning_rate": 9.986490672070438e-06,
"loss": 1.2033,
"step": 1358
},
{
"epoch": 0.361436170212766,
"grad_norm": 5.602739334106445,
"learning_rate": 9.986425986117055e-06,
"loss": 1.2993,
"step": 1359
},
{
"epoch": 0.3617021276595745,
"grad_norm": 3.687655448913574,
"learning_rate": 9.986361145877783e-06,
"loss": 1.1984,
"step": 1360
},
{
"epoch": 0.36196808510638295,
"grad_norm": 4.312001705169678,
"learning_rate": 9.986296151354625e-06,
"loss": 1.2943,
"step": 1361
},
{
"epoch": 0.36223404255319147,
"grad_norm": 4.478762149810791,
"learning_rate": 9.986231002549594e-06,
"loss": 1.294,
"step": 1362
},
{
"epoch": 0.3625,
"grad_norm": 4.86306095123291,
"learning_rate": 9.986165699464706e-06,
"loss": 1.5325,
"step": 1363
},
{
"epoch": 0.3627659574468085,
"grad_norm": 4.426929950714111,
"learning_rate": 9.986100242101982e-06,
"loss": 1.3561,
"step": 1364
},
{
"epoch": 0.363031914893617,
"grad_norm": 4.546680450439453,
"learning_rate": 9.986034630463443e-06,
"loss": 1.3143,
"step": 1365
},
{
"epoch": 0.36329787234042554,
"grad_norm": 4.5038957595825195,
"learning_rate": 9.985968864551123e-06,
"loss": 1.2948,
"step": 1366
},
{
"epoch": 0.36356382978723406,
"grad_norm": 4.967344284057617,
"learning_rate": 9.985902944367058e-06,
"loss": 1.2844,
"step": 1367
},
{
"epoch": 0.3638297872340426,
"grad_norm": 3.8887312412261963,
"learning_rate": 9.985836869913283e-06,
"loss": 1.2737,
"step": 1368
},
{
"epoch": 0.36409574468085104,
"grad_norm": 4.1144795417785645,
"learning_rate": 9.985770641191847e-06,
"loss": 1.3379,
"step": 1369
},
{
"epoch": 0.36436170212765956,
"grad_norm": 4.12211275100708,
"learning_rate": 9.985704258204798e-06,
"loss": 1.3465,
"step": 1370
},
{
"epoch": 0.3646276595744681,
"grad_norm": 4.424558162689209,
"learning_rate": 9.985637720954188e-06,
"loss": 1.0785,
"step": 1371
},
{
"epoch": 0.3648936170212766,
"grad_norm": 4.308188438415527,
"learning_rate": 9.985571029442078e-06,
"loss": 1.4829,
"step": 1372
},
{
"epoch": 0.3651595744680851,
"grad_norm": 3.587887763977051,
"learning_rate": 9.98550418367053e-06,
"loss": 1.2684,
"step": 1373
},
{
"epoch": 0.36542553191489363,
"grad_norm": 4.300267696380615,
"learning_rate": 9.985437183641612e-06,
"loss": 1.305,
"step": 1374
},
{
"epoch": 0.36569148936170215,
"grad_norm": 4.035099506378174,
"learning_rate": 9.985370029357399e-06,
"loss": 1.2249,
"step": 1375
},
{
"epoch": 0.3659574468085106,
"grad_norm": 3.958627939224243,
"learning_rate": 9.985302720819967e-06,
"loss": 1.2176,
"step": 1376
},
{
"epoch": 0.36622340425531913,
"grad_norm": 4.257254600524902,
"learning_rate": 9.9852352580314e-06,
"loss": 1.2714,
"step": 1377
},
{
"epoch": 0.36648936170212765,
"grad_norm": 4.782037258148193,
"learning_rate": 9.985167640993784e-06,
"loss": 1.4979,
"step": 1378
},
{
"epoch": 0.36675531914893617,
"grad_norm": 4.400300025939941,
"learning_rate": 9.985099869709213e-06,
"loss": 1.3505,
"step": 1379
},
{
"epoch": 0.3670212765957447,
"grad_norm": 4.289068698883057,
"learning_rate": 9.985031944179781e-06,
"loss": 1.2113,
"step": 1380
},
{
"epoch": 0.3672872340425532,
"grad_norm": 4.770625591278076,
"learning_rate": 9.984963864407593e-06,
"loss": 1.4373,
"step": 1381
},
{
"epoch": 0.3675531914893617,
"grad_norm": 4.392122268676758,
"learning_rate": 9.984895630394755e-06,
"loss": 1.3069,
"step": 1382
},
{
"epoch": 0.36781914893617024,
"grad_norm": 3.9814369678497314,
"learning_rate": 9.984827242143376e-06,
"loss": 1.281,
"step": 1383
},
{
"epoch": 0.3680851063829787,
"grad_norm": 3.9791054725646973,
"learning_rate": 9.984758699655572e-06,
"loss": 1.1758,
"step": 1384
},
{
"epoch": 0.3683510638297872,
"grad_norm": 4.434001922607422,
"learning_rate": 9.984690002933465e-06,
"loss": 1.3586,
"step": 1385
},
{
"epoch": 0.36861702127659574,
"grad_norm": 4.445183753967285,
"learning_rate": 9.984621151979183e-06,
"loss": 1.367,
"step": 1386
},
{
"epoch": 0.36888297872340425,
"grad_norm": 3.8560211658477783,
"learning_rate": 9.984552146794853e-06,
"loss": 1.2933,
"step": 1387
},
{
"epoch": 0.36914893617021277,
"grad_norm": 4.20532751083374,
"learning_rate": 9.984482987382612e-06,
"loss": 1.3036,
"step": 1388
},
{
"epoch": 0.3694148936170213,
"grad_norm": 4.1775898933410645,
"learning_rate": 9.984413673744597e-06,
"loss": 1.1862,
"step": 1389
},
{
"epoch": 0.3696808510638298,
"grad_norm": 4.668176651000977,
"learning_rate": 9.984344205882954e-06,
"loss": 1.3125,
"step": 1390
},
{
"epoch": 0.3699468085106383,
"grad_norm": 4.170348644256592,
"learning_rate": 9.984274583799833e-06,
"loss": 1.1855,
"step": 1391
},
{
"epoch": 0.3702127659574468,
"grad_norm": 3.893609046936035,
"learning_rate": 9.98420480749739e-06,
"loss": 1.3567,
"step": 1392
},
{
"epoch": 0.3704787234042553,
"grad_norm": 3.791059970855713,
"learning_rate": 9.98413487697778e-06,
"loss": 1.2596,
"step": 1393
},
{
"epoch": 0.3707446808510638,
"grad_norm": 3.89493465423584,
"learning_rate": 9.984064792243171e-06,
"loss": 1.1468,
"step": 1394
},
{
"epoch": 0.37101063829787234,
"grad_norm": 3.932354211807251,
"learning_rate": 9.983994553295728e-06,
"loss": 1.2274,
"step": 1395
},
{
"epoch": 0.37127659574468086,
"grad_norm": 3.772759199142456,
"learning_rate": 9.983924160137627e-06,
"loss": 1.1687,
"step": 1396
},
{
"epoch": 0.3715425531914894,
"grad_norm": 4.090175628662109,
"learning_rate": 9.983853612771043e-06,
"loss": 1.1627,
"step": 1397
},
{
"epoch": 0.3718085106382979,
"grad_norm": 5.041259288787842,
"learning_rate": 9.983782911198161e-06,
"loss": 1.2878,
"step": 1398
},
{
"epoch": 0.37207446808510636,
"grad_norm": 4.565484523773193,
"learning_rate": 9.98371205542117e-06,
"loss": 1.2838,
"step": 1399
},
{
"epoch": 0.3723404255319149,
"grad_norm": 3.94577956199646,
"learning_rate": 9.983641045442256e-06,
"loss": 1.3253,
"step": 1400
},
{
"epoch": 0.3726063829787234,
"grad_norm": 3.559597969055176,
"learning_rate": 9.983569881263625e-06,
"loss": 1.0896,
"step": 1401
},
{
"epoch": 0.3728723404255319,
"grad_norm": 4.101516246795654,
"learning_rate": 9.983498562887471e-06,
"loss": 1.4844,
"step": 1402
},
{
"epoch": 0.37313829787234043,
"grad_norm": 4.680913925170898,
"learning_rate": 9.983427090316005e-06,
"loss": 1.3343,
"step": 1403
},
{
"epoch": 0.37340425531914895,
"grad_norm": 5.2188286781311035,
"learning_rate": 9.983355463551439e-06,
"loss": 1.3206,
"step": 1404
},
{
"epoch": 0.37367021276595747,
"grad_norm": 4.363986968994141,
"learning_rate": 9.983283682595986e-06,
"loss": 1.5722,
"step": 1405
},
{
"epoch": 0.373936170212766,
"grad_norm": 4.405764579772949,
"learning_rate": 9.98321174745187e-06,
"loss": 1.3106,
"step": 1406
},
{
"epoch": 0.37420212765957445,
"grad_norm": 3.671576738357544,
"learning_rate": 9.983139658121316e-06,
"loss": 1.1663,
"step": 1407
},
{
"epoch": 0.37446808510638296,
"grad_norm": 4.068467140197754,
"learning_rate": 9.983067414606553e-06,
"loss": 1.3443,
"step": 1408
},
{
"epoch": 0.3747340425531915,
"grad_norm": 4.050812244415283,
"learning_rate": 9.982995016909817e-06,
"loss": 1.2671,
"step": 1409
},
{
"epoch": 0.375,
"grad_norm": 4.016097545623779,
"learning_rate": 9.98292246503335e-06,
"loss": 1.2389,
"step": 1410
},
{
"epoch": 0.3752659574468085,
"grad_norm": 4.278280258178711,
"learning_rate": 9.982849758979394e-06,
"loss": 1.3095,
"step": 1411
},
{
"epoch": 0.37553191489361704,
"grad_norm": 3.826686143875122,
"learning_rate": 9.9827768987502e-06,
"loss": 1.0923,
"step": 1412
},
{
"epoch": 0.37579787234042555,
"grad_norm": 3.954808473587036,
"learning_rate": 9.982703884348023e-06,
"loss": 1.3359,
"step": 1413
},
{
"epoch": 0.376063829787234,
"grad_norm": 3.8342320919036865,
"learning_rate": 9.982630715775121e-06,
"loss": 1.287,
"step": 1414
},
{
"epoch": 0.37632978723404253,
"grad_norm": 4.190742492675781,
"learning_rate": 9.982557393033758e-06,
"loss": 1.2957,
"step": 1415
},
{
"epoch": 0.37659574468085105,
"grad_norm": 4.030623435974121,
"learning_rate": 9.982483916126204e-06,
"loss": 1.2992,
"step": 1416
},
{
"epoch": 0.37686170212765957,
"grad_norm": 4.164768695831299,
"learning_rate": 9.98241028505473e-06,
"loss": 1.5608,
"step": 1417
},
{
"epoch": 0.3771276595744681,
"grad_norm": 4.243110656738281,
"learning_rate": 9.982336499821617e-06,
"loss": 1.3214,
"step": 1418
},
{
"epoch": 0.3773936170212766,
"grad_norm": 3.969595193862915,
"learning_rate": 9.982262560429147e-06,
"loss": 1.3743,
"step": 1419
},
{
"epoch": 0.3776595744680851,
"grad_norm": 4.253571033477783,
"learning_rate": 9.982188466879607e-06,
"loss": 1.329,
"step": 1420
},
{
"epoch": 0.37792553191489364,
"grad_norm": 4.254541397094727,
"learning_rate": 9.98211421917529e-06,
"loss": 1.3093,
"step": 1421
},
{
"epoch": 0.3781914893617021,
"grad_norm": 4.365729808807373,
"learning_rate": 9.982039817318491e-06,
"loss": 1.3744,
"step": 1422
},
{
"epoch": 0.3784574468085106,
"grad_norm": 4.0368499755859375,
"learning_rate": 9.981965261311519e-06,
"loss": 1.1517,
"step": 1423
},
{
"epoch": 0.37872340425531914,
"grad_norm": 4.165602207183838,
"learning_rate": 9.981890551156673e-06,
"loss": 1.2983,
"step": 1424
},
{
"epoch": 0.37898936170212766,
"grad_norm": 4.241005897521973,
"learning_rate": 9.981815686856268e-06,
"loss": 1.2491,
"step": 1425
},
{
"epoch": 0.3792553191489362,
"grad_norm": 3.9506289958953857,
"learning_rate": 9.981740668412622e-06,
"loss": 1.175,
"step": 1426
},
{
"epoch": 0.3795212765957447,
"grad_norm": 4.209918022155762,
"learning_rate": 9.981665495828053e-06,
"loss": 1.379,
"step": 1427
},
{
"epoch": 0.3797872340425532,
"grad_norm": 4.048032283782959,
"learning_rate": 9.981590169104889e-06,
"loss": 1.4339,
"step": 1428
},
{
"epoch": 0.3800531914893617,
"grad_norm": 3.9107158184051514,
"learning_rate": 9.98151468824546e-06,
"loss": 1.4468,
"step": 1429
},
{
"epoch": 0.3803191489361702,
"grad_norm": 3.8230321407318115,
"learning_rate": 9.981439053252102e-06,
"loss": 1.2942,
"step": 1430
},
{
"epoch": 0.3805851063829787,
"grad_norm": 3.772338390350342,
"learning_rate": 9.981363264127154e-06,
"loss": 1.3236,
"step": 1431
},
{
"epoch": 0.38085106382978723,
"grad_norm": 4.234860897064209,
"learning_rate": 9.981287320872962e-06,
"loss": 1.3763,
"step": 1432
},
{
"epoch": 0.38111702127659575,
"grad_norm": 3.8890817165374756,
"learning_rate": 9.981211223491876e-06,
"loss": 1.3667,
"step": 1433
},
{
"epoch": 0.38138297872340426,
"grad_norm": 3.8217055797576904,
"learning_rate": 9.98113497198625e-06,
"loss": 1.1392,
"step": 1434
},
{
"epoch": 0.3816489361702128,
"grad_norm": 3.9971745014190674,
"learning_rate": 9.981058566358443e-06,
"loss": 1.1892,
"step": 1435
},
{
"epoch": 0.3819148936170213,
"grad_norm": 4.417277812957764,
"learning_rate": 9.98098200661082e-06,
"loss": 1.3306,
"step": 1436
},
{
"epoch": 0.38218085106382976,
"grad_norm": 4.433936595916748,
"learning_rate": 9.980905292745749e-06,
"loss": 1.2253,
"step": 1437
},
{
"epoch": 0.3824468085106383,
"grad_norm": 3.668414831161499,
"learning_rate": 9.980828424765603e-06,
"loss": 1.3243,
"step": 1438
},
{
"epoch": 0.3827127659574468,
"grad_norm": 4.062864303588867,
"learning_rate": 9.980751402672762e-06,
"loss": 1.2416,
"step": 1439
},
{
"epoch": 0.3829787234042553,
"grad_norm": 4.28949499130249,
"learning_rate": 9.980674226469608e-06,
"loss": 1.3018,
"step": 1440
},
{
"epoch": 0.38324468085106383,
"grad_norm": 3.598482847213745,
"learning_rate": 9.980596896158532e-06,
"loss": 1.1174,
"step": 1441
},
{
"epoch": 0.38351063829787235,
"grad_norm": 4.300634384155273,
"learning_rate": 9.980519411741922e-06,
"loss": 1.3079,
"step": 1442
},
{
"epoch": 0.38377659574468087,
"grad_norm": 4.2363128662109375,
"learning_rate": 9.980441773222178e-06,
"loss": 1.3546,
"step": 1443
},
{
"epoch": 0.3840425531914894,
"grad_norm": 4.521866321563721,
"learning_rate": 9.980363980601702e-06,
"loss": 1.2007,
"step": 1444
},
{
"epoch": 0.38430851063829785,
"grad_norm": 3.9129135608673096,
"learning_rate": 9.9802860338829e-06,
"loss": 1.3101,
"step": 1445
},
{
"epoch": 0.38457446808510637,
"grad_norm": 4.559953689575195,
"learning_rate": 9.980207933068185e-06,
"loss": 1.3183,
"step": 1446
},
{
"epoch": 0.3848404255319149,
"grad_norm": 4.102110385894775,
"learning_rate": 9.980129678159974e-06,
"loss": 1.2549,
"step": 1447
},
{
"epoch": 0.3851063829787234,
"grad_norm": 4.215007781982422,
"learning_rate": 9.980051269160686e-06,
"loss": 1.3281,
"step": 1448
},
{
"epoch": 0.3853723404255319,
"grad_norm": 4.188117980957031,
"learning_rate": 9.97997270607275e-06,
"loss": 1.267,
"step": 1449
},
{
"epoch": 0.38563829787234044,
"grad_norm": 3.9828150272369385,
"learning_rate": 9.979893988898592e-06,
"loss": 1.2967,
"step": 1450
},
{
"epoch": 0.38590425531914896,
"grad_norm": 3.9680116176605225,
"learning_rate": 9.979815117640654e-06,
"loss": 1.2711,
"step": 1451
},
{
"epoch": 0.3861702127659574,
"grad_norm": 3.9651451110839844,
"learning_rate": 9.979736092301374e-06,
"loss": 1.2298,
"step": 1452
},
{
"epoch": 0.38643617021276594,
"grad_norm": 3.7032337188720703,
"learning_rate": 9.979656912883193e-06,
"loss": 1.1644,
"step": 1453
},
{
"epoch": 0.38670212765957446,
"grad_norm": 4.174644470214844,
"learning_rate": 9.979577579388566e-06,
"loss": 1.1941,
"step": 1454
},
{
"epoch": 0.386968085106383,
"grad_norm": 3.9499082565307617,
"learning_rate": 9.979498091819946e-06,
"loss": 1.2205,
"step": 1455
},
{
"epoch": 0.3872340425531915,
"grad_norm": 4.005082130432129,
"learning_rate": 9.979418450179792e-06,
"loss": 1.2983,
"step": 1456
},
{
"epoch": 0.3875,
"grad_norm": 4.425258159637451,
"learning_rate": 9.97933865447057e-06,
"loss": 1.3444,
"step": 1457
},
{
"epoch": 0.38776595744680853,
"grad_norm": 4.169209003448486,
"learning_rate": 9.979258704694747e-06,
"loss": 1.3914,
"step": 1458
},
{
"epoch": 0.38803191489361705,
"grad_norm": 3.7960317134857178,
"learning_rate": 9.979178600854797e-06,
"loss": 1.2186,
"step": 1459
},
{
"epoch": 0.3882978723404255,
"grad_norm": 3.9216535091400146,
"learning_rate": 9.979098342953198e-06,
"loss": 1.0839,
"step": 1460
},
{
"epoch": 0.388563829787234,
"grad_norm": 4.077401638031006,
"learning_rate": 9.979017930992436e-06,
"loss": 1.225,
"step": 1461
},
{
"epoch": 0.38882978723404255,
"grad_norm": 3.871135950088501,
"learning_rate": 9.978937364974996e-06,
"loss": 1.2545,
"step": 1462
},
{
"epoch": 0.38909574468085106,
"grad_norm": 4.12876558303833,
"learning_rate": 9.978856644903373e-06,
"loss": 1.3806,
"step": 1463
},
{
"epoch": 0.3893617021276596,
"grad_norm": 4.172638416290283,
"learning_rate": 9.978775770780061e-06,
"loss": 1.3444,
"step": 1464
},
{
"epoch": 0.3896276595744681,
"grad_norm": 4.253303050994873,
"learning_rate": 9.978694742607566e-06,
"loss": 1.3015,
"step": 1465
},
{
"epoch": 0.3898936170212766,
"grad_norm": 3.937948226928711,
"learning_rate": 9.978613560388396e-06,
"loss": 1.4014,
"step": 1466
},
{
"epoch": 0.3901595744680851,
"grad_norm": 3.959920644760132,
"learning_rate": 9.978532224125059e-06,
"loss": 1.2797,
"step": 1467
},
{
"epoch": 0.3904255319148936,
"grad_norm": 4.240394592285156,
"learning_rate": 9.978450733820073e-06,
"loss": 1.3541,
"step": 1468
},
{
"epoch": 0.3906914893617021,
"grad_norm": 4.060705661773682,
"learning_rate": 9.97836908947596e-06,
"loss": 1.2997,
"step": 1469
},
{
"epoch": 0.39095744680851063,
"grad_norm": 4.276419162750244,
"learning_rate": 9.978287291095248e-06,
"loss": 1.4451,
"step": 1470
},
{
"epoch": 0.39122340425531915,
"grad_norm": 3.961526393890381,
"learning_rate": 9.978205338680465e-06,
"loss": 1.3248,
"step": 1471
},
{
"epoch": 0.39148936170212767,
"grad_norm": 4.002696514129639,
"learning_rate": 9.978123232234147e-06,
"loss": 1.3274,
"step": 1472
},
{
"epoch": 0.3917553191489362,
"grad_norm": 3.857750654220581,
"learning_rate": 9.978040971758836e-06,
"loss": 1.2552,
"step": 1473
},
{
"epoch": 0.3920212765957447,
"grad_norm": 3.973501682281494,
"learning_rate": 9.977958557257077e-06,
"loss": 1.3911,
"step": 1474
},
{
"epoch": 0.39228723404255317,
"grad_norm": 4.301419258117676,
"learning_rate": 9.977875988731418e-06,
"loss": 1.2423,
"step": 1475
},
{
"epoch": 0.3925531914893617,
"grad_norm": 3.7840960025787354,
"learning_rate": 9.977793266184416e-06,
"loss": 1.1739,
"step": 1476
},
{
"epoch": 0.3928191489361702,
"grad_norm": 3.6807820796966553,
"learning_rate": 9.977710389618628e-06,
"loss": 1.1685,
"step": 1477
},
{
"epoch": 0.3930851063829787,
"grad_norm": 3.942674398422241,
"learning_rate": 9.977627359036624e-06,
"loss": 1.2033,
"step": 1478
},
{
"epoch": 0.39335106382978724,
"grad_norm": 4.07774543762207,
"learning_rate": 9.977544174440965e-06,
"loss": 1.2707,
"step": 1479
},
{
"epoch": 0.39361702127659576,
"grad_norm": 4.302217483520508,
"learning_rate": 9.977460835834231e-06,
"loss": 1.3944,
"step": 1480
},
{
"epoch": 0.3938829787234043,
"grad_norm": 4.006019592285156,
"learning_rate": 9.977377343218998e-06,
"loss": 1.3301,
"step": 1481
},
{
"epoch": 0.3941489361702128,
"grad_norm": 4.067336082458496,
"learning_rate": 9.977293696597849e-06,
"loss": 1.3282,
"step": 1482
},
{
"epoch": 0.39441489361702126,
"grad_norm": 4.4912004470825195,
"learning_rate": 9.977209895973374e-06,
"loss": 1.374,
"step": 1483
},
{
"epoch": 0.3946808510638298,
"grad_norm": 3.933626651763916,
"learning_rate": 9.977125941348165e-06,
"loss": 1.1584,
"step": 1484
},
{
"epoch": 0.3949468085106383,
"grad_norm": 4.08411169052124,
"learning_rate": 9.97704183272482e-06,
"loss": 1.3587,
"step": 1485
},
{
"epoch": 0.3952127659574468,
"grad_norm": 4.316272735595703,
"learning_rate": 9.976957570105939e-06,
"loss": 1.2544,
"step": 1486
},
{
"epoch": 0.39547872340425533,
"grad_norm": 4.05543851852417,
"learning_rate": 9.976873153494132e-06,
"loss": 1.1699,
"step": 1487
},
{
"epoch": 0.39574468085106385,
"grad_norm": 4.137149810791016,
"learning_rate": 9.976788582892012e-06,
"loss": 1.3501,
"step": 1488
},
{
"epoch": 0.39601063829787236,
"grad_norm": 3.830085515975952,
"learning_rate": 9.976703858302192e-06,
"loss": 1.2818,
"step": 1489
},
{
"epoch": 0.3962765957446808,
"grad_norm": 4.138214588165283,
"learning_rate": 9.976618979727295e-06,
"loss": 1.2769,
"step": 1490
},
{
"epoch": 0.39654255319148934,
"grad_norm": 4.205438137054443,
"learning_rate": 9.976533947169948e-06,
"loss": 1.4103,
"step": 1491
},
{
"epoch": 0.39680851063829786,
"grad_norm": 4.104953289031982,
"learning_rate": 9.976448760632782e-06,
"loss": 1.3701,
"step": 1492
},
{
"epoch": 0.3970744680851064,
"grad_norm": 3.725175619125366,
"learning_rate": 9.976363420118432e-06,
"loss": 1.2986,
"step": 1493
},
{
"epoch": 0.3973404255319149,
"grad_norm": 4.973143577575684,
"learning_rate": 9.97627792562954e-06,
"loss": 1.3123,
"step": 1494
},
{
"epoch": 0.3976063829787234,
"grad_norm": 3.5973260402679443,
"learning_rate": 9.976192277168748e-06,
"loss": 1.1878,
"step": 1495
},
{
"epoch": 0.39787234042553193,
"grad_norm": 3.9308860301971436,
"learning_rate": 9.97610647473871e-06,
"loss": 1.3139,
"step": 1496
},
{
"epoch": 0.39813829787234045,
"grad_norm": 3.831552028656006,
"learning_rate": 9.976020518342078e-06,
"loss": 1.249,
"step": 1497
},
{
"epoch": 0.3984042553191489,
"grad_norm": 3.8937809467315674,
"learning_rate": 9.975934407981512e-06,
"loss": 1.2361,
"step": 1498
},
{
"epoch": 0.39867021276595743,
"grad_norm": 4.4092512130737305,
"learning_rate": 9.97584814365968e-06,
"loss": 1.424,
"step": 1499
},
{
"epoch": 0.39893617021276595,
"grad_norm": 4.096745491027832,
"learning_rate": 9.975761725379243e-06,
"loss": 1.3488,
"step": 1500
},
{
"epoch": 0.39893617021276595,
"eval_loss": 1.3084138631820679,
"eval_runtime": 12.5754,
"eval_samples_per_second": 31.808,
"eval_steps_per_second": 3.976,
"step": 1500
},
{
"epoch": 0.39920212765957447,
"grad_norm": 5.023965835571289,
"learning_rate": 9.975675153142884e-06,
"loss": 1.3409,
"step": 1501
},
{
"epoch": 0.399468085106383,
"grad_norm": 4.182278156280518,
"learning_rate": 9.975588426953276e-06,
"loss": 1.2497,
"step": 1502
},
{
"epoch": 0.3997340425531915,
"grad_norm": 3.872786283493042,
"learning_rate": 9.975501546813104e-06,
"loss": 1.29,
"step": 1503
},
{
"epoch": 0.4,
"grad_norm": 3.9527881145477295,
"learning_rate": 9.975414512725058e-06,
"loss": 1.3427,
"step": 1504
},
{
"epoch": 0.4002659574468085,
"grad_norm": 3.563168525695801,
"learning_rate": 9.975327324691828e-06,
"loss": 1.2509,
"step": 1505
},
{
"epoch": 0.400531914893617,
"grad_norm": 3.8460729122161865,
"learning_rate": 9.975239982716113e-06,
"loss": 1.214,
"step": 1506
},
{
"epoch": 0.4007978723404255,
"grad_norm": 4.321569442749023,
"learning_rate": 9.975152486800615e-06,
"loss": 1.1959,
"step": 1507
},
{
"epoch": 0.40106382978723404,
"grad_norm": 4.102901935577393,
"learning_rate": 9.975064836948041e-06,
"loss": 1.2786,
"step": 1508
},
{
"epoch": 0.40132978723404256,
"grad_norm": 3.8385143280029297,
"learning_rate": 9.974977033161103e-06,
"loss": 1.3574,
"step": 1509
},
{
"epoch": 0.4015957446808511,
"grad_norm": 3.912363290786743,
"learning_rate": 9.97488907544252e-06,
"loss": 1.388,
"step": 1510
},
{
"epoch": 0.4018617021276596,
"grad_norm": 4.346206188201904,
"learning_rate": 9.974800963795012e-06,
"loss": 1.4532,
"step": 1511
},
{
"epoch": 0.4021276595744681,
"grad_norm": 4.346587657928467,
"learning_rate": 9.974712698221306e-06,
"loss": 1.2098,
"step": 1512
},
{
"epoch": 0.4023936170212766,
"grad_norm": 3.9622318744659424,
"learning_rate": 9.97462427872413e-06,
"loss": 1.1556,
"step": 1513
},
{
"epoch": 0.4026595744680851,
"grad_norm": 3.903508186340332,
"learning_rate": 9.974535705306222e-06,
"loss": 1.1644,
"step": 1514
},
{
"epoch": 0.4029255319148936,
"grad_norm": 4.4463605880737305,
"learning_rate": 9.974446977970322e-06,
"loss": 1.4892,
"step": 1515
},
{
"epoch": 0.4031914893617021,
"grad_norm": 3.8401832580566406,
"learning_rate": 9.974358096719178e-06,
"loss": 1.3681,
"step": 1516
},
{
"epoch": 0.40345744680851064,
"grad_norm": 4.009060382843018,
"learning_rate": 9.974269061555537e-06,
"loss": 1.2134,
"step": 1517
},
{
"epoch": 0.40372340425531916,
"grad_norm": 3.609969139099121,
"learning_rate": 9.974179872482153e-06,
"loss": 1.34,
"step": 1518
},
{
"epoch": 0.4039893617021277,
"grad_norm": 4.289672374725342,
"learning_rate": 9.97409052950179e-06,
"loss": 1.4246,
"step": 1519
},
{
"epoch": 0.40425531914893614,
"grad_norm": 3.6479434967041016,
"learning_rate": 9.974001032617208e-06,
"loss": 1.2366,
"step": 1520
},
{
"epoch": 0.40452127659574466,
"grad_norm": 4.251558780670166,
"learning_rate": 9.973911381831178e-06,
"loss": 1.3208,
"step": 1521
},
{
"epoch": 0.4047872340425532,
"grad_norm": 3.7560923099517822,
"learning_rate": 9.973821577146475e-06,
"loss": 1.2298,
"step": 1522
},
{
"epoch": 0.4050531914893617,
"grad_norm": 3.9338622093200684,
"learning_rate": 9.973731618565876e-06,
"loss": 1.34,
"step": 1523
},
{
"epoch": 0.4053191489361702,
"grad_norm": 3.8561365604400635,
"learning_rate": 9.973641506092165e-06,
"loss": 1.4198,
"step": 1524
},
{
"epoch": 0.40558510638297873,
"grad_norm": 3.7590527534484863,
"learning_rate": 9.973551239728129e-06,
"loss": 1.3644,
"step": 1525
},
{
"epoch": 0.40585106382978725,
"grad_norm": 4.470832824707031,
"learning_rate": 9.973460819476562e-06,
"loss": 1.3641,
"step": 1526
},
{
"epoch": 0.40611702127659577,
"grad_norm": 3.5494723320007324,
"learning_rate": 9.973370245340264e-06,
"loss": 1.2552,
"step": 1527
},
{
"epoch": 0.40638297872340423,
"grad_norm": 4.204685211181641,
"learning_rate": 9.973279517322033e-06,
"loss": 1.3577,
"step": 1528
},
{
"epoch": 0.40664893617021275,
"grad_norm": 4.775966167449951,
"learning_rate": 9.97318863542468e-06,
"loss": 1.4342,
"step": 1529
},
{
"epoch": 0.40691489361702127,
"grad_norm": 4.2795729637146,
"learning_rate": 9.973097599651013e-06,
"loss": 1.3033,
"step": 1530
},
{
"epoch": 0.4071808510638298,
"grad_norm": 4.110699653625488,
"learning_rate": 9.973006410003853e-06,
"loss": 1.3463,
"step": 1531
},
{
"epoch": 0.4074468085106383,
"grad_norm": 3.8819406032562256,
"learning_rate": 9.97291506648602e-06,
"loss": 1.1908,
"step": 1532
},
{
"epoch": 0.4077127659574468,
"grad_norm": 4.164956092834473,
"learning_rate": 9.972823569100338e-06,
"loss": 1.2573,
"step": 1533
},
{
"epoch": 0.40797872340425534,
"grad_norm": 3.9775986671447754,
"learning_rate": 9.97273191784964e-06,
"loss": 1.2141,
"step": 1534
},
{
"epoch": 0.40824468085106386,
"grad_norm": 4.500059604644775,
"learning_rate": 9.972640112736764e-06,
"loss": 1.3342,
"step": 1535
},
{
"epoch": 0.4085106382978723,
"grad_norm": 4.081606864929199,
"learning_rate": 9.972548153764547e-06,
"loss": 1.2027,
"step": 1536
},
{
"epoch": 0.40877659574468084,
"grad_norm": 4.272010803222656,
"learning_rate": 9.972456040935838e-06,
"loss": 1.2332,
"step": 1537
},
{
"epoch": 0.40904255319148936,
"grad_norm": 4.042487144470215,
"learning_rate": 9.972363774253481e-06,
"loss": 1.1932,
"step": 1538
},
{
"epoch": 0.4093085106382979,
"grad_norm": 3.9628350734710693,
"learning_rate": 9.972271353720337e-06,
"loss": 1.2636,
"step": 1539
},
{
"epoch": 0.4095744680851064,
"grad_norm": 4.018553256988525,
"learning_rate": 9.972178779339264e-06,
"loss": 1.2822,
"step": 1540
},
{
"epoch": 0.4098404255319149,
"grad_norm": 4.054775714874268,
"learning_rate": 9.972086051113123e-06,
"loss": 1.3419,
"step": 1541
},
{
"epoch": 0.4101063829787234,
"grad_norm": 4.035485744476318,
"learning_rate": 9.971993169044787e-06,
"loss": 1.2586,
"step": 1542
},
{
"epoch": 0.4103723404255319,
"grad_norm": 4.139084815979004,
"learning_rate": 9.971900133137128e-06,
"loss": 1.3533,
"step": 1543
},
{
"epoch": 0.4106382978723404,
"grad_norm": 3.9709324836730957,
"learning_rate": 9.971806943393026e-06,
"loss": 1.1807,
"step": 1544
},
{
"epoch": 0.4109042553191489,
"grad_norm": 3.836603879928589,
"learning_rate": 9.971713599815364e-06,
"loss": 1.2364,
"step": 1545
},
{
"epoch": 0.41117021276595744,
"grad_norm": 3.484250068664551,
"learning_rate": 9.97162010240703e-06,
"loss": 1.2536,
"step": 1546
},
{
"epoch": 0.41143617021276596,
"grad_norm": 4.203670978546143,
"learning_rate": 9.971526451170914e-06,
"loss": 1.2339,
"step": 1547
},
{
"epoch": 0.4117021276595745,
"grad_norm": 3.7969377040863037,
"learning_rate": 9.971432646109919e-06,
"loss": 1.4205,
"step": 1548
},
{
"epoch": 0.411968085106383,
"grad_norm": 3.9421546459198,
"learning_rate": 9.971338687226944e-06,
"loss": 1.2441,
"step": 1549
},
{
"epoch": 0.4122340425531915,
"grad_norm": 3.8566412925720215,
"learning_rate": 9.971244574524897e-06,
"loss": 1.3148,
"step": 1550
},
{
"epoch": 0.4125,
"grad_norm": 3.6699059009552,
"learning_rate": 9.971150308006689e-06,
"loss": 1.1396,
"step": 1551
},
{
"epoch": 0.4127659574468085,
"grad_norm": 4.328299522399902,
"learning_rate": 9.971055887675238e-06,
"loss": 1.4105,
"step": 1552
},
{
"epoch": 0.413031914893617,
"grad_norm": 3.6258397102355957,
"learning_rate": 9.970961313533465e-06,
"loss": 1.2399,
"step": 1553
},
{
"epoch": 0.41329787234042553,
"grad_norm": 4.217952251434326,
"learning_rate": 9.970866585584298e-06,
"loss": 1.2643,
"step": 1554
},
{
"epoch": 0.41356382978723405,
"grad_norm": 3.8410286903381348,
"learning_rate": 9.970771703830666e-06,
"loss": 1.3982,
"step": 1555
},
{
"epoch": 0.41382978723404257,
"grad_norm": 4.1184234619140625,
"learning_rate": 9.970676668275504e-06,
"loss": 1.3206,
"step": 1556
},
{
"epoch": 0.4140957446808511,
"grad_norm": 3.805264472961426,
"learning_rate": 9.970581478921755e-06,
"loss": 1.3301,
"step": 1557
},
{
"epoch": 0.41436170212765955,
"grad_norm": 3.7191929817199707,
"learning_rate": 9.970486135772362e-06,
"loss": 1.3443,
"step": 1558
},
{
"epoch": 0.41462765957446807,
"grad_norm": 3.7962100505828857,
"learning_rate": 9.970390638830275e-06,
"loss": 1.1145,
"step": 1559
},
{
"epoch": 0.4148936170212766,
"grad_norm": 3.8480000495910645,
"learning_rate": 9.970294988098452e-06,
"loss": 1.303,
"step": 1560
},
{
"epoch": 0.4151595744680851,
"grad_norm": 4.154008388519287,
"learning_rate": 9.970199183579847e-06,
"loss": 1.2505,
"step": 1561
},
{
"epoch": 0.4154255319148936,
"grad_norm": 3.6945624351501465,
"learning_rate": 9.97010322527743e-06,
"loss": 1.2318,
"step": 1562
},
{
"epoch": 0.41569148936170214,
"grad_norm": 4.145558834075928,
"learning_rate": 9.970007113194168e-06,
"loss": 1.2855,
"step": 1563
},
{
"epoch": 0.41595744680851066,
"grad_norm": 4.037220001220703,
"learning_rate": 9.969910847333032e-06,
"loss": 1.2599,
"step": 1564
},
{
"epoch": 0.4162234042553192,
"grad_norm": 4.070208549499512,
"learning_rate": 9.969814427697007e-06,
"loss": 1.3002,
"step": 1565
},
{
"epoch": 0.41648936170212764,
"grad_norm": 4.0794548988342285,
"learning_rate": 9.969717854289069e-06,
"loss": 1.3807,
"step": 1566
},
{
"epoch": 0.41675531914893615,
"grad_norm": 3.9017162322998047,
"learning_rate": 9.969621127112211e-06,
"loss": 1.1982,
"step": 1567
},
{
"epoch": 0.41702127659574467,
"grad_norm": 4.089752674102783,
"learning_rate": 9.969524246169424e-06,
"loss": 1.2734,
"step": 1568
},
{
"epoch": 0.4172872340425532,
"grad_norm": 3.7550644874572754,
"learning_rate": 9.969427211463705e-06,
"loss": 1.2207,
"step": 1569
},
{
"epoch": 0.4175531914893617,
"grad_norm": 3.9977076053619385,
"learning_rate": 9.969330022998057e-06,
"loss": 1.3695,
"step": 1570
},
{
"epoch": 0.4178191489361702,
"grad_norm": 4.422798156738281,
"learning_rate": 9.969232680775491e-06,
"loss": 1.3292,
"step": 1571
},
{
"epoch": 0.41808510638297874,
"grad_norm": 4.122771263122559,
"learning_rate": 9.969135184799013e-06,
"loss": 1.3753,
"step": 1572
},
{
"epoch": 0.4183510638297872,
"grad_norm": 3.827120542526245,
"learning_rate": 9.969037535071641e-06,
"loss": 1.2738,
"step": 1573
},
{
"epoch": 0.4186170212765957,
"grad_norm": 3.823761463165283,
"learning_rate": 9.968939731596399e-06,
"loss": 1.2201,
"step": 1574
},
{
"epoch": 0.41888297872340424,
"grad_norm": 4.0475616455078125,
"learning_rate": 9.96884177437631e-06,
"loss": 1.3511,
"step": 1575
},
{
"epoch": 0.41914893617021276,
"grad_norm": 4.167337894439697,
"learning_rate": 9.968743663414408e-06,
"loss": 1.3725,
"step": 1576
},
{
"epoch": 0.4194148936170213,
"grad_norm": 4.683474063873291,
"learning_rate": 9.968645398713726e-06,
"loss": 1.3719,
"step": 1577
},
{
"epoch": 0.4196808510638298,
"grad_norm": 4.450965881347656,
"learning_rate": 9.968546980277305e-06,
"loss": 1.2847,
"step": 1578
},
{
"epoch": 0.4199468085106383,
"grad_norm": 4.25331449508667,
"learning_rate": 9.968448408108191e-06,
"loss": 1.4151,
"step": 1579
},
{
"epoch": 0.42021276595744683,
"grad_norm": 4.090495586395264,
"learning_rate": 9.968349682209434e-06,
"loss": 1.2518,
"step": 1580
},
{
"epoch": 0.4204787234042553,
"grad_norm": 4.116806507110596,
"learning_rate": 9.96825080258409e-06,
"loss": 1.3986,
"step": 1581
},
{
"epoch": 0.4207446808510638,
"grad_norm": 4.016780376434326,
"learning_rate": 9.968151769235216e-06,
"loss": 1.2488,
"step": 1582
},
{
"epoch": 0.42101063829787233,
"grad_norm": 4.153627872467041,
"learning_rate": 9.968052582165874e-06,
"loss": 1.3459,
"step": 1583
},
{
"epoch": 0.42127659574468085,
"grad_norm": 4.0243048667907715,
"learning_rate": 9.96795324137914e-06,
"loss": 1.2554,
"step": 1584
},
{
"epoch": 0.42154255319148937,
"grad_norm": 4.162500381469727,
"learning_rate": 9.96785374687808e-06,
"loss": 1.3597,
"step": 1585
},
{
"epoch": 0.4218085106382979,
"grad_norm": 3.8271100521087646,
"learning_rate": 9.967754098665778e-06,
"loss": 1.2375,
"step": 1586
},
{
"epoch": 0.4220744680851064,
"grad_norm": 3.73313045501709,
"learning_rate": 9.967654296745317e-06,
"loss": 1.1394,
"step": 1587
},
{
"epoch": 0.4223404255319149,
"grad_norm": 4.17546272277832,
"learning_rate": 9.96755434111978e-06,
"loss": 1.3004,
"step": 1588
},
{
"epoch": 0.4226063829787234,
"grad_norm": 3.7987289428710938,
"learning_rate": 9.967454231792267e-06,
"loss": 1.2551,
"step": 1589
},
{
"epoch": 0.4228723404255319,
"grad_norm": 4.171220779418945,
"learning_rate": 9.967353968765868e-06,
"loss": 1.2722,
"step": 1590
},
{
"epoch": 0.4231382978723404,
"grad_norm": 4.090373516082764,
"learning_rate": 9.96725355204369e-06,
"loss": 1.2963,
"step": 1591
},
{
"epoch": 0.42340425531914894,
"grad_norm": 4.222188949584961,
"learning_rate": 9.967152981628841e-06,
"loss": 1.1075,
"step": 1592
},
{
"epoch": 0.42367021276595745,
"grad_norm": 3.9014172554016113,
"learning_rate": 9.967052257524428e-06,
"loss": 1.251,
"step": 1593
},
{
"epoch": 0.423936170212766,
"grad_norm": 4.0223870277404785,
"learning_rate": 9.966951379733572e-06,
"loss": 1.1924,
"step": 1594
},
{
"epoch": 0.4242021276595745,
"grad_norm": 3.724557876586914,
"learning_rate": 9.96685034825939e-06,
"loss": 1.206,
"step": 1595
},
{
"epoch": 0.42446808510638295,
"grad_norm": 4.103020191192627,
"learning_rate": 9.966749163105011e-06,
"loss": 1.374,
"step": 1596
},
{
"epoch": 0.42473404255319147,
"grad_norm": 3.997119188308716,
"learning_rate": 9.966647824273567e-06,
"loss": 1.2097,
"step": 1597
},
{
"epoch": 0.425,
"grad_norm": 4.226285934448242,
"learning_rate": 9.966546331768192e-06,
"loss": 1.3387,
"step": 1598
},
{
"epoch": 0.4252659574468085,
"grad_norm": 4.060708999633789,
"learning_rate": 9.966444685592025e-06,
"loss": 1.2762,
"step": 1599
},
{
"epoch": 0.425531914893617,
"grad_norm": 4.005706787109375,
"learning_rate": 9.966342885748212e-06,
"loss": 1.2845,
"step": 1600
},
{
"epoch": 0.42579787234042554,
"grad_norm": 4.201882839202881,
"learning_rate": 9.966240932239904e-06,
"loss": 1.2953,
"step": 1601
},
{
"epoch": 0.42606382978723406,
"grad_norm": 3.7558727264404297,
"learning_rate": 9.966138825070254e-06,
"loss": 1.2806,
"step": 1602
},
{
"epoch": 0.4263297872340426,
"grad_norm": 3.9751381874084473,
"learning_rate": 9.96603656424242e-06,
"loss": 1.2354,
"step": 1603
},
{
"epoch": 0.42659574468085104,
"grad_norm": 3.775033712387085,
"learning_rate": 9.96593414975957e-06,
"loss": 1.2592,
"step": 1604
},
{
"epoch": 0.42686170212765956,
"grad_norm": 4.114045143127441,
"learning_rate": 9.965831581624872e-06,
"loss": 1.1019,
"step": 1605
},
{
"epoch": 0.4271276595744681,
"grad_norm": 3.6853203773498535,
"learning_rate": 9.965728859841497e-06,
"loss": 1.356,
"step": 1606
},
{
"epoch": 0.4273936170212766,
"grad_norm": 3.8778109550476074,
"learning_rate": 9.965625984412623e-06,
"loss": 1.2266,
"step": 1607
},
{
"epoch": 0.4276595744680851,
"grad_norm": 3.860879421234131,
"learning_rate": 9.965522955341437e-06,
"loss": 1.2998,
"step": 1608
},
{
"epoch": 0.42792553191489363,
"grad_norm": 3.7324464321136475,
"learning_rate": 9.965419772631125e-06,
"loss": 1.3103,
"step": 1609
},
{
"epoch": 0.42819148936170215,
"grad_norm": 3.8030385971069336,
"learning_rate": 9.965316436284877e-06,
"loss": 1.2967,
"step": 1610
},
{
"epoch": 0.4284574468085106,
"grad_norm": 4.376537322998047,
"learning_rate": 9.965212946305893e-06,
"loss": 1.4258,
"step": 1611
},
{
"epoch": 0.42872340425531913,
"grad_norm": 4.365556716918945,
"learning_rate": 9.965109302697376e-06,
"loss": 1.3794,
"step": 1612
},
{
"epoch": 0.42898936170212765,
"grad_norm": 4.431367874145508,
"learning_rate": 9.96500550546253e-06,
"loss": 1.2973,
"step": 1613
},
{
"epoch": 0.42925531914893617,
"grad_norm": 4.084920406341553,
"learning_rate": 9.96490155460457e-06,
"loss": 1.2417,
"step": 1614
},
{
"epoch": 0.4295212765957447,
"grad_norm": 3.6877284049987793,
"learning_rate": 9.964797450126708e-06,
"loss": 1.2577,
"step": 1615
},
{
"epoch": 0.4297872340425532,
"grad_norm": 4.147090911865234,
"learning_rate": 9.964693192032168e-06,
"loss": 1.3127,
"step": 1616
},
{
"epoch": 0.4300531914893617,
"grad_norm": 3.9144530296325684,
"learning_rate": 9.964588780324176e-06,
"loss": 1.2333,
"step": 1617
},
{
"epoch": 0.43031914893617024,
"grad_norm": 3.9510538578033447,
"learning_rate": 9.964484215005963e-06,
"loss": 1.2541,
"step": 1618
},
{
"epoch": 0.4305851063829787,
"grad_norm": 4.1784892082214355,
"learning_rate": 9.964379496080763e-06,
"loss": 1.3247,
"step": 1619
},
{
"epoch": 0.4308510638297872,
"grad_norm": 3.9380571842193604,
"learning_rate": 9.964274623551814e-06,
"loss": 1.3042,
"step": 1620
},
{
"epoch": 0.43111702127659574,
"grad_norm": 3.6729469299316406,
"learning_rate": 9.964169597422367e-06,
"loss": 1.2064,
"step": 1621
},
{
"epoch": 0.43138297872340425,
"grad_norm": 4.168332576751709,
"learning_rate": 9.964064417695666e-06,
"loss": 1.2936,
"step": 1622
},
{
"epoch": 0.43164893617021277,
"grad_norm": 3.7848429679870605,
"learning_rate": 9.963959084374969e-06,
"loss": 1.3055,
"step": 1623
},
{
"epoch": 0.4319148936170213,
"grad_norm": 3.760188579559326,
"learning_rate": 9.963853597463533e-06,
"loss": 1.2085,
"step": 1624
},
{
"epoch": 0.4321808510638298,
"grad_norm": 3.734712839126587,
"learning_rate": 9.963747956964623e-06,
"loss": 1.1788,
"step": 1625
},
{
"epoch": 0.4324468085106383,
"grad_norm": 4.398496627807617,
"learning_rate": 9.963642162881506e-06,
"loss": 1.1853,
"step": 1626
},
{
"epoch": 0.4327127659574468,
"grad_norm": 4.267323970794678,
"learning_rate": 9.963536215217457e-06,
"loss": 1.2317,
"step": 1627
},
{
"epoch": 0.4329787234042553,
"grad_norm": 4.306065082550049,
"learning_rate": 9.963430113975753e-06,
"loss": 1.5309,
"step": 1628
},
{
"epoch": 0.4332446808510638,
"grad_norm": 3.862356424331665,
"learning_rate": 9.963323859159679e-06,
"loss": 1.2449,
"step": 1629
},
{
"epoch": 0.43351063829787234,
"grad_norm": 3.6479053497314453,
"learning_rate": 9.96321745077252e-06,
"loss": 1.1502,
"step": 1630
},
{
"epoch": 0.43377659574468086,
"grad_norm": 3.702998399734497,
"learning_rate": 9.963110888817569e-06,
"loss": 1.1776,
"step": 1631
},
{
"epoch": 0.4340425531914894,
"grad_norm": 4.183767795562744,
"learning_rate": 9.963004173298125e-06,
"loss": 1.2266,
"step": 1632
},
{
"epoch": 0.4343085106382979,
"grad_norm": 3.9834625720977783,
"learning_rate": 9.96289730421749e-06,
"loss": 1.222,
"step": 1633
},
{
"epoch": 0.43457446808510636,
"grad_norm": 3.971428871154785,
"learning_rate": 9.962790281578966e-06,
"loss": 1.3843,
"step": 1634
},
{
"epoch": 0.4348404255319149,
"grad_norm": 3.833468437194824,
"learning_rate": 9.96268310538587e-06,
"loss": 1.3268,
"step": 1635
},
{
"epoch": 0.4351063829787234,
"grad_norm": 3.7899720668792725,
"learning_rate": 9.962575775641516e-06,
"loss": 1.2939,
"step": 1636
},
{
"epoch": 0.4353723404255319,
"grad_norm": 3.8362271785736084,
"learning_rate": 9.962468292349223e-06,
"loss": 1.2681,
"step": 1637
},
{
"epoch": 0.43563829787234043,
"grad_norm": 3.884549140930176,
"learning_rate": 9.96236065551232e-06,
"loss": 1.267,
"step": 1638
},
{
"epoch": 0.43590425531914895,
"grad_norm": 3.975801944732666,
"learning_rate": 9.962252865134136e-06,
"loss": 1.3039,
"step": 1639
},
{
"epoch": 0.43617021276595747,
"grad_norm": 4.278522491455078,
"learning_rate": 9.962144921218005e-06,
"loss": 1.3885,
"step": 1640
},
{
"epoch": 0.436436170212766,
"grad_norm": 3.9850552082061768,
"learning_rate": 9.962036823767269e-06,
"loss": 1.2586,
"step": 1641
},
{
"epoch": 0.43670212765957445,
"grad_norm": 4.315723419189453,
"learning_rate": 9.961928572785272e-06,
"loss": 1.3281,
"step": 1642
},
{
"epoch": 0.43696808510638296,
"grad_norm": 3.7114546298980713,
"learning_rate": 9.96182016827536e-06,
"loss": 1.1813,
"step": 1643
},
{
"epoch": 0.4372340425531915,
"grad_norm": 4.079943656921387,
"learning_rate": 9.961711610240892e-06,
"loss": 1.2878,
"step": 1644
},
{
"epoch": 0.4375,
"grad_norm": 3.7427685260772705,
"learning_rate": 9.961602898685225e-06,
"loss": 1.3068,
"step": 1645
},
{
"epoch": 0.4377659574468085,
"grad_norm": 4.234682083129883,
"learning_rate": 9.961494033611726e-06,
"loss": 1.4143,
"step": 1646
},
{
"epoch": 0.43803191489361704,
"grad_norm": 3.7043113708496094,
"learning_rate": 9.961385015023755e-06,
"loss": 1.356,
"step": 1647
},
{
"epoch": 0.43829787234042555,
"grad_norm": 3.9575397968292236,
"learning_rate": 9.961275842924694e-06,
"loss": 1.3257,
"step": 1648
},
{
"epoch": 0.438563829787234,
"grad_norm": 4.285686016082764,
"learning_rate": 9.961166517317914e-06,
"loss": 1.2934,
"step": 1649
},
{
"epoch": 0.43882978723404253,
"grad_norm": 4.141624927520752,
"learning_rate": 9.961057038206804e-06,
"loss": 1.1941,
"step": 1650
},
{
"epoch": 0.43909574468085105,
"grad_norm": 3.7219042778015137,
"learning_rate": 9.960947405594747e-06,
"loss": 1.309,
"step": 1651
},
{
"epoch": 0.43936170212765957,
"grad_norm": 4.113218307495117,
"learning_rate": 9.960837619485136e-06,
"loss": 1.2331,
"step": 1652
},
{
"epoch": 0.4396276595744681,
"grad_norm": 4.069479465484619,
"learning_rate": 9.96072767988137e-06,
"loss": 1.1383,
"step": 1653
},
{
"epoch": 0.4398936170212766,
"grad_norm": 3.974097967147827,
"learning_rate": 9.960617586786847e-06,
"loss": 1.2015,
"step": 1654
},
{
"epoch": 0.4401595744680851,
"grad_norm": 3.991530656814575,
"learning_rate": 9.960507340204977e-06,
"loss": 1.254,
"step": 1655
},
{
"epoch": 0.44042553191489364,
"grad_norm": 4.121614933013916,
"learning_rate": 9.960396940139169e-06,
"loss": 1.4372,
"step": 1656
},
{
"epoch": 0.4406914893617021,
"grad_norm": 4.809171676635742,
"learning_rate": 9.960286386592839e-06,
"loss": 1.1771,
"step": 1657
},
{
"epoch": 0.4409574468085106,
"grad_norm": 3.7910423278808594,
"learning_rate": 9.960175679569409e-06,
"loss": 1.4103,
"step": 1658
},
{
"epoch": 0.44122340425531914,
"grad_norm": 3.5597236156463623,
"learning_rate": 9.960064819072305e-06,
"loss": 1.2461,
"step": 1659
},
{
"epoch": 0.44148936170212766,
"grad_norm": 4.393692493438721,
"learning_rate": 9.959953805104953e-06,
"loss": 1.3746,
"step": 1660
},
{
"epoch": 0.4417553191489362,
"grad_norm": 4.309146881103516,
"learning_rate": 9.959842637670791e-06,
"loss": 1.2619,
"step": 1661
},
{
"epoch": 0.4420212765957447,
"grad_norm": 4.537207126617432,
"learning_rate": 9.95973131677326e-06,
"loss": 1.2895,
"step": 1662
},
{
"epoch": 0.4422872340425532,
"grad_norm": 4.204534530639648,
"learning_rate": 9.959619842415802e-06,
"loss": 1.2458,
"step": 1663
},
{
"epoch": 0.4425531914893617,
"grad_norm": 3.859935998916626,
"learning_rate": 9.959508214601866e-06,
"loss": 1.2334,
"step": 1664
},
{
"epoch": 0.4428191489361702,
"grad_norm": 4.042413711547852,
"learning_rate": 9.959396433334907e-06,
"loss": 1.451,
"step": 1665
},
{
"epoch": 0.4430851063829787,
"grad_norm": 4.226952075958252,
"learning_rate": 9.959284498618385e-06,
"loss": 1.3204,
"step": 1666
},
{
"epoch": 0.44335106382978723,
"grad_norm": 4.049594402313232,
"learning_rate": 9.95917241045576e-06,
"loss": 1.3671,
"step": 1667
},
{
"epoch": 0.44361702127659575,
"grad_norm": 3.731627941131592,
"learning_rate": 9.959060168850504e-06,
"loss": 1.289,
"step": 1668
},
{
"epoch": 0.44388297872340426,
"grad_norm": 4.097120761871338,
"learning_rate": 9.958947773806084e-06,
"loss": 1.2126,
"step": 1669
},
{
"epoch": 0.4441489361702128,
"grad_norm": 4.148438930511475,
"learning_rate": 9.958835225325984e-06,
"loss": 1.1967,
"step": 1670
},
{
"epoch": 0.4444148936170213,
"grad_norm": 3.9843711853027344,
"learning_rate": 9.958722523413685e-06,
"loss": 1.3463,
"step": 1671
},
{
"epoch": 0.44468085106382976,
"grad_norm": 4.3066630363464355,
"learning_rate": 9.958609668072673e-06,
"loss": 1.4344,
"step": 1672
},
{
"epoch": 0.4449468085106383,
"grad_norm": 3.673088550567627,
"learning_rate": 9.958496659306436e-06,
"loss": 1.3849,
"step": 1673
},
{
"epoch": 0.4452127659574468,
"grad_norm": 4.2683210372924805,
"learning_rate": 9.958383497118478e-06,
"loss": 1.3148,
"step": 1674
},
{
"epoch": 0.4454787234042553,
"grad_norm": 3.677374839782715,
"learning_rate": 9.958270181512295e-06,
"loss": 1.1148,
"step": 1675
},
{
"epoch": 0.44574468085106383,
"grad_norm": 4.075168132781982,
"learning_rate": 9.958156712491396e-06,
"loss": 1.4016,
"step": 1676
},
{
"epoch": 0.44601063829787235,
"grad_norm": 4.137705326080322,
"learning_rate": 9.95804309005929e-06,
"loss": 1.3865,
"step": 1677
},
{
"epoch": 0.44627659574468087,
"grad_norm": 3.7367939949035645,
"learning_rate": 9.957929314219494e-06,
"loss": 1.3304,
"step": 1678
},
{
"epoch": 0.4465425531914894,
"grad_norm": 3.8000895977020264,
"learning_rate": 9.957815384975528e-06,
"loss": 1.4171,
"step": 1679
},
{
"epoch": 0.44680851063829785,
"grad_norm": 3.774846315383911,
"learning_rate": 9.957701302330915e-06,
"loss": 1.0019,
"step": 1680
},
{
"epoch": 0.44707446808510637,
"grad_norm": 3.7514147758483887,
"learning_rate": 9.957587066289189e-06,
"loss": 1.0711,
"step": 1681
},
{
"epoch": 0.4473404255319149,
"grad_norm": 4.298345565795898,
"learning_rate": 9.957472676853882e-06,
"loss": 1.2902,
"step": 1682
},
{
"epoch": 0.4476063829787234,
"grad_norm": 3.632465362548828,
"learning_rate": 9.957358134028535e-06,
"loss": 1.1969,
"step": 1683
},
{
"epoch": 0.4478723404255319,
"grad_norm": 3.680661201477051,
"learning_rate": 9.957243437816688e-06,
"loss": 1.2266,
"step": 1684
},
{
"epoch": 0.44813829787234044,
"grad_norm": 3.757211208343506,
"learning_rate": 9.957128588221895e-06,
"loss": 1.2374,
"step": 1685
},
{
"epoch": 0.44840425531914896,
"grad_norm": 3.93074107170105,
"learning_rate": 9.957013585247703e-06,
"loss": 1.2285,
"step": 1686
},
{
"epoch": 0.4486702127659574,
"grad_norm": 4.218538284301758,
"learning_rate": 9.95689842889768e-06,
"loss": 1.1887,
"step": 1687
},
{
"epoch": 0.44893617021276594,
"grad_norm": 4.04231595993042,
"learning_rate": 9.95678311917538e-06,
"loss": 1.3696,
"step": 1688
},
{
"epoch": 0.44920212765957446,
"grad_norm": 3.7490601539611816,
"learning_rate": 9.956667656084376e-06,
"loss": 1.2857,
"step": 1689
},
{
"epoch": 0.449468085106383,
"grad_norm": 3.642409324645996,
"learning_rate": 9.956552039628237e-06,
"loss": 1.1536,
"step": 1690
},
{
"epoch": 0.4497340425531915,
"grad_norm": 4.070724964141846,
"learning_rate": 9.956436269810543e-06,
"loss": 1.3129,
"step": 1691
},
{
"epoch": 0.45,
"grad_norm": 3.6677682399749756,
"learning_rate": 9.956320346634877e-06,
"loss": 1.2578,
"step": 1692
},
{
"epoch": 0.45026595744680853,
"grad_norm": 3.783087730407715,
"learning_rate": 9.956204270104823e-06,
"loss": 1.2943,
"step": 1693
},
{
"epoch": 0.45053191489361705,
"grad_norm": 4.206989765167236,
"learning_rate": 9.956088040223975e-06,
"loss": 1.4913,
"step": 1694
},
{
"epoch": 0.4507978723404255,
"grad_norm": 4.3370819091796875,
"learning_rate": 9.955971656995927e-06,
"loss": 1.1996,
"step": 1695
},
{
"epoch": 0.451063829787234,
"grad_norm": 3.9697062969207764,
"learning_rate": 9.95585512042428e-06,
"loss": 1.253,
"step": 1696
},
{
"epoch": 0.45132978723404255,
"grad_norm": 3.6939969062805176,
"learning_rate": 9.95573843051264e-06,
"loss": 1.1627,
"step": 1697
},
{
"epoch": 0.45159574468085106,
"grad_norm": 4.0041351318359375,
"learning_rate": 9.955621587264621e-06,
"loss": 1.2185,
"step": 1698
},
{
"epoch": 0.4518617021276596,
"grad_norm": 4.0276079177856445,
"learning_rate": 9.955504590683834e-06,
"loss": 1.2071,
"step": 1699
},
{
"epoch": 0.4521276595744681,
"grad_norm": 4.058544158935547,
"learning_rate": 9.955387440773902e-06,
"loss": 1.2284,
"step": 1700
},
{
"epoch": 0.4523936170212766,
"grad_norm": 3.8239941596984863,
"learning_rate": 9.955270137538446e-06,
"loss": 1.3371,
"step": 1701
},
{
"epoch": 0.4526595744680851,
"grad_norm": 4.147292613983154,
"learning_rate": 9.955152680981099e-06,
"loss": 1.3542,
"step": 1702
},
{
"epoch": 0.4529255319148936,
"grad_norm": 3.7271342277526855,
"learning_rate": 9.955035071105495e-06,
"loss": 1.0038,
"step": 1703
},
{
"epoch": 0.4531914893617021,
"grad_norm": 4.002806663513184,
"learning_rate": 9.954917307915272e-06,
"loss": 1.3361,
"step": 1704
},
{
"epoch": 0.45345744680851063,
"grad_norm": 3.8606765270233154,
"learning_rate": 9.954799391414073e-06,
"loss": 1.2703,
"step": 1705
},
{
"epoch": 0.45372340425531915,
"grad_norm": 4.117914199829102,
"learning_rate": 9.954681321605546e-06,
"loss": 1.4262,
"step": 1706
},
{
"epoch": 0.45398936170212767,
"grad_norm": 3.956178903579712,
"learning_rate": 9.954563098493349e-06,
"loss": 1.2889,
"step": 1707
},
{
"epoch": 0.4542553191489362,
"grad_norm": 3.8659157752990723,
"learning_rate": 9.954444722081133e-06,
"loss": 1.2892,
"step": 1708
},
{
"epoch": 0.4545212765957447,
"grad_norm": 3.936624765396118,
"learning_rate": 9.954326192372565e-06,
"loss": 1.5031,
"step": 1709
},
{
"epoch": 0.45478723404255317,
"grad_norm": 3.8671083450317383,
"learning_rate": 9.954207509371313e-06,
"loss": 1.3221,
"step": 1710
},
{
"epoch": 0.4550531914893617,
"grad_norm": 4.292788505554199,
"learning_rate": 9.954088673081048e-06,
"loss": 1.3216,
"step": 1711
},
{
"epoch": 0.4553191489361702,
"grad_norm": 3.8020899295806885,
"learning_rate": 9.953969683505444e-06,
"loss": 1.2248,
"step": 1712
},
{
"epoch": 0.4555851063829787,
"grad_norm": 4.227027893066406,
"learning_rate": 9.953850540648189e-06,
"loss": 1.2624,
"step": 1713
},
{
"epoch": 0.45585106382978724,
"grad_norm": 4.067933559417725,
"learning_rate": 9.953731244512963e-06,
"loss": 1.2756,
"step": 1714
},
{
"epoch": 0.45611702127659576,
"grad_norm": 3.9916749000549316,
"learning_rate": 9.953611795103462e-06,
"loss": 1.2651,
"step": 1715
},
{
"epoch": 0.4563829787234043,
"grad_norm": 4.110116004943848,
"learning_rate": 9.953492192423379e-06,
"loss": 1.3669,
"step": 1716
},
{
"epoch": 0.4566489361702128,
"grad_norm": 4.194306373596191,
"learning_rate": 9.953372436476414e-06,
"loss": 1.534,
"step": 1717
},
{
"epoch": 0.45691489361702126,
"grad_norm": 3.9467716217041016,
"learning_rate": 9.953252527266275e-06,
"loss": 1.2748,
"step": 1718
},
{
"epoch": 0.4571808510638298,
"grad_norm": 4.1253886222839355,
"learning_rate": 9.953132464796674e-06,
"loss": 1.2625,
"step": 1719
},
{
"epoch": 0.4574468085106383,
"grad_norm": 4.45941162109375,
"learning_rate": 9.95301224907132e-06,
"loss": 1.3565,
"step": 1720
},
{
"epoch": 0.4577127659574468,
"grad_norm": 4.033083915710449,
"learning_rate": 9.952891880093935e-06,
"loss": 1.2789,
"step": 1721
},
{
"epoch": 0.45797872340425533,
"grad_norm": 4.035634517669678,
"learning_rate": 9.952771357868245e-06,
"loss": 1.2641,
"step": 1722
},
{
"epoch": 0.45824468085106385,
"grad_norm": 3.722550630569458,
"learning_rate": 9.952650682397978e-06,
"loss": 1.3316,
"step": 1723
},
{
"epoch": 0.45851063829787236,
"grad_norm": 3.8771049976348877,
"learning_rate": 9.952529853686868e-06,
"loss": 1.3889,
"step": 1724
},
{
"epoch": 0.4587765957446808,
"grad_norm": 4.175072193145752,
"learning_rate": 9.952408871738652e-06,
"loss": 1.3766,
"step": 1725
},
{
"epoch": 0.45904255319148934,
"grad_norm": 3.859618902206421,
"learning_rate": 9.952287736557078e-06,
"loss": 1.1251,
"step": 1726
},
{
"epoch": 0.45930851063829786,
"grad_norm": 4.060375213623047,
"learning_rate": 9.952166448145887e-06,
"loss": 1.2308,
"step": 1727
},
{
"epoch": 0.4595744680851064,
"grad_norm": 3.9827208518981934,
"learning_rate": 9.952045006508839e-06,
"loss": 1.2434,
"step": 1728
},
{
"epoch": 0.4598404255319149,
"grad_norm": 3.8347811698913574,
"learning_rate": 9.951923411649686e-06,
"loss": 1.1165,
"step": 1729
},
{
"epoch": 0.4601063829787234,
"grad_norm": 3.8551104068756104,
"learning_rate": 9.951801663572194e-06,
"loss": 1.2536,
"step": 1730
},
{
"epoch": 0.46037234042553193,
"grad_norm": 4.300414562225342,
"learning_rate": 9.951679762280127e-06,
"loss": 1.3653,
"step": 1731
},
{
"epoch": 0.46063829787234045,
"grad_norm": 3.9349825382232666,
"learning_rate": 9.95155770777726e-06,
"loss": 1.1563,
"step": 1732
},
{
"epoch": 0.4609042553191489,
"grad_norm": 4.161105632781982,
"learning_rate": 9.951435500067366e-06,
"loss": 1.3807,
"step": 1733
},
{
"epoch": 0.46117021276595743,
"grad_norm": 4.0084686279296875,
"learning_rate": 9.95131313915423e-06,
"loss": 1.2486,
"step": 1734
},
{
"epoch": 0.46143617021276595,
"grad_norm": 3.6559159755706787,
"learning_rate": 9.951190625041634e-06,
"loss": 1.2063,
"step": 1735
},
{
"epoch": 0.46170212765957447,
"grad_norm": 3.99893856048584,
"learning_rate": 9.95106795773337e-06,
"loss": 1.2945,
"step": 1736
},
{
"epoch": 0.461968085106383,
"grad_norm": 4.061460018157959,
"learning_rate": 9.950945137233237e-06,
"loss": 1.3383,
"step": 1737
},
{
"epoch": 0.4622340425531915,
"grad_norm": 4.054213047027588,
"learning_rate": 9.950822163545032e-06,
"loss": 1.2836,
"step": 1738
},
{
"epoch": 0.4625,
"grad_norm": 3.9057390689849854,
"learning_rate": 9.95069903667256e-06,
"loss": 1.2157,
"step": 1739
},
{
"epoch": 0.4627659574468085,
"grad_norm": 3.977504014968872,
"learning_rate": 9.95057575661963e-06,
"loss": 1.322,
"step": 1740
},
{
"epoch": 0.463031914893617,
"grad_norm": 3.478853702545166,
"learning_rate": 9.950452323390058e-06,
"loss": 1.1772,
"step": 1741
},
{
"epoch": 0.4632978723404255,
"grad_norm": 3.8592848777770996,
"learning_rate": 9.950328736987664e-06,
"loss": 1.3234,
"step": 1742
},
{
"epoch": 0.46356382978723404,
"grad_norm": 3.858339309692383,
"learning_rate": 9.95020499741627e-06,
"loss": 1.3079,
"step": 1743
},
{
"epoch": 0.46382978723404256,
"grad_norm": 3.797468900680542,
"learning_rate": 9.950081104679704e-06,
"loss": 1.1611,
"step": 1744
},
{
"epoch": 0.4640957446808511,
"grad_norm": 3.9753012657165527,
"learning_rate": 9.949957058781802e-06,
"loss": 1.3449,
"step": 1745
},
{
"epoch": 0.4643617021276596,
"grad_norm": 4.22615385055542,
"learning_rate": 9.9498328597264e-06,
"loss": 1.1605,
"step": 1746
},
{
"epoch": 0.4646276595744681,
"grad_norm": 4.091019153594971,
"learning_rate": 9.949708507517342e-06,
"loss": 1.2877,
"step": 1747
},
{
"epoch": 0.4648936170212766,
"grad_norm": 4.121149063110352,
"learning_rate": 9.949584002158474e-06,
"loss": 1.2463,
"step": 1748
},
{
"epoch": 0.4651595744680851,
"grad_norm": 4.406885147094727,
"learning_rate": 9.949459343653652e-06,
"loss": 1.3303,
"step": 1749
},
{
"epoch": 0.4654255319148936,
"grad_norm": 4.5540666580200195,
"learning_rate": 9.94933453200673e-06,
"loss": 1.3149,
"step": 1750
},
{
"epoch": 0.4656914893617021,
"grad_norm": 3.9736440181732178,
"learning_rate": 9.949209567221569e-06,
"loss": 1.4947,
"step": 1751
},
{
"epoch": 0.46595744680851064,
"grad_norm": 4.265797138214111,
"learning_rate": 9.949084449302038e-06,
"loss": 1.2727,
"step": 1752
},
{
"epoch": 0.46622340425531916,
"grad_norm": 3.906663656234741,
"learning_rate": 9.948959178252007e-06,
"loss": 1.2346,
"step": 1753
},
{
"epoch": 0.4664893617021277,
"grad_norm": 3.8884990215301514,
"learning_rate": 9.948833754075351e-06,
"loss": 1.2997,
"step": 1754
},
{
"epoch": 0.46675531914893614,
"grad_norm": 3.943458080291748,
"learning_rate": 9.948708176775954e-06,
"loss": 1.2945,
"step": 1755
},
{
"epoch": 0.46702127659574466,
"grad_norm": 3.9176204204559326,
"learning_rate": 9.9485824463577e-06,
"loss": 1.2714,
"step": 1756
},
{
"epoch": 0.4672872340425532,
"grad_norm": 3.834636926651001,
"learning_rate": 9.948456562824478e-06,
"loss": 1.1341,
"step": 1757
},
{
"epoch": 0.4675531914893617,
"grad_norm": 3.8121955394744873,
"learning_rate": 9.948330526180183e-06,
"loss": 1.3064,
"step": 1758
},
{
"epoch": 0.4678191489361702,
"grad_norm": 4.121542930603027,
"learning_rate": 9.948204336428717e-06,
"loss": 1.2775,
"step": 1759
},
{
"epoch": 0.46808510638297873,
"grad_norm": 4.043048858642578,
"learning_rate": 9.948077993573983e-06,
"loss": 1.2601,
"step": 1760
},
{
"epoch": 0.46835106382978725,
"grad_norm": 3.7144079208374023,
"learning_rate": 9.94795149761989e-06,
"loss": 1.1136,
"step": 1761
},
{
"epoch": 0.46861702127659577,
"grad_norm": 4.818117141723633,
"learning_rate": 9.947824848570352e-06,
"loss": 1.4366,
"step": 1762
},
{
"epoch": 0.46888297872340423,
"grad_norm": 4.190409183502197,
"learning_rate": 9.947698046429287e-06,
"loss": 1.2308,
"step": 1763
},
{
"epoch": 0.46914893617021275,
"grad_norm": 4.0341267585754395,
"learning_rate": 9.94757109120062e-06,
"loss": 1.2466,
"step": 1764
},
{
"epoch": 0.46941489361702127,
"grad_norm": 3.9223225116729736,
"learning_rate": 9.947443982888279e-06,
"loss": 1.212,
"step": 1765
},
{
"epoch": 0.4696808510638298,
"grad_norm": 4.121956825256348,
"learning_rate": 9.947316721496196e-06,
"loss": 1.2635,
"step": 1766
},
{
"epoch": 0.4699468085106383,
"grad_norm": 3.9485208988189697,
"learning_rate": 9.947189307028308e-06,
"loss": 1.3579,
"step": 1767
},
{
"epoch": 0.4702127659574468,
"grad_norm": 4.009948253631592,
"learning_rate": 9.947061739488559e-06,
"loss": 1.4448,
"step": 1768
},
{
"epoch": 0.47047872340425534,
"grad_norm": 4.2954912185668945,
"learning_rate": 9.946934018880896e-06,
"loss": 1.1665,
"step": 1769
},
{
"epoch": 0.47074468085106386,
"grad_norm": 3.6225626468658447,
"learning_rate": 9.94680614520927e-06,
"loss": 1.2863,
"step": 1770
},
{
"epoch": 0.4710106382978723,
"grad_norm": 3.9409780502319336,
"learning_rate": 9.946678118477635e-06,
"loss": 1.1042,
"step": 1771
},
{
"epoch": 0.47127659574468084,
"grad_norm": 3.5868918895721436,
"learning_rate": 9.946549938689958e-06,
"loss": 1.1924,
"step": 1772
},
{
"epoch": 0.47154255319148936,
"grad_norm": 3.5596354007720947,
"learning_rate": 9.946421605850201e-06,
"loss": 1.1459,
"step": 1773
},
{
"epoch": 0.4718085106382979,
"grad_norm": 3.595719337463379,
"learning_rate": 9.946293119962336e-06,
"loss": 1.2274,
"step": 1774
},
{
"epoch": 0.4720744680851064,
"grad_norm": 4.341657638549805,
"learning_rate": 9.946164481030339e-06,
"loss": 1.433,
"step": 1775
},
{
"epoch": 0.4723404255319149,
"grad_norm": 4.137777328491211,
"learning_rate": 9.946035689058189e-06,
"loss": 1.3307,
"step": 1776
},
{
"epoch": 0.4726063829787234,
"grad_norm": 4.115199565887451,
"learning_rate": 9.94590674404987e-06,
"loss": 1.3575,
"step": 1777
},
{
"epoch": 0.4728723404255319,
"grad_norm": 3.9467270374298096,
"learning_rate": 9.945777646009375e-06,
"loss": 1.1772,
"step": 1778
},
{
"epoch": 0.4731382978723404,
"grad_norm": 3.986268997192383,
"learning_rate": 9.945648394940697e-06,
"loss": 1.3949,
"step": 1779
},
{
"epoch": 0.4734042553191489,
"grad_norm": 4.070546627044678,
"learning_rate": 9.945518990847835e-06,
"loss": 1.3664,
"step": 1780
},
{
"epoch": 0.47367021276595744,
"grad_norm": 4.0783233642578125,
"learning_rate": 9.94538943373479e-06,
"loss": 1.3199,
"step": 1781
},
{
"epoch": 0.47393617021276596,
"grad_norm": 4.331148147583008,
"learning_rate": 9.945259723605579e-06,
"loss": 1.3809,
"step": 1782
},
{
"epoch": 0.4742021276595745,
"grad_norm": 4.163266658782959,
"learning_rate": 9.945129860464205e-06,
"loss": 1.3325,
"step": 1783
},
{
"epoch": 0.474468085106383,
"grad_norm": 4.23274564743042,
"learning_rate": 9.944999844314693e-06,
"loss": 1.3793,
"step": 1784
},
{
"epoch": 0.4747340425531915,
"grad_norm": 4.219319820404053,
"learning_rate": 9.944869675161062e-06,
"loss": 1.3631,
"step": 1785
},
{
"epoch": 0.475,
"grad_norm": 4.5794830322265625,
"learning_rate": 9.944739353007344e-06,
"loss": 1.3941,
"step": 1786
},
{
"epoch": 0.4752659574468085,
"grad_norm": 3.806102752685547,
"learning_rate": 9.944608877857567e-06,
"loss": 1.2896,
"step": 1787
},
{
"epoch": 0.475531914893617,
"grad_norm": 3.927706241607666,
"learning_rate": 9.94447824971577e-06,
"loss": 1.4121,
"step": 1788
},
{
"epoch": 0.47579787234042553,
"grad_norm": 3.8713526725769043,
"learning_rate": 9.944347468585995e-06,
"loss": 1.3029,
"step": 1789
},
{
"epoch": 0.47606382978723405,
"grad_norm": 3.6732828617095947,
"learning_rate": 9.944216534472287e-06,
"loss": 1.2379,
"step": 1790
},
{
"epoch": 0.47632978723404257,
"grad_norm": 4.1793084144592285,
"learning_rate": 9.9440854473787e-06,
"loss": 1.391,
"step": 1791
},
{
"epoch": 0.4765957446808511,
"grad_norm": 4.131939888000488,
"learning_rate": 9.943954207309287e-06,
"loss": 1.2346,
"step": 1792
},
{
"epoch": 0.47686170212765955,
"grad_norm": 4.083577632904053,
"learning_rate": 9.94382281426811e-06,
"loss": 1.4478,
"step": 1793
},
{
"epoch": 0.47712765957446807,
"grad_norm": 3.640902280807495,
"learning_rate": 9.943691268259234e-06,
"loss": 1.2515,
"step": 1794
},
{
"epoch": 0.4773936170212766,
"grad_norm": 4.226308345794678,
"learning_rate": 9.943559569286731e-06,
"loss": 1.3599,
"step": 1795
},
{
"epoch": 0.4776595744680851,
"grad_norm": 4.301510810852051,
"learning_rate": 9.943427717354674e-06,
"loss": 1.2623,
"step": 1796
},
{
"epoch": 0.4779255319148936,
"grad_norm": 3.6332836151123047,
"learning_rate": 9.943295712467145e-06,
"loss": 1.2776,
"step": 1797
},
{
"epoch": 0.47819148936170214,
"grad_norm": 3.6086063385009766,
"learning_rate": 9.943163554628223e-06,
"loss": 1.2306,
"step": 1798
},
{
"epoch": 0.47845744680851066,
"grad_norm": 3.787510395050049,
"learning_rate": 9.943031243842004e-06,
"loss": 1.3904,
"step": 1799
},
{
"epoch": 0.4787234042553192,
"grad_norm": 4.257116317749023,
"learning_rate": 9.942898780112578e-06,
"loss": 1.2504,
"step": 1800
},
{
"epoch": 0.47898936170212764,
"grad_norm": 4.033913612365723,
"learning_rate": 9.942766163444044e-06,
"loss": 1.1252,
"step": 1801
},
{
"epoch": 0.47925531914893615,
"grad_norm": 3.9039859771728516,
"learning_rate": 9.942633393840504e-06,
"loss": 1.2183,
"step": 1802
},
{
"epoch": 0.47952127659574467,
"grad_norm": 4.116021156311035,
"learning_rate": 9.94250047130607e-06,
"loss": 1.3872,
"step": 1803
},
{
"epoch": 0.4797872340425532,
"grad_norm": 4.146193504333496,
"learning_rate": 9.94236739584485e-06,
"loss": 1.2302,
"step": 1804
},
{
"epoch": 0.4800531914893617,
"grad_norm": 4.098079681396484,
"learning_rate": 9.942234167460966e-06,
"loss": 1.3785,
"step": 1805
},
{
"epoch": 0.4803191489361702,
"grad_norm": 3.643486976623535,
"learning_rate": 9.942100786158537e-06,
"loss": 1.1499,
"step": 1806
},
{
"epoch": 0.48058510638297874,
"grad_norm": 4.246469974517822,
"learning_rate": 9.94196725194169e-06,
"loss": 1.3295,
"step": 1807
},
{
"epoch": 0.4808510638297872,
"grad_norm": 3.857382297515869,
"learning_rate": 9.94183356481456e-06,
"loss": 1.325,
"step": 1808
},
{
"epoch": 0.4811170212765957,
"grad_norm": 3.5324032306671143,
"learning_rate": 9.94169972478128e-06,
"loss": 1.1482,
"step": 1809
},
{
"epoch": 0.48138297872340424,
"grad_norm": 3.7972612380981445,
"learning_rate": 9.941565731845993e-06,
"loss": 1.4476,
"step": 1810
},
{
"epoch": 0.48164893617021276,
"grad_norm": 3.770042896270752,
"learning_rate": 9.941431586012844e-06,
"loss": 1.3034,
"step": 1811
},
{
"epoch": 0.4819148936170213,
"grad_norm": 3.675645351409912,
"learning_rate": 9.941297287285984e-06,
"loss": 1.2526,
"step": 1812
},
{
"epoch": 0.4821808510638298,
"grad_norm": 3.526350975036621,
"learning_rate": 9.941162835669568e-06,
"loss": 1.1573,
"step": 1813
},
{
"epoch": 0.4824468085106383,
"grad_norm": 3.4532649517059326,
"learning_rate": 9.941028231167756e-06,
"loss": 1.1735,
"step": 1814
},
{
"epoch": 0.48271276595744683,
"grad_norm": 3.9783992767333984,
"learning_rate": 9.940893473784714e-06,
"loss": 1.3828,
"step": 1815
},
{
"epoch": 0.4829787234042553,
"grad_norm": 4.059201717376709,
"learning_rate": 9.940758563524611e-06,
"loss": 1.2649,
"step": 1816
},
{
"epoch": 0.4832446808510638,
"grad_norm": 4.069849491119385,
"learning_rate": 9.94062350039162e-06,
"loss": 1.2833,
"step": 1817
},
{
"epoch": 0.48351063829787233,
"grad_norm": 3.488699197769165,
"learning_rate": 9.940488284389923e-06,
"loss": 1.0884,
"step": 1818
},
{
"epoch": 0.48377659574468085,
"grad_norm": 3.721902370452881,
"learning_rate": 9.940352915523699e-06,
"loss": 1.2442,
"step": 1819
},
{
"epoch": 0.48404255319148937,
"grad_norm": 4.082354545593262,
"learning_rate": 9.94021739379714e-06,
"loss": 1.3406,
"step": 1820
},
{
"epoch": 0.4843085106382979,
"grad_norm": 3.9286141395568848,
"learning_rate": 9.94008171921444e-06,
"loss": 1.2856,
"step": 1821
},
{
"epoch": 0.4845744680851064,
"grad_norm": 3.968208074569702,
"learning_rate": 9.939945891779795e-06,
"loss": 1.3172,
"step": 1822
},
{
"epoch": 0.4848404255319149,
"grad_norm": 4.114230155944824,
"learning_rate": 9.939809911497407e-06,
"loss": 1.2936,
"step": 1823
},
{
"epoch": 0.4851063829787234,
"grad_norm": 3.840162754058838,
"learning_rate": 9.939673778371484e-06,
"loss": 1.3923,
"step": 1824
},
{
"epoch": 0.4853723404255319,
"grad_norm": 4.272914886474609,
"learning_rate": 9.939537492406239e-06,
"loss": 1.2932,
"step": 1825
},
{
"epoch": 0.4856382978723404,
"grad_norm": 3.7386868000030518,
"learning_rate": 9.939401053605889e-06,
"loss": 1.3849,
"step": 1826
},
{
"epoch": 0.48590425531914894,
"grad_norm": 4.278271675109863,
"learning_rate": 9.939264461974654e-06,
"loss": 1.2878,
"step": 1827
},
{
"epoch": 0.48617021276595745,
"grad_norm": 3.827216386795044,
"learning_rate": 9.939127717516763e-06,
"loss": 1.2833,
"step": 1828
},
{
"epoch": 0.486436170212766,
"grad_norm": 3.888113498687744,
"learning_rate": 9.938990820236445e-06,
"loss": 1.2384,
"step": 1829
},
{
"epoch": 0.4867021276595745,
"grad_norm": 3.886965036392212,
"learning_rate": 9.938853770137935e-06,
"loss": 1.3365,
"step": 1830
},
{
"epoch": 0.48696808510638295,
"grad_norm": 3.9059507846832275,
"learning_rate": 9.938716567225475e-06,
"loss": 1.3569,
"step": 1831
},
{
"epoch": 0.48723404255319147,
"grad_norm": 3.922834634780884,
"learning_rate": 9.93857921150331e-06,
"loss": 1.2035,
"step": 1832
},
{
"epoch": 0.4875,
"grad_norm": 3.949385643005371,
"learning_rate": 9.938441702975689e-06,
"loss": 1.3485,
"step": 1833
},
{
"epoch": 0.4877659574468085,
"grad_norm": 4.1959333419799805,
"learning_rate": 9.938304041646869e-06,
"loss": 1.3079,
"step": 1834
},
{
"epoch": 0.488031914893617,
"grad_norm": 3.98871111869812,
"learning_rate": 9.938166227521106e-06,
"loss": 1.3067,
"step": 1835
},
{
"epoch": 0.48829787234042554,
"grad_norm": 4.129928112030029,
"learning_rate": 9.938028260602668e-06,
"loss": 1.3053,
"step": 1836
},
{
"epoch": 0.48856382978723406,
"grad_norm": 4.131626129150391,
"learning_rate": 9.937890140895819e-06,
"loss": 1.3332,
"step": 1837
},
{
"epoch": 0.4888297872340426,
"grad_norm": 3.8896591663360596,
"learning_rate": 9.937751868404838e-06,
"loss": 1.2105,
"step": 1838
},
{
"epoch": 0.48909574468085104,
"grad_norm": 3.6959292888641357,
"learning_rate": 9.937613443134e-06,
"loss": 1.1607,
"step": 1839
},
{
"epoch": 0.48936170212765956,
"grad_norm": 4.914716720581055,
"learning_rate": 9.937474865087588e-06,
"loss": 1.1406,
"step": 1840
},
{
"epoch": 0.4896276595744681,
"grad_norm": 3.811239004135132,
"learning_rate": 9.93733613426989e-06,
"loss": 1.2047,
"step": 1841
},
{
"epoch": 0.4898936170212766,
"grad_norm": 3.8995115756988525,
"learning_rate": 9.937197250685202e-06,
"loss": 1.1582,
"step": 1842
},
{
"epoch": 0.4901595744680851,
"grad_norm": 3.6087286472320557,
"learning_rate": 9.937058214337817e-06,
"loss": 1.1866,
"step": 1843
},
{
"epoch": 0.49042553191489363,
"grad_norm": 3.854526996612549,
"learning_rate": 9.936919025232036e-06,
"loss": 1.2744,
"step": 1844
},
{
"epoch": 0.49069148936170215,
"grad_norm": 3.870508909225464,
"learning_rate": 9.936779683372169e-06,
"loss": 1.1989,
"step": 1845
},
{
"epoch": 0.4909574468085106,
"grad_norm": 4.0505194664001465,
"learning_rate": 9.936640188762527e-06,
"loss": 1.206,
"step": 1846
},
{
"epoch": 0.49122340425531913,
"grad_norm": 3.8995118141174316,
"learning_rate": 9.936500541407424e-06,
"loss": 1.1642,
"step": 1847
},
{
"epoch": 0.49148936170212765,
"grad_norm": 4.045437812805176,
"learning_rate": 9.936360741311185e-06,
"loss": 1.2949,
"step": 1848
},
{
"epoch": 0.49175531914893617,
"grad_norm": 3.954519271850586,
"learning_rate": 9.93622078847813e-06,
"loss": 1.3334,
"step": 1849
},
{
"epoch": 0.4920212765957447,
"grad_norm": 3.9482545852661133,
"learning_rate": 9.936080682912594e-06,
"loss": 1.2859,
"step": 1850
},
{
"epoch": 0.4922872340425532,
"grad_norm": 3.7565512657165527,
"learning_rate": 9.935940424618908e-06,
"loss": 1.1294,
"step": 1851
},
{
"epoch": 0.4925531914893617,
"grad_norm": 4.012822151184082,
"learning_rate": 9.935800013601415e-06,
"loss": 1.4283,
"step": 1852
},
{
"epoch": 0.49281914893617024,
"grad_norm": 3.7840845584869385,
"learning_rate": 9.935659449864458e-06,
"loss": 1.332,
"step": 1853
},
{
"epoch": 0.4930851063829787,
"grad_norm": 4.097705364227295,
"learning_rate": 9.935518733412387e-06,
"loss": 1.1062,
"step": 1854
},
{
"epoch": 0.4933510638297872,
"grad_norm": 4.073275089263916,
"learning_rate": 9.935377864249558e-06,
"loss": 1.4567,
"step": 1855
},
{
"epoch": 0.49361702127659574,
"grad_norm": 4.020910263061523,
"learning_rate": 9.935236842380325e-06,
"loss": 1.247,
"step": 1856
},
{
"epoch": 0.49388297872340425,
"grad_norm": 4.380120277404785,
"learning_rate": 9.935095667809053e-06,
"loss": 1.2439,
"step": 1857
},
{
"epoch": 0.49414893617021277,
"grad_norm": 3.8681838512420654,
"learning_rate": 9.934954340540111e-06,
"loss": 1.3522,
"step": 1858
},
{
"epoch": 0.4944148936170213,
"grad_norm": 3.7794203758239746,
"learning_rate": 9.934812860577871e-06,
"loss": 1.1068,
"step": 1859
},
{
"epoch": 0.4946808510638298,
"grad_norm": 3.9970266819000244,
"learning_rate": 9.934671227926714e-06,
"loss": 1.228,
"step": 1860
},
{
"epoch": 0.4949468085106383,
"grad_norm": 4.03349494934082,
"learning_rate": 9.934529442591016e-06,
"loss": 1.5158,
"step": 1861
},
{
"epoch": 0.4952127659574468,
"grad_norm": 3.6862449645996094,
"learning_rate": 9.934387504575169e-06,
"loss": 1.3988,
"step": 1862
},
{
"epoch": 0.4954787234042553,
"grad_norm": 3.7959797382354736,
"learning_rate": 9.934245413883561e-06,
"loss": 1.2412,
"step": 1863
},
{
"epoch": 0.4957446808510638,
"grad_norm": 3.952791929244995,
"learning_rate": 9.934103170520592e-06,
"loss": 1.3866,
"step": 1864
},
{
"epoch": 0.49601063829787234,
"grad_norm": 3.7724785804748535,
"learning_rate": 9.933960774490663e-06,
"loss": 1.1724,
"step": 1865
},
{
"epoch": 0.49627659574468086,
"grad_norm": 3.9937689304351807,
"learning_rate": 9.933818225798178e-06,
"loss": 1.3353,
"step": 1866
},
{
"epoch": 0.4965425531914894,
"grad_norm": 3.818441152572632,
"learning_rate": 9.933675524447549e-06,
"loss": 1.205,
"step": 1867
},
{
"epoch": 0.4968085106382979,
"grad_norm": 3.97725772857666,
"learning_rate": 9.933532670443188e-06,
"loss": 1.289,
"step": 1868
},
{
"epoch": 0.49707446808510636,
"grad_norm": 3.930464744567871,
"learning_rate": 9.93338966378952e-06,
"loss": 1.5099,
"step": 1869
},
{
"epoch": 0.4973404255319149,
"grad_norm": 4.353559494018555,
"learning_rate": 9.933246504490966e-06,
"loss": 1.4003,
"step": 1870
},
{
"epoch": 0.4976063829787234,
"grad_norm": 3.9544339179992676,
"learning_rate": 9.933103192551958e-06,
"loss": 1.1387,
"step": 1871
},
{
"epoch": 0.4978723404255319,
"grad_norm": 3.9833321571350098,
"learning_rate": 9.932959727976928e-06,
"loss": 1.2584,
"step": 1872
},
{
"epoch": 0.49813829787234043,
"grad_norm": 3.862346887588501,
"learning_rate": 9.932816110770317e-06,
"loss": 1.4073,
"step": 1873
},
{
"epoch": 0.49840425531914895,
"grad_norm": 3.7747912406921387,
"learning_rate": 9.932672340936568e-06,
"loss": 1.2541,
"step": 1874
},
{
"epoch": 0.49867021276595747,
"grad_norm": 4.324585437774658,
"learning_rate": 9.93252841848013e-06,
"loss": 1.4344,
"step": 1875
},
{
"epoch": 0.498936170212766,
"grad_norm": 4.572371006011963,
"learning_rate": 9.932384343405452e-06,
"loss": 1.246,
"step": 1876
},
{
"epoch": 0.49920212765957445,
"grad_norm": 4.566850662231445,
"learning_rate": 9.932240115716998e-06,
"loss": 1.2813,
"step": 1877
},
{
"epoch": 0.49946808510638296,
"grad_norm": 3.940889358520508,
"learning_rate": 9.932095735419228e-06,
"loss": 1.1925,
"step": 1878
},
{
"epoch": 0.4997340425531915,
"grad_norm": 3.6935203075408936,
"learning_rate": 9.93195120251661e-06,
"loss": 1.2649,
"step": 1879
},
{
"epoch": 0.5,
"grad_norm": 4.11472749710083,
"learning_rate": 9.931806517013612e-06,
"loss": 1.3672,
"step": 1880
},
{
"epoch": 0.5002659574468085,
"grad_norm": 4.156626224517822,
"learning_rate": 9.931661678914717e-06,
"loss": 1.4258,
"step": 1881
},
{
"epoch": 0.500531914893617,
"grad_norm": 4.2577805519104,
"learning_rate": 9.9315166882244e-06,
"loss": 1.3524,
"step": 1882
},
{
"epoch": 0.5007978723404255,
"grad_norm": 3.9902119636535645,
"learning_rate": 9.931371544947154e-06,
"loss": 1.2988,
"step": 1883
},
{
"epoch": 0.5010638297872341,
"grad_norm": 4.20100736618042,
"learning_rate": 9.931226249087465e-06,
"loss": 1.3102,
"step": 1884
},
{
"epoch": 0.5013297872340425,
"grad_norm": 4.172153949737549,
"learning_rate": 9.93108080064983e-06,
"loss": 1.2019,
"step": 1885
},
{
"epoch": 0.5015957446808511,
"grad_norm": 4.27764892578125,
"learning_rate": 9.93093519963875e-06,
"loss": 1.2075,
"step": 1886
},
{
"epoch": 0.5018617021276596,
"grad_norm": 4.327826023101807,
"learning_rate": 9.930789446058729e-06,
"loss": 1.2459,
"step": 1887
},
{
"epoch": 0.502127659574468,
"grad_norm": 4.269448757171631,
"learning_rate": 9.930643539914276e-06,
"loss": 1.4385,
"step": 1888
},
{
"epoch": 0.5023936170212766,
"grad_norm": 3.7377564907073975,
"learning_rate": 9.930497481209908e-06,
"loss": 1.2267,
"step": 1889
},
{
"epoch": 0.5026595744680851,
"grad_norm": 3.958397388458252,
"learning_rate": 9.930351269950144e-06,
"loss": 1.3289,
"step": 1890
},
{
"epoch": 0.5029255319148936,
"grad_norm": 3.992171049118042,
"learning_rate": 9.930204906139506e-06,
"loss": 1.2989,
"step": 1891
},
{
"epoch": 0.5031914893617021,
"grad_norm": 3.8019278049468994,
"learning_rate": 9.930058389782523e-06,
"loss": 1.3542,
"step": 1892
},
{
"epoch": 0.5034574468085107,
"grad_norm": 3.7610788345336914,
"learning_rate": 9.929911720883729e-06,
"loss": 1.247,
"step": 1893
},
{
"epoch": 0.5037234042553191,
"grad_norm": 3.765941619873047,
"learning_rate": 9.929764899447662e-06,
"loss": 1.3651,
"step": 1894
},
{
"epoch": 0.5039893617021277,
"grad_norm": 4.16331672668457,
"learning_rate": 9.929617925478868e-06,
"loss": 1.28,
"step": 1895
},
{
"epoch": 0.5042553191489362,
"grad_norm": 4.166515827178955,
"learning_rate": 9.929470798981888e-06,
"loss": 1.2401,
"step": 1896
},
{
"epoch": 0.5045212765957446,
"grad_norm": 4.0264177322387695,
"learning_rate": 9.929323519961278e-06,
"loss": 1.3036,
"step": 1897
},
{
"epoch": 0.5047872340425532,
"grad_norm": 3.85672926902771,
"learning_rate": 9.929176088421596e-06,
"loss": 1.1619,
"step": 1898
},
{
"epoch": 0.5050531914893617,
"grad_norm": 4.00507926940918,
"learning_rate": 9.929028504367402e-06,
"loss": 1.2787,
"step": 1899
},
{
"epoch": 0.5053191489361702,
"grad_norm": 3.6691126823425293,
"learning_rate": 9.928880767803264e-06,
"loss": 1.3256,
"step": 1900
},
{
"epoch": 0.5055851063829787,
"grad_norm": 4.093438625335693,
"learning_rate": 9.92873287873375e-06,
"loss": 1.2623,
"step": 1901
},
{
"epoch": 0.5058510638297873,
"grad_norm": 3.689911127090454,
"learning_rate": 9.92858483716344e-06,
"loss": 1.4022,
"step": 1902
},
{
"epoch": 0.5061170212765957,
"grad_norm": 4.178584575653076,
"learning_rate": 9.928436643096909e-06,
"loss": 1.3588,
"step": 1903
},
{
"epoch": 0.5063829787234042,
"grad_norm": 4.098899841308594,
"learning_rate": 9.928288296538749e-06,
"loss": 1.2687,
"step": 1904
},
{
"epoch": 0.5066489361702128,
"grad_norm": 4.034060001373291,
"learning_rate": 9.928139797493545e-06,
"loss": 1.2859,
"step": 1905
},
{
"epoch": 0.5069148936170212,
"grad_norm": 4.75716495513916,
"learning_rate": 9.927991145965894e-06,
"loss": 1.445,
"step": 1906
},
{
"epoch": 0.5071808510638298,
"grad_norm": 3.466297149658203,
"learning_rate": 9.927842341960396e-06,
"loss": 1.0634,
"step": 1907
},
{
"epoch": 0.5074468085106383,
"grad_norm": 3.9337103366851807,
"learning_rate": 9.927693385481652e-06,
"loss": 1.4115,
"step": 1908
},
{
"epoch": 0.5077127659574469,
"grad_norm": 3.6876132488250732,
"learning_rate": 9.927544276534275e-06,
"loss": 1.2333,
"step": 1909
},
{
"epoch": 0.5079787234042553,
"grad_norm": 4.154485702514648,
"learning_rate": 9.927395015122876e-06,
"loss": 1.2432,
"step": 1910
},
{
"epoch": 0.5082446808510638,
"grad_norm": 4.0430073738098145,
"learning_rate": 9.927245601252074e-06,
"loss": 1.3562,
"step": 1911
},
{
"epoch": 0.5085106382978724,
"grad_norm": 3.6701016426086426,
"learning_rate": 9.927096034926491e-06,
"loss": 1.2138,
"step": 1912
},
{
"epoch": 0.5087765957446808,
"grad_norm": 3.7969815731048584,
"learning_rate": 9.926946316150757e-06,
"loss": 1.3166,
"step": 1913
},
{
"epoch": 0.5090425531914894,
"grad_norm": 3.662705183029175,
"learning_rate": 9.926796444929502e-06,
"loss": 1.1107,
"step": 1914
},
{
"epoch": 0.5093085106382979,
"grad_norm": 3.8880231380462646,
"learning_rate": 9.926646421267366e-06,
"loss": 1.2989,
"step": 1915
},
{
"epoch": 0.5095744680851064,
"grad_norm": 3.6114046573638916,
"learning_rate": 9.926496245168989e-06,
"loss": 1.1822,
"step": 1916
},
{
"epoch": 0.5098404255319149,
"grad_norm": 3.799083948135376,
"learning_rate": 9.926345916639018e-06,
"loss": 1.1918,
"step": 1917
},
{
"epoch": 0.5101063829787233,
"grad_norm": 3.4708175659179688,
"learning_rate": 9.926195435682102e-06,
"loss": 1.1244,
"step": 1918
},
{
"epoch": 0.5103723404255319,
"grad_norm": 4.323407173156738,
"learning_rate": 9.926044802302904e-06,
"loss": 1.275,
"step": 1919
},
{
"epoch": 0.5106382978723404,
"grad_norm": 3.8659491539001465,
"learning_rate": 9.925894016506076e-06,
"loss": 1.2904,
"step": 1920
},
{
"epoch": 0.510904255319149,
"grad_norm": 3.7898192405700684,
"learning_rate": 9.925743078296288e-06,
"loss": 1.2569,
"step": 1921
},
{
"epoch": 0.5111702127659574,
"grad_norm": 3.559047222137451,
"learning_rate": 9.925591987678212e-06,
"loss": 1.3267,
"step": 1922
},
{
"epoch": 0.511436170212766,
"grad_norm": 3.8164639472961426,
"learning_rate": 9.925440744656518e-06,
"loss": 1.2059,
"step": 1923
},
{
"epoch": 0.5117021276595745,
"grad_norm": 4.318164825439453,
"learning_rate": 9.925289349235892e-06,
"loss": 1.3528,
"step": 1924
},
{
"epoch": 0.511968085106383,
"grad_norm": 3.8021814823150635,
"learning_rate": 9.925137801421011e-06,
"loss": 1.2096,
"step": 1925
},
{
"epoch": 0.5122340425531915,
"grad_norm": 3.7836246490478516,
"learning_rate": 9.924986101216569e-06,
"loss": 1.2719,
"step": 1926
},
{
"epoch": 0.5125,
"grad_norm": 4.108916282653809,
"learning_rate": 9.92483424862726e-06,
"loss": 1.4018,
"step": 1927
},
{
"epoch": 0.5127659574468085,
"grad_norm": 3.7151575088500977,
"learning_rate": 9.92468224365778e-06,
"loss": 1.3966,
"step": 1928
},
{
"epoch": 0.513031914893617,
"grad_norm": 3.5576205253601074,
"learning_rate": 9.924530086312834e-06,
"loss": 1.2066,
"step": 1929
},
{
"epoch": 0.5132978723404256,
"grad_norm": 3.6642985343933105,
"learning_rate": 9.924377776597128e-06,
"loss": 1.3887,
"step": 1930
},
{
"epoch": 0.513563829787234,
"grad_norm": 4.360495567321777,
"learning_rate": 9.924225314515375e-06,
"loss": 1.6151,
"step": 1931
},
{
"epoch": 0.5138297872340426,
"grad_norm": 3.934380292892456,
"learning_rate": 9.924072700072296e-06,
"loss": 1.2027,
"step": 1932
},
{
"epoch": 0.5140957446808511,
"grad_norm": 3.95251727104187,
"learning_rate": 9.923919933272608e-06,
"loss": 1.4496,
"step": 1933
},
{
"epoch": 0.5143617021276595,
"grad_norm": 3.660336494445801,
"learning_rate": 9.923767014121042e-06,
"loss": 1.2549,
"step": 1934
},
{
"epoch": 0.5146276595744681,
"grad_norm": 3.936469316482544,
"learning_rate": 9.923613942622326e-06,
"loss": 1.3851,
"step": 1935
},
{
"epoch": 0.5148936170212766,
"grad_norm": 3.912565231323242,
"learning_rate": 9.923460718781198e-06,
"loss": 1.303,
"step": 1936
},
{
"epoch": 0.5151595744680851,
"grad_norm": 3.9063549041748047,
"learning_rate": 9.923307342602399e-06,
"loss": 1.315,
"step": 1937
},
{
"epoch": 0.5154255319148936,
"grad_norm": 3.749720335006714,
"learning_rate": 9.923153814090675e-06,
"loss": 1.2961,
"step": 1938
},
{
"epoch": 0.5156914893617022,
"grad_norm": 3.978954315185547,
"learning_rate": 9.923000133250776e-06,
"loss": 1.4325,
"step": 1939
},
{
"epoch": 0.5159574468085106,
"grad_norm": 4.081971645355225,
"learning_rate": 9.922846300087454e-06,
"loss": 1.2811,
"step": 1940
},
{
"epoch": 0.5162234042553191,
"grad_norm": 3.9421591758728027,
"learning_rate": 9.922692314605472e-06,
"loss": 1.3513,
"step": 1941
},
{
"epoch": 0.5164893617021277,
"grad_norm": 3.6500041484832764,
"learning_rate": 9.922538176809597e-06,
"loss": 1.2927,
"step": 1942
},
{
"epoch": 0.5167553191489361,
"grad_norm": 3.858421564102173,
"learning_rate": 9.922383886704594e-06,
"loss": 1.1699,
"step": 1943
},
{
"epoch": 0.5170212765957447,
"grad_norm": 4.286783695220947,
"learning_rate": 9.922229444295238e-06,
"loss": 1.4037,
"step": 1944
},
{
"epoch": 0.5172872340425532,
"grad_norm": 4.163476943969727,
"learning_rate": 9.922074849586308e-06,
"loss": 1.1268,
"step": 1945
},
{
"epoch": 0.5175531914893617,
"grad_norm": 3.8577239513397217,
"learning_rate": 9.921920102582587e-06,
"loss": 1.2154,
"step": 1946
},
{
"epoch": 0.5178191489361702,
"grad_norm": 4.213263988494873,
"learning_rate": 9.921765203288862e-06,
"loss": 1.3188,
"step": 1947
},
{
"epoch": 0.5180851063829788,
"grad_norm": 3.817172050476074,
"learning_rate": 9.921610151709929e-06,
"loss": 1.2897,
"step": 1948
},
{
"epoch": 0.5183510638297872,
"grad_norm": 3.954479694366455,
"learning_rate": 9.921454947850582e-06,
"loss": 1.1568,
"step": 1949
},
{
"epoch": 0.5186170212765957,
"grad_norm": 4.054901123046875,
"learning_rate": 9.921299591715624e-06,
"loss": 1.1991,
"step": 1950
},
{
"epoch": 0.5188829787234043,
"grad_norm": 3.9514553546905518,
"learning_rate": 9.921144083309864e-06,
"loss": 1.2588,
"step": 1951
},
{
"epoch": 0.5191489361702127,
"grad_norm": 4.228671550750732,
"learning_rate": 9.920988422638112e-06,
"loss": 1.3348,
"step": 1952
},
{
"epoch": 0.5194148936170213,
"grad_norm": 3.997422695159912,
"learning_rate": 9.920832609705184e-06,
"loss": 1.2402,
"step": 1953
},
{
"epoch": 0.5196808510638298,
"grad_norm": 3.8394384384155273,
"learning_rate": 9.920676644515902e-06,
"loss": 1.222,
"step": 1954
},
{
"epoch": 0.5199468085106383,
"grad_norm": 3.654381036758423,
"learning_rate": 9.92052052707509e-06,
"loss": 1.4059,
"step": 1955
},
{
"epoch": 0.5202127659574468,
"grad_norm": 3.881578207015991,
"learning_rate": 9.92036425738758e-06,
"loss": 1.3507,
"step": 1956
},
{
"epoch": 0.5204787234042553,
"grad_norm": 3.819066286087036,
"learning_rate": 9.920207835458208e-06,
"loss": 1.3433,
"step": 1957
},
{
"epoch": 0.5207446808510638,
"grad_norm": 3.2657382488250732,
"learning_rate": 9.920051261291812e-06,
"loss": 1.0601,
"step": 1958
},
{
"epoch": 0.5210106382978723,
"grad_norm": 3.789560556411743,
"learning_rate": 9.919894534893237e-06,
"loss": 1.2395,
"step": 1959
},
{
"epoch": 0.5212765957446809,
"grad_norm": 3.620661973953247,
"learning_rate": 9.919737656267335e-06,
"loss": 1.1793,
"step": 1960
},
{
"epoch": 0.5215425531914893,
"grad_norm": 4.208719253540039,
"learning_rate": 9.919580625418955e-06,
"loss": 1.5431,
"step": 1961
},
{
"epoch": 0.5218085106382979,
"grad_norm": 4.2255024909973145,
"learning_rate": 9.919423442352958e-06,
"loss": 1.3665,
"step": 1962
},
{
"epoch": 0.5220744680851064,
"grad_norm": 4.246603965759277,
"learning_rate": 9.91926610707421e-06,
"loss": 1.2552,
"step": 1963
},
{
"epoch": 0.5223404255319148,
"grad_norm": 4.042827606201172,
"learning_rate": 9.919108619587575e-06,
"loss": 1.2171,
"step": 1964
},
{
"epoch": 0.5226063829787234,
"grad_norm": 4.006556510925293,
"learning_rate": 9.918950979897928e-06,
"loss": 1.2559,
"step": 1965
},
{
"epoch": 0.5228723404255319,
"grad_norm": 3.7249419689178467,
"learning_rate": 9.918793188010147e-06,
"loss": 1.0816,
"step": 1966
},
{
"epoch": 0.5231382978723405,
"grad_norm": 4.087320804595947,
"learning_rate": 9.918635243929115e-06,
"loss": 1.2607,
"step": 1967
},
{
"epoch": 0.5234042553191489,
"grad_norm": 4.031649589538574,
"learning_rate": 9.918477147659715e-06,
"loss": 1.2983,
"step": 1968
},
{
"epoch": 0.5236702127659575,
"grad_norm": 4.055499076843262,
"learning_rate": 9.918318899206842e-06,
"loss": 1.2686,
"step": 1969
},
{
"epoch": 0.523936170212766,
"grad_norm": 4.922122955322266,
"learning_rate": 9.918160498575394e-06,
"loss": 1.2761,
"step": 1970
},
{
"epoch": 0.5242021276595744,
"grad_norm": 4.155685901641846,
"learning_rate": 9.918001945770267e-06,
"loss": 1.3004,
"step": 1971
},
{
"epoch": 0.524468085106383,
"grad_norm": 4.165022373199463,
"learning_rate": 9.91784324079637e-06,
"loss": 1.4643,
"step": 1972
},
{
"epoch": 0.5247340425531914,
"grad_norm": 3.9013566970825195,
"learning_rate": 9.917684383658614e-06,
"loss": 1.2264,
"step": 1973
},
{
"epoch": 0.525,
"grad_norm": 4.016994953155518,
"learning_rate": 9.917525374361913e-06,
"loss": 1.2748,
"step": 1974
},
{
"epoch": 0.5252659574468085,
"grad_norm": 4.0600996017456055,
"learning_rate": 9.917366212911187e-06,
"loss": 1.2,
"step": 1975
},
{
"epoch": 0.5255319148936171,
"grad_norm": 4.1870903968811035,
"learning_rate": 9.91720689931136e-06,
"loss": 1.2307,
"step": 1976
},
{
"epoch": 0.5257978723404255,
"grad_norm": 3.7501108646392822,
"learning_rate": 9.917047433567364e-06,
"loss": 1.2853,
"step": 1977
},
{
"epoch": 0.5260638297872341,
"grad_norm": 3.8789479732513428,
"learning_rate": 9.91688781568413e-06,
"loss": 1.3571,
"step": 1978
},
{
"epoch": 0.5263297872340426,
"grad_norm": 3.641453981399536,
"learning_rate": 9.9167280456666e-06,
"loss": 1.1975,
"step": 1979
},
{
"epoch": 0.526595744680851,
"grad_norm": 4.097661972045898,
"learning_rate": 9.916568123519713e-06,
"loss": 1.2415,
"step": 1980
},
{
"epoch": 0.5268617021276596,
"grad_norm": 3.447585105895996,
"learning_rate": 9.91640804924842e-06,
"loss": 1.1599,
"step": 1981
},
{
"epoch": 0.527127659574468,
"grad_norm": 3.906158208847046,
"learning_rate": 9.916247822857675e-06,
"loss": 1.2141,
"step": 1982
},
{
"epoch": 0.5273936170212766,
"grad_norm": 4.226005554199219,
"learning_rate": 9.916087444352433e-06,
"loss": 1.3575,
"step": 1983
},
{
"epoch": 0.5276595744680851,
"grad_norm": 3.955073118209839,
"learning_rate": 9.91592691373766e-06,
"loss": 1.159,
"step": 1984
},
{
"epoch": 0.5279255319148937,
"grad_norm": 3.770538568496704,
"learning_rate": 9.915766231018317e-06,
"loss": 1.2722,
"step": 1985
},
{
"epoch": 0.5281914893617021,
"grad_norm": 4.1326422691345215,
"learning_rate": 9.91560539619938e-06,
"loss": 1.4044,
"step": 1986
},
{
"epoch": 0.5284574468085106,
"grad_norm": 3.933978319168091,
"learning_rate": 9.915444409285827e-06,
"loss": 1.1495,
"step": 1987
},
{
"epoch": 0.5287234042553192,
"grad_norm": 3.8940069675445557,
"learning_rate": 9.915283270282637e-06,
"loss": 1.2658,
"step": 1988
},
{
"epoch": 0.5289893617021276,
"grad_norm": 3.8015975952148438,
"learning_rate": 9.915121979194793e-06,
"loss": 1.2155,
"step": 1989
},
{
"epoch": 0.5292553191489362,
"grad_norm": 4.204024791717529,
"learning_rate": 9.914960536027289e-06,
"loss": 1.3081,
"step": 1990
},
{
"epoch": 0.5295212765957447,
"grad_norm": 3.80530047416687,
"learning_rate": 9.91479894078512e-06,
"loss": 1.2827,
"step": 1991
},
{
"epoch": 0.5297872340425532,
"grad_norm": 4.011538505554199,
"learning_rate": 9.914637193473284e-06,
"loss": 1.2801,
"step": 1992
},
{
"epoch": 0.5300531914893617,
"grad_norm": 3.848898410797119,
"learning_rate": 9.914475294096788e-06,
"loss": 1.2904,
"step": 1993
},
{
"epoch": 0.5303191489361702,
"grad_norm": 3.7076499462127686,
"learning_rate": 9.91431324266064e-06,
"loss": 1.3455,
"step": 1994
},
{
"epoch": 0.5305851063829787,
"grad_norm": 4.372555255889893,
"learning_rate": 9.914151039169855e-06,
"loss": 1.3233,
"step": 1995
},
{
"epoch": 0.5308510638297872,
"grad_norm": 4.168186664581299,
"learning_rate": 9.913988683629449e-06,
"loss": 1.3303,
"step": 1996
},
{
"epoch": 0.5311170212765958,
"grad_norm": 3.4844412803649902,
"learning_rate": 9.91382617604445e-06,
"loss": 1.28,
"step": 1997
},
{
"epoch": 0.5313829787234042,
"grad_norm": 3.981612205505371,
"learning_rate": 9.913663516419883e-06,
"loss": 1.4133,
"step": 1998
},
{
"epoch": 0.5316489361702128,
"grad_norm": 3.6310243606567383,
"learning_rate": 9.913500704760781e-06,
"loss": 1.2546,
"step": 1999
},
{
"epoch": 0.5319148936170213,
"grad_norm": 3.6045448780059814,
"learning_rate": 9.913337741072183e-06,
"loss": 1.1445,
"step": 2000
},
{
"epoch": 0.5319148936170213,
"eval_loss": 1.2938566207885742,
"eval_runtime": 12.2817,
"eval_samples_per_second": 32.569,
"eval_steps_per_second": 4.071,
"step": 2000
},
{
"epoch": 0.5321808510638298,
"grad_norm": 4.040936470031738,
"learning_rate": 9.913174625359132e-06,
"loss": 1.2325,
"step": 2001
},
{
"epoch": 0.5324468085106383,
"grad_norm": 3.7908430099487305,
"learning_rate": 9.913011357626672e-06,
"loss": 1.3091,
"step": 2002
},
{
"epoch": 0.5327127659574468,
"grad_norm": 3.7691242694854736,
"learning_rate": 9.912847937879855e-06,
"loss": 1.2236,
"step": 2003
},
{
"epoch": 0.5329787234042553,
"grad_norm": 4.643370628356934,
"learning_rate": 9.91268436612374e-06,
"loss": 1.3033,
"step": 2004
},
{
"epoch": 0.5332446808510638,
"grad_norm": 3.5233020782470703,
"learning_rate": 9.912520642363387e-06,
"loss": 1.1542,
"step": 2005
},
{
"epoch": 0.5335106382978724,
"grad_norm": 4.1154022216796875,
"learning_rate": 9.912356766603862e-06,
"loss": 1.4088,
"step": 2006
},
{
"epoch": 0.5337765957446808,
"grad_norm": 5.4873247146606445,
"learning_rate": 9.912192738850234e-06,
"loss": 1.3057,
"step": 2007
},
{
"epoch": 0.5340425531914894,
"grad_norm": 3.9308226108551025,
"learning_rate": 9.912028559107577e-06,
"loss": 1.2788,
"step": 2008
},
{
"epoch": 0.5343085106382979,
"grad_norm": 3.6488893032073975,
"learning_rate": 9.91186422738098e-06,
"loss": 1.1555,
"step": 2009
},
{
"epoch": 0.5345744680851063,
"grad_norm": 3.553065061569214,
"learning_rate": 9.911699743675513e-06,
"loss": 1.2228,
"step": 2010
},
{
"epoch": 0.5348404255319149,
"grad_norm": 3.8336079120635986,
"learning_rate": 9.911535107996278e-06,
"loss": 1.2563,
"step": 2011
},
{
"epoch": 0.5351063829787234,
"grad_norm": 4.1601715087890625,
"learning_rate": 9.911370320348363e-06,
"loss": 1.2525,
"step": 2012
},
{
"epoch": 0.535372340425532,
"grad_norm": 3.4441726207733154,
"learning_rate": 9.911205380736868e-06,
"loss": 1.2293,
"step": 2013
},
{
"epoch": 0.5356382978723404,
"grad_norm": 4.281271457672119,
"learning_rate": 9.911040289166896e-06,
"loss": 1.5168,
"step": 2014
},
{
"epoch": 0.535904255319149,
"grad_norm": 3.982959508895874,
"learning_rate": 9.910875045643555e-06,
"loss": 1.2864,
"step": 2015
},
{
"epoch": 0.5361702127659574,
"grad_norm": 3.9199705123901367,
"learning_rate": 9.91070965017196e-06,
"loss": 1.2906,
"step": 2016
},
{
"epoch": 0.5364361702127659,
"grad_norm": 4.073878288269043,
"learning_rate": 9.910544102757224e-06,
"loss": 1.2435,
"step": 2017
},
{
"epoch": 0.5367021276595745,
"grad_norm": 4.169588088989258,
"learning_rate": 9.910378403404473e-06,
"loss": 1.3231,
"step": 2018
},
{
"epoch": 0.5369680851063829,
"grad_norm": 3.7797560691833496,
"learning_rate": 9.910212552118835e-06,
"loss": 1.2632,
"step": 2019
},
{
"epoch": 0.5372340425531915,
"grad_norm": 4.002804756164551,
"learning_rate": 9.910046548905437e-06,
"loss": 1.3988,
"step": 2020
},
{
"epoch": 0.5375,
"grad_norm": 3.8956003189086914,
"learning_rate": 9.90988039376942e-06,
"loss": 1.2534,
"step": 2021
},
{
"epoch": 0.5377659574468086,
"grad_norm": 3.6937549114227295,
"learning_rate": 9.90971408671592e-06,
"loss": 1.2312,
"step": 2022
},
{
"epoch": 0.538031914893617,
"grad_norm": 3.7216007709503174,
"learning_rate": 9.909547627750089e-06,
"loss": 1.2408,
"step": 2023
},
{
"epoch": 0.5382978723404256,
"grad_norm": 3.827702760696411,
"learning_rate": 9.909381016877074e-06,
"loss": 1.2551,
"step": 2024
},
{
"epoch": 0.538563829787234,
"grad_norm": 3.5307586193084717,
"learning_rate": 9.909214254102027e-06,
"loss": 1.2352,
"step": 2025
},
{
"epoch": 0.5388297872340425,
"grad_norm": 3.7490625381469727,
"learning_rate": 9.909047339430113e-06,
"loss": 1.2867,
"step": 2026
},
{
"epoch": 0.5390957446808511,
"grad_norm": 4.107030391693115,
"learning_rate": 9.908880272866495e-06,
"loss": 1.3459,
"step": 2027
},
{
"epoch": 0.5393617021276595,
"grad_norm": 3.855973482131958,
"learning_rate": 9.908713054416342e-06,
"loss": 1.224,
"step": 2028
},
{
"epoch": 0.5396276595744681,
"grad_norm": 4.167142391204834,
"learning_rate": 9.908545684084826e-06,
"loss": 1.4258,
"step": 2029
},
{
"epoch": 0.5398936170212766,
"grad_norm": 3.899373769760132,
"learning_rate": 9.90837816187713e-06,
"loss": 1.2853,
"step": 2030
},
{
"epoch": 0.5401595744680852,
"grad_norm": 3.8360328674316406,
"learning_rate": 9.908210487798433e-06,
"loss": 1.3503,
"step": 2031
},
{
"epoch": 0.5404255319148936,
"grad_norm": 3.633971929550171,
"learning_rate": 9.908042661853926e-06,
"loss": 1.0622,
"step": 2032
},
{
"epoch": 0.5406914893617021,
"grad_norm": 4.1685991287231445,
"learning_rate": 9.9078746840488e-06,
"loss": 1.3733,
"step": 2033
},
{
"epoch": 0.5409574468085107,
"grad_norm": 3.9930756092071533,
"learning_rate": 9.907706554388253e-06,
"loss": 1.4306,
"step": 2034
},
{
"epoch": 0.5412234042553191,
"grad_norm": 3.9129087924957275,
"learning_rate": 9.907538272877487e-06,
"loss": 1.1834,
"step": 2035
},
{
"epoch": 0.5414893617021277,
"grad_norm": 3.658611536026001,
"learning_rate": 9.90736983952171e-06,
"loss": 1.1908,
"step": 2036
},
{
"epoch": 0.5417553191489362,
"grad_norm": 3.9367542266845703,
"learning_rate": 9.907201254326132e-06,
"loss": 1.2853,
"step": 2037
},
{
"epoch": 0.5420212765957447,
"grad_norm": 3.9035940170288086,
"learning_rate": 9.907032517295966e-06,
"loss": 1.2867,
"step": 2038
},
{
"epoch": 0.5422872340425532,
"grad_norm": 3.702096939086914,
"learning_rate": 9.906863628436441e-06,
"loss": 1.2614,
"step": 2039
},
{
"epoch": 0.5425531914893617,
"grad_norm": 4.073267459869385,
"learning_rate": 9.906694587752777e-06,
"loss": 1.3793,
"step": 2040
},
{
"epoch": 0.5428191489361702,
"grad_norm": 3.864699363708496,
"learning_rate": 9.906525395250206e-06,
"loss": 1.1233,
"step": 2041
},
{
"epoch": 0.5430851063829787,
"grad_norm": 3.8738772869110107,
"learning_rate": 9.906356050933962e-06,
"loss": 1.1704,
"step": 2042
},
{
"epoch": 0.5433510638297873,
"grad_norm": 3.837299108505249,
"learning_rate": 9.906186554809284e-06,
"loss": 1.1802,
"step": 2043
},
{
"epoch": 0.5436170212765957,
"grad_norm": 4.00624942779541,
"learning_rate": 9.906016906881419e-06,
"loss": 1.2934,
"step": 2044
},
{
"epoch": 0.5438829787234043,
"grad_norm": 3.6519479751586914,
"learning_rate": 9.905847107155615e-06,
"loss": 1.2313,
"step": 2045
},
{
"epoch": 0.5441489361702128,
"grad_norm": 4.127234935760498,
"learning_rate": 9.905677155637126e-06,
"loss": 1.476,
"step": 2046
},
{
"epoch": 0.5444148936170212,
"grad_norm": 3.580862283706665,
"learning_rate": 9.90550705233121e-06,
"loss": 1.1991,
"step": 2047
},
{
"epoch": 0.5446808510638298,
"grad_norm": 4.004328727722168,
"learning_rate": 9.90533679724313e-06,
"loss": 1.2811,
"step": 2048
},
{
"epoch": 0.5449468085106383,
"grad_norm": 3.6748900413513184,
"learning_rate": 9.905166390378154e-06,
"loss": 1.3381,
"step": 2049
},
{
"epoch": 0.5452127659574468,
"grad_norm": 3.5765295028686523,
"learning_rate": 9.904995831741553e-06,
"loss": 1.2265,
"step": 2050
},
{
"epoch": 0.5454787234042553,
"grad_norm": 3.910905361175537,
"learning_rate": 9.904825121338609e-06,
"loss": 1.2516,
"step": 2051
},
{
"epoch": 0.5457446808510639,
"grad_norm": 3.8337693214416504,
"learning_rate": 9.9046542591746e-06,
"loss": 1.2997,
"step": 2052
},
{
"epoch": 0.5460106382978723,
"grad_norm": 3.837082862854004,
"learning_rate": 9.904483245254812e-06,
"loss": 1.3341,
"step": 2053
},
{
"epoch": 0.5462765957446809,
"grad_norm": 4.098066806793213,
"learning_rate": 9.90431207958454e-06,
"loss": 1.2182,
"step": 2054
},
{
"epoch": 0.5465425531914894,
"grad_norm": 4.022514343261719,
"learning_rate": 9.904140762169079e-06,
"loss": 1.4144,
"step": 2055
},
{
"epoch": 0.5468085106382978,
"grad_norm": 3.779283046722412,
"learning_rate": 9.903969293013727e-06,
"loss": 1.2291,
"step": 2056
},
{
"epoch": 0.5470744680851064,
"grad_norm": 4.28890323638916,
"learning_rate": 9.903797672123791e-06,
"loss": 1.3899,
"step": 2057
},
{
"epoch": 0.5473404255319149,
"grad_norm": 3.720780372619629,
"learning_rate": 9.903625899504583e-06,
"loss": 1.1992,
"step": 2058
},
{
"epoch": 0.5476063829787234,
"grad_norm": 3.80373215675354,
"learning_rate": 9.903453975161416e-06,
"loss": 1.322,
"step": 2059
},
{
"epoch": 0.5478723404255319,
"grad_norm": 4.012282371520996,
"learning_rate": 9.90328189909961e-06,
"loss": 1.1998,
"step": 2060
},
{
"epoch": 0.5481382978723405,
"grad_norm": 4.059588432312012,
"learning_rate": 9.903109671324488e-06,
"loss": 1.286,
"step": 2061
},
{
"epoch": 0.5484042553191489,
"grad_norm": 3.9015207290649414,
"learning_rate": 9.902937291841383e-06,
"loss": 1.3525,
"step": 2062
},
{
"epoch": 0.5486702127659574,
"grad_norm": 4.0359954833984375,
"learning_rate": 9.902764760655623e-06,
"loss": 1.3094,
"step": 2063
},
{
"epoch": 0.548936170212766,
"grad_norm": 3.487372875213623,
"learning_rate": 9.90259207777255e-06,
"loss": 1.2127,
"step": 2064
},
{
"epoch": 0.5492021276595744,
"grad_norm": 3.607064723968506,
"learning_rate": 9.902419243197505e-06,
"loss": 1.2091,
"step": 2065
},
{
"epoch": 0.549468085106383,
"grad_norm": 3.9896395206451416,
"learning_rate": 9.902246256935837e-06,
"loss": 1.3059,
"step": 2066
},
{
"epoch": 0.5497340425531915,
"grad_norm": 4.376030445098877,
"learning_rate": 9.9020731189929e-06,
"loss": 1.3092,
"step": 2067
},
{
"epoch": 0.55,
"grad_norm": 3.3590362071990967,
"learning_rate": 9.901899829374048e-06,
"loss": 1.201,
"step": 2068
},
{
"epoch": 0.5502659574468085,
"grad_norm": 3.7063753604888916,
"learning_rate": 9.901726388084643e-06,
"loss": 1.182,
"step": 2069
},
{
"epoch": 0.550531914893617,
"grad_norm": 3.709569215774536,
"learning_rate": 9.901552795130054e-06,
"loss": 1.1766,
"step": 2070
},
{
"epoch": 0.5507978723404255,
"grad_norm": 4.3449249267578125,
"learning_rate": 9.90137905051565e-06,
"loss": 1.3167,
"step": 2071
},
{
"epoch": 0.551063829787234,
"grad_norm": 3.8162055015563965,
"learning_rate": 9.901205154246807e-06,
"loss": 1.2192,
"step": 2072
},
{
"epoch": 0.5513297872340426,
"grad_norm": 3.792880058288574,
"learning_rate": 9.901031106328907e-06,
"loss": 1.2957,
"step": 2073
},
{
"epoch": 0.551595744680851,
"grad_norm": 3.6657822132110596,
"learning_rate": 9.900856906767334e-06,
"loss": 1.3045,
"step": 2074
},
{
"epoch": 0.5518617021276596,
"grad_norm": 3.327601194381714,
"learning_rate": 9.900682555567478e-06,
"loss": 1.1348,
"step": 2075
},
{
"epoch": 0.5521276595744681,
"grad_norm": 3.9993128776550293,
"learning_rate": 9.900508052734734e-06,
"loss": 1.2678,
"step": 2076
},
{
"epoch": 0.5523936170212767,
"grad_norm": 3.922495126724243,
"learning_rate": 9.900333398274501e-06,
"loss": 1.1644,
"step": 2077
},
{
"epoch": 0.5526595744680851,
"grad_norm": 3.6909377574920654,
"learning_rate": 9.900158592192184e-06,
"loss": 1.208,
"step": 2078
},
{
"epoch": 0.5529255319148936,
"grad_norm": 4.378490924835205,
"learning_rate": 9.89998363449319e-06,
"loss": 1.2866,
"step": 2079
},
{
"epoch": 0.5531914893617021,
"grad_norm": 3.6202850341796875,
"learning_rate": 9.899808525182935e-06,
"loss": 1.238,
"step": 2080
},
{
"epoch": 0.5534574468085106,
"grad_norm": 3.9422550201416016,
"learning_rate": 9.899633264266835e-06,
"loss": 1.2932,
"step": 2081
},
{
"epoch": 0.5537234042553192,
"grad_norm": 4.002807140350342,
"learning_rate": 9.899457851750312e-06,
"loss": 1.301,
"step": 2082
},
{
"epoch": 0.5539893617021276,
"grad_norm": 4.242476940155029,
"learning_rate": 9.899282287638795e-06,
"loss": 1.2967,
"step": 2083
},
{
"epoch": 0.5542553191489362,
"grad_norm": 4.148952007293701,
"learning_rate": 9.899106571937716e-06,
"loss": 1.2863,
"step": 2084
},
{
"epoch": 0.5545212765957447,
"grad_norm": 3.8258893489837646,
"learning_rate": 9.898930704652512e-06,
"loss": 1.2253,
"step": 2085
},
{
"epoch": 0.5547872340425531,
"grad_norm": 4.117706298828125,
"learning_rate": 9.898754685788623e-06,
"loss": 1.3706,
"step": 2086
},
{
"epoch": 0.5550531914893617,
"grad_norm": 3.989381790161133,
"learning_rate": 9.898578515351498e-06,
"loss": 1.2585,
"step": 2087
},
{
"epoch": 0.5553191489361702,
"grad_norm": 3.8721275329589844,
"learning_rate": 9.898402193346585e-06,
"loss": 1.1284,
"step": 2088
},
{
"epoch": 0.5555851063829788,
"grad_norm": 4.169785499572754,
"learning_rate": 9.898225719779342e-06,
"loss": 1.2176,
"step": 2089
},
{
"epoch": 0.5558510638297872,
"grad_norm": 3.8007307052612305,
"learning_rate": 9.898049094655229e-06,
"loss": 1.1421,
"step": 2090
},
{
"epoch": 0.5561170212765958,
"grad_norm": 3.48579740524292,
"learning_rate": 9.897872317979708e-06,
"loss": 1.1123,
"step": 2091
},
{
"epoch": 0.5563829787234043,
"grad_norm": 3.6224656105041504,
"learning_rate": 9.897695389758253e-06,
"loss": 1.2452,
"step": 2092
},
{
"epoch": 0.5566489361702127,
"grad_norm": 4.0066752433776855,
"learning_rate": 9.897518309996336e-06,
"loss": 1.3127,
"step": 2093
},
{
"epoch": 0.5569148936170213,
"grad_norm": 3.5834217071533203,
"learning_rate": 9.897341078699437e-06,
"loss": 1.1945,
"step": 2094
},
{
"epoch": 0.5571808510638298,
"grad_norm": 3.616166830062866,
"learning_rate": 9.897163695873036e-06,
"loss": 1.2113,
"step": 2095
},
{
"epoch": 0.5574468085106383,
"grad_norm": 4.5236945152282715,
"learning_rate": 9.896986161522627e-06,
"loss": 1.556,
"step": 2096
},
{
"epoch": 0.5577127659574468,
"grad_norm": 4.006591320037842,
"learning_rate": 9.896808475653701e-06,
"loss": 1.3505,
"step": 2097
},
{
"epoch": 0.5579787234042554,
"grad_norm": 4.137003421783447,
"learning_rate": 9.896630638271755e-06,
"loss": 1.2105,
"step": 2098
},
{
"epoch": 0.5582446808510638,
"grad_norm": 4.136394500732422,
"learning_rate": 9.896452649382291e-06,
"loss": 1.4277,
"step": 2099
},
{
"epoch": 0.5585106382978723,
"grad_norm": 3.8342485427856445,
"learning_rate": 9.896274508990818e-06,
"loss": 1.2839,
"step": 2100
},
{
"epoch": 0.5587765957446809,
"grad_norm": 3.687845230102539,
"learning_rate": 9.896096217102848e-06,
"loss": 1.1659,
"step": 2101
},
{
"epoch": 0.5590425531914893,
"grad_norm": 3.971306562423706,
"learning_rate": 9.895917773723895e-06,
"loss": 1.4681,
"step": 2102
},
{
"epoch": 0.5593085106382979,
"grad_norm": 3.5636236667633057,
"learning_rate": 9.895739178859483e-06,
"loss": 1.2463,
"step": 2103
},
{
"epoch": 0.5595744680851064,
"grad_norm": 4.580478191375732,
"learning_rate": 9.895560432515136e-06,
"loss": 1.488,
"step": 2104
},
{
"epoch": 0.5598404255319149,
"grad_norm": 3.5549540519714355,
"learning_rate": 9.895381534696385e-06,
"loss": 1.1869,
"step": 2105
},
{
"epoch": 0.5601063829787234,
"grad_norm": 3.6891443729400635,
"learning_rate": 9.895202485408766e-06,
"loss": 1.2356,
"step": 2106
},
{
"epoch": 0.560372340425532,
"grad_norm": 4.139247894287109,
"learning_rate": 9.895023284657821e-06,
"loss": 1.2941,
"step": 2107
},
{
"epoch": 0.5606382978723404,
"grad_norm": 3.616758346557617,
"learning_rate": 9.89484393244909e-06,
"loss": 1.2292,
"step": 2108
},
{
"epoch": 0.5609042553191489,
"grad_norm": 3.634755849838257,
"learning_rate": 9.894664428788126e-06,
"loss": 1.2215,
"step": 2109
},
{
"epoch": 0.5611702127659575,
"grad_norm": 3.9066550731658936,
"learning_rate": 9.89448477368048e-06,
"loss": 1.3777,
"step": 2110
},
{
"epoch": 0.5614361702127659,
"grad_norm": 3.8861474990844727,
"learning_rate": 9.894304967131713e-06,
"loss": 1.2666,
"step": 2111
},
{
"epoch": 0.5617021276595745,
"grad_norm": 3.3856041431427,
"learning_rate": 9.894125009147389e-06,
"loss": 1.3001,
"step": 2112
},
{
"epoch": 0.561968085106383,
"grad_norm": 3.5979838371276855,
"learning_rate": 9.893944899733076e-06,
"loss": 1.2005,
"step": 2113
},
{
"epoch": 0.5622340425531915,
"grad_norm": 3.851020336151123,
"learning_rate": 9.893764638894345e-06,
"loss": 1.3479,
"step": 2114
},
{
"epoch": 0.5625,
"grad_norm": 4.208298206329346,
"learning_rate": 9.893584226636773e-06,
"loss": 1.3329,
"step": 2115
},
{
"epoch": 0.5627659574468085,
"grad_norm": 3.6734988689422607,
"learning_rate": 9.893403662965944e-06,
"loss": 1.3678,
"step": 2116
},
{
"epoch": 0.563031914893617,
"grad_norm": 3.708069324493408,
"learning_rate": 9.893222947887446e-06,
"loss": 1.3176,
"step": 2117
},
{
"epoch": 0.5632978723404255,
"grad_norm": 4.194994926452637,
"learning_rate": 9.893042081406868e-06,
"loss": 1.381,
"step": 2118
},
{
"epoch": 0.5635638297872341,
"grad_norm": 3.740922689437866,
"learning_rate": 9.892861063529807e-06,
"loss": 1.1555,
"step": 2119
},
{
"epoch": 0.5638297872340425,
"grad_norm": 3.744663715362549,
"learning_rate": 9.892679894261865e-06,
"loss": 1.132,
"step": 2120
},
{
"epoch": 0.5640957446808511,
"grad_norm": 4.050332546234131,
"learning_rate": 9.892498573608645e-06,
"loss": 1.3709,
"step": 2121
},
{
"epoch": 0.5643617021276596,
"grad_norm": 3.9612951278686523,
"learning_rate": 9.89231710157576e-06,
"loss": 1.2954,
"step": 2122
},
{
"epoch": 0.564627659574468,
"grad_norm": 3.165841817855835,
"learning_rate": 9.892135478168824e-06,
"loss": 1.1757,
"step": 2123
},
{
"epoch": 0.5648936170212766,
"grad_norm": 3.6281683444976807,
"learning_rate": 9.891953703393455e-06,
"loss": 1.0733,
"step": 2124
},
{
"epoch": 0.5651595744680851,
"grad_norm": 3.7431442737579346,
"learning_rate": 9.89177177725528e-06,
"loss": 1.3628,
"step": 2125
},
{
"epoch": 0.5654255319148936,
"grad_norm": 3.704817295074463,
"learning_rate": 9.891589699759929e-06,
"loss": 1.284,
"step": 2126
},
{
"epoch": 0.5656914893617021,
"grad_norm": 3.5511844158172607,
"learning_rate": 9.89140747091303e-06,
"loss": 1.1152,
"step": 2127
},
{
"epoch": 0.5659574468085107,
"grad_norm": 3.450695753097534,
"learning_rate": 9.891225090720227e-06,
"loss": 1.2245,
"step": 2128
},
{
"epoch": 0.5662234042553191,
"grad_norm": 3.8009350299835205,
"learning_rate": 9.891042559187161e-06,
"loss": 1.319,
"step": 2129
},
{
"epoch": 0.5664893617021277,
"grad_norm": 4.276994228363037,
"learning_rate": 9.890859876319479e-06,
"loss": 1.3191,
"step": 2130
},
{
"epoch": 0.5667553191489362,
"grad_norm": 4.0986738204956055,
"learning_rate": 9.890677042122834e-06,
"loss": 1.2553,
"step": 2131
},
{
"epoch": 0.5670212765957446,
"grad_norm": 3.861093044281006,
"learning_rate": 9.890494056602883e-06,
"loss": 1.1618,
"step": 2132
},
{
"epoch": 0.5672872340425532,
"grad_norm": 3.8807971477508545,
"learning_rate": 9.89031091976529e-06,
"loss": 1.3676,
"step": 2133
},
{
"epoch": 0.5675531914893617,
"grad_norm": 3.5750906467437744,
"learning_rate": 9.890127631615719e-06,
"loss": 1.3009,
"step": 2134
},
{
"epoch": 0.5678191489361702,
"grad_norm": 3.740861654281616,
"learning_rate": 9.88994419215984e-06,
"loss": 1.3059,
"step": 2135
},
{
"epoch": 0.5680851063829787,
"grad_norm": 3.945333480834961,
"learning_rate": 9.88976060140333e-06,
"loss": 1.3027,
"step": 2136
},
{
"epoch": 0.5683510638297873,
"grad_norm": 3.9484307765960693,
"learning_rate": 9.889576859351873e-06,
"loss": 1.4177,
"step": 2137
},
{
"epoch": 0.5686170212765957,
"grad_norm": 3.9661643505096436,
"learning_rate": 9.88939296601115e-06,
"loss": 1.3607,
"step": 2138
},
{
"epoch": 0.5688829787234042,
"grad_norm": 3.4872074127197266,
"learning_rate": 9.88920892138685e-06,
"loss": 1.1658,
"step": 2139
},
{
"epoch": 0.5691489361702128,
"grad_norm": 3.545102119445801,
"learning_rate": 9.889024725484672e-06,
"loss": 1.1813,
"step": 2140
},
{
"epoch": 0.5694148936170212,
"grad_norm": 3.738452434539795,
"learning_rate": 9.888840378310312e-06,
"loss": 1.2977,
"step": 2141
},
{
"epoch": 0.5696808510638298,
"grad_norm": 3.6037521362304688,
"learning_rate": 9.888655879869475e-06,
"loss": 1.2053,
"step": 2142
},
{
"epoch": 0.5699468085106383,
"grad_norm": 4.002810955047607,
"learning_rate": 9.888471230167869e-06,
"loss": 1.1678,
"step": 2143
},
{
"epoch": 0.5702127659574469,
"grad_norm": 3.659442186355591,
"learning_rate": 9.88828642921121e-06,
"loss": 1.3656,
"step": 2144
},
{
"epoch": 0.5704787234042553,
"grad_norm": 3.817089557647705,
"learning_rate": 9.88810147700521e-06,
"loss": 1.3597,
"step": 2145
},
{
"epoch": 0.5707446808510638,
"grad_norm": 3.5655431747436523,
"learning_rate": 9.887916373555597e-06,
"loss": 1.2276,
"step": 2146
},
{
"epoch": 0.5710106382978724,
"grad_norm": 3.873889923095703,
"learning_rate": 9.887731118868098e-06,
"loss": 1.3873,
"step": 2147
},
{
"epoch": 0.5712765957446808,
"grad_norm": 4.273273468017578,
"learning_rate": 9.887545712948441e-06,
"loss": 1.366,
"step": 2148
},
{
"epoch": 0.5715425531914894,
"grad_norm": 3.5899455547332764,
"learning_rate": 9.887360155802366e-06,
"loss": 1.1787,
"step": 2149
},
{
"epoch": 0.5718085106382979,
"grad_norm": 3.615471124649048,
"learning_rate": 9.887174447435615e-06,
"loss": 1.1561,
"step": 2150
},
{
"epoch": 0.5720744680851064,
"grad_norm": 3.8445990085601807,
"learning_rate": 9.886988587853933e-06,
"loss": 1.315,
"step": 2151
},
{
"epoch": 0.5723404255319149,
"grad_norm": 3.989668846130371,
"learning_rate": 9.886802577063068e-06,
"loss": 1.3116,
"step": 2152
},
{
"epoch": 0.5726063829787233,
"grad_norm": 4.619128227233887,
"learning_rate": 9.886616415068779e-06,
"loss": 1.3862,
"step": 2153
},
{
"epoch": 0.5728723404255319,
"grad_norm": 3.6989963054656982,
"learning_rate": 9.886430101876825e-06,
"loss": 1.2221,
"step": 2154
},
{
"epoch": 0.5731382978723404,
"grad_norm": 4.153132915496826,
"learning_rate": 9.886243637492969e-06,
"loss": 1.2128,
"step": 2155
},
{
"epoch": 0.573404255319149,
"grad_norm": 3.970520257949829,
"learning_rate": 9.886057021922984e-06,
"loss": 1.2802,
"step": 2156
},
{
"epoch": 0.5736702127659574,
"grad_norm": 3.751838207244873,
"learning_rate": 9.885870255172642e-06,
"loss": 1.1967,
"step": 2157
},
{
"epoch": 0.573936170212766,
"grad_norm": 3.6611552238464355,
"learning_rate": 9.88568333724772e-06,
"loss": 1.2956,
"step": 2158
},
{
"epoch": 0.5742021276595745,
"grad_norm": 4.170332908630371,
"learning_rate": 9.885496268154005e-06,
"loss": 1.2867,
"step": 2159
},
{
"epoch": 0.574468085106383,
"grad_norm": 3.5777552127838135,
"learning_rate": 9.885309047897285e-06,
"loss": 1.1703,
"step": 2160
},
{
"epoch": 0.5747340425531915,
"grad_norm": 3.9369912147521973,
"learning_rate": 9.88512167648335e-06,
"loss": 1.3682,
"step": 2161
},
{
"epoch": 0.575,
"grad_norm": 4.30880069732666,
"learning_rate": 9.884934153917998e-06,
"loss": 1.2892,
"step": 2162
},
{
"epoch": 0.5752659574468085,
"grad_norm": 4.251465797424316,
"learning_rate": 9.884746480207031e-06,
"loss": 1.3043,
"step": 2163
},
{
"epoch": 0.575531914893617,
"grad_norm": 3.4858951568603516,
"learning_rate": 9.88455865535626e-06,
"loss": 1.3418,
"step": 2164
},
{
"epoch": 0.5757978723404256,
"grad_norm": 3.715372085571289,
"learning_rate": 9.88437067937149e-06,
"loss": 1.274,
"step": 2165
},
{
"epoch": 0.576063829787234,
"grad_norm": 3.5083811283111572,
"learning_rate": 9.884182552258543e-06,
"loss": 1.1127,
"step": 2166
},
{
"epoch": 0.5763297872340426,
"grad_norm": 4.5049004554748535,
"learning_rate": 9.883994274023237e-06,
"loss": 1.3182,
"step": 2167
},
{
"epoch": 0.5765957446808511,
"grad_norm": 4.002771377563477,
"learning_rate": 9.883805844671396e-06,
"loss": 1.4289,
"step": 2168
},
{
"epoch": 0.5768617021276595,
"grad_norm": 3.691743850708008,
"learning_rate": 9.883617264208854e-06,
"loss": 1.3677,
"step": 2169
},
{
"epoch": 0.5771276595744681,
"grad_norm": 4.031147003173828,
"learning_rate": 9.883428532641445e-06,
"loss": 1.1805,
"step": 2170
},
{
"epoch": 0.5773936170212766,
"grad_norm": 4.453026294708252,
"learning_rate": 9.883239649975007e-06,
"loss": 1.4034,
"step": 2171
},
{
"epoch": 0.5776595744680851,
"grad_norm": 3.6685361862182617,
"learning_rate": 9.883050616215383e-06,
"loss": 1.3169,
"step": 2172
},
{
"epoch": 0.5779255319148936,
"grad_norm": 3.6789016723632812,
"learning_rate": 9.882861431368425e-06,
"loss": 1.3912,
"step": 2173
},
{
"epoch": 0.5781914893617022,
"grad_norm": 3.6971778869628906,
"learning_rate": 9.882672095439987e-06,
"loss": 1.1346,
"step": 2174
},
{
"epoch": 0.5784574468085106,
"grad_norm": 3.8128819465637207,
"learning_rate": 9.882482608435924e-06,
"loss": 1.3105,
"step": 2175
},
{
"epoch": 0.5787234042553191,
"grad_norm": 4.369806289672852,
"learning_rate": 9.882292970362101e-06,
"loss": 1.3673,
"step": 2176
},
{
"epoch": 0.5789893617021277,
"grad_norm": 3.403639316558838,
"learning_rate": 9.882103181224386e-06,
"loss": 1.2435,
"step": 2177
},
{
"epoch": 0.5792553191489361,
"grad_norm": 3.7755768299102783,
"learning_rate": 9.88191324102865e-06,
"loss": 1.3237,
"step": 2178
},
{
"epoch": 0.5795212765957447,
"grad_norm": 3.4330899715423584,
"learning_rate": 9.88172314978077e-06,
"loss": 1.249,
"step": 2179
},
{
"epoch": 0.5797872340425532,
"grad_norm": 3.9291467666625977,
"learning_rate": 9.88153290748663e-06,
"loss": 1.4475,
"step": 2180
},
{
"epoch": 0.5800531914893617,
"grad_norm": 3.731370210647583,
"learning_rate": 9.881342514152114e-06,
"loss": 1.2166,
"step": 2181
},
{
"epoch": 0.5803191489361702,
"grad_norm": 3.7620556354522705,
"learning_rate": 9.881151969783113e-06,
"loss": 1.2329,
"step": 2182
},
{
"epoch": 0.5805851063829788,
"grad_norm": 3.822985887527466,
"learning_rate": 9.880961274385523e-06,
"loss": 1.2219,
"step": 2183
},
{
"epoch": 0.5808510638297872,
"grad_norm": 3.2141547203063965,
"learning_rate": 9.880770427965245e-06,
"loss": 1.0712,
"step": 2184
},
{
"epoch": 0.5811170212765957,
"grad_norm": 3.733004331588745,
"learning_rate": 9.880579430528183e-06,
"loss": 1.203,
"step": 2185
},
{
"epoch": 0.5813829787234043,
"grad_norm": 3.6706783771514893,
"learning_rate": 9.880388282080247e-06,
"loss": 1.1757,
"step": 2186
},
{
"epoch": 0.5816489361702127,
"grad_norm": 3.7189342975616455,
"learning_rate": 9.880196982627352e-06,
"loss": 1.2265,
"step": 2187
},
{
"epoch": 0.5819148936170213,
"grad_norm": 3.8598103523254395,
"learning_rate": 9.88000553217542e-06,
"loss": 1.2892,
"step": 2188
},
{
"epoch": 0.5821808510638298,
"grad_norm": 3.854811191558838,
"learning_rate": 9.879813930730367e-06,
"loss": 1.1292,
"step": 2189
},
{
"epoch": 0.5824468085106383,
"grad_norm": 4.142318248748779,
"learning_rate": 9.879622178298128e-06,
"loss": 1.1795,
"step": 2190
},
{
"epoch": 0.5827127659574468,
"grad_norm": 3.688462257385254,
"learning_rate": 9.879430274884632e-06,
"loss": 1.2044,
"step": 2191
},
{
"epoch": 0.5829787234042553,
"grad_norm": 3.4742586612701416,
"learning_rate": 9.879238220495818e-06,
"loss": 1.1547,
"step": 2192
},
{
"epoch": 0.5832446808510638,
"grad_norm": 3.9008736610412598,
"learning_rate": 9.87904601513763e-06,
"loss": 1.2293,
"step": 2193
},
{
"epoch": 0.5835106382978723,
"grad_norm": 3.70694899559021,
"learning_rate": 9.878853658816015e-06,
"loss": 1.2758,
"step": 2194
},
{
"epoch": 0.5837765957446809,
"grad_norm": 4.015002727508545,
"learning_rate": 9.878661151536923e-06,
"loss": 1.3352,
"step": 2195
},
{
"epoch": 0.5840425531914893,
"grad_norm": 3.423016309738159,
"learning_rate": 9.87846849330631e-06,
"loss": 1.1313,
"step": 2196
},
{
"epoch": 0.5843085106382979,
"grad_norm": 3.549492120742798,
"learning_rate": 9.87827568413014e-06,
"loss": 1.3162,
"step": 2197
},
{
"epoch": 0.5845744680851064,
"grad_norm": 4.05422306060791,
"learning_rate": 9.878082724014375e-06,
"loss": 1.2593,
"step": 2198
},
{
"epoch": 0.5848404255319148,
"grad_norm": 3.875730514526367,
"learning_rate": 9.877889612964988e-06,
"loss": 1.1837,
"step": 2199
},
{
"epoch": 0.5851063829787234,
"grad_norm": 3.4176459312438965,
"learning_rate": 9.877696350987954e-06,
"loss": 1.1748,
"step": 2200
},
{
"epoch": 0.5853723404255319,
"grad_norm": 4.281347751617432,
"learning_rate": 9.87750293808925e-06,
"loss": 1.272,
"step": 2201
},
{
"epoch": 0.5856382978723405,
"grad_norm": 4.0162577629089355,
"learning_rate": 9.877309374274865e-06,
"loss": 1.2567,
"step": 2202
},
{
"epoch": 0.5859042553191489,
"grad_norm": 4.051181793212891,
"learning_rate": 9.877115659550785e-06,
"loss": 1.2305,
"step": 2203
},
{
"epoch": 0.5861702127659575,
"grad_norm": 3.711719512939453,
"learning_rate": 9.876921793923005e-06,
"loss": 1.1956,
"step": 2204
},
{
"epoch": 0.586436170212766,
"grad_norm": 3.402353048324585,
"learning_rate": 9.876727777397522e-06,
"loss": 1.1938,
"step": 2205
},
{
"epoch": 0.5867021276595744,
"grad_norm": 3.7966136932373047,
"learning_rate": 9.87653360998034e-06,
"loss": 1.2964,
"step": 2206
},
{
"epoch": 0.586968085106383,
"grad_norm": 3.816732406616211,
"learning_rate": 9.876339291677466e-06,
"loss": 1.2739,
"step": 2207
},
{
"epoch": 0.5872340425531914,
"grad_norm": 3.801443576812744,
"learning_rate": 9.876144822494913e-06,
"loss": 1.2832,
"step": 2208
},
{
"epoch": 0.5875,
"grad_norm": 3.7559401988983154,
"learning_rate": 9.8759502024387e-06,
"loss": 1.2176,
"step": 2209
},
{
"epoch": 0.5877659574468085,
"grad_norm": 3.9138758182525635,
"learning_rate": 9.875755431514846e-06,
"loss": 1.3423,
"step": 2210
},
{
"epoch": 0.5880319148936171,
"grad_norm": 4.0434041023254395,
"learning_rate": 9.875560509729379e-06,
"loss": 1.3064,
"step": 2211
},
{
"epoch": 0.5882978723404255,
"grad_norm": 3.7799887657165527,
"learning_rate": 9.87536543708833e-06,
"loss": 1.2518,
"step": 2212
},
{
"epoch": 0.5885638297872341,
"grad_norm": 3.8034684658050537,
"learning_rate": 9.875170213597731e-06,
"loss": 1.2485,
"step": 2213
},
{
"epoch": 0.5888297872340426,
"grad_norm": 4.390495300292969,
"learning_rate": 9.874974839263629e-06,
"loss": 1.263,
"step": 2214
},
{
"epoch": 0.589095744680851,
"grad_norm": 4.027488708496094,
"learning_rate": 9.874779314092065e-06,
"loss": 1.2718,
"step": 2215
},
{
"epoch": 0.5893617021276596,
"grad_norm": 3.8035428524017334,
"learning_rate": 9.87458363808909e-06,
"loss": 1.2636,
"step": 2216
},
{
"epoch": 0.589627659574468,
"grad_norm": 3.5652413368225098,
"learning_rate": 9.874387811260756e-06,
"loss": 1.241,
"step": 2217
},
{
"epoch": 0.5898936170212766,
"grad_norm": 4.2285614013671875,
"learning_rate": 9.874191833613128e-06,
"loss": 1.1943,
"step": 2218
},
{
"epoch": 0.5901595744680851,
"grad_norm": 4.229702472686768,
"learning_rate": 9.873995705152264e-06,
"loss": 1.382,
"step": 2219
},
{
"epoch": 0.5904255319148937,
"grad_norm": 4.092412948608398,
"learning_rate": 9.873799425884235e-06,
"loss": 1.132,
"step": 2220
},
{
"epoch": 0.5906914893617021,
"grad_norm": 3.6512703895568848,
"learning_rate": 9.873602995815113e-06,
"loss": 1.2022,
"step": 2221
},
{
"epoch": 0.5909574468085106,
"grad_norm": 3.634768009185791,
"learning_rate": 9.873406414950977e-06,
"loss": 1.2932,
"step": 2222
},
{
"epoch": 0.5912234042553192,
"grad_norm": 3.6227974891662598,
"learning_rate": 9.873209683297908e-06,
"loss": 1.2947,
"step": 2223
},
{
"epoch": 0.5914893617021276,
"grad_norm": 3.5124943256378174,
"learning_rate": 9.873012800861996e-06,
"loss": 1.1896,
"step": 2224
},
{
"epoch": 0.5917553191489362,
"grad_norm": 3.759474992752075,
"learning_rate": 9.872815767649329e-06,
"loss": 1.2116,
"step": 2225
},
{
"epoch": 0.5920212765957447,
"grad_norm": 3.7036375999450684,
"learning_rate": 9.872618583666005e-06,
"loss": 1.2293,
"step": 2226
},
{
"epoch": 0.5922872340425532,
"grad_norm": 3.61789608001709,
"learning_rate": 9.872421248918124e-06,
"loss": 1.2121,
"step": 2227
},
{
"epoch": 0.5925531914893617,
"grad_norm": 4.019472122192383,
"learning_rate": 9.872223763411794e-06,
"loss": 1.1467,
"step": 2228
},
{
"epoch": 0.5928191489361702,
"grad_norm": 3.774531364440918,
"learning_rate": 9.872026127153126e-06,
"loss": 1.3685,
"step": 2229
},
{
"epoch": 0.5930851063829787,
"grad_norm": 3.9165661334991455,
"learning_rate": 9.871828340148232e-06,
"loss": 1.1668,
"step": 2230
},
{
"epoch": 0.5933510638297872,
"grad_norm": 3.762282133102417,
"learning_rate": 9.871630402403235e-06,
"loss": 1.2315,
"step": 2231
},
{
"epoch": 0.5936170212765958,
"grad_norm": 3.96540904045105,
"learning_rate": 9.871432313924255e-06,
"loss": 1.3042,
"step": 2232
},
{
"epoch": 0.5938829787234042,
"grad_norm": 4.1440229415893555,
"learning_rate": 9.871234074717424e-06,
"loss": 1.3715,
"step": 2233
},
{
"epoch": 0.5941489361702128,
"grad_norm": 3.7638661861419678,
"learning_rate": 9.871035684788878e-06,
"loss": 1.2619,
"step": 2234
},
{
"epoch": 0.5944148936170213,
"grad_norm": 3.5591323375701904,
"learning_rate": 9.870837144144752e-06,
"loss": 1.1941,
"step": 2235
},
{
"epoch": 0.5946808510638298,
"grad_norm": 4.143522262573242,
"learning_rate": 9.87063845279119e-06,
"loss": 1.1687,
"step": 2236
},
{
"epoch": 0.5949468085106383,
"grad_norm": 4.148569583892822,
"learning_rate": 9.87043961073434e-06,
"loss": 1.4218,
"step": 2237
},
{
"epoch": 0.5952127659574468,
"grad_norm": 3.687147378921509,
"learning_rate": 9.870240617980353e-06,
"loss": 1.1311,
"step": 2238
},
{
"epoch": 0.5954787234042553,
"grad_norm": 3.5179238319396973,
"learning_rate": 9.870041474535388e-06,
"loss": 1.1823,
"step": 2239
},
{
"epoch": 0.5957446808510638,
"grad_norm": 3.844238519668579,
"learning_rate": 9.869842180405607e-06,
"loss": 1.3256,
"step": 2240
},
{
"epoch": 0.5960106382978724,
"grad_norm": 3.9333431720733643,
"learning_rate": 9.869642735597174e-06,
"loss": 1.3545,
"step": 2241
},
{
"epoch": 0.5962765957446808,
"grad_norm": 3.531179666519165,
"learning_rate": 9.869443140116261e-06,
"loss": 1.3254,
"step": 2242
},
{
"epoch": 0.5965425531914894,
"grad_norm": 3.795381546020508,
"learning_rate": 9.869243393969045e-06,
"loss": 1.2744,
"step": 2243
},
{
"epoch": 0.5968085106382979,
"grad_norm": 4.001238822937012,
"learning_rate": 9.869043497161707e-06,
"loss": 1.3585,
"step": 2244
},
{
"epoch": 0.5970744680851063,
"grad_norm": 4.289900302886963,
"learning_rate": 9.868843449700429e-06,
"loss": 1.3628,
"step": 2245
},
{
"epoch": 0.5973404255319149,
"grad_norm": 3.581144332885742,
"learning_rate": 9.868643251591403e-06,
"loss": 1.3021,
"step": 2246
},
{
"epoch": 0.5976063829787234,
"grad_norm": 3.504152536392212,
"learning_rate": 9.868442902840823e-06,
"loss": 1.2073,
"step": 2247
},
{
"epoch": 0.597872340425532,
"grad_norm": 3.648141622543335,
"learning_rate": 9.868242403454886e-06,
"loss": 1.3169,
"step": 2248
},
{
"epoch": 0.5981382978723404,
"grad_norm": 3.544408082962036,
"learning_rate": 9.8680417534398e-06,
"loss": 1.1334,
"step": 2249
},
{
"epoch": 0.598404255319149,
"grad_norm": 3.6868479251861572,
"learning_rate": 9.867840952801768e-06,
"loss": 1.209,
"step": 2250
},
{
"epoch": 0.5986702127659574,
"grad_norm": 3.6805198192596436,
"learning_rate": 9.867640001547007e-06,
"loss": 1.3011,
"step": 2251
},
{
"epoch": 0.5989361702127659,
"grad_norm": 3.646977186203003,
"learning_rate": 9.867438899681734e-06,
"loss": 1.2178,
"step": 2252
},
{
"epoch": 0.5992021276595745,
"grad_norm": 3.4612386226654053,
"learning_rate": 9.867237647212168e-06,
"loss": 1.1646,
"step": 2253
},
{
"epoch": 0.5994680851063829,
"grad_norm": 3.663968324661255,
"learning_rate": 9.867036244144544e-06,
"loss": 1.2337,
"step": 2254
},
{
"epoch": 0.5997340425531915,
"grad_norm": 3.724919080734253,
"learning_rate": 9.866834690485083e-06,
"loss": 1.3467,
"step": 2255
},
{
"epoch": 0.6,
"grad_norm": 3.6140668392181396,
"learning_rate": 9.86663298624003e-06,
"loss": 1.2684,
"step": 2256
},
{
"epoch": 0.6002659574468086,
"grad_norm": 3.805572271347046,
"learning_rate": 9.866431131415621e-06,
"loss": 1.3172,
"step": 2257
},
{
"epoch": 0.600531914893617,
"grad_norm": 3.921037435531616,
"learning_rate": 9.866229126018104e-06,
"loss": 1.1632,
"step": 2258
},
{
"epoch": 0.6007978723404256,
"grad_norm": 4.814824104309082,
"learning_rate": 9.866026970053728e-06,
"loss": 1.371,
"step": 2259
},
{
"epoch": 0.601063829787234,
"grad_norm": 3.8934485912323,
"learning_rate": 9.86582466352875e-06,
"loss": 1.2192,
"step": 2260
},
{
"epoch": 0.6013297872340425,
"grad_norm": 4.167794704437256,
"learning_rate": 9.865622206449428e-06,
"loss": 1.3167,
"step": 2261
},
{
"epoch": 0.6015957446808511,
"grad_norm": 3.916013479232788,
"learning_rate": 9.865419598822025e-06,
"loss": 1.2492,
"step": 2262
},
{
"epoch": 0.6018617021276595,
"grad_norm": 3.5649423599243164,
"learning_rate": 9.865216840652811e-06,
"loss": 1.1833,
"step": 2263
},
{
"epoch": 0.6021276595744681,
"grad_norm": 3.508890151977539,
"learning_rate": 9.865013931948061e-06,
"loss": 1.2527,
"step": 2264
},
{
"epoch": 0.6023936170212766,
"grad_norm": 3.513054132461548,
"learning_rate": 9.864810872714053e-06,
"loss": 1.2032,
"step": 2265
},
{
"epoch": 0.6026595744680852,
"grad_norm": 3.777679443359375,
"learning_rate": 9.864607662957066e-06,
"loss": 1.3355,
"step": 2266
},
{
"epoch": 0.6029255319148936,
"grad_norm": 3.778639316558838,
"learning_rate": 9.864404302683393e-06,
"loss": 1.3697,
"step": 2267
},
{
"epoch": 0.6031914893617021,
"grad_norm": 3.5880136489868164,
"learning_rate": 9.864200791899323e-06,
"loss": 1.2124,
"step": 2268
},
{
"epoch": 0.6034574468085107,
"grad_norm": 3.5101895332336426,
"learning_rate": 9.863997130611153e-06,
"loss": 1.1641,
"step": 2269
},
{
"epoch": 0.6037234042553191,
"grad_norm": 3.5391786098480225,
"learning_rate": 9.863793318825186e-06,
"loss": 1.2167,
"step": 2270
},
{
"epoch": 0.6039893617021277,
"grad_norm": 3.74766206741333,
"learning_rate": 9.863589356547728e-06,
"loss": 1.3565,
"step": 2271
},
{
"epoch": 0.6042553191489362,
"grad_norm": 3.966728925704956,
"learning_rate": 9.863385243785088e-06,
"loss": 1.3416,
"step": 2272
},
{
"epoch": 0.6045212765957447,
"grad_norm": 3.2839200496673584,
"learning_rate": 9.863180980543582e-06,
"loss": 1.1073,
"step": 2273
},
{
"epoch": 0.6047872340425532,
"grad_norm": 3.958099603652954,
"learning_rate": 9.862976566829532e-06,
"loss": 1.356,
"step": 2274
},
{
"epoch": 0.6050531914893617,
"grad_norm": 3.6041507720947266,
"learning_rate": 9.862772002649261e-06,
"loss": 1.4091,
"step": 2275
},
{
"epoch": 0.6053191489361702,
"grad_norm": 3.320826530456543,
"learning_rate": 9.862567288009099e-06,
"loss": 1.196,
"step": 2276
},
{
"epoch": 0.6055851063829787,
"grad_norm": 3.375542163848877,
"learning_rate": 9.862362422915382e-06,
"loss": 1.161,
"step": 2277
},
{
"epoch": 0.6058510638297873,
"grad_norm": 3.680457353591919,
"learning_rate": 9.862157407374446e-06,
"loss": 1.129,
"step": 2278
},
{
"epoch": 0.6061170212765957,
"grad_norm": 3.8363595008850098,
"learning_rate": 9.861952241392633e-06,
"loss": 1.309,
"step": 2279
},
{
"epoch": 0.6063829787234043,
"grad_norm": 3.7582051753997803,
"learning_rate": 9.861746924976297e-06,
"loss": 1.2328,
"step": 2280
},
{
"epoch": 0.6066489361702128,
"grad_norm": 3.5171892642974854,
"learning_rate": 9.861541458131785e-06,
"loss": 1.2098,
"step": 2281
},
{
"epoch": 0.6069148936170212,
"grad_norm": 3.905834197998047,
"learning_rate": 9.861335840865455e-06,
"loss": 1.2909,
"step": 2282
},
{
"epoch": 0.6071808510638298,
"grad_norm": 3.9347522258758545,
"learning_rate": 9.861130073183674e-06,
"loss": 1.265,
"step": 2283
},
{
"epoch": 0.6074468085106383,
"grad_norm": 3.6212542057037354,
"learning_rate": 9.860924155092803e-06,
"loss": 1.3044,
"step": 2284
},
{
"epoch": 0.6077127659574468,
"grad_norm": 3.9703807830810547,
"learning_rate": 9.860718086599217e-06,
"loss": 1.3497,
"step": 2285
},
{
"epoch": 0.6079787234042553,
"grad_norm": 3.94783091545105,
"learning_rate": 9.860511867709289e-06,
"loss": 1.248,
"step": 2286
},
{
"epoch": 0.6082446808510639,
"grad_norm": 4.237410545349121,
"learning_rate": 9.860305498429404e-06,
"loss": 1.3791,
"step": 2287
},
{
"epoch": 0.6085106382978723,
"grad_norm": 3.7259433269500732,
"learning_rate": 9.860098978765942e-06,
"loss": 1.3233,
"step": 2288
},
{
"epoch": 0.6087765957446809,
"grad_norm": 3.8508055210113525,
"learning_rate": 9.859892308725296e-06,
"loss": 1.2324,
"step": 2289
},
{
"epoch": 0.6090425531914894,
"grad_norm": 3.8663196563720703,
"learning_rate": 9.859685488313861e-06,
"loss": 1.2425,
"step": 2290
},
{
"epoch": 0.6093085106382978,
"grad_norm": 4.03026008605957,
"learning_rate": 9.859478517538035e-06,
"loss": 1.2932,
"step": 2291
},
{
"epoch": 0.6095744680851064,
"grad_norm": 3.517122745513916,
"learning_rate": 9.859271396404223e-06,
"loss": 1.1597,
"step": 2292
},
{
"epoch": 0.6098404255319149,
"grad_norm": 3.6704776287078857,
"learning_rate": 9.85906412491883e-06,
"loss": 1.1834,
"step": 2293
},
{
"epoch": 0.6101063829787234,
"grad_norm": 4.267923831939697,
"learning_rate": 9.858856703088276e-06,
"loss": 1.1888,
"step": 2294
},
{
"epoch": 0.6103723404255319,
"grad_norm": 4.178102493286133,
"learning_rate": 9.85864913091897e-06,
"loss": 1.3685,
"step": 2295
},
{
"epoch": 0.6106382978723405,
"grad_norm": 4.176131725311279,
"learning_rate": 9.858441408417345e-06,
"loss": 1.231,
"step": 2296
},
{
"epoch": 0.6109042553191489,
"grad_norm": 3.4884450435638428,
"learning_rate": 9.85823353558982e-06,
"loss": 1.2206,
"step": 2297
},
{
"epoch": 0.6111702127659574,
"grad_norm": 3.8766729831695557,
"learning_rate": 9.85802551244283e-06,
"loss": 1.3035,
"step": 2298
},
{
"epoch": 0.611436170212766,
"grad_norm": 3.5301473140716553,
"learning_rate": 9.857817338982811e-06,
"loss": 1.1712,
"step": 2299
},
{
"epoch": 0.6117021276595744,
"grad_norm": 3.7902379035949707,
"learning_rate": 9.857609015216205e-06,
"loss": 1.1324,
"step": 2300
},
{
"epoch": 0.611968085106383,
"grad_norm": 4.028817176818848,
"learning_rate": 9.857400541149455e-06,
"loss": 1.3142,
"step": 2301
},
{
"epoch": 0.6122340425531915,
"grad_norm": 3.6242549419403076,
"learning_rate": 9.857191916789016e-06,
"loss": 1.2368,
"step": 2302
},
{
"epoch": 0.6125,
"grad_norm": 3.6776719093322754,
"learning_rate": 9.856983142141338e-06,
"loss": 1.3289,
"step": 2303
},
{
"epoch": 0.6127659574468085,
"grad_norm": 3.8104121685028076,
"learning_rate": 9.856774217212886e-06,
"loss": 1.3076,
"step": 2304
},
{
"epoch": 0.613031914893617,
"grad_norm": 3.668893337249756,
"learning_rate": 9.85656514201012e-06,
"loss": 1.2935,
"step": 2305
},
{
"epoch": 0.6132978723404255,
"grad_norm": 3.5787241458892822,
"learning_rate": 9.85635591653951e-06,
"loss": 1.1477,
"step": 2306
},
{
"epoch": 0.613563829787234,
"grad_norm": 3.9113807678222656,
"learning_rate": 9.856146540807531e-06,
"loss": 1.3338,
"step": 2307
},
{
"epoch": 0.6138297872340426,
"grad_norm": 3.6910572052001953,
"learning_rate": 9.85593701482066e-06,
"loss": 1.1302,
"step": 2308
},
{
"epoch": 0.614095744680851,
"grad_norm": 4.1038689613342285,
"learning_rate": 9.855727338585381e-06,
"loss": 1.4519,
"step": 2309
},
{
"epoch": 0.6143617021276596,
"grad_norm": 3.5061099529266357,
"learning_rate": 9.855517512108182e-06,
"loss": 1.2243,
"step": 2310
},
{
"epoch": 0.6146276595744681,
"grad_norm": 3.5231192111968994,
"learning_rate": 9.855307535395553e-06,
"loss": 1.2158,
"step": 2311
},
{
"epoch": 0.6148936170212767,
"grad_norm": 3.8572421073913574,
"learning_rate": 9.855097408453993e-06,
"loss": 1.2392,
"step": 2312
},
{
"epoch": 0.6151595744680851,
"grad_norm": 3.7707557678222656,
"learning_rate": 9.854887131290002e-06,
"loss": 1.2316,
"step": 2313
},
{
"epoch": 0.6154255319148936,
"grad_norm": 3.860130548477173,
"learning_rate": 9.854676703910092e-06,
"loss": 1.2118,
"step": 2314
},
{
"epoch": 0.6156914893617021,
"grad_norm": 3.404811382293701,
"learning_rate": 9.854466126320763e-06,
"loss": 1.1942,
"step": 2315
},
{
"epoch": 0.6159574468085106,
"grad_norm": 3.659116268157959,
"learning_rate": 9.854255398528541e-06,
"loss": 1.2822,
"step": 2316
},
{
"epoch": 0.6162234042553192,
"grad_norm": 3.97190260887146,
"learning_rate": 9.85404452053994e-06,
"loss": 1.3892,
"step": 2317
},
{
"epoch": 0.6164893617021276,
"grad_norm": 3.99293851852417,
"learning_rate": 9.853833492361486e-06,
"loss": 1.2248,
"step": 2318
},
{
"epoch": 0.6167553191489362,
"grad_norm": 3.846611499786377,
"learning_rate": 9.85362231399971e-06,
"loss": 1.3553,
"step": 2319
},
{
"epoch": 0.6170212765957447,
"grad_norm": 3.922665596008301,
"learning_rate": 9.853410985461145e-06,
"loss": 1.2831,
"step": 2320
},
{
"epoch": 0.6172872340425531,
"grad_norm": 3.788879871368408,
"learning_rate": 9.85319950675233e-06,
"loss": 1.3213,
"step": 2321
},
{
"epoch": 0.6175531914893617,
"grad_norm": 3.7415027618408203,
"learning_rate": 9.852987877879807e-06,
"loss": 1.1951,
"step": 2322
},
{
"epoch": 0.6178191489361702,
"grad_norm": 4.016115665435791,
"learning_rate": 9.852776098850128e-06,
"loss": 1.2595,
"step": 2323
},
{
"epoch": 0.6180851063829788,
"grad_norm": 3.5927200317382812,
"learning_rate": 9.85256416966984e-06,
"loss": 1.2103,
"step": 2324
},
{
"epoch": 0.6183510638297872,
"grad_norm": 3.9768147468566895,
"learning_rate": 9.852352090345504e-06,
"loss": 1.3389,
"step": 2325
},
{
"epoch": 0.6186170212765958,
"grad_norm": 3.378852605819702,
"learning_rate": 9.852139860883684e-06,
"loss": 1.1266,
"step": 2326
},
{
"epoch": 0.6188829787234043,
"grad_norm": 4.071725368499756,
"learning_rate": 9.851927481290943e-06,
"loss": 1.4006,
"step": 2327
},
{
"epoch": 0.6191489361702127,
"grad_norm": 3.721118688583374,
"learning_rate": 9.851714951573853e-06,
"loss": 1.2344,
"step": 2328
},
{
"epoch": 0.6194148936170213,
"grad_norm": 3.551180839538574,
"learning_rate": 9.851502271738989e-06,
"loss": 1.3175,
"step": 2329
},
{
"epoch": 0.6196808510638298,
"grad_norm": 3.6764516830444336,
"learning_rate": 9.851289441792934e-06,
"loss": 1.2169,
"step": 2330
},
{
"epoch": 0.6199468085106383,
"grad_norm": 3.8505606651306152,
"learning_rate": 9.851076461742272e-06,
"loss": 1.3586,
"step": 2331
},
{
"epoch": 0.6202127659574468,
"grad_norm": 3.9605445861816406,
"learning_rate": 9.850863331593591e-06,
"loss": 1.2454,
"step": 2332
},
{
"epoch": 0.6204787234042554,
"grad_norm": 4.140010833740234,
"learning_rate": 9.85065005135349e-06,
"loss": 1.4014,
"step": 2333
},
{
"epoch": 0.6207446808510638,
"grad_norm": 4.118074417114258,
"learning_rate": 9.850436621028565e-06,
"loss": 1.2367,
"step": 2334
},
{
"epoch": 0.6210106382978723,
"grad_norm": 3.6424777507781982,
"learning_rate": 9.85022304062542e-06,
"loss": 1.129,
"step": 2335
},
{
"epoch": 0.6212765957446809,
"grad_norm": 3.643145799636841,
"learning_rate": 9.850009310150662e-06,
"loss": 1.3767,
"step": 2336
},
{
"epoch": 0.6215425531914893,
"grad_norm": 3.913959503173828,
"learning_rate": 9.849795429610908e-06,
"loss": 1.1977,
"step": 2337
},
{
"epoch": 0.6218085106382979,
"grad_norm": 3.91186261177063,
"learning_rate": 9.849581399012772e-06,
"loss": 1.2842,
"step": 2338
},
{
"epoch": 0.6220744680851064,
"grad_norm": 3.7167961597442627,
"learning_rate": 9.849367218362879e-06,
"loss": 1.2802,
"step": 2339
},
{
"epoch": 0.6223404255319149,
"grad_norm": 3.5471532344818115,
"learning_rate": 9.849152887667855e-06,
"loss": 1.2785,
"step": 2340
},
{
"epoch": 0.6226063829787234,
"grad_norm": 4.358826637268066,
"learning_rate": 9.84893840693433e-06,
"loss": 1.1696,
"step": 2341
},
{
"epoch": 0.622872340425532,
"grad_norm": 3.869590997695923,
"learning_rate": 9.848723776168942e-06,
"loss": 1.3316,
"step": 2342
},
{
"epoch": 0.6231382978723404,
"grad_norm": 4.493122577667236,
"learning_rate": 9.848508995378333e-06,
"loss": 1.2928,
"step": 2343
},
{
"epoch": 0.6234042553191489,
"grad_norm": 3.808885335922241,
"learning_rate": 9.848294064569146e-06,
"loss": 1.331,
"step": 2344
},
{
"epoch": 0.6236702127659575,
"grad_norm": 3.6614105701446533,
"learning_rate": 9.848078983748032e-06,
"loss": 1.3549,
"step": 2345
},
{
"epoch": 0.6239361702127659,
"grad_norm": 3.5685722827911377,
"learning_rate": 9.847863752921649e-06,
"loss": 1.1914,
"step": 2346
},
{
"epoch": 0.6242021276595745,
"grad_norm": 4.203314781188965,
"learning_rate": 9.847648372096652e-06,
"loss": 1.3369,
"step": 2347
},
{
"epoch": 0.624468085106383,
"grad_norm": 3.762103796005249,
"learning_rate": 9.847432841279707e-06,
"loss": 1.261,
"step": 2348
},
{
"epoch": 0.6247340425531915,
"grad_norm": 4.371121883392334,
"learning_rate": 9.847217160477483e-06,
"loss": 1.3071,
"step": 2349
},
{
"epoch": 0.625,
"grad_norm": 3.928662061691284,
"learning_rate": 9.847001329696653e-06,
"loss": 1.2321,
"step": 2350
},
{
"epoch": 0.6252659574468085,
"grad_norm": 3.7375707626342773,
"learning_rate": 9.846785348943896e-06,
"loss": 1.3022,
"step": 2351
},
{
"epoch": 0.625531914893617,
"grad_norm": 3.684936046600342,
"learning_rate": 9.846569218225892e-06,
"loss": 1.2365,
"step": 2352
},
{
"epoch": 0.6257978723404255,
"grad_norm": 3.5079708099365234,
"learning_rate": 9.846352937549332e-06,
"loss": 1.2328,
"step": 2353
},
{
"epoch": 0.6260638297872341,
"grad_norm": 3.814976692199707,
"learning_rate": 9.846136506920907e-06,
"loss": 1.1824,
"step": 2354
},
{
"epoch": 0.6263297872340425,
"grad_norm": 3.3843934535980225,
"learning_rate": 9.84591992634731e-06,
"loss": 1.0477,
"step": 2355
},
{
"epoch": 0.6265957446808511,
"grad_norm": 3.712428569793701,
"learning_rate": 9.845703195835248e-06,
"loss": 1.2826,
"step": 2356
},
{
"epoch": 0.6268617021276596,
"grad_norm": 3.617882251739502,
"learning_rate": 9.845486315391421e-06,
"loss": 1.2472,
"step": 2357
},
{
"epoch": 0.627127659574468,
"grad_norm": 4.057145595550537,
"learning_rate": 9.845269285022545e-06,
"loss": 1.4144,
"step": 2358
},
{
"epoch": 0.6273936170212766,
"grad_norm": 4.23139762878418,
"learning_rate": 9.845052104735331e-06,
"loss": 1.4445,
"step": 2359
},
{
"epoch": 0.6276595744680851,
"grad_norm": 3.8976731300354004,
"learning_rate": 9.844834774536503e-06,
"loss": 1.2646,
"step": 2360
},
{
"epoch": 0.6279255319148936,
"grad_norm": 3.6036627292633057,
"learning_rate": 9.844617294432781e-06,
"loss": 1.251,
"step": 2361
},
{
"epoch": 0.6281914893617021,
"grad_norm": 3.4059393405914307,
"learning_rate": 9.844399664430896e-06,
"loss": 1.1432,
"step": 2362
},
{
"epoch": 0.6284574468085107,
"grad_norm": 3.6594855785369873,
"learning_rate": 9.844181884537583e-06,
"loss": 1.3047,
"step": 2363
},
{
"epoch": 0.6287234042553191,
"grad_norm": 4.183903217315674,
"learning_rate": 9.843963954759578e-06,
"loss": 1.2951,
"step": 2364
},
{
"epoch": 0.6289893617021277,
"grad_norm": 3.496905565261841,
"learning_rate": 9.843745875103628e-06,
"loss": 1.3087,
"step": 2365
},
{
"epoch": 0.6292553191489362,
"grad_norm": 3.5995302200317383,
"learning_rate": 9.843527645576475e-06,
"loss": 1.2998,
"step": 2366
},
{
"epoch": 0.6295212765957446,
"grad_norm": 3.597393035888672,
"learning_rate": 9.843309266184875e-06,
"loss": 1.2151,
"step": 2367
},
{
"epoch": 0.6297872340425532,
"grad_norm": 3.922405481338501,
"learning_rate": 9.843090736935583e-06,
"loss": 1.4409,
"step": 2368
},
{
"epoch": 0.6300531914893617,
"grad_norm": 3.7593741416931152,
"learning_rate": 9.842872057835363e-06,
"loss": 1.0905,
"step": 2369
},
{
"epoch": 0.6303191489361702,
"grad_norm": 3.570892572402954,
"learning_rate": 9.842653228890979e-06,
"loss": 1.2337,
"step": 2370
},
{
"epoch": 0.6305851063829787,
"grad_norm": 3.2270023822784424,
"learning_rate": 9.842434250109202e-06,
"loss": 0.9824,
"step": 2371
},
{
"epoch": 0.6308510638297873,
"grad_norm": 3.9054601192474365,
"learning_rate": 9.84221512149681e-06,
"loss": 1.3091,
"step": 2372
},
{
"epoch": 0.6311170212765957,
"grad_norm": 3.7820627689361572,
"learning_rate": 9.84199584306058e-06,
"loss": 1.2331,
"step": 2373
},
{
"epoch": 0.6313829787234042,
"grad_norm": 3.407257080078125,
"learning_rate": 9.841776414807297e-06,
"loss": 1.1868,
"step": 2374
},
{
"epoch": 0.6316489361702128,
"grad_norm": 3.471640110015869,
"learning_rate": 9.841556836743752e-06,
"loss": 1.2025,
"step": 2375
},
{
"epoch": 0.6319148936170212,
"grad_norm": 3.824422597885132,
"learning_rate": 9.841337108876739e-06,
"loss": 1.1932,
"step": 2376
},
{
"epoch": 0.6321808510638298,
"grad_norm": 3.6980538368225098,
"learning_rate": 9.841117231213055e-06,
"loss": 1.2374,
"step": 2377
},
{
"epoch": 0.6324468085106383,
"grad_norm": 3.9002277851104736,
"learning_rate": 9.840897203759502e-06,
"loss": 1.3205,
"step": 2378
},
{
"epoch": 0.6327127659574469,
"grad_norm": 3.993248462677002,
"learning_rate": 9.840677026522893e-06,
"loss": 1.1262,
"step": 2379
},
{
"epoch": 0.6329787234042553,
"grad_norm": 3.8742499351501465,
"learning_rate": 9.840456699510038e-06,
"loss": 1.1456,
"step": 2380
},
{
"epoch": 0.6332446808510638,
"grad_norm": 3.772584915161133,
"learning_rate": 9.840236222727752e-06,
"loss": 1.1367,
"step": 2381
},
{
"epoch": 0.6335106382978724,
"grad_norm": 3.7653708457946777,
"learning_rate": 9.840015596182861e-06,
"loss": 1.24,
"step": 2382
},
{
"epoch": 0.6337765957446808,
"grad_norm": 3.4554617404937744,
"learning_rate": 9.839794819882188e-06,
"loss": 1.2708,
"step": 2383
},
{
"epoch": 0.6340425531914894,
"grad_norm": 3.808807611465454,
"learning_rate": 9.839573893832564e-06,
"loss": 1.3985,
"step": 2384
},
{
"epoch": 0.6343085106382979,
"grad_norm": 3.6254007816314697,
"learning_rate": 9.839352818040825e-06,
"loss": 1.3145,
"step": 2385
},
{
"epoch": 0.6345744680851064,
"grad_norm": 3.83559513092041,
"learning_rate": 9.839131592513814e-06,
"loss": 1.2868,
"step": 2386
},
{
"epoch": 0.6348404255319149,
"grad_norm": 3.465432643890381,
"learning_rate": 9.838910217258375e-06,
"loss": 1.213,
"step": 2387
},
{
"epoch": 0.6351063829787233,
"grad_norm": 3.762899160385132,
"learning_rate": 9.838688692281356e-06,
"loss": 1.3678,
"step": 2388
},
{
"epoch": 0.6353723404255319,
"grad_norm": 3.573856830596924,
"learning_rate": 9.83846701758961e-06,
"loss": 1.3181,
"step": 2389
},
{
"epoch": 0.6356382978723404,
"grad_norm": 3.873749256134033,
"learning_rate": 9.838245193189999e-06,
"loss": 1.252,
"step": 2390
},
{
"epoch": 0.635904255319149,
"grad_norm": 3.5495100021362305,
"learning_rate": 9.838023219089386e-06,
"loss": 1.352,
"step": 2391
},
{
"epoch": 0.6361702127659574,
"grad_norm": 3.6257059574127197,
"learning_rate": 9.837801095294639e-06,
"loss": 1.2099,
"step": 2392
},
{
"epoch": 0.636436170212766,
"grad_norm": 3.658745288848877,
"learning_rate": 9.83757882181263e-06,
"loss": 1.2089,
"step": 2393
},
{
"epoch": 0.6367021276595745,
"grad_norm": 3.6948094367980957,
"learning_rate": 9.837356398650235e-06,
"loss": 1.3032,
"step": 2394
},
{
"epoch": 0.636968085106383,
"grad_norm": 3.677865743637085,
"learning_rate": 9.83713382581434e-06,
"loss": 1.2295,
"step": 2395
},
{
"epoch": 0.6372340425531915,
"grad_norm": 3.758213758468628,
"learning_rate": 9.836911103311828e-06,
"loss": 1.2542,
"step": 2396
},
{
"epoch": 0.6375,
"grad_norm": 3.710860252380371,
"learning_rate": 9.836688231149593e-06,
"loss": 1.3331,
"step": 2397
},
{
"epoch": 0.6377659574468085,
"grad_norm": 3.436738967895508,
"learning_rate": 9.836465209334529e-06,
"loss": 1.1318,
"step": 2398
},
{
"epoch": 0.638031914893617,
"grad_norm": 4.398902416229248,
"learning_rate": 9.836242037873536e-06,
"loss": 1.3268,
"step": 2399
},
{
"epoch": 0.6382978723404256,
"grad_norm": 3.483926773071289,
"learning_rate": 9.836018716773522e-06,
"loss": 1.1744,
"step": 2400
},
{
"epoch": 0.638563829787234,
"grad_norm": 3.766038417816162,
"learning_rate": 9.835795246041395e-06,
"loss": 1.1829,
"step": 2401
},
{
"epoch": 0.6388297872340426,
"grad_norm": 3.7989938259124756,
"learning_rate": 9.835571625684068e-06,
"loss": 1.2691,
"step": 2402
},
{
"epoch": 0.6390957446808511,
"grad_norm": 3.6767778396606445,
"learning_rate": 9.835347855708464e-06,
"loss": 1.1456,
"step": 2403
},
{
"epoch": 0.6393617021276595,
"grad_norm": 3.689368963241577,
"learning_rate": 9.835123936121504e-06,
"loss": 1.2714,
"step": 2404
},
{
"epoch": 0.6396276595744681,
"grad_norm": 3.6774284839630127,
"learning_rate": 9.834899866930116e-06,
"loss": 1.1968,
"step": 2405
},
{
"epoch": 0.6398936170212766,
"grad_norm": 3.734713077545166,
"learning_rate": 9.834675648141235e-06,
"loss": 1.4036,
"step": 2406
},
{
"epoch": 0.6401595744680851,
"grad_norm": 3.4915902614593506,
"learning_rate": 9.834451279761796e-06,
"loss": 1.0733,
"step": 2407
},
{
"epoch": 0.6404255319148936,
"grad_norm": 3.5466091632843018,
"learning_rate": 9.834226761798742e-06,
"loss": 1.2197,
"step": 2408
},
{
"epoch": 0.6406914893617022,
"grad_norm": 3.5611202716827393,
"learning_rate": 9.83400209425902e-06,
"loss": 1.092,
"step": 2409
},
{
"epoch": 0.6409574468085106,
"grad_norm": 3.35369610786438,
"learning_rate": 9.833777277149585e-06,
"loss": 1.2385,
"step": 2410
},
{
"epoch": 0.6412234042553191,
"grad_norm": 3.7679550647735596,
"learning_rate": 9.833552310477388e-06,
"loss": 1.0647,
"step": 2411
},
{
"epoch": 0.6414893617021277,
"grad_norm": 3.6990325450897217,
"learning_rate": 9.833327194249392e-06,
"loss": 1.1853,
"step": 2412
},
{
"epoch": 0.6417553191489361,
"grad_norm": 3.6745262145996094,
"learning_rate": 9.833101928472562e-06,
"loss": 1.2038,
"step": 2413
},
{
"epoch": 0.6420212765957447,
"grad_norm": 3.357508897781372,
"learning_rate": 9.832876513153867e-06,
"loss": 1.0274,
"step": 2414
},
{
"epoch": 0.6422872340425532,
"grad_norm": 3.786376953125,
"learning_rate": 9.832650948300284e-06,
"loss": 1.288,
"step": 2415
},
{
"epoch": 0.6425531914893617,
"grad_norm": 3.253251314163208,
"learning_rate": 9.83242523391879e-06,
"loss": 1.0876,
"step": 2416
},
{
"epoch": 0.6428191489361702,
"grad_norm": 3.3168015480041504,
"learning_rate": 9.832199370016371e-06,
"loss": 1.1551,
"step": 2417
},
{
"epoch": 0.6430851063829788,
"grad_norm": 3.8747761249542236,
"learning_rate": 9.831973356600013e-06,
"loss": 1.2343,
"step": 2418
},
{
"epoch": 0.6433510638297872,
"grad_norm": 3.9137704372406006,
"learning_rate": 9.83174719367671e-06,
"loss": 1.1782,
"step": 2419
},
{
"epoch": 0.6436170212765957,
"grad_norm": 3.64943528175354,
"learning_rate": 9.831520881253462e-06,
"loss": 1.0506,
"step": 2420
},
{
"epoch": 0.6438829787234043,
"grad_norm": 3.5648887157440186,
"learning_rate": 9.83129441933727e-06,
"loss": 1.0195,
"step": 2421
},
{
"epoch": 0.6441489361702127,
"grad_norm": 3.6668763160705566,
"learning_rate": 9.83106780793514e-06,
"loss": 1.349,
"step": 2422
},
{
"epoch": 0.6444148936170213,
"grad_norm": 3.6365723609924316,
"learning_rate": 9.830841047054083e-06,
"loss": 1.2105,
"step": 2423
},
{
"epoch": 0.6446808510638298,
"grad_norm": 3.657466411590576,
"learning_rate": 9.830614136701116e-06,
"loss": 1.2453,
"step": 2424
},
{
"epoch": 0.6449468085106383,
"grad_norm": 3.7750251293182373,
"learning_rate": 9.83038707688326e-06,
"loss": 1.2753,
"step": 2425
},
{
"epoch": 0.6452127659574468,
"grad_norm": 3.4032111167907715,
"learning_rate": 9.830159867607543e-06,
"loss": 1.2054,
"step": 2426
},
{
"epoch": 0.6454787234042553,
"grad_norm": 3.546877861022949,
"learning_rate": 9.82993250888099e-06,
"loss": 1.35,
"step": 2427
},
{
"epoch": 0.6457446808510638,
"grad_norm": 3.5076162815093994,
"learning_rate": 9.829705000710642e-06,
"loss": 1.1382,
"step": 2428
},
{
"epoch": 0.6460106382978723,
"grad_norm": 3.955322742462158,
"learning_rate": 9.829477343103533e-06,
"loss": 1.3948,
"step": 2429
},
{
"epoch": 0.6462765957446809,
"grad_norm": 3.5918376445770264,
"learning_rate": 9.82924953606671e-06,
"loss": 1.2271,
"step": 2430
},
{
"epoch": 0.6465425531914893,
"grad_norm": 3.8371551036834717,
"learning_rate": 9.82902157960722e-06,
"loss": 1.2004,
"step": 2431
},
{
"epoch": 0.6468085106382979,
"grad_norm": 3.573141098022461,
"learning_rate": 9.828793473732116e-06,
"loss": 1.2059,
"step": 2432
},
{
"epoch": 0.6470744680851064,
"grad_norm": 3.8021459579467773,
"learning_rate": 9.828565218448457e-06,
"loss": 1.1852,
"step": 2433
},
{
"epoch": 0.6473404255319148,
"grad_norm": 4.022589206695557,
"learning_rate": 9.828336813763308e-06,
"loss": 1.2385,
"step": 2434
},
{
"epoch": 0.6476063829787234,
"grad_norm": 3.364841938018799,
"learning_rate": 9.82810825968373e-06,
"loss": 1.1976,
"step": 2435
},
{
"epoch": 0.6478723404255319,
"grad_norm": 4.046548843383789,
"learning_rate": 9.8278795562168e-06,
"loss": 1.3522,
"step": 2436
},
{
"epoch": 0.6481382978723405,
"grad_norm": 3.795485019683838,
"learning_rate": 9.82765070336959e-06,
"loss": 1.2166,
"step": 2437
},
{
"epoch": 0.6484042553191489,
"grad_norm": 3.8107662200927734,
"learning_rate": 9.827421701149187e-06,
"loss": 1.3138,
"step": 2438
},
{
"epoch": 0.6486702127659575,
"grad_norm": 3.618577241897583,
"learning_rate": 9.82719254956267e-06,
"loss": 1.1677,
"step": 2439
},
{
"epoch": 0.648936170212766,
"grad_norm": 3.680255651473999,
"learning_rate": 9.826963248617133e-06,
"loss": 1.2319,
"step": 2440
},
{
"epoch": 0.6492021276595744,
"grad_norm": 3.6145694255828857,
"learning_rate": 9.82673379831967e-06,
"loss": 1.2276,
"step": 2441
},
{
"epoch": 0.649468085106383,
"grad_norm": 3.643686532974243,
"learning_rate": 9.82650419867738e-06,
"loss": 1.2989,
"step": 2442
},
{
"epoch": 0.6497340425531914,
"grad_norm": 3.774909019470215,
"learning_rate": 9.82627444969737e-06,
"loss": 1.2749,
"step": 2443
},
{
"epoch": 0.65,
"grad_norm": 3.7553470134735107,
"learning_rate": 9.826044551386743e-06,
"loss": 1.0902,
"step": 2444
},
{
"epoch": 0.6502659574468085,
"grad_norm": 3.453191041946411,
"learning_rate": 9.825814503752618e-06,
"loss": 1.2609,
"step": 2445
},
{
"epoch": 0.6505319148936171,
"grad_norm": 3.889417886734009,
"learning_rate": 9.825584306802109e-06,
"loss": 1.2514,
"step": 2446
},
{
"epoch": 0.6507978723404255,
"grad_norm": 3.5073375701904297,
"learning_rate": 9.825353960542342e-06,
"loss": 1.2466,
"step": 2447
},
{
"epoch": 0.6510638297872341,
"grad_norm": 3.4606523513793945,
"learning_rate": 9.825123464980442e-06,
"loss": 1.1156,
"step": 2448
},
{
"epoch": 0.6513297872340426,
"grad_norm": 3.831897497177124,
"learning_rate": 9.82489282012354e-06,
"loss": 1.1323,
"step": 2449
},
{
"epoch": 0.651595744680851,
"grad_norm": 4.391724109649658,
"learning_rate": 9.824662025978774e-06,
"loss": 1.2543,
"step": 2450
},
{
"epoch": 0.6518617021276596,
"grad_norm": 3.8090097904205322,
"learning_rate": 9.824431082553285e-06,
"loss": 1.3592,
"step": 2451
},
{
"epoch": 0.652127659574468,
"grad_norm": 3.706662893295288,
"learning_rate": 9.824199989854217e-06,
"loss": 1.2753,
"step": 2452
},
{
"epoch": 0.6523936170212766,
"grad_norm": 4.826519966125488,
"learning_rate": 9.823968747888722e-06,
"loss": 1.501,
"step": 2453
},
{
"epoch": 0.6526595744680851,
"grad_norm": 3.7181127071380615,
"learning_rate": 9.823737356663956e-06,
"loss": 1.283,
"step": 2454
},
{
"epoch": 0.6529255319148937,
"grad_norm": 3.6020474433898926,
"learning_rate": 9.823505816187076e-06,
"loss": 1.195,
"step": 2455
},
{
"epoch": 0.6531914893617021,
"grad_norm": 3.7805116176605225,
"learning_rate": 9.823274126465245e-06,
"loss": 1.3032,
"step": 2456
},
{
"epoch": 0.6534574468085106,
"grad_norm": 3.6897008419036865,
"learning_rate": 9.823042287505636e-06,
"loss": 1.33,
"step": 2457
},
{
"epoch": 0.6537234042553192,
"grad_norm": 3.6036691665649414,
"learning_rate": 9.82281029931542e-06,
"loss": 1.2454,
"step": 2458
},
{
"epoch": 0.6539893617021276,
"grad_norm": 3.8645083904266357,
"learning_rate": 9.822578161901774e-06,
"loss": 1.4082,
"step": 2459
},
{
"epoch": 0.6542553191489362,
"grad_norm": 3.982588052749634,
"learning_rate": 9.822345875271884e-06,
"loss": 1.2635,
"step": 2460
},
{
"epoch": 0.6545212765957447,
"grad_norm": 3.576320171356201,
"learning_rate": 9.822113439432933e-06,
"loss": 1.3524,
"step": 2461
},
{
"epoch": 0.6547872340425532,
"grad_norm": 3.387544870376587,
"learning_rate": 9.821880854392115e-06,
"loss": 1.2344,
"step": 2462
},
{
"epoch": 0.6550531914893617,
"grad_norm": 3.385258436203003,
"learning_rate": 9.821648120156628e-06,
"loss": 1.2054,
"step": 2463
},
{
"epoch": 0.6553191489361702,
"grad_norm": 3.952305316925049,
"learning_rate": 9.82141523673367e-06,
"loss": 1.153,
"step": 2464
},
{
"epoch": 0.6555851063829787,
"grad_norm": 3.8070571422576904,
"learning_rate": 9.821182204130448e-06,
"loss": 1.3405,
"step": 2465
},
{
"epoch": 0.6558510638297872,
"grad_norm": 3.9651296138763428,
"learning_rate": 9.820949022354174e-06,
"loss": 1.3205,
"step": 2466
},
{
"epoch": 0.6561170212765958,
"grad_norm": 3.980510950088501,
"learning_rate": 9.82071569141206e-06,
"loss": 1.401,
"step": 2467
},
{
"epoch": 0.6563829787234042,
"grad_norm": 4.441346168518066,
"learning_rate": 9.820482211311326e-06,
"loss": 1.3839,
"step": 2468
},
{
"epoch": 0.6566489361702128,
"grad_norm": 3.4150032997131348,
"learning_rate": 9.820248582059197e-06,
"loss": 1.0058,
"step": 2469
},
{
"epoch": 0.6569148936170213,
"grad_norm": 3.4013893604278564,
"learning_rate": 9.820014803662905e-06,
"loss": 1.1612,
"step": 2470
},
{
"epoch": 0.6571808510638298,
"grad_norm": 4.017107009887695,
"learning_rate": 9.819780876129677e-06,
"loss": 1.2295,
"step": 2471
},
{
"epoch": 0.6574468085106383,
"grad_norm": 3.500370979309082,
"learning_rate": 9.819546799466756e-06,
"loss": 1.2573,
"step": 2472
},
{
"epoch": 0.6577127659574468,
"grad_norm": 3.7119557857513428,
"learning_rate": 9.81931257368138e-06,
"loss": 1.1827,
"step": 2473
},
{
"epoch": 0.6579787234042553,
"grad_norm": 4.006588935852051,
"learning_rate": 9.8190781987808e-06,
"loss": 1.3236,
"step": 2474
},
{
"epoch": 0.6582446808510638,
"grad_norm": 3.6574013233184814,
"learning_rate": 9.818843674772268e-06,
"loss": 1.2783,
"step": 2475
},
{
"epoch": 0.6585106382978724,
"grad_norm": 3.4724280834198,
"learning_rate": 9.818609001663038e-06,
"loss": 1.3469,
"step": 2476
},
{
"epoch": 0.6587765957446808,
"grad_norm": 3.3943772315979004,
"learning_rate": 9.818374179460372e-06,
"loss": 1.1934,
"step": 2477
},
{
"epoch": 0.6590425531914894,
"grad_norm": 3.6822094917297363,
"learning_rate": 9.818139208171537e-06,
"loss": 1.3505,
"step": 2478
},
{
"epoch": 0.6593085106382979,
"grad_norm": 3.474010467529297,
"learning_rate": 9.817904087803802e-06,
"loss": 1.1487,
"step": 2479
},
{
"epoch": 0.6595744680851063,
"grad_norm": 3.4429280757904053,
"learning_rate": 9.817668818364441e-06,
"loss": 1.1786,
"step": 2480
},
{
"epoch": 0.6598404255319149,
"grad_norm": 4.096560955047607,
"learning_rate": 9.817433399860736e-06,
"loss": 1.3167,
"step": 2481
},
{
"epoch": 0.6601063829787234,
"grad_norm": 3.4501636028289795,
"learning_rate": 9.817197832299971e-06,
"loss": 1.0416,
"step": 2482
},
{
"epoch": 0.660372340425532,
"grad_norm": 3.7687666416168213,
"learning_rate": 9.816962115689432e-06,
"loss": 1.1121,
"step": 2483
},
{
"epoch": 0.6606382978723404,
"grad_norm": 3.6816604137420654,
"learning_rate": 9.816726250036413e-06,
"loss": 1.2019,
"step": 2484
},
{
"epoch": 0.660904255319149,
"grad_norm": 4.033024787902832,
"learning_rate": 9.816490235348215e-06,
"loss": 1.3078,
"step": 2485
},
{
"epoch": 0.6611702127659574,
"grad_norm": 3.7372167110443115,
"learning_rate": 9.816254071632137e-06,
"loss": 1.4434,
"step": 2486
},
{
"epoch": 0.6614361702127659,
"grad_norm": 3.694561004638672,
"learning_rate": 9.816017758895488e-06,
"loss": 1.2969,
"step": 2487
},
{
"epoch": 0.6617021276595745,
"grad_norm": 4.178577423095703,
"learning_rate": 9.815781297145578e-06,
"loss": 1.3661,
"step": 2488
},
{
"epoch": 0.6619680851063829,
"grad_norm": 3.647728681564331,
"learning_rate": 9.815544686389727e-06,
"loss": 1.1693,
"step": 2489
},
{
"epoch": 0.6622340425531915,
"grad_norm": 3.6795883178710938,
"learning_rate": 9.815307926635252e-06,
"loss": 1.2308,
"step": 2490
},
{
"epoch": 0.6625,
"grad_norm": 3.8441531658172607,
"learning_rate": 9.81507101788948e-06,
"loss": 1.2011,
"step": 2491
},
{
"epoch": 0.6627659574468086,
"grad_norm": 3.512495994567871,
"learning_rate": 9.814833960159744e-06,
"loss": 1.1509,
"step": 2492
},
{
"epoch": 0.663031914893617,
"grad_norm": 3.631899356842041,
"learning_rate": 9.814596753453376e-06,
"loss": 1.0989,
"step": 2493
},
{
"epoch": 0.6632978723404256,
"grad_norm": 3.5272533893585205,
"learning_rate": 9.814359397777716e-06,
"loss": 1.3053,
"step": 2494
},
{
"epoch": 0.663563829787234,
"grad_norm": 3.492922306060791,
"learning_rate": 9.814121893140105e-06,
"loss": 1.2977,
"step": 2495
},
{
"epoch": 0.6638297872340425,
"grad_norm": 3.5858078002929688,
"learning_rate": 9.813884239547898e-06,
"loss": 1.1963,
"step": 2496
},
{
"epoch": 0.6640957446808511,
"grad_norm": 3.4466118812561035,
"learning_rate": 9.813646437008444e-06,
"loss": 1.266,
"step": 2497
},
{
"epoch": 0.6643617021276595,
"grad_norm": 3.682159900665283,
"learning_rate": 9.813408485529103e-06,
"loss": 1.1549,
"step": 2498
},
{
"epoch": 0.6646276595744681,
"grad_norm": 4.358649253845215,
"learning_rate": 9.813170385117235e-06,
"loss": 1.3577,
"step": 2499
},
{
"epoch": 0.6648936170212766,
"grad_norm": 4.059812068939209,
"learning_rate": 9.81293213578021e-06,
"loss": 1.3728,
"step": 2500
},
{
"epoch": 0.6648936170212766,
"eval_loss": 1.2857128381729126,
"eval_runtime": 12.6822,
"eval_samples_per_second": 31.54,
"eval_steps_per_second": 3.943,
"step": 2500
},
{
"epoch": 0.6651595744680852,
"grad_norm": 3.519260883331299,
"learning_rate": 9.812693737525396e-06,
"loss": 1.1743,
"step": 2501
},
{
"epoch": 0.6654255319148936,
"grad_norm": 4.004322052001953,
"learning_rate": 9.812455190360172e-06,
"loss": 1.2847,
"step": 2502
},
{
"epoch": 0.6656914893617021,
"grad_norm": 3.699012517929077,
"learning_rate": 9.81221649429192e-06,
"loss": 1.3645,
"step": 2503
},
{
"epoch": 0.6659574468085107,
"grad_norm": 3.5919108390808105,
"learning_rate": 9.811977649328021e-06,
"loss": 1.1794,
"step": 2504
},
{
"epoch": 0.6662234042553191,
"grad_norm": 3.382624626159668,
"learning_rate": 9.81173865547587e-06,
"loss": 1.2909,
"step": 2505
},
{
"epoch": 0.6664893617021277,
"grad_norm": 3.7188732624053955,
"learning_rate": 9.811499512742861e-06,
"loss": 1.2731,
"step": 2506
},
{
"epoch": 0.6667553191489362,
"grad_norm": 3.5745997428894043,
"learning_rate": 9.811260221136392e-06,
"loss": 1.1994,
"step": 2507
},
{
"epoch": 0.6670212765957447,
"grad_norm": 3.6393473148345947,
"learning_rate": 9.811020780663865e-06,
"loss": 1.2335,
"step": 2508
},
{
"epoch": 0.6672872340425532,
"grad_norm": 3.4967026710510254,
"learning_rate": 9.810781191332692e-06,
"loss": 1.2272,
"step": 2509
},
{
"epoch": 0.6675531914893617,
"grad_norm": 3.826430559158325,
"learning_rate": 9.810541453150286e-06,
"loss": 1.3689,
"step": 2510
},
{
"epoch": 0.6678191489361702,
"grad_norm": 4.058473110198975,
"learning_rate": 9.810301566124063e-06,
"loss": 1.1942,
"step": 2511
},
{
"epoch": 0.6680851063829787,
"grad_norm": 3.5520458221435547,
"learning_rate": 9.810061530261446e-06,
"loss": 1.1599,
"step": 2512
},
{
"epoch": 0.6683510638297873,
"grad_norm": 3.7619452476501465,
"learning_rate": 9.80982134556986e-06,
"loss": 1.2391,
"step": 2513
},
{
"epoch": 0.6686170212765957,
"grad_norm": 3.9400548934936523,
"learning_rate": 9.809581012056743e-06,
"loss": 1.2792,
"step": 2514
},
{
"epoch": 0.6688829787234043,
"grad_norm": 3.3986830711364746,
"learning_rate": 9.809340529729523e-06,
"loss": 1.2333,
"step": 2515
},
{
"epoch": 0.6691489361702128,
"grad_norm": 3.8278701305389404,
"learning_rate": 9.809099898595647e-06,
"loss": 1.2988,
"step": 2516
},
{
"epoch": 0.6694148936170212,
"grad_norm": 3.8813681602478027,
"learning_rate": 9.808859118662558e-06,
"loss": 1.1505,
"step": 2517
},
{
"epoch": 0.6696808510638298,
"grad_norm": 3.5952844619750977,
"learning_rate": 9.808618189937706e-06,
"loss": 1.3804,
"step": 2518
},
{
"epoch": 0.6699468085106383,
"grad_norm": 3.642479181289673,
"learning_rate": 9.808377112428546e-06,
"loss": 1.2918,
"step": 2519
},
{
"epoch": 0.6702127659574468,
"grad_norm": 3.810826301574707,
"learning_rate": 9.808135886142536e-06,
"loss": 1.3684,
"step": 2520
},
{
"epoch": 0.6704787234042553,
"grad_norm": 3.843879222869873,
"learning_rate": 9.807894511087141e-06,
"loss": 1.2815,
"step": 2521
},
{
"epoch": 0.6707446808510639,
"grad_norm": 3.68229341506958,
"learning_rate": 9.807652987269829e-06,
"loss": 1.1894,
"step": 2522
},
{
"epoch": 0.6710106382978723,
"grad_norm": 3.585465669631958,
"learning_rate": 9.807411314698075e-06,
"loss": 1.3078,
"step": 2523
},
{
"epoch": 0.6712765957446809,
"grad_norm": 3.825195074081421,
"learning_rate": 9.807169493379353e-06,
"loss": 1.2117,
"step": 2524
},
{
"epoch": 0.6715425531914894,
"grad_norm": 3.376753091812134,
"learning_rate": 9.806927523321148e-06,
"loss": 1.1575,
"step": 2525
},
{
"epoch": 0.6718085106382978,
"grad_norm": 3.877986431121826,
"learning_rate": 9.806685404530946e-06,
"loss": 1.3773,
"step": 2526
},
{
"epoch": 0.6720744680851064,
"grad_norm": 3.9964683055877686,
"learning_rate": 9.806443137016237e-06,
"loss": 1.2466,
"step": 2527
},
{
"epoch": 0.6723404255319149,
"grad_norm": 3.6897804737091064,
"learning_rate": 9.80620072078452e-06,
"loss": 1.2107,
"step": 2528
},
{
"epoch": 0.6726063829787234,
"grad_norm": 3.921840190887451,
"learning_rate": 9.805958155843294e-06,
"loss": 1.226,
"step": 2529
},
{
"epoch": 0.6728723404255319,
"grad_norm": 3.4277050495147705,
"learning_rate": 9.805715442200065e-06,
"loss": 1.2126,
"step": 2530
},
{
"epoch": 0.6731382978723405,
"grad_norm": 3.841946601867676,
"learning_rate": 9.805472579862342e-06,
"loss": 1.323,
"step": 2531
},
{
"epoch": 0.6734042553191489,
"grad_norm": 3.7039599418640137,
"learning_rate": 9.805229568837637e-06,
"loss": 1.2843,
"step": 2532
},
{
"epoch": 0.6736702127659574,
"grad_norm": 3.5301520824432373,
"learning_rate": 9.804986409133475e-06,
"loss": 1.0612,
"step": 2533
},
{
"epoch": 0.673936170212766,
"grad_norm": 4.042654037475586,
"learning_rate": 9.804743100757375e-06,
"loss": 1.215,
"step": 2534
},
{
"epoch": 0.6742021276595744,
"grad_norm": 3.895273447036743,
"learning_rate": 9.804499643716866e-06,
"loss": 1.4006,
"step": 2535
},
{
"epoch": 0.674468085106383,
"grad_norm": 3.5299017429351807,
"learning_rate": 9.804256038019482e-06,
"loss": 1.3813,
"step": 2536
},
{
"epoch": 0.6747340425531915,
"grad_norm": 3.8434762954711914,
"learning_rate": 9.80401228367276e-06,
"loss": 1.4165,
"step": 2537
},
{
"epoch": 0.675,
"grad_norm": 4.0280256271362305,
"learning_rate": 9.803768380684242e-06,
"loss": 1.3851,
"step": 2538
},
{
"epoch": 0.6752659574468085,
"grad_norm": 3.663043260574341,
"learning_rate": 9.803524329061474e-06,
"loss": 1.3044,
"step": 2539
},
{
"epoch": 0.675531914893617,
"grad_norm": 3.575730562210083,
"learning_rate": 9.803280128812009e-06,
"loss": 1.2849,
"step": 2540
},
{
"epoch": 0.6757978723404255,
"grad_norm": 3.7937097549438477,
"learning_rate": 9.8030357799434e-06,
"loss": 1.2569,
"step": 2541
},
{
"epoch": 0.676063829787234,
"grad_norm": 3.982719898223877,
"learning_rate": 9.80279128246321e-06,
"loss": 1.411,
"step": 2542
},
{
"epoch": 0.6763297872340426,
"grad_norm": 3.825068950653076,
"learning_rate": 9.802546636379001e-06,
"loss": 1.295,
"step": 2543
},
{
"epoch": 0.676595744680851,
"grad_norm": 3.8499345779418945,
"learning_rate": 9.80230184169835e-06,
"loss": 1.282,
"step": 2544
},
{
"epoch": 0.6768617021276596,
"grad_norm": 3.4873030185699463,
"learning_rate": 9.802056898428823e-06,
"loss": 1.2803,
"step": 2545
},
{
"epoch": 0.6771276595744681,
"grad_norm": 3.9438254833221436,
"learning_rate": 9.801811806578001e-06,
"loss": 1.2881,
"step": 2546
},
{
"epoch": 0.6773936170212767,
"grad_norm": 3.392169237136841,
"learning_rate": 9.80156656615347e-06,
"loss": 1.2485,
"step": 2547
},
{
"epoch": 0.6776595744680851,
"grad_norm": 3.8698456287384033,
"learning_rate": 9.801321177162814e-06,
"loss": 1.281,
"step": 2548
},
{
"epoch": 0.6779255319148936,
"grad_norm": 3.8232076168060303,
"learning_rate": 9.801075639613628e-06,
"loss": 1.3045,
"step": 2549
},
{
"epoch": 0.6781914893617021,
"grad_norm": 3.8453428745269775,
"learning_rate": 9.80082995351351e-06,
"loss": 1.2239,
"step": 2550
},
{
"epoch": 0.6784574468085106,
"grad_norm": 3.7375547885894775,
"learning_rate": 9.800584118870063e-06,
"loss": 1.195,
"step": 2551
},
{
"epoch": 0.6787234042553192,
"grad_norm": 3.84708571434021,
"learning_rate": 9.800338135690889e-06,
"loss": 1.1614,
"step": 2552
},
{
"epoch": 0.6789893617021276,
"grad_norm": 3.612217664718628,
"learning_rate": 9.800092003983602e-06,
"loss": 1.2499,
"step": 2553
},
{
"epoch": 0.6792553191489362,
"grad_norm": 3.217289447784424,
"learning_rate": 9.799845723755818e-06,
"loss": 1.1648,
"step": 2554
},
{
"epoch": 0.6795212765957447,
"grad_norm": 4.510238170623779,
"learning_rate": 9.799599295015154e-06,
"loss": 1.2728,
"step": 2555
},
{
"epoch": 0.6797872340425531,
"grad_norm": 4.0085129737854,
"learning_rate": 9.79935271776924e-06,
"loss": 1.3524,
"step": 2556
},
{
"epoch": 0.6800531914893617,
"grad_norm": 3.8481833934783936,
"learning_rate": 9.799105992025699e-06,
"loss": 1.2783,
"step": 2557
},
{
"epoch": 0.6803191489361702,
"grad_norm": 3.901775598526001,
"learning_rate": 9.79885911779217e-06,
"loss": 1.1736,
"step": 2558
},
{
"epoch": 0.6805851063829788,
"grad_norm": 3.864826202392578,
"learning_rate": 9.798612095076291e-06,
"loss": 1.3108,
"step": 2559
},
{
"epoch": 0.6808510638297872,
"grad_norm": 3.7867627143859863,
"learning_rate": 9.798364923885703e-06,
"loss": 1.1626,
"step": 2560
},
{
"epoch": 0.6811170212765958,
"grad_norm": 3.8203864097595215,
"learning_rate": 9.798117604228054e-06,
"loss": 1.2232,
"step": 2561
},
{
"epoch": 0.6813829787234043,
"grad_norm": 3.5479917526245117,
"learning_rate": 9.797870136110998e-06,
"loss": 1.1571,
"step": 2562
},
{
"epoch": 0.6816489361702127,
"grad_norm": 3.782655715942383,
"learning_rate": 9.797622519542193e-06,
"loss": 1.3004,
"step": 2563
},
{
"epoch": 0.6819148936170213,
"grad_norm": 3.477875232696533,
"learning_rate": 9.797374754529297e-06,
"loss": 1.0335,
"step": 2564
},
{
"epoch": 0.6821808510638298,
"grad_norm": 3.8241772651672363,
"learning_rate": 9.797126841079979e-06,
"loss": 1.4163,
"step": 2565
},
{
"epoch": 0.6824468085106383,
"grad_norm": 3.764817476272583,
"learning_rate": 9.796878779201906e-06,
"loss": 1.2243,
"step": 2566
},
{
"epoch": 0.6827127659574468,
"grad_norm": 3.784823417663574,
"learning_rate": 9.796630568902758e-06,
"loss": 1.4082,
"step": 2567
},
{
"epoch": 0.6829787234042554,
"grad_norm": 3.3941454887390137,
"learning_rate": 9.796382210190212e-06,
"loss": 1.0939,
"step": 2568
},
{
"epoch": 0.6832446808510638,
"grad_norm": 3.484823226928711,
"learning_rate": 9.796133703071956e-06,
"loss": 1.2322,
"step": 2569
},
{
"epoch": 0.6835106382978723,
"grad_norm": 3.6055960655212402,
"learning_rate": 9.795885047555673e-06,
"loss": 1.3383,
"step": 2570
},
{
"epoch": 0.6837765957446809,
"grad_norm": 3.7031943798065186,
"learning_rate": 9.795636243649061e-06,
"loss": 1.2987,
"step": 2571
},
{
"epoch": 0.6840425531914893,
"grad_norm": 3.5490245819091797,
"learning_rate": 9.795387291359819e-06,
"loss": 1.291,
"step": 2572
},
{
"epoch": 0.6843085106382979,
"grad_norm": 3.611907958984375,
"learning_rate": 9.795138190695647e-06,
"loss": 1.2693,
"step": 2573
},
{
"epoch": 0.6845744680851064,
"grad_norm": 3.580634832382202,
"learning_rate": 9.794888941664253e-06,
"loss": 1.3336,
"step": 2574
},
{
"epoch": 0.6848404255319149,
"grad_norm": 3.957103967666626,
"learning_rate": 9.794639544273352e-06,
"loss": 1.2077,
"step": 2575
},
{
"epoch": 0.6851063829787234,
"grad_norm": 3.5140933990478516,
"learning_rate": 9.794389998530659e-06,
"loss": 1.2885,
"step": 2576
},
{
"epoch": 0.685372340425532,
"grad_norm": 3.6171066761016846,
"learning_rate": 9.794140304443891e-06,
"loss": 1.2211,
"step": 2577
},
{
"epoch": 0.6856382978723404,
"grad_norm": 3.641486167907715,
"learning_rate": 9.793890462020781e-06,
"loss": 1.0571,
"step": 2578
},
{
"epoch": 0.6859042553191489,
"grad_norm": 3.605208396911621,
"learning_rate": 9.793640471269055e-06,
"loss": 1.1932,
"step": 2579
},
{
"epoch": 0.6861702127659575,
"grad_norm": 3.67253041267395,
"learning_rate": 9.793390332196448e-06,
"loss": 1.1474,
"step": 2580
},
{
"epoch": 0.6864361702127659,
"grad_norm": 4.190906524658203,
"learning_rate": 9.793140044810701e-06,
"loss": 1.2488,
"step": 2581
},
{
"epoch": 0.6867021276595745,
"grad_norm": 4.1439104080200195,
"learning_rate": 9.792889609119558e-06,
"loss": 1.2747,
"step": 2582
},
{
"epoch": 0.686968085106383,
"grad_norm": 3.9002907276153564,
"learning_rate": 9.79263902513077e-06,
"loss": 1.2291,
"step": 2583
},
{
"epoch": 0.6872340425531915,
"grad_norm": 3.6862435340881348,
"learning_rate": 9.792388292852084e-06,
"loss": 1.1637,
"step": 2584
},
{
"epoch": 0.6875,
"grad_norm": 3.789638042449951,
"learning_rate": 9.792137412291265e-06,
"loss": 1.1779,
"step": 2585
},
{
"epoch": 0.6877659574468085,
"grad_norm": 3.5384011268615723,
"learning_rate": 9.791886383456071e-06,
"loss": 1.2701,
"step": 2586
},
{
"epoch": 0.688031914893617,
"grad_norm": 3.6008050441741943,
"learning_rate": 9.79163520635427e-06,
"loss": 1.2479,
"step": 2587
},
{
"epoch": 0.6882978723404255,
"grad_norm": 3.71974515914917,
"learning_rate": 9.791383880993635e-06,
"loss": 1.267,
"step": 2588
},
{
"epoch": 0.6885638297872341,
"grad_norm": 3.5324504375457764,
"learning_rate": 9.791132407381942e-06,
"loss": 1.2725,
"step": 2589
},
{
"epoch": 0.6888297872340425,
"grad_norm": 3.602149724960327,
"learning_rate": 9.790880785526971e-06,
"loss": 1.1551,
"step": 2590
},
{
"epoch": 0.6890957446808511,
"grad_norm": 3.761108160018921,
"learning_rate": 9.790629015436508e-06,
"loss": 1.2654,
"step": 2591
},
{
"epoch": 0.6893617021276596,
"grad_norm": 3.6845576763153076,
"learning_rate": 9.790377097118342e-06,
"loss": 1.1352,
"step": 2592
},
{
"epoch": 0.689627659574468,
"grad_norm": 3.4206063747406006,
"learning_rate": 9.79012503058027e-06,
"loss": 1.1649,
"step": 2593
},
{
"epoch": 0.6898936170212766,
"grad_norm": 3.91064190864563,
"learning_rate": 9.789872815830089e-06,
"loss": 1.2736,
"step": 2594
},
{
"epoch": 0.6901595744680851,
"grad_norm": 3.3683114051818848,
"learning_rate": 9.789620452875605e-06,
"loss": 1.1734,
"step": 2595
},
{
"epoch": 0.6904255319148936,
"grad_norm": 3.797476053237915,
"learning_rate": 9.789367941724623e-06,
"loss": 1.239,
"step": 2596
},
{
"epoch": 0.6906914893617021,
"grad_norm": 3.623358964920044,
"learning_rate": 9.78911528238496e-06,
"loss": 1.2941,
"step": 2597
},
{
"epoch": 0.6909574468085107,
"grad_norm": 4.187454700469971,
"learning_rate": 9.78886247486443e-06,
"loss": 1.3176,
"step": 2598
},
{
"epoch": 0.6912234042553191,
"grad_norm": 4.131342887878418,
"learning_rate": 9.78860951917086e-06,
"loss": 1.3183,
"step": 2599
},
{
"epoch": 0.6914893617021277,
"grad_norm": 3.6273796558380127,
"learning_rate": 9.78835641531207e-06,
"loss": 1.1836,
"step": 2600
},
{
"epoch": 0.6917553191489362,
"grad_norm": 3.8663980960845947,
"learning_rate": 9.788103163295897e-06,
"loss": 1.4566,
"step": 2601
},
{
"epoch": 0.6920212765957446,
"grad_norm": 3.8288991451263428,
"learning_rate": 9.787849763130174e-06,
"loss": 1.2238,
"step": 2602
},
{
"epoch": 0.6922872340425532,
"grad_norm": 4.178062438964844,
"learning_rate": 9.787596214822743e-06,
"loss": 1.399,
"step": 2603
},
{
"epoch": 0.6925531914893617,
"grad_norm": 3.824878215789795,
"learning_rate": 9.787342518381447e-06,
"loss": 1.2654,
"step": 2604
},
{
"epoch": 0.6928191489361702,
"grad_norm": 3.742422103881836,
"learning_rate": 9.787088673814137e-06,
"loss": 1.3921,
"step": 2605
},
{
"epoch": 0.6930851063829787,
"grad_norm": 4.080827713012695,
"learning_rate": 9.78683468112867e-06,
"loss": 1.2525,
"step": 2606
},
{
"epoch": 0.6933510638297873,
"grad_norm": 3.393066883087158,
"learning_rate": 9.7865805403329e-06,
"loss": 1.0471,
"step": 2607
},
{
"epoch": 0.6936170212765957,
"grad_norm": 3.3034181594848633,
"learning_rate": 9.786326251434694e-06,
"loss": 1.1627,
"step": 2608
},
{
"epoch": 0.6938829787234042,
"grad_norm": 3.8288989067077637,
"learning_rate": 9.786071814441918e-06,
"loss": 1.2483,
"step": 2609
},
{
"epoch": 0.6941489361702128,
"grad_norm": 3.4944722652435303,
"learning_rate": 9.785817229362445e-06,
"loss": 1.2921,
"step": 2610
},
{
"epoch": 0.6944148936170212,
"grad_norm": 3.653322219848633,
"learning_rate": 9.785562496204151e-06,
"loss": 1.2367,
"step": 2611
},
{
"epoch": 0.6946808510638298,
"grad_norm": 3.3792853355407715,
"learning_rate": 9.785307614974922e-06,
"loss": 1.1746,
"step": 2612
},
{
"epoch": 0.6949468085106383,
"grad_norm": 3.608031988143921,
"learning_rate": 9.78505258568264e-06,
"loss": 1.2059,
"step": 2613
},
{
"epoch": 0.6952127659574469,
"grad_norm": 4.2280402183532715,
"learning_rate": 9.784797408335195e-06,
"loss": 1.294,
"step": 2614
},
{
"epoch": 0.6954787234042553,
"grad_norm": 3.8257791996002197,
"learning_rate": 9.784542082940488e-06,
"loss": 1.3261,
"step": 2615
},
{
"epoch": 0.6957446808510638,
"grad_norm": 3.9494855403900146,
"learning_rate": 9.784286609506415e-06,
"loss": 1.3776,
"step": 2616
},
{
"epoch": 0.6960106382978724,
"grad_norm": 3.8635013103485107,
"learning_rate": 9.78403098804088e-06,
"loss": 1.3371,
"step": 2617
},
{
"epoch": 0.6962765957446808,
"grad_norm": 3.8114707469940186,
"learning_rate": 9.783775218551796e-06,
"loss": 1.3064,
"step": 2618
},
{
"epoch": 0.6965425531914894,
"grad_norm": 3.8006489276885986,
"learning_rate": 9.783519301047072e-06,
"loss": 1.3864,
"step": 2619
},
{
"epoch": 0.6968085106382979,
"grad_norm": 3.504070997238159,
"learning_rate": 9.783263235534632e-06,
"loss": 1.2172,
"step": 2620
},
{
"epoch": 0.6970744680851064,
"grad_norm": 3.741771936416626,
"learning_rate": 9.783007022022394e-06,
"loss": 1.2375,
"step": 2621
},
{
"epoch": 0.6973404255319149,
"grad_norm": 3.5260889530181885,
"learning_rate": 9.782750660518288e-06,
"loss": 1.4035,
"step": 2622
},
{
"epoch": 0.6976063829787233,
"grad_norm": 3.832963466644287,
"learning_rate": 9.782494151030245e-06,
"loss": 1.2979,
"step": 2623
},
{
"epoch": 0.6978723404255319,
"grad_norm": 3.5783939361572266,
"learning_rate": 9.782237493566202e-06,
"loss": 1.1859,
"step": 2624
},
{
"epoch": 0.6981382978723404,
"grad_norm": 3.677419900894165,
"learning_rate": 9.781980688134102e-06,
"loss": 1.2306,
"step": 2625
},
{
"epoch": 0.698404255319149,
"grad_norm": 3.812321901321411,
"learning_rate": 9.781723734741889e-06,
"loss": 1.3585,
"step": 2626
},
{
"epoch": 0.6986702127659574,
"grad_norm": 3.3270645141601562,
"learning_rate": 9.781466633397512e-06,
"loss": 1.0776,
"step": 2627
},
{
"epoch": 0.698936170212766,
"grad_norm": 3.6559667587280273,
"learning_rate": 9.78120938410893e-06,
"loss": 1.3296,
"step": 2628
},
{
"epoch": 0.6992021276595745,
"grad_norm": 3.707422971725464,
"learning_rate": 9.7809519868841e-06,
"loss": 1.2396,
"step": 2629
},
{
"epoch": 0.699468085106383,
"grad_norm": 3.875147581100464,
"learning_rate": 9.780694441730987e-06,
"loss": 1.4079,
"step": 2630
},
{
"epoch": 0.6997340425531915,
"grad_norm": 4.308002471923828,
"learning_rate": 9.780436748657559e-06,
"loss": 1.3675,
"step": 2631
},
{
"epoch": 0.7,
"grad_norm": 3.6063718795776367,
"learning_rate": 9.780178907671788e-06,
"loss": 1.1953,
"step": 2632
},
{
"epoch": 0.7002659574468085,
"grad_norm": 3.582390308380127,
"learning_rate": 9.779920918781656e-06,
"loss": 1.2841,
"step": 2633
},
{
"epoch": 0.700531914893617,
"grad_norm": 3.8668954372406006,
"learning_rate": 9.779662781995144e-06,
"loss": 1.3806,
"step": 2634
},
{
"epoch": 0.7007978723404256,
"grad_norm": 3.4479143619537354,
"learning_rate": 9.779404497320236e-06,
"loss": 1.3201,
"step": 2635
},
{
"epoch": 0.701063829787234,
"grad_norm": 4.041039943695068,
"learning_rate": 9.779146064764925e-06,
"loss": 1.1912,
"step": 2636
},
{
"epoch": 0.7013297872340426,
"grad_norm": 3.944117307662964,
"learning_rate": 9.77888748433721e-06,
"loss": 1.1603,
"step": 2637
},
{
"epoch": 0.7015957446808511,
"grad_norm": 4.008464336395264,
"learning_rate": 9.77862875604509e-06,
"loss": 1.3612,
"step": 2638
},
{
"epoch": 0.7018617021276595,
"grad_norm": 3.5746493339538574,
"learning_rate": 9.778369879896568e-06,
"loss": 1.3117,
"step": 2639
},
{
"epoch": 0.7021276595744681,
"grad_norm": 4.120686054229736,
"learning_rate": 9.778110855899659e-06,
"loss": 1.2801,
"step": 2640
},
{
"epoch": 0.7023936170212766,
"grad_norm": 3.7582547664642334,
"learning_rate": 9.777851684062371e-06,
"loss": 1.291,
"step": 2641
},
{
"epoch": 0.7026595744680851,
"grad_norm": 3.8033053874969482,
"learning_rate": 9.77759236439273e-06,
"loss": 1.3342,
"step": 2642
},
{
"epoch": 0.7029255319148936,
"grad_norm": 3.712113618850708,
"learning_rate": 9.777332896898754e-06,
"loss": 1.1921,
"step": 2643
},
{
"epoch": 0.7031914893617022,
"grad_norm": 3.1552655696868896,
"learning_rate": 9.777073281588476e-06,
"loss": 1.1407,
"step": 2644
},
{
"epoch": 0.7034574468085106,
"grad_norm": 4.050416946411133,
"learning_rate": 9.776813518469924e-06,
"loss": 1.3787,
"step": 2645
},
{
"epoch": 0.7037234042553191,
"grad_norm": 3.63802170753479,
"learning_rate": 9.77655360755114e-06,
"loss": 1.3203,
"step": 2646
},
{
"epoch": 0.7039893617021277,
"grad_norm": 4.1890482902526855,
"learning_rate": 9.77629354884016e-06,
"loss": 1.3532,
"step": 2647
},
{
"epoch": 0.7042553191489361,
"grad_norm": 4.1286444664001465,
"learning_rate": 9.776033342345038e-06,
"loss": 1.2704,
"step": 2648
},
{
"epoch": 0.7045212765957447,
"grad_norm": 3.4052047729492188,
"learning_rate": 9.77577298807382e-06,
"loss": 1.2537,
"step": 2649
},
{
"epoch": 0.7047872340425532,
"grad_norm": 4.194342136383057,
"learning_rate": 9.775512486034564e-06,
"loss": 1.449,
"step": 2650
},
{
"epoch": 0.7050531914893617,
"grad_norm": 3.945206880569458,
"learning_rate": 9.775251836235327e-06,
"loss": 1.357,
"step": 2651
},
{
"epoch": 0.7053191489361702,
"grad_norm": 3.5744996070861816,
"learning_rate": 9.774991038684177e-06,
"loss": 1.2701,
"step": 2652
},
{
"epoch": 0.7055851063829788,
"grad_norm": 3.9091970920562744,
"learning_rate": 9.774730093389182e-06,
"loss": 1.3401,
"step": 2653
},
{
"epoch": 0.7058510638297872,
"grad_norm": 3.7527072429656982,
"learning_rate": 9.774469000358418e-06,
"loss": 1.2886,
"step": 2654
},
{
"epoch": 0.7061170212765957,
"grad_norm": 3.5021281242370605,
"learning_rate": 9.774207759599961e-06,
"loss": 1.2253,
"step": 2655
},
{
"epoch": 0.7063829787234043,
"grad_norm": 3.725334405899048,
"learning_rate": 9.773946371121894e-06,
"loss": 1.3451,
"step": 2656
},
{
"epoch": 0.7066489361702127,
"grad_norm": 3.3787760734558105,
"learning_rate": 9.773684834932306e-06,
"loss": 1.183,
"step": 2657
},
{
"epoch": 0.7069148936170213,
"grad_norm": 3.956935167312622,
"learning_rate": 9.77342315103929e-06,
"loss": 1.3828,
"step": 2658
},
{
"epoch": 0.7071808510638298,
"grad_norm": 3.7493388652801514,
"learning_rate": 9.77316131945094e-06,
"loss": 1.2192,
"step": 2659
},
{
"epoch": 0.7074468085106383,
"grad_norm": 4.022577285766602,
"learning_rate": 9.772899340175362e-06,
"loss": 1.2509,
"step": 2660
},
{
"epoch": 0.7077127659574468,
"grad_norm": 3.9888761043548584,
"learning_rate": 9.772637213220658e-06,
"loss": 1.3076,
"step": 2661
},
{
"epoch": 0.7079787234042553,
"grad_norm": 3.502845048904419,
"learning_rate": 9.772374938594937e-06,
"loss": 1.4205,
"step": 2662
},
{
"epoch": 0.7082446808510638,
"grad_norm": 3.611692190170288,
"learning_rate": 9.772112516306318e-06,
"loss": 1.2036,
"step": 2663
},
{
"epoch": 0.7085106382978723,
"grad_norm": 3.3075003623962402,
"learning_rate": 9.77184994636292e-06,
"loss": 1.1399,
"step": 2664
},
{
"epoch": 0.7087765957446809,
"grad_norm": 3.6357240676879883,
"learning_rate": 9.771587228772866e-06,
"loss": 1.2438,
"step": 2665
},
{
"epoch": 0.7090425531914893,
"grad_norm": 3.798506259918213,
"learning_rate": 9.771324363544286e-06,
"loss": 1.2793,
"step": 2666
},
{
"epoch": 0.7093085106382979,
"grad_norm": 3.3980555534362793,
"learning_rate": 9.771061350685312e-06,
"loss": 1.2446,
"step": 2667
},
{
"epoch": 0.7095744680851064,
"grad_norm": 3.5380852222442627,
"learning_rate": 9.770798190204083e-06,
"loss": 1.1996,
"step": 2668
},
{
"epoch": 0.7098404255319148,
"grad_norm": 3.93696665763855,
"learning_rate": 9.77053488210874e-06,
"loss": 1.2549,
"step": 2669
},
{
"epoch": 0.7101063829787234,
"grad_norm": 4.042500019073486,
"learning_rate": 9.770271426407432e-06,
"loss": 1.455,
"step": 2670
},
{
"epoch": 0.7103723404255319,
"grad_norm": 3.6526906490325928,
"learning_rate": 9.770007823108309e-06,
"loss": 1.3447,
"step": 2671
},
{
"epoch": 0.7106382978723405,
"grad_norm": 3.8958542346954346,
"learning_rate": 9.76974407221953e-06,
"loss": 1.2542,
"step": 2672
},
{
"epoch": 0.7109042553191489,
"grad_norm": 3.5408430099487305,
"learning_rate": 9.769480173749252e-06,
"loss": 1.3333,
"step": 2673
},
{
"epoch": 0.7111702127659575,
"grad_norm": 3.586918592453003,
"learning_rate": 9.769216127705643e-06,
"loss": 1.2469,
"step": 2674
},
{
"epoch": 0.711436170212766,
"grad_norm": 3.6321678161621094,
"learning_rate": 9.76895193409687e-06,
"loss": 1.3352,
"step": 2675
},
{
"epoch": 0.7117021276595744,
"grad_norm": 3.4352383613586426,
"learning_rate": 9.768687592931111e-06,
"loss": 1.228,
"step": 2676
},
{
"epoch": 0.711968085106383,
"grad_norm": 3.756770610809326,
"learning_rate": 9.768423104216544e-06,
"loss": 1.1776,
"step": 2677
},
{
"epoch": 0.7122340425531914,
"grad_norm": 4.270863056182861,
"learning_rate": 9.76815846796135e-06,
"loss": 1.2372,
"step": 2678
},
{
"epoch": 0.7125,
"grad_norm": 4.0467848777771,
"learning_rate": 9.767893684173722e-06,
"loss": 1.33,
"step": 2679
},
{
"epoch": 0.7127659574468085,
"grad_norm": 3.9330484867095947,
"learning_rate": 9.767628752861848e-06,
"loss": 1.2019,
"step": 2680
},
{
"epoch": 0.7130319148936171,
"grad_norm": 4.011680603027344,
"learning_rate": 9.767363674033928e-06,
"loss": 1.1982,
"step": 2681
},
{
"epoch": 0.7132978723404255,
"grad_norm": 3.5905420780181885,
"learning_rate": 9.767098447698163e-06,
"loss": 1.2441,
"step": 2682
},
{
"epoch": 0.7135638297872341,
"grad_norm": 3.8876521587371826,
"learning_rate": 9.766833073862758e-06,
"loss": 1.3112,
"step": 2683
},
{
"epoch": 0.7138297872340426,
"grad_norm": 3.6759207248687744,
"learning_rate": 9.766567552535928e-06,
"loss": 1.2974,
"step": 2684
},
{
"epoch": 0.714095744680851,
"grad_norm": 3.6160476207733154,
"learning_rate": 9.766301883725884e-06,
"loss": 1.3107,
"step": 2685
},
{
"epoch": 0.7143617021276596,
"grad_norm": 3.9795331954956055,
"learning_rate": 9.766036067440849e-06,
"loss": 1.4063,
"step": 2686
},
{
"epoch": 0.714627659574468,
"grad_norm": 3.899998188018799,
"learning_rate": 9.765770103689045e-06,
"loss": 1.3517,
"step": 2687
},
{
"epoch": 0.7148936170212766,
"grad_norm": 3.501302719116211,
"learning_rate": 9.765503992478704e-06,
"loss": 1.078,
"step": 2688
},
{
"epoch": 0.7151595744680851,
"grad_norm": 3.4490084648132324,
"learning_rate": 9.76523773381806e-06,
"loss": 1.2363,
"step": 2689
},
{
"epoch": 0.7154255319148937,
"grad_norm": 3.773393154144287,
"learning_rate": 9.76497132771535e-06,
"loss": 1.2677,
"step": 2690
},
{
"epoch": 0.7156914893617021,
"grad_norm": 3.2833402156829834,
"learning_rate": 9.764704774178816e-06,
"loss": 1.2409,
"step": 2691
},
{
"epoch": 0.7159574468085106,
"grad_norm": 3.798407793045044,
"learning_rate": 9.764438073216706e-06,
"loss": 1.2375,
"step": 2692
},
{
"epoch": 0.7162234042553192,
"grad_norm": 3.383553981781006,
"learning_rate": 9.764171224837274e-06,
"loss": 1.223,
"step": 2693
},
{
"epoch": 0.7164893617021276,
"grad_norm": 3.781569242477417,
"learning_rate": 9.763904229048775e-06,
"loss": 1.1822,
"step": 2694
},
{
"epoch": 0.7167553191489362,
"grad_norm": 3.862577438354492,
"learning_rate": 9.76363708585947e-06,
"loss": 1.2266,
"step": 2695
},
{
"epoch": 0.7170212765957447,
"grad_norm": 3.4044363498687744,
"learning_rate": 9.763369795277627e-06,
"loss": 1.1887,
"step": 2696
},
{
"epoch": 0.7172872340425532,
"grad_norm": 3.930368185043335,
"learning_rate": 9.763102357311511e-06,
"loss": 1.2911,
"step": 2697
},
{
"epoch": 0.7175531914893617,
"grad_norm": 3.72084379196167,
"learning_rate": 9.762834771969403e-06,
"loss": 1.2693,
"step": 2698
},
{
"epoch": 0.7178191489361702,
"grad_norm": 3.3735997676849365,
"learning_rate": 9.762567039259577e-06,
"loss": 1.2202,
"step": 2699
},
{
"epoch": 0.7180851063829787,
"grad_norm": 3.3215930461883545,
"learning_rate": 9.762299159190322e-06,
"loss": 1.311,
"step": 2700
},
{
"epoch": 0.7183510638297872,
"grad_norm": 3.2667737007141113,
"learning_rate": 9.762031131769923e-06,
"loss": 1.1621,
"step": 2701
},
{
"epoch": 0.7186170212765958,
"grad_norm": 3.8327572345733643,
"learning_rate": 9.761762957006673e-06,
"loss": 1.2764,
"step": 2702
},
{
"epoch": 0.7188829787234042,
"grad_norm": 3.693328857421875,
"learning_rate": 9.761494634908872e-06,
"loss": 1.168,
"step": 2703
},
{
"epoch": 0.7191489361702128,
"grad_norm": 3.7882509231567383,
"learning_rate": 9.761226165484822e-06,
"loss": 1.3076,
"step": 2704
},
{
"epoch": 0.7194148936170213,
"grad_norm": 3.366978645324707,
"learning_rate": 9.760957548742828e-06,
"loss": 1.3628,
"step": 2705
},
{
"epoch": 0.7196808510638298,
"grad_norm": 3.4671497344970703,
"learning_rate": 9.7606887846912e-06,
"loss": 1.2197,
"step": 2706
},
{
"epoch": 0.7199468085106383,
"grad_norm": 4.486639022827148,
"learning_rate": 9.760419873338261e-06,
"loss": 1.1786,
"step": 2707
},
{
"epoch": 0.7202127659574468,
"grad_norm": 3.5285980701446533,
"learning_rate": 9.760150814692321e-06,
"loss": 1.0701,
"step": 2708
},
{
"epoch": 0.7204787234042553,
"grad_norm": 3.4500350952148438,
"learning_rate": 9.759881608761714e-06,
"loss": 1.1768,
"step": 2709
},
{
"epoch": 0.7207446808510638,
"grad_norm": 3.219653606414795,
"learning_rate": 9.759612255554765e-06,
"loss": 1.1413,
"step": 2710
},
{
"epoch": 0.7210106382978724,
"grad_norm": 3.7905290126800537,
"learning_rate": 9.75934275507981e-06,
"loss": 1.3632,
"step": 2711
},
{
"epoch": 0.7212765957446808,
"grad_norm": 3.765892744064331,
"learning_rate": 9.759073107345186e-06,
"loss": 1.3237,
"step": 2712
},
{
"epoch": 0.7215425531914894,
"grad_norm": 3.8589115142822266,
"learning_rate": 9.758803312359236e-06,
"loss": 1.3028,
"step": 2713
},
{
"epoch": 0.7218085106382979,
"grad_norm": 3.688624143600464,
"learning_rate": 9.758533370130308e-06,
"loss": 1.2325,
"step": 2714
},
{
"epoch": 0.7220744680851063,
"grad_norm": 3.397474765777588,
"learning_rate": 9.758263280666757e-06,
"loss": 1.3173,
"step": 2715
},
{
"epoch": 0.7223404255319149,
"grad_norm": 3.9396157264709473,
"learning_rate": 9.757993043976937e-06,
"loss": 1.4517,
"step": 2716
},
{
"epoch": 0.7226063829787234,
"grad_norm": 3.5887930393218994,
"learning_rate": 9.757722660069211e-06,
"loss": 1.1431,
"step": 2717
},
{
"epoch": 0.722872340425532,
"grad_norm": 3.520183563232422,
"learning_rate": 9.757452128951945e-06,
"loss": 1.3442,
"step": 2718
},
{
"epoch": 0.7231382978723404,
"grad_norm": 3.704939365386963,
"learning_rate": 9.757181450633507e-06,
"loss": 1.2257,
"step": 2719
},
{
"epoch": 0.723404255319149,
"grad_norm": 4.201409816741943,
"learning_rate": 9.756910625122276e-06,
"loss": 1.234,
"step": 2720
},
{
"epoch": 0.7236702127659574,
"grad_norm": 3.571162700653076,
"learning_rate": 9.756639652426627e-06,
"loss": 1.195,
"step": 2721
},
{
"epoch": 0.7239361702127659,
"grad_norm": 3.463414192199707,
"learning_rate": 9.75636853255495e-06,
"loss": 1.2494,
"step": 2722
},
{
"epoch": 0.7242021276595745,
"grad_norm": 3.4496824741363525,
"learning_rate": 9.75609726551563e-06,
"loss": 1.1707,
"step": 2723
},
{
"epoch": 0.7244680851063829,
"grad_norm": 3.9885363578796387,
"learning_rate": 9.75582585131706e-06,
"loss": 1.2613,
"step": 2724
},
{
"epoch": 0.7247340425531915,
"grad_norm": 4.085259437561035,
"learning_rate": 9.755554289967638e-06,
"loss": 1.2527,
"step": 2725
},
{
"epoch": 0.725,
"grad_norm": 4.417264938354492,
"learning_rate": 9.755282581475769e-06,
"loss": 1.466,
"step": 2726
},
{
"epoch": 0.7252659574468086,
"grad_norm": 3.954056739807129,
"learning_rate": 9.755010725849857e-06,
"loss": 1.2379,
"step": 2727
},
{
"epoch": 0.725531914893617,
"grad_norm": 3.838103771209717,
"learning_rate": 9.754738723098316e-06,
"loss": 1.1999,
"step": 2728
},
{
"epoch": 0.7257978723404256,
"grad_norm": 4.1355695724487305,
"learning_rate": 9.75446657322956e-06,
"loss": 1.2805,
"step": 2729
},
{
"epoch": 0.726063829787234,
"grad_norm": 4.266016483306885,
"learning_rate": 9.75419427625201e-06,
"loss": 1.274,
"step": 2730
},
{
"epoch": 0.7263297872340425,
"grad_norm": 3.8930816650390625,
"learning_rate": 9.753921832174094e-06,
"loss": 1.3094,
"step": 2731
},
{
"epoch": 0.7265957446808511,
"grad_norm": 3.7425036430358887,
"learning_rate": 9.753649241004238e-06,
"loss": 1.2826,
"step": 2732
},
{
"epoch": 0.7268617021276595,
"grad_norm": 4.708345890045166,
"learning_rate": 9.753376502750878e-06,
"loss": 1.4243,
"step": 2733
},
{
"epoch": 0.7271276595744681,
"grad_norm": 3.6511597633361816,
"learning_rate": 9.753103617422452e-06,
"loss": 1.1892,
"step": 2734
},
{
"epoch": 0.7273936170212766,
"grad_norm": 3.807124376296997,
"learning_rate": 9.752830585027406e-06,
"loss": 1.2767,
"step": 2735
},
{
"epoch": 0.7276595744680852,
"grad_norm": 3.596545457839966,
"learning_rate": 9.752557405574184e-06,
"loss": 1.1901,
"step": 2736
},
{
"epoch": 0.7279255319148936,
"grad_norm": 3.6757147312164307,
"learning_rate": 9.752284079071242e-06,
"loss": 1.4032,
"step": 2737
},
{
"epoch": 0.7281914893617021,
"grad_norm": 3.862985372543335,
"learning_rate": 9.752010605527033e-06,
"loss": 1.1524,
"step": 2738
},
{
"epoch": 0.7284574468085107,
"grad_norm": 3.685128927230835,
"learning_rate": 9.751736984950023e-06,
"loss": 1.1703,
"step": 2739
},
{
"epoch": 0.7287234042553191,
"grad_norm": 3.4319050312042236,
"learning_rate": 9.751463217348675e-06,
"loss": 1.1965,
"step": 2740
},
{
"epoch": 0.7289893617021277,
"grad_norm": 3.4726648330688477,
"learning_rate": 9.751189302731463e-06,
"loss": 1.24,
"step": 2741
},
{
"epoch": 0.7292553191489362,
"grad_norm": 3.4759905338287354,
"learning_rate": 9.750915241106857e-06,
"loss": 1.1663,
"step": 2742
},
{
"epoch": 0.7295212765957447,
"grad_norm": 3.5179250240325928,
"learning_rate": 9.750641032483344e-06,
"loss": 1.1964,
"step": 2743
},
{
"epoch": 0.7297872340425532,
"grad_norm": 3.397850751876831,
"learning_rate": 9.750366676869401e-06,
"loss": 1.159,
"step": 2744
},
{
"epoch": 0.7300531914893617,
"grad_norm": 3.505492687225342,
"learning_rate": 9.75009217427352e-06,
"loss": 1.4271,
"step": 2745
},
{
"epoch": 0.7303191489361702,
"grad_norm": 3.516559362411499,
"learning_rate": 9.749817524704198e-06,
"loss": 1.2119,
"step": 2746
},
{
"epoch": 0.7305851063829787,
"grad_norm": 3.5949020385742188,
"learning_rate": 9.749542728169925e-06,
"loss": 1.1291,
"step": 2747
},
{
"epoch": 0.7308510638297873,
"grad_norm": 3.3480985164642334,
"learning_rate": 9.749267784679211e-06,
"loss": 1.1421,
"step": 2748
},
{
"epoch": 0.7311170212765957,
"grad_norm": 3.4003922939300537,
"learning_rate": 9.74899269424056e-06,
"loss": 1.3106,
"step": 2749
},
{
"epoch": 0.7313829787234043,
"grad_norm": 3.5191762447357178,
"learning_rate": 9.748717456862484e-06,
"loss": 1.1878,
"step": 2750
},
{
"epoch": 0.7316489361702128,
"grad_norm": 3.5664145946502686,
"learning_rate": 9.748442072553496e-06,
"loss": 1.2272,
"step": 2751
},
{
"epoch": 0.7319148936170212,
"grad_norm": 3.928241491317749,
"learning_rate": 9.748166541322124e-06,
"loss": 1.2986,
"step": 2752
},
{
"epoch": 0.7321808510638298,
"grad_norm": 3.8403828144073486,
"learning_rate": 9.747890863176887e-06,
"loss": 1.3132,
"step": 2753
},
{
"epoch": 0.7324468085106383,
"grad_norm": 3.4996137619018555,
"learning_rate": 9.747615038126317e-06,
"loss": 1.3824,
"step": 2754
},
{
"epoch": 0.7327127659574468,
"grad_norm": 3.5281126499176025,
"learning_rate": 9.747339066178947e-06,
"loss": 1.3015,
"step": 2755
},
{
"epoch": 0.7329787234042553,
"grad_norm": 3.466567277908325,
"learning_rate": 9.747062947343318e-06,
"loss": 1.2638,
"step": 2756
},
{
"epoch": 0.7332446808510639,
"grad_norm": 3.8412346839904785,
"learning_rate": 9.746786681627971e-06,
"loss": 1.1944,
"step": 2757
},
{
"epoch": 0.7335106382978723,
"grad_norm": 3.3403968811035156,
"learning_rate": 9.746510269041459e-06,
"loss": 1.215,
"step": 2758
},
{
"epoch": 0.7337765957446809,
"grad_norm": 3.735173225402832,
"learning_rate": 9.746233709592328e-06,
"loss": 1.393,
"step": 2759
},
{
"epoch": 0.7340425531914894,
"grad_norm": 4.095008373260498,
"learning_rate": 9.745957003289138e-06,
"loss": 1.2848,
"step": 2760
},
{
"epoch": 0.7343085106382978,
"grad_norm": 3.8568758964538574,
"learning_rate": 9.745680150140452e-06,
"loss": 1.3195,
"step": 2761
},
{
"epoch": 0.7345744680851064,
"grad_norm": 3.512941360473633,
"learning_rate": 9.745403150154833e-06,
"loss": 1.0682,
"step": 2762
},
{
"epoch": 0.7348404255319149,
"grad_norm": 4.007373332977295,
"learning_rate": 9.745126003340854e-06,
"loss": 1.2665,
"step": 2763
},
{
"epoch": 0.7351063829787234,
"grad_norm": 3.8637166023254395,
"learning_rate": 9.74484870970709e-06,
"loss": 1.4367,
"step": 2764
},
{
"epoch": 0.7353723404255319,
"grad_norm": 3.6544454097747803,
"learning_rate": 9.744571269262122e-06,
"loss": 1.157,
"step": 2765
},
{
"epoch": 0.7356382978723405,
"grad_norm": 3.5814568996429443,
"learning_rate": 9.744293682014532e-06,
"loss": 1.2989,
"step": 2766
},
{
"epoch": 0.7359042553191489,
"grad_norm": 3.59860897064209,
"learning_rate": 9.74401594797291e-06,
"loss": 1.1852,
"step": 2767
},
{
"epoch": 0.7361702127659574,
"grad_norm": 3.694519519805908,
"learning_rate": 9.743738067145849e-06,
"loss": 1.3947,
"step": 2768
},
{
"epoch": 0.736436170212766,
"grad_norm": 3.570734977722168,
"learning_rate": 9.743460039541947e-06,
"loss": 1.3176,
"step": 2769
},
{
"epoch": 0.7367021276595744,
"grad_norm": 3.448857545852661,
"learning_rate": 9.743181865169806e-06,
"loss": 1.2162,
"step": 2770
},
{
"epoch": 0.736968085106383,
"grad_norm": 3.7955188751220703,
"learning_rate": 9.742903544038033e-06,
"loss": 1.2489,
"step": 2771
},
{
"epoch": 0.7372340425531915,
"grad_norm": 3.520260810852051,
"learning_rate": 9.742625076155244e-06,
"loss": 1.2545,
"step": 2772
},
{
"epoch": 0.7375,
"grad_norm": 3.3301799297332764,
"learning_rate": 9.742346461530048e-06,
"loss": 1.0909,
"step": 2773
},
{
"epoch": 0.7377659574468085,
"grad_norm": 3.57509708404541,
"learning_rate": 9.742067700171069e-06,
"loss": 1.2049,
"step": 2774
},
{
"epoch": 0.738031914893617,
"grad_norm": 3.4712679386138916,
"learning_rate": 9.741788792086934e-06,
"loss": 1.1797,
"step": 2775
},
{
"epoch": 0.7382978723404255,
"grad_norm": 3.4553110599517822,
"learning_rate": 9.74150973728627e-06,
"loss": 1.1082,
"step": 2776
},
{
"epoch": 0.738563829787234,
"grad_norm": 3.6550087928771973,
"learning_rate": 9.741230535777712e-06,
"loss": 1.281,
"step": 2777
},
{
"epoch": 0.7388297872340426,
"grad_norm": 3.3699588775634766,
"learning_rate": 9.7409511875699e-06,
"loss": 1.2331,
"step": 2778
},
{
"epoch": 0.739095744680851,
"grad_norm": 3.393129825592041,
"learning_rate": 9.740671692671478e-06,
"loss": 1.1614,
"step": 2779
},
{
"epoch": 0.7393617021276596,
"grad_norm": 3.888546943664551,
"learning_rate": 9.74039205109109e-06,
"loss": 1.3773,
"step": 2780
},
{
"epoch": 0.7396276595744681,
"grad_norm": 3.5572216510772705,
"learning_rate": 9.740112262837391e-06,
"loss": 1.2269,
"step": 2781
},
{
"epoch": 0.7398936170212767,
"grad_norm": 3.7788665294647217,
"learning_rate": 9.73983232791904e-06,
"loss": 1.2385,
"step": 2782
},
{
"epoch": 0.7401595744680851,
"grad_norm": 4.092897891998291,
"learning_rate": 9.739552246344692e-06,
"loss": 1.3396,
"step": 2783
},
{
"epoch": 0.7404255319148936,
"grad_norm": 3.679199457168579,
"learning_rate": 9.73927201812302e-06,
"loss": 1.2957,
"step": 2784
},
{
"epoch": 0.7406914893617021,
"grad_norm": 3.590893030166626,
"learning_rate": 9.738991643262693e-06,
"loss": 1.3364,
"step": 2785
},
{
"epoch": 0.7409574468085106,
"grad_norm": 3.5082991123199463,
"learning_rate": 9.738711121772384e-06,
"loss": 1.1921,
"step": 2786
},
{
"epoch": 0.7412234042553192,
"grad_norm": 3.556530475616455,
"learning_rate": 9.738430453660774e-06,
"loss": 1.2388,
"step": 2787
},
{
"epoch": 0.7414893617021276,
"grad_norm": 4.152648448944092,
"learning_rate": 9.738149638936547e-06,
"loss": 1.3962,
"step": 2788
},
{
"epoch": 0.7417553191489362,
"grad_norm": 3.8726470470428467,
"learning_rate": 9.73786867760839e-06,
"loss": 1.368,
"step": 2789
},
{
"epoch": 0.7420212765957447,
"grad_norm": 3.4200189113616943,
"learning_rate": 9.737587569685e-06,
"loss": 1.3165,
"step": 2790
},
{
"epoch": 0.7422872340425531,
"grad_norm": 3.8217222690582275,
"learning_rate": 9.737306315175072e-06,
"loss": 1.07,
"step": 2791
},
{
"epoch": 0.7425531914893617,
"grad_norm": 4.083987236022949,
"learning_rate": 9.73702491408731e-06,
"loss": 1.2129,
"step": 2792
},
{
"epoch": 0.7428191489361702,
"grad_norm": 3.396623373031616,
"learning_rate": 9.73674336643042e-06,
"loss": 1.1692,
"step": 2793
},
{
"epoch": 0.7430851063829788,
"grad_norm": 3.545069456100464,
"learning_rate": 9.736461672213112e-06,
"loss": 1.2257,
"step": 2794
},
{
"epoch": 0.7433510638297872,
"grad_norm": 3.856208324432373,
"learning_rate": 9.736179831444103e-06,
"loss": 1.4061,
"step": 2795
},
{
"epoch": 0.7436170212765958,
"grad_norm": 3.6652262210845947,
"learning_rate": 9.735897844132116e-06,
"loss": 1.1792,
"step": 2796
},
{
"epoch": 0.7438829787234043,
"grad_norm": 3.402409791946411,
"learning_rate": 9.735615710285873e-06,
"loss": 1.1954,
"step": 2797
},
{
"epoch": 0.7441489361702127,
"grad_norm": 4.120236396789551,
"learning_rate": 9.735333429914103e-06,
"loss": 1.3625,
"step": 2798
},
{
"epoch": 0.7444148936170213,
"grad_norm": 3.873011350631714,
"learning_rate": 9.735051003025543e-06,
"loss": 1.1915,
"step": 2799
},
{
"epoch": 0.7446808510638298,
"grad_norm": 3.4933876991271973,
"learning_rate": 9.73476842962893e-06,
"loss": 1.1695,
"step": 2800
},
{
"epoch": 0.7449468085106383,
"grad_norm": 3.8242671489715576,
"learning_rate": 9.734485709733007e-06,
"loss": 1.2618,
"step": 2801
},
{
"epoch": 0.7452127659574468,
"grad_norm": 3.512907028198242,
"learning_rate": 9.734202843346522e-06,
"loss": 1.1924,
"step": 2802
},
{
"epoch": 0.7454787234042554,
"grad_norm": 4.221972465515137,
"learning_rate": 9.733919830478227e-06,
"loss": 1.2335,
"step": 2803
},
{
"epoch": 0.7457446808510638,
"grad_norm": 3.864529609680176,
"learning_rate": 9.73363667113688e-06,
"loss": 1.3128,
"step": 2804
},
{
"epoch": 0.7460106382978723,
"grad_norm": 4.328346252441406,
"learning_rate": 9.73335336533124e-06,
"loss": 1.3956,
"step": 2805
},
{
"epoch": 0.7462765957446809,
"grad_norm": 3.605314254760742,
"learning_rate": 9.733069913070074e-06,
"loss": 1.1795,
"step": 2806
},
{
"epoch": 0.7465425531914893,
"grad_norm": 4.531727313995361,
"learning_rate": 9.732786314362154e-06,
"loss": 1.3895,
"step": 2807
},
{
"epoch": 0.7468085106382979,
"grad_norm": 3.587550163269043,
"learning_rate": 9.732502569216252e-06,
"loss": 1.289,
"step": 2808
},
{
"epoch": 0.7470744680851064,
"grad_norm": 3.99782133102417,
"learning_rate": 9.73221867764115e-06,
"loss": 1.3014,
"step": 2809
},
{
"epoch": 0.7473404255319149,
"grad_norm": 3.9140994548797607,
"learning_rate": 9.731934639645628e-06,
"loss": 1.2428,
"step": 2810
},
{
"epoch": 0.7476063829787234,
"grad_norm": 3.7804577350616455,
"learning_rate": 9.73165045523848e-06,
"loss": 1.2315,
"step": 2811
},
{
"epoch": 0.747872340425532,
"grad_norm": 4.103899002075195,
"learning_rate": 9.731366124428495e-06,
"loss": 1.4515,
"step": 2812
},
{
"epoch": 0.7481382978723404,
"grad_norm": 4.170511245727539,
"learning_rate": 9.73108164722447e-06,
"loss": 1.3773,
"step": 2813
},
{
"epoch": 0.7484042553191489,
"grad_norm": 3.4937591552734375,
"learning_rate": 9.73079702363521e-06,
"loss": 1.1113,
"step": 2814
},
{
"epoch": 0.7486702127659575,
"grad_norm": 3.6979286670684814,
"learning_rate": 9.730512253669523e-06,
"loss": 1.2525,
"step": 2815
},
{
"epoch": 0.7489361702127659,
"grad_norm": 3.6911709308624268,
"learning_rate": 9.730227337336214e-06,
"loss": 1.2443,
"step": 2816
},
{
"epoch": 0.7492021276595745,
"grad_norm": 3.462308883666992,
"learning_rate": 9.729942274644102e-06,
"loss": 1.1075,
"step": 2817
},
{
"epoch": 0.749468085106383,
"grad_norm": 4.0079240798950195,
"learning_rate": 9.729657065602007e-06,
"loss": 1.2715,
"step": 2818
},
{
"epoch": 0.7497340425531915,
"grad_norm": 3.6619253158569336,
"learning_rate": 9.729371710218755e-06,
"loss": 1.135,
"step": 2819
},
{
"epoch": 0.75,
"grad_norm": 3.3799519538879395,
"learning_rate": 9.729086208503174e-06,
"loss": 1.2331,
"step": 2820
},
{
"epoch": 0.7502659574468085,
"grad_norm": 3.828418493270874,
"learning_rate": 9.728800560464097e-06,
"loss": 1.3006,
"step": 2821
},
{
"epoch": 0.750531914893617,
"grad_norm": 4.1295928955078125,
"learning_rate": 9.728514766110366e-06,
"loss": 1.2404,
"step": 2822
},
{
"epoch": 0.7507978723404255,
"grad_norm": 3.73343825340271,
"learning_rate": 9.728228825450818e-06,
"loss": 1.3261,
"step": 2823
},
{
"epoch": 0.7510638297872341,
"grad_norm": 3.336246967315674,
"learning_rate": 9.727942738494305e-06,
"loss": 1.0928,
"step": 2824
},
{
"epoch": 0.7513297872340425,
"grad_norm": 3.4438130855560303,
"learning_rate": 9.727656505249676e-06,
"loss": 1.2058,
"step": 2825
},
{
"epoch": 0.7515957446808511,
"grad_norm": 3.7546231746673584,
"learning_rate": 9.72737012572579e-06,
"loss": 1.1447,
"step": 2826
},
{
"epoch": 0.7518617021276596,
"grad_norm": 4.008635520935059,
"learning_rate": 9.727083599931506e-06,
"loss": 1.3526,
"step": 2827
},
{
"epoch": 0.752127659574468,
"grad_norm": 4.192075729370117,
"learning_rate": 9.726796927875688e-06,
"loss": 1.3889,
"step": 2828
},
{
"epoch": 0.7523936170212766,
"grad_norm": 3.805386543273926,
"learning_rate": 9.726510109567211e-06,
"loss": 1.3894,
"step": 2829
},
{
"epoch": 0.7526595744680851,
"grad_norm": 3.9009950160980225,
"learning_rate": 9.726223145014946e-06,
"loss": 1.2844,
"step": 2830
},
{
"epoch": 0.7529255319148936,
"grad_norm": 3.870450735092163,
"learning_rate": 9.725936034227771e-06,
"loss": 1.2328,
"step": 2831
},
{
"epoch": 0.7531914893617021,
"grad_norm": 3.5746779441833496,
"learning_rate": 9.725648777214571e-06,
"loss": 1.2661,
"step": 2832
},
{
"epoch": 0.7534574468085107,
"grad_norm": 4.304332733154297,
"learning_rate": 9.725361373984235e-06,
"loss": 1.2722,
"step": 2833
},
{
"epoch": 0.7537234042553191,
"grad_norm": 3.693098783493042,
"learning_rate": 9.725073824545655e-06,
"loss": 1.3476,
"step": 2834
},
{
"epoch": 0.7539893617021277,
"grad_norm": 3.3664565086364746,
"learning_rate": 9.724786128907726e-06,
"loss": 1.2575,
"step": 2835
},
{
"epoch": 0.7542553191489362,
"grad_norm": 3.585892915725708,
"learning_rate": 9.724498287079353e-06,
"loss": 1.3478,
"step": 2836
},
{
"epoch": 0.7545212765957446,
"grad_norm": 3.768718957901001,
"learning_rate": 9.72421029906944e-06,
"loss": 1.2749,
"step": 2837
},
{
"epoch": 0.7547872340425532,
"grad_norm": 3.891233205795288,
"learning_rate": 9.723922164886898e-06,
"loss": 1.3033,
"step": 2838
},
{
"epoch": 0.7550531914893617,
"grad_norm": 3.5751054286956787,
"learning_rate": 9.723633884540643e-06,
"loss": 1.1453,
"step": 2839
},
{
"epoch": 0.7553191489361702,
"grad_norm": 3.516754150390625,
"learning_rate": 9.723345458039595e-06,
"loss": 1.2553,
"step": 2840
},
{
"epoch": 0.7555851063829787,
"grad_norm": 3.76668643951416,
"learning_rate": 9.723056885392677e-06,
"loss": 1.3444,
"step": 2841
},
{
"epoch": 0.7558510638297873,
"grad_norm": 3.9877772331237793,
"learning_rate": 9.722768166608818e-06,
"loss": 1.2582,
"step": 2842
},
{
"epoch": 0.7561170212765957,
"grad_norm": 3.631065607070923,
"learning_rate": 9.72247930169695e-06,
"loss": 1.3652,
"step": 2843
},
{
"epoch": 0.7563829787234042,
"grad_norm": 3.124361515045166,
"learning_rate": 9.722190290666014e-06,
"loss": 0.9727,
"step": 2844
},
{
"epoch": 0.7566489361702128,
"grad_norm": 3.7869699001312256,
"learning_rate": 9.721901133524951e-06,
"loss": 1.3348,
"step": 2845
},
{
"epoch": 0.7569148936170212,
"grad_norm": 3.49450421333313,
"learning_rate": 9.721611830282707e-06,
"loss": 1.2607,
"step": 2846
},
{
"epoch": 0.7571808510638298,
"grad_norm": 4.137457370758057,
"learning_rate": 9.721322380948235e-06,
"loss": 1.2993,
"step": 2847
},
{
"epoch": 0.7574468085106383,
"grad_norm": 3.492685317993164,
"learning_rate": 9.721032785530488e-06,
"loss": 1.3636,
"step": 2848
},
{
"epoch": 0.7577127659574469,
"grad_norm": 3.78635835647583,
"learning_rate": 9.72074304403843e-06,
"loss": 1.3039,
"step": 2849
},
{
"epoch": 0.7579787234042553,
"grad_norm": 3.5052456855773926,
"learning_rate": 9.720453156481023e-06,
"loss": 1.1737,
"step": 2850
},
{
"epoch": 0.7582446808510638,
"grad_norm": 3.5687224864959717,
"learning_rate": 9.72016312286724e-06,
"loss": 1.3378,
"step": 2851
},
{
"epoch": 0.7585106382978724,
"grad_norm": 3.2821710109710693,
"learning_rate": 9.71987294320605e-06,
"loss": 1.0614,
"step": 2852
},
{
"epoch": 0.7587765957446808,
"grad_norm": 3.9896838665008545,
"learning_rate": 9.719582617506434e-06,
"loss": 1.4842,
"step": 2853
},
{
"epoch": 0.7590425531914894,
"grad_norm": 3.674095392227173,
"learning_rate": 9.719292145777377e-06,
"loss": 1.2268,
"step": 2854
},
{
"epoch": 0.7593085106382979,
"grad_norm": 3.586404800415039,
"learning_rate": 9.719001528027863e-06,
"loss": 1.3219,
"step": 2855
},
{
"epoch": 0.7595744680851064,
"grad_norm": 3.734853744506836,
"learning_rate": 9.718710764266888e-06,
"loss": 1.2469,
"step": 2856
},
{
"epoch": 0.7598404255319149,
"grad_norm": 3.4392611980438232,
"learning_rate": 9.718419854503444e-06,
"loss": 1.1928,
"step": 2857
},
{
"epoch": 0.7601063829787233,
"grad_norm": 3.7639527320861816,
"learning_rate": 9.718128798746537e-06,
"loss": 1.2995,
"step": 2858
},
{
"epoch": 0.7603723404255319,
"grad_norm": 3.564790964126587,
"learning_rate": 9.717837597005169e-06,
"loss": 1.2086,
"step": 2859
},
{
"epoch": 0.7606382978723404,
"grad_norm": 3.9883244037628174,
"learning_rate": 9.71754624928835e-06,
"loss": 1.2138,
"step": 2860
},
{
"epoch": 0.760904255319149,
"grad_norm": 3.823289632797241,
"learning_rate": 9.717254755605097e-06,
"loss": 1.2225,
"step": 2861
},
{
"epoch": 0.7611702127659574,
"grad_norm": 3.4945852756500244,
"learning_rate": 9.716963115964427e-06,
"loss": 1.26,
"step": 2862
},
{
"epoch": 0.761436170212766,
"grad_norm": 3.7626545429229736,
"learning_rate": 9.716671330375366e-06,
"loss": 1.2424,
"step": 2863
},
{
"epoch": 0.7617021276595745,
"grad_norm": 3.789428949356079,
"learning_rate": 9.71637939884694e-06,
"loss": 1.3538,
"step": 2864
},
{
"epoch": 0.761968085106383,
"grad_norm": 3.781531810760498,
"learning_rate": 9.716087321388184e-06,
"loss": 1.2693,
"step": 2865
},
{
"epoch": 0.7622340425531915,
"grad_norm": 3.184601306915283,
"learning_rate": 9.715795098008132e-06,
"loss": 1.0477,
"step": 2866
},
{
"epoch": 0.7625,
"grad_norm": 3.636810302734375,
"learning_rate": 9.715502728715827e-06,
"loss": 1.2691,
"step": 2867
},
{
"epoch": 0.7627659574468085,
"grad_norm": 4.0694122314453125,
"learning_rate": 9.715210213520317e-06,
"loss": 1.3419,
"step": 2868
},
{
"epoch": 0.763031914893617,
"grad_norm": 3.9551241397857666,
"learning_rate": 9.714917552430652e-06,
"loss": 1.2398,
"step": 2869
},
{
"epoch": 0.7632978723404256,
"grad_norm": 3.7696473598480225,
"learning_rate": 9.714624745455885e-06,
"loss": 1.2691,
"step": 2870
},
{
"epoch": 0.763563829787234,
"grad_norm": 3.726793050765991,
"learning_rate": 9.71433179260508e-06,
"loss": 1.2308,
"step": 2871
},
{
"epoch": 0.7638297872340426,
"grad_norm": 3.6226067543029785,
"learning_rate": 9.714038693887298e-06,
"loss": 1.3653,
"step": 2872
},
{
"epoch": 0.7640957446808511,
"grad_norm": 3.4948949813842773,
"learning_rate": 9.713745449311606e-06,
"loss": 1.2048,
"step": 2873
},
{
"epoch": 0.7643617021276595,
"grad_norm": 3.3849282264709473,
"learning_rate": 9.713452058887084e-06,
"loss": 1.1664,
"step": 2874
},
{
"epoch": 0.7646276595744681,
"grad_norm": 3.9506824016571045,
"learning_rate": 9.713158522622804e-06,
"loss": 1.4175,
"step": 2875
},
{
"epoch": 0.7648936170212766,
"grad_norm": 3.5069642066955566,
"learning_rate": 9.71286484052785e-06,
"loss": 1.2298,
"step": 2876
},
{
"epoch": 0.7651595744680851,
"grad_norm": 3.5655500888824463,
"learning_rate": 9.71257101261131e-06,
"loss": 1.1717,
"step": 2877
},
{
"epoch": 0.7654255319148936,
"grad_norm": 3.450375556945801,
"learning_rate": 9.712277038882274e-06,
"loss": 1.1573,
"step": 2878
},
{
"epoch": 0.7656914893617022,
"grad_norm": 3.849936008453369,
"learning_rate": 9.711982919349839e-06,
"loss": 1.1671,
"step": 2879
},
{
"epoch": 0.7659574468085106,
"grad_norm": 3.557499647140503,
"learning_rate": 9.711688654023105e-06,
"loss": 1.2369,
"step": 2880
},
{
"epoch": 0.7662234042553191,
"grad_norm": 4.1276326179504395,
"learning_rate": 9.711394242911177e-06,
"loss": 1.2304,
"step": 2881
},
{
"epoch": 0.7664893617021277,
"grad_norm": 3.553694725036621,
"learning_rate": 9.711099686023161e-06,
"loss": 1.285,
"step": 2882
},
{
"epoch": 0.7667553191489361,
"grad_norm": 3.484138250350952,
"learning_rate": 9.710804983368177e-06,
"loss": 1.2578,
"step": 2883
},
{
"epoch": 0.7670212765957447,
"grad_norm": 3.855220317840576,
"learning_rate": 9.71051013495534e-06,
"loss": 1.2213,
"step": 2884
},
{
"epoch": 0.7672872340425532,
"grad_norm": 3.9998855590820312,
"learning_rate": 9.710215140793774e-06,
"loss": 1.231,
"step": 2885
},
{
"epoch": 0.7675531914893617,
"grad_norm": 3.568758487701416,
"learning_rate": 9.709920000892605e-06,
"loss": 1.1779,
"step": 2886
},
{
"epoch": 0.7678191489361702,
"grad_norm": 3.5209362506866455,
"learning_rate": 9.709624715260965e-06,
"loss": 1.0908,
"step": 2887
},
{
"epoch": 0.7680851063829788,
"grad_norm": 3.783108949661255,
"learning_rate": 9.709329283907993e-06,
"loss": 1.3374,
"step": 2888
},
{
"epoch": 0.7683510638297872,
"grad_norm": 3.672305107116699,
"learning_rate": 9.70903370684283e-06,
"loss": 1.2719,
"step": 2889
},
{
"epoch": 0.7686170212765957,
"grad_norm": 3.9783568382263184,
"learning_rate": 9.708737984074616e-06,
"loss": 1.2343,
"step": 2890
},
{
"epoch": 0.7688829787234043,
"grad_norm": 3.6471900939941406,
"learning_rate": 9.708442115612508e-06,
"loss": 1.1384,
"step": 2891
},
{
"epoch": 0.7691489361702127,
"grad_norm": 3.8330166339874268,
"learning_rate": 9.708146101465657e-06,
"loss": 1.3178,
"step": 2892
},
{
"epoch": 0.7694148936170213,
"grad_norm": 3.224055290222168,
"learning_rate": 9.707849941643222e-06,
"loss": 1.087,
"step": 2893
},
{
"epoch": 0.7696808510638298,
"grad_norm": 4.061996936798096,
"learning_rate": 9.707553636154366e-06,
"loss": 1.4389,
"step": 2894
},
{
"epoch": 0.7699468085106383,
"grad_norm": 3.7000250816345215,
"learning_rate": 9.707257185008259e-06,
"loss": 1.2383,
"step": 2895
},
{
"epoch": 0.7702127659574468,
"grad_norm": 3.3188624382019043,
"learning_rate": 9.706960588214072e-06,
"loss": 1.1835,
"step": 2896
},
{
"epoch": 0.7704787234042553,
"grad_norm": 3.68198299407959,
"learning_rate": 9.706663845780984e-06,
"loss": 1.2511,
"step": 2897
},
{
"epoch": 0.7707446808510638,
"grad_norm": 3.831139326095581,
"learning_rate": 9.706366957718174e-06,
"loss": 1.3409,
"step": 2898
},
{
"epoch": 0.7710106382978723,
"grad_norm": 3.3753414154052734,
"learning_rate": 9.70606992403483e-06,
"loss": 1.1988,
"step": 2899
},
{
"epoch": 0.7712765957446809,
"grad_norm": 3.3466532230377197,
"learning_rate": 9.705772744740142e-06,
"loss": 1.1079,
"step": 2900
},
{
"epoch": 0.7715425531914893,
"grad_norm": 3.39589524269104,
"learning_rate": 9.705475419843304e-06,
"loss": 1.2094,
"step": 2901
},
{
"epoch": 0.7718085106382979,
"grad_norm": 3.5272488594055176,
"learning_rate": 9.705177949353516e-06,
"loss": 1.2466,
"step": 2902
},
{
"epoch": 0.7720744680851064,
"grad_norm": 3.9202656745910645,
"learning_rate": 9.704880333279985e-06,
"loss": 1.2347,
"step": 2903
},
{
"epoch": 0.7723404255319148,
"grad_norm": 3.421706199645996,
"learning_rate": 9.704582571631915e-06,
"loss": 1.1643,
"step": 2904
},
{
"epoch": 0.7726063829787234,
"grad_norm": 3.8939504623413086,
"learning_rate": 9.704284664418521e-06,
"loss": 1.4996,
"step": 2905
},
{
"epoch": 0.7728723404255319,
"grad_norm": 3.362236976623535,
"learning_rate": 9.703986611649024e-06,
"loss": 1.2661,
"step": 2906
},
{
"epoch": 0.7731382978723405,
"grad_norm": 3.2896718978881836,
"learning_rate": 9.70368841333264e-06,
"loss": 1.0865,
"step": 2907
},
{
"epoch": 0.7734042553191489,
"grad_norm": 3.662534475326538,
"learning_rate": 9.7033900694786e-06,
"loss": 1.223,
"step": 2908
},
{
"epoch": 0.7736702127659575,
"grad_norm": 3.7135627269744873,
"learning_rate": 9.703091580096132e-06,
"loss": 1.4123,
"step": 2909
},
{
"epoch": 0.773936170212766,
"grad_norm": 3.431130886077881,
"learning_rate": 9.702792945194475e-06,
"loss": 1.139,
"step": 2910
},
{
"epoch": 0.7742021276595744,
"grad_norm": 4.038398742675781,
"learning_rate": 9.702494164782866e-06,
"loss": 1.3352,
"step": 2911
},
{
"epoch": 0.774468085106383,
"grad_norm": 3.5457537174224854,
"learning_rate": 9.702195238870552e-06,
"loss": 1.2472,
"step": 2912
},
{
"epoch": 0.7747340425531914,
"grad_norm": 3.9684653282165527,
"learning_rate": 9.70189616746678e-06,
"loss": 1.2834,
"step": 2913
},
{
"epoch": 0.775,
"grad_norm": 3.520798683166504,
"learning_rate": 9.701596950580807e-06,
"loss": 1.1989,
"step": 2914
},
{
"epoch": 0.7752659574468085,
"grad_norm": 3.4203343391418457,
"learning_rate": 9.701297588221888e-06,
"loss": 1.2368,
"step": 2915
},
{
"epoch": 0.7755319148936171,
"grad_norm": 3.5501503944396973,
"learning_rate": 9.700998080399287e-06,
"loss": 1.2317,
"step": 2916
},
{
"epoch": 0.7757978723404255,
"grad_norm": 3.5603249073028564,
"learning_rate": 9.700698427122269e-06,
"loss": 1.2071,
"step": 2917
},
{
"epoch": 0.7760638297872341,
"grad_norm": 3.5951790809631348,
"learning_rate": 9.700398628400109e-06,
"loss": 1.1681,
"step": 2918
},
{
"epoch": 0.7763297872340426,
"grad_norm": 3.6561312675476074,
"learning_rate": 9.700098684242082e-06,
"loss": 1.3097,
"step": 2919
},
{
"epoch": 0.776595744680851,
"grad_norm": 3.628885269165039,
"learning_rate": 9.699798594657464e-06,
"loss": 1.2199,
"step": 2920
},
{
"epoch": 0.7768617021276596,
"grad_norm": 3.6864166259765625,
"learning_rate": 9.699498359655548e-06,
"loss": 1.2123,
"step": 2921
},
{
"epoch": 0.777127659574468,
"grad_norm": 4.034405708312988,
"learning_rate": 9.699197979245617e-06,
"loss": 1.3019,
"step": 2922
},
{
"epoch": 0.7773936170212766,
"grad_norm": 3.9352498054504395,
"learning_rate": 9.69889745343697e-06,
"loss": 1.4196,
"step": 2923
},
{
"epoch": 0.7776595744680851,
"grad_norm": 3.983980894088745,
"learning_rate": 9.698596782238904e-06,
"loss": 1.1829,
"step": 2924
},
{
"epoch": 0.7779255319148937,
"grad_norm": 3.4715261459350586,
"learning_rate": 9.698295965660721e-06,
"loss": 1.144,
"step": 2925
},
{
"epoch": 0.7781914893617021,
"grad_norm": 3.7768967151641846,
"learning_rate": 9.69799500371173e-06,
"loss": 1.2891,
"step": 2926
},
{
"epoch": 0.7784574468085106,
"grad_norm": 3.628307580947876,
"learning_rate": 9.697693896401239e-06,
"loss": 1.2956,
"step": 2927
},
{
"epoch": 0.7787234042553192,
"grad_norm": 3.601635456085205,
"learning_rate": 9.697392643738571e-06,
"loss": 1.2924,
"step": 2928
},
{
"epoch": 0.7789893617021276,
"grad_norm": 3.6882519721984863,
"learning_rate": 9.697091245733043e-06,
"loss": 1.2887,
"step": 2929
},
{
"epoch": 0.7792553191489362,
"grad_norm": 3.7858314514160156,
"learning_rate": 9.696789702393982e-06,
"loss": 1.3439,
"step": 2930
},
{
"epoch": 0.7795212765957447,
"grad_norm": 3.6974260807037354,
"learning_rate": 9.696488013730717e-06,
"loss": 1.2487,
"step": 2931
},
{
"epoch": 0.7797872340425532,
"grad_norm": 3.5106611251831055,
"learning_rate": 9.696186179752587e-06,
"loss": 1.1533,
"step": 2932
},
{
"epoch": 0.7800531914893617,
"grad_norm": 3.440690279006958,
"learning_rate": 9.695884200468923e-06,
"loss": 1.1004,
"step": 2933
},
{
"epoch": 0.7803191489361702,
"grad_norm": 3.43935227394104,
"learning_rate": 9.695582075889077e-06,
"loss": 1.192,
"step": 2934
},
{
"epoch": 0.7805851063829787,
"grad_norm": 3.6551554203033447,
"learning_rate": 9.695279806022391e-06,
"loss": 1.2693,
"step": 2935
},
{
"epoch": 0.7808510638297872,
"grad_norm": 3.6879799365997314,
"learning_rate": 9.694977390878219e-06,
"loss": 1.3101,
"step": 2936
},
{
"epoch": 0.7811170212765958,
"grad_norm": 3.6642568111419678,
"learning_rate": 9.69467483046592e-06,
"loss": 1.3313,
"step": 2937
},
{
"epoch": 0.7813829787234042,
"grad_norm": 3.6739001274108887,
"learning_rate": 9.694372124794855e-06,
"loss": 1.175,
"step": 2938
},
{
"epoch": 0.7816489361702128,
"grad_norm": 3.346895933151245,
"learning_rate": 9.69406927387439e-06,
"loss": 1.135,
"step": 2939
},
{
"epoch": 0.7819148936170213,
"grad_norm": 3.605050563812256,
"learning_rate": 9.693766277713893e-06,
"loss": 1.2365,
"step": 2940
},
{
"epoch": 0.7821808510638298,
"grad_norm": 3.56868839263916,
"learning_rate": 9.693463136322743e-06,
"loss": 1.2756,
"step": 2941
},
{
"epoch": 0.7824468085106383,
"grad_norm": 3.4643678665161133,
"learning_rate": 9.693159849710317e-06,
"loss": 1.1344,
"step": 2942
},
{
"epoch": 0.7827127659574468,
"grad_norm": 3.7843425273895264,
"learning_rate": 9.692856417885998e-06,
"loss": 1.2301,
"step": 2943
},
{
"epoch": 0.7829787234042553,
"grad_norm": 3.7226831912994385,
"learning_rate": 9.69255284085918e-06,
"loss": 1.2124,
"step": 2944
},
{
"epoch": 0.7832446808510638,
"grad_norm": 3.5860259532928467,
"learning_rate": 9.69224911863925e-06,
"loss": 1.2237,
"step": 2945
},
{
"epoch": 0.7835106382978724,
"grad_norm": 3.68369722366333,
"learning_rate": 9.691945251235608e-06,
"loss": 1.3566,
"step": 2946
},
{
"epoch": 0.7837765957446808,
"grad_norm": 3.778324842453003,
"learning_rate": 9.691641238657655e-06,
"loss": 1.2369,
"step": 2947
},
{
"epoch": 0.7840425531914894,
"grad_norm": 3.4326350688934326,
"learning_rate": 9.6913370809148e-06,
"loss": 1.0766,
"step": 2948
},
{
"epoch": 0.7843085106382979,
"grad_norm": 3.609269380569458,
"learning_rate": 9.691032778016452e-06,
"loss": 1.228,
"step": 2949
},
{
"epoch": 0.7845744680851063,
"grad_norm": 3.3350110054016113,
"learning_rate": 9.690728329972025e-06,
"loss": 1.1658,
"step": 2950
},
{
"epoch": 0.7848404255319149,
"grad_norm": 3.53971004486084,
"learning_rate": 9.690423736790944e-06,
"loss": 1.2674,
"step": 2951
},
{
"epoch": 0.7851063829787234,
"grad_norm": 3.3145904541015625,
"learning_rate": 9.690118998482628e-06,
"loss": 1.2601,
"step": 2952
},
{
"epoch": 0.785372340425532,
"grad_norm": 3.7415387630462646,
"learning_rate": 9.689814115056509e-06,
"loss": 1.3693,
"step": 2953
},
{
"epoch": 0.7856382978723404,
"grad_norm": 3.2443130016326904,
"learning_rate": 9.689509086522019e-06,
"loss": 1.1516,
"step": 2954
},
{
"epoch": 0.785904255319149,
"grad_norm": 3.4239816665649414,
"learning_rate": 9.689203912888597e-06,
"loss": 1.2722,
"step": 2955
},
{
"epoch": 0.7861702127659574,
"grad_norm": 3.5822324752807617,
"learning_rate": 9.688898594165685e-06,
"loss": 1.2253,
"step": 2956
},
{
"epoch": 0.7864361702127659,
"grad_norm": 3.2302675247192383,
"learning_rate": 9.688593130362731e-06,
"loss": 1.1031,
"step": 2957
},
{
"epoch": 0.7867021276595745,
"grad_norm": 3.6517271995544434,
"learning_rate": 9.688287521489184e-06,
"loss": 1.2459,
"step": 2958
},
{
"epoch": 0.7869680851063829,
"grad_norm": 3.772766351699829,
"learning_rate": 9.687981767554502e-06,
"loss": 1.2623,
"step": 2959
},
{
"epoch": 0.7872340425531915,
"grad_norm": 3.646852731704712,
"learning_rate": 9.687675868568145e-06,
"loss": 1.2951,
"step": 2960
},
{
"epoch": 0.7875,
"grad_norm": 3.738582134246826,
"learning_rate": 9.687369824539577e-06,
"loss": 1.3321,
"step": 2961
},
{
"epoch": 0.7877659574468086,
"grad_norm": 3.6618778705596924,
"learning_rate": 9.687063635478269e-06,
"loss": 1.3527,
"step": 2962
},
{
"epoch": 0.788031914893617,
"grad_norm": 3.6133735179901123,
"learning_rate": 9.686757301393693e-06,
"loss": 1.2852,
"step": 2963
},
{
"epoch": 0.7882978723404256,
"grad_norm": 3.7590041160583496,
"learning_rate": 9.686450822295327e-06,
"loss": 1.2057,
"step": 2964
},
{
"epoch": 0.788563829787234,
"grad_norm": 3.4455080032348633,
"learning_rate": 9.686144198192658e-06,
"loss": 1.2478,
"step": 2965
},
{
"epoch": 0.7888297872340425,
"grad_norm": 3.4166572093963623,
"learning_rate": 9.685837429095169e-06,
"loss": 1.2585,
"step": 2966
},
{
"epoch": 0.7890957446808511,
"grad_norm": 3.322124719619751,
"learning_rate": 9.685530515012352e-06,
"loss": 1.2452,
"step": 2967
},
{
"epoch": 0.7893617021276595,
"grad_norm": 3.493075132369995,
"learning_rate": 9.685223455953703e-06,
"loss": 1.1951,
"step": 2968
},
{
"epoch": 0.7896276595744681,
"grad_norm": 3.7366654872894287,
"learning_rate": 9.684916251928727e-06,
"loss": 1.4098,
"step": 2969
},
{
"epoch": 0.7898936170212766,
"grad_norm": 3.846484899520874,
"learning_rate": 9.684608902946926e-06,
"loss": 1.2726,
"step": 2970
},
{
"epoch": 0.7901595744680852,
"grad_norm": 3.382856607437134,
"learning_rate": 9.684301409017808e-06,
"loss": 1.2072,
"step": 2971
},
{
"epoch": 0.7904255319148936,
"grad_norm": 3.600064277648926,
"learning_rate": 9.68399377015089e-06,
"loss": 1.2991,
"step": 2972
},
{
"epoch": 0.7906914893617021,
"grad_norm": 3.4890823364257812,
"learning_rate": 9.683685986355692e-06,
"loss": 1.303,
"step": 2973
},
{
"epoch": 0.7909574468085107,
"grad_norm": 3.2720248699188232,
"learning_rate": 9.683378057641735e-06,
"loss": 1.305,
"step": 2974
},
{
"epoch": 0.7912234042553191,
"grad_norm": 3.3121964931488037,
"learning_rate": 9.683069984018545e-06,
"loss": 1.228,
"step": 2975
},
{
"epoch": 0.7914893617021277,
"grad_norm": 3.5907375812530518,
"learning_rate": 9.682761765495657e-06,
"loss": 1.3374,
"step": 2976
},
{
"epoch": 0.7917553191489362,
"grad_norm": 3.518444538116455,
"learning_rate": 9.682453402082607e-06,
"loss": 1.0759,
"step": 2977
},
{
"epoch": 0.7920212765957447,
"grad_norm": 3.7533528804779053,
"learning_rate": 9.682144893788934e-06,
"loss": 1.2666,
"step": 2978
},
{
"epoch": 0.7922872340425532,
"grad_norm": 3.877476453781128,
"learning_rate": 9.681836240624187e-06,
"loss": 1.2371,
"step": 2979
},
{
"epoch": 0.7925531914893617,
"grad_norm": 3.945760488510132,
"learning_rate": 9.681527442597916e-06,
"loss": 1.282,
"step": 2980
},
{
"epoch": 0.7928191489361702,
"grad_norm": 3.585514783859253,
"learning_rate": 9.681218499719673e-06,
"loss": 1.3038,
"step": 2981
},
{
"epoch": 0.7930851063829787,
"grad_norm": 4.198021411895752,
"learning_rate": 9.680909411999018e-06,
"loss": 1.4758,
"step": 2982
},
{
"epoch": 0.7933510638297873,
"grad_norm": 3.670048713684082,
"learning_rate": 9.680600179445514e-06,
"loss": 1.2579,
"step": 2983
},
{
"epoch": 0.7936170212765957,
"grad_norm": 3.6147031784057617,
"learning_rate": 9.68029080206873e-06,
"loss": 1.2565,
"step": 2984
},
{
"epoch": 0.7938829787234043,
"grad_norm": 3.589110851287842,
"learning_rate": 9.67998127987824e-06,
"loss": 1.2516,
"step": 2985
},
{
"epoch": 0.7941489361702128,
"grad_norm": 3.5315637588500977,
"learning_rate": 9.679671612883615e-06,
"loss": 1.2206,
"step": 2986
},
{
"epoch": 0.7944148936170212,
"grad_norm": 3.6465420722961426,
"learning_rate": 9.679361801094445e-06,
"loss": 1.2784,
"step": 2987
},
{
"epoch": 0.7946808510638298,
"grad_norm": 3.6671435832977295,
"learning_rate": 9.679051844520308e-06,
"loss": 1.4118,
"step": 2988
},
{
"epoch": 0.7949468085106383,
"grad_norm": 3.479151725769043,
"learning_rate": 9.6787417431708e-06,
"loss": 1.303,
"step": 2989
},
{
"epoch": 0.7952127659574468,
"grad_norm": 3.694517135620117,
"learning_rate": 9.678431497055515e-06,
"loss": 1.1658,
"step": 2990
},
{
"epoch": 0.7954787234042553,
"grad_norm": 3.453770637512207,
"learning_rate": 9.67812110618405e-06,
"loss": 1.2784,
"step": 2991
},
{
"epoch": 0.7957446808510639,
"grad_norm": 3.926161527633667,
"learning_rate": 9.677810570566011e-06,
"loss": 1.2926,
"step": 2992
},
{
"epoch": 0.7960106382978723,
"grad_norm": 3.6100566387176514,
"learning_rate": 9.677499890211005e-06,
"loss": 1.2504,
"step": 2993
},
{
"epoch": 0.7962765957446809,
"grad_norm": 3.496819019317627,
"learning_rate": 9.677189065128646e-06,
"loss": 1.1922,
"step": 2994
},
{
"epoch": 0.7965425531914894,
"grad_norm": 3.4073357582092285,
"learning_rate": 9.676878095328547e-06,
"loss": 1.1934,
"step": 2995
},
{
"epoch": 0.7968085106382978,
"grad_norm": 3.5559115409851074,
"learning_rate": 9.676566980820338e-06,
"loss": 1.3128,
"step": 2996
},
{
"epoch": 0.7970744680851064,
"grad_norm": 3.844743013381958,
"learning_rate": 9.676255721613639e-06,
"loss": 1.2881,
"step": 2997
},
{
"epoch": 0.7973404255319149,
"grad_norm": 3.2858474254608154,
"learning_rate": 9.675944317718083e-06,
"loss": 1.2103,
"step": 2998
},
{
"epoch": 0.7976063829787234,
"grad_norm": 3.7412915229797363,
"learning_rate": 9.675632769143303e-06,
"loss": 1.2254,
"step": 2999
},
{
"epoch": 0.7978723404255319,
"grad_norm": 4.140746116638184,
"learning_rate": 9.67532107589894e-06,
"loss": 1.2933,
"step": 3000
},
{
"epoch": 0.7978723404255319,
"eval_loss": 1.2683638334274292,
"eval_runtime": 12.6307,
"eval_samples_per_second": 31.669,
"eval_steps_per_second": 3.959,
"step": 3000
},
{
"epoch": 0.7981382978723405,
"grad_norm": 3.8456828594207764,
"learning_rate": 9.67500923799464e-06,
"loss": 1.3237,
"step": 3001
},
{
"epoch": 0.7984042553191489,
"grad_norm": 3.4592676162719727,
"learning_rate": 9.67469725544005e-06,
"loss": 1.0598,
"step": 3002
},
{
"epoch": 0.7986702127659574,
"grad_norm": 3.729926586151123,
"learning_rate": 9.674385128244823e-06,
"loss": 1.2681,
"step": 3003
},
{
"epoch": 0.798936170212766,
"grad_norm": 3.4208433628082275,
"learning_rate": 9.674072856418616e-06,
"loss": 1.3245,
"step": 3004
},
{
"epoch": 0.7992021276595744,
"grad_norm": 3.511957883834839,
"learning_rate": 9.673760439971091e-06,
"loss": 1.1623,
"step": 3005
},
{
"epoch": 0.799468085106383,
"grad_norm": 3.794137477874756,
"learning_rate": 9.673447878911916e-06,
"loss": 1.1303,
"step": 3006
},
{
"epoch": 0.7997340425531915,
"grad_norm": 3.826404571533203,
"learning_rate": 9.673135173250763e-06,
"loss": 1.3698,
"step": 3007
},
{
"epoch": 0.8,
"grad_norm": 3.5505003929138184,
"learning_rate": 9.672822322997305e-06,
"loss": 1.257,
"step": 3008
},
{
"epoch": 0.8002659574468085,
"grad_norm": 3.616678237915039,
"learning_rate": 9.672509328161222e-06,
"loss": 1.263,
"step": 3009
},
{
"epoch": 0.800531914893617,
"grad_norm": 3.5338237285614014,
"learning_rate": 9.672196188752201e-06,
"loss": 1.2328,
"step": 3010
},
{
"epoch": 0.8007978723404255,
"grad_norm": 3.4037692546844482,
"learning_rate": 9.671882904779927e-06,
"loss": 1.1843,
"step": 3011
},
{
"epoch": 0.801063829787234,
"grad_norm": 3.918245315551758,
"learning_rate": 9.671569476254096e-06,
"loss": 1.3486,
"step": 3012
},
{
"epoch": 0.8013297872340426,
"grad_norm": 3.5351336002349854,
"learning_rate": 9.671255903184405e-06,
"loss": 1.3272,
"step": 3013
},
{
"epoch": 0.801595744680851,
"grad_norm": 3.9071462154388428,
"learning_rate": 9.670942185580557e-06,
"loss": 1.1649,
"step": 3014
},
{
"epoch": 0.8018617021276596,
"grad_norm": 3.493410110473633,
"learning_rate": 9.670628323452259e-06,
"loss": 1.1651,
"step": 3015
},
{
"epoch": 0.8021276595744681,
"grad_norm": 3.2986040115356445,
"learning_rate": 9.670314316809222e-06,
"loss": 1.2718,
"step": 3016
},
{
"epoch": 0.8023936170212767,
"grad_norm": 3.4360411167144775,
"learning_rate": 9.67000016566116e-06,
"loss": 1.1393,
"step": 3017
},
{
"epoch": 0.8026595744680851,
"grad_norm": 3.690444231033325,
"learning_rate": 9.669685870017795e-06,
"loss": 1.1887,
"step": 3018
},
{
"epoch": 0.8029255319148936,
"grad_norm": 3.58248233795166,
"learning_rate": 9.669371429888852e-06,
"loss": 1.3714,
"step": 3019
},
{
"epoch": 0.8031914893617021,
"grad_norm": 3.723407745361328,
"learning_rate": 9.66905684528406e-06,
"loss": 1.2999,
"step": 3020
},
{
"epoch": 0.8034574468085106,
"grad_norm": 3.7996089458465576,
"learning_rate": 9.66874211621315e-06,
"loss": 1.3091,
"step": 3021
},
{
"epoch": 0.8037234042553192,
"grad_norm": 3.741523265838623,
"learning_rate": 9.668427242685864e-06,
"loss": 1.261,
"step": 3022
},
{
"epoch": 0.8039893617021276,
"grad_norm": 3.6952426433563232,
"learning_rate": 9.668112224711941e-06,
"loss": 1.3148,
"step": 3023
},
{
"epoch": 0.8042553191489362,
"grad_norm": 3.728320837020874,
"learning_rate": 9.667797062301133e-06,
"loss": 1.2188,
"step": 3024
},
{
"epoch": 0.8045212765957447,
"grad_norm": 3.7836687564849854,
"learning_rate": 9.667481755463183e-06,
"loss": 1.3981,
"step": 3025
},
{
"epoch": 0.8047872340425531,
"grad_norm": 3.308515787124634,
"learning_rate": 9.667166304207856e-06,
"loss": 1.2107,
"step": 3026
},
{
"epoch": 0.8050531914893617,
"grad_norm": 3.5682644844055176,
"learning_rate": 9.666850708544907e-06,
"loss": 1.2288,
"step": 3027
},
{
"epoch": 0.8053191489361702,
"grad_norm": 3.817530632019043,
"learning_rate": 9.666534968484105e-06,
"loss": 1.2821,
"step": 3028
},
{
"epoch": 0.8055851063829788,
"grad_norm": 3.1704676151275635,
"learning_rate": 9.666219084035215e-06,
"loss": 1.1683,
"step": 3029
},
{
"epoch": 0.8058510638297872,
"grad_norm": 3.884427547454834,
"learning_rate": 9.665903055208013e-06,
"loss": 1.3448,
"step": 3030
},
{
"epoch": 0.8061170212765958,
"grad_norm": 3.8523178100585938,
"learning_rate": 9.665586882012278e-06,
"loss": 1.1827,
"step": 3031
},
{
"epoch": 0.8063829787234043,
"grad_norm": 3.217390298843384,
"learning_rate": 9.66527056445779e-06,
"loss": 1.1782,
"step": 3032
},
{
"epoch": 0.8066489361702127,
"grad_norm": 3.484069585800171,
"learning_rate": 9.66495410255434e-06,
"loss": 1.2279,
"step": 3033
},
{
"epoch": 0.8069148936170213,
"grad_norm": 3.62542724609375,
"learning_rate": 9.664637496311717e-06,
"loss": 1.232,
"step": 3034
},
{
"epoch": 0.8071808510638298,
"grad_norm": 3.6373066902160645,
"learning_rate": 9.664320745739717e-06,
"loss": 1.2463,
"step": 3035
},
{
"epoch": 0.8074468085106383,
"grad_norm": 3.3646364212036133,
"learning_rate": 9.664003850848142e-06,
"loss": 1.1543,
"step": 3036
},
{
"epoch": 0.8077127659574468,
"grad_norm": 3.772383689880371,
"learning_rate": 9.663686811646798e-06,
"loss": 1.3646,
"step": 3037
},
{
"epoch": 0.8079787234042554,
"grad_norm": 3.8896496295928955,
"learning_rate": 9.663369628145493e-06,
"loss": 1.2321,
"step": 3038
},
{
"epoch": 0.8082446808510638,
"grad_norm": 4.038544654846191,
"learning_rate": 9.66305230035404e-06,
"loss": 1.2345,
"step": 3039
},
{
"epoch": 0.8085106382978723,
"grad_norm": 3.7592129707336426,
"learning_rate": 9.662734828282258e-06,
"loss": 1.2879,
"step": 3040
},
{
"epoch": 0.8087765957446809,
"grad_norm": 3.3927769660949707,
"learning_rate": 9.662417211939974e-06,
"loss": 1.2495,
"step": 3041
},
{
"epoch": 0.8090425531914893,
"grad_norm": 3.7398223876953125,
"learning_rate": 9.662099451337009e-06,
"loss": 1.2328,
"step": 3042
},
{
"epoch": 0.8093085106382979,
"grad_norm": 3.697510004043579,
"learning_rate": 9.6617815464832e-06,
"loss": 1.2306,
"step": 3043
},
{
"epoch": 0.8095744680851064,
"grad_norm": 3.362252712249756,
"learning_rate": 9.66146349738838e-06,
"loss": 1.2598,
"step": 3044
},
{
"epoch": 0.8098404255319149,
"grad_norm": 3.629018783569336,
"learning_rate": 9.661145304062391e-06,
"loss": 1.2364,
"step": 3045
},
{
"epoch": 0.8101063829787234,
"grad_norm": 3.6889262199401855,
"learning_rate": 9.66082696651508e-06,
"loss": 1.2122,
"step": 3046
},
{
"epoch": 0.810372340425532,
"grad_norm": 3.6210176944732666,
"learning_rate": 9.660508484756295e-06,
"loss": 1.2425,
"step": 3047
},
{
"epoch": 0.8106382978723404,
"grad_norm": 3.52443528175354,
"learning_rate": 9.66018985879589e-06,
"loss": 1.1755,
"step": 3048
},
{
"epoch": 0.8109042553191489,
"grad_norm": 3.6943182945251465,
"learning_rate": 9.659871088643724e-06,
"loss": 1.2033,
"step": 3049
},
{
"epoch": 0.8111702127659575,
"grad_norm": 3.6708784103393555,
"learning_rate": 9.65955217430966e-06,
"loss": 1.2418,
"step": 3050
},
{
"epoch": 0.8114361702127659,
"grad_norm": 3.3263115882873535,
"learning_rate": 9.659233115803565e-06,
"loss": 1.133,
"step": 3051
},
{
"epoch": 0.8117021276595745,
"grad_norm": 3.9797048568725586,
"learning_rate": 9.658913913135314e-06,
"loss": 1.2549,
"step": 3052
},
{
"epoch": 0.811968085106383,
"grad_norm": 3.505920648574829,
"learning_rate": 9.658594566314781e-06,
"loss": 1.3769,
"step": 3053
},
{
"epoch": 0.8122340425531915,
"grad_norm": 3.466444492340088,
"learning_rate": 9.658275075351846e-06,
"loss": 1.2394,
"step": 3054
},
{
"epoch": 0.8125,
"grad_norm": 3.4919936656951904,
"learning_rate": 9.657955440256396e-06,
"loss": 1.1807,
"step": 3055
},
{
"epoch": 0.8127659574468085,
"grad_norm": 3.8641278743743896,
"learning_rate": 9.65763566103832e-06,
"loss": 1.2532,
"step": 3056
},
{
"epoch": 0.813031914893617,
"grad_norm": 3.5937435626983643,
"learning_rate": 9.657315737707514e-06,
"loss": 1.2234,
"step": 3057
},
{
"epoch": 0.8132978723404255,
"grad_norm": 3.8876571655273438,
"learning_rate": 9.656995670273877e-06,
"loss": 1.2057,
"step": 3058
},
{
"epoch": 0.8135638297872341,
"grad_norm": 3.532804012298584,
"learning_rate": 9.656675458747308e-06,
"loss": 1.2109,
"step": 3059
},
{
"epoch": 0.8138297872340425,
"grad_norm": 3.421060800552368,
"learning_rate": 9.65635510313772e-06,
"loss": 1.2677,
"step": 3060
},
{
"epoch": 0.8140957446808511,
"grad_norm": 3.599653720855713,
"learning_rate": 9.656034603455022e-06,
"loss": 1.2561,
"step": 3061
},
{
"epoch": 0.8143617021276596,
"grad_norm": 3.297154664993286,
"learning_rate": 9.655713959709133e-06,
"loss": 1.1693,
"step": 3062
},
{
"epoch": 0.814627659574468,
"grad_norm": 3.678478240966797,
"learning_rate": 9.65539317190997e-06,
"loss": 1.2403,
"step": 3063
},
{
"epoch": 0.8148936170212766,
"grad_norm": 3.6876394748687744,
"learning_rate": 9.655072240067464e-06,
"loss": 1.2774,
"step": 3064
},
{
"epoch": 0.8151595744680851,
"grad_norm": 3.6876394748687744,
"learning_rate": 9.65475116419154e-06,
"loss": 1.1866,
"step": 3065
},
{
"epoch": 0.8154255319148936,
"grad_norm": 4.459439277648926,
"learning_rate": 9.654429944292136e-06,
"loss": 1.255,
"step": 3066
},
{
"epoch": 0.8156914893617021,
"grad_norm": 3.636715888977051,
"learning_rate": 9.65410858037919e-06,
"loss": 1.4368,
"step": 3067
},
{
"epoch": 0.8159574468085107,
"grad_norm": 3.7368946075439453,
"learning_rate": 9.653787072462644e-06,
"loss": 1.3039,
"step": 3068
},
{
"epoch": 0.8162234042553191,
"grad_norm": 3.32794451713562,
"learning_rate": 9.653465420552445e-06,
"loss": 1.1366,
"step": 3069
},
{
"epoch": 0.8164893617021277,
"grad_norm": 3.3161087036132812,
"learning_rate": 9.65314362465855e-06,
"loss": 1.0602,
"step": 3070
},
{
"epoch": 0.8167553191489362,
"grad_norm": 3.6150729656219482,
"learning_rate": 9.652821684790912e-06,
"loss": 1.3939,
"step": 3071
},
{
"epoch": 0.8170212765957446,
"grad_norm": 3.7740049362182617,
"learning_rate": 9.652499600959493e-06,
"loss": 1.3626,
"step": 3072
},
{
"epoch": 0.8172872340425532,
"grad_norm": 3.8331871032714844,
"learning_rate": 9.65217737317426e-06,
"loss": 1.3151,
"step": 3073
},
{
"epoch": 0.8175531914893617,
"grad_norm": 3.3269927501678467,
"learning_rate": 9.65185500144518e-06,
"loss": 1.1879,
"step": 3074
},
{
"epoch": 0.8178191489361702,
"grad_norm": 3.318422555923462,
"learning_rate": 9.651532485782231e-06,
"loss": 1.2128,
"step": 3075
},
{
"epoch": 0.8180851063829787,
"grad_norm": 3.8798575401306152,
"learning_rate": 9.65120982619539e-06,
"loss": 1.2097,
"step": 3076
},
{
"epoch": 0.8183510638297873,
"grad_norm": 3.538886785507202,
"learning_rate": 9.650887022694639e-06,
"loss": 1.2558,
"step": 3077
},
{
"epoch": 0.8186170212765957,
"grad_norm": 3.8403117656707764,
"learning_rate": 9.65056407528997e-06,
"loss": 1.4618,
"step": 3078
},
{
"epoch": 0.8188829787234042,
"grad_norm": 3.731025218963623,
"learning_rate": 9.650240983991372e-06,
"loss": 1.2627,
"step": 3079
},
{
"epoch": 0.8191489361702128,
"grad_norm": 3.7986326217651367,
"learning_rate": 9.649917748808844e-06,
"loss": 1.2213,
"step": 3080
},
{
"epoch": 0.8194148936170212,
"grad_norm": 3.556394577026367,
"learning_rate": 9.649594369752384e-06,
"loss": 1.2093,
"step": 3081
},
{
"epoch": 0.8196808510638298,
"grad_norm": 3.989525318145752,
"learning_rate": 9.649270846832001e-06,
"loss": 1.4164,
"step": 3082
},
{
"epoch": 0.8199468085106383,
"grad_norm": 3.6029410362243652,
"learning_rate": 9.648947180057705e-06,
"loss": 1.315,
"step": 3083
},
{
"epoch": 0.8202127659574469,
"grad_norm": 3.677532196044922,
"learning_rate": 9.648623369439509e-06,
"loss": 1.3006,
"step": 3084
},
{
"epoch": 0.8204787234042553,
"grad_norm": 3.241009473800659,
"learning_rate": 9.648299414987434e-06,
"loss": 1.1637,
"step": 3085
},
{
"epoch": 0.8207446808510638,
"grad_norm": 3.470125198364258,
"learning_rate": 9.647975316711502e-06,
"loss": 1.1894,
"step": 3086
},
{
"epoch": 0.8210106382978724,
"grad_norm": 3.6613218784332275,
"learning_rate": 9.647651074621741e-06,
"loss": 1.2222,
"step": 3087
},
{
"epoch": 0.8212765957446808,
"grad_norm": 3.4483370780944824,
"learning_rate": 9.647326688728184e-06,
"loss": 1.1142,
"step": 3088
},
{
"epoch": 0.8215425531914894,
"grad_norm": 3.830843687057495,
"learning_rate": 9.647002159040868e-06,
"loss": 1.2923,
"step": 3089
},
{
"epoch": 0.8218085106382979,
"grad_norm": 3.445209264755249,
"learning_rate": 9.646677485569834e-06,
"loss": 1.2042,
"step": 3090
},
{
"epoch": 0.8220744680851064,
"grad_norm": 3.818505048751831,
"learning_rate": 9.646352668325128e-06,
"loss": 1.3102,
"step": 3091
},
{
"epoch": 0.8223404255319149,
"grad_norm": 3.4437718391418457,
"learning_rate": 9.646027707316798e-06,
"loss": 1.1836,
"step": 3092
},
{
"epoch": 0.8226063829787233,
"grad_norm": 3.690908670425415,
"learning_rate": 9.645702602554902e-06,
"loss": 1.1375,
"step": 3093
},
{
"epoch": 0.8228723404255319,
"grad_norm": 4.1998209953308105,
"learning_rate": 9.645377354049499e-06,
"loss": 1.3336,
"step": 3094
},
{
"epoch": 0.8231382978723404,
"grad_norm": 3.559067487716675,
"learning_rate": 9.64505196181065e-06,
"loss": 1.1967,
"step": 3095
},
{
"epoch": 0.823404255319149,
"grad_norm": 3.657874584197998,
"learning_rate": 9.644726425848425e-06,
"loss": 1.2603,
"step": 3096
},
{
"epoch": 0.8236702127659574,
"grad_norm": 3.2679355144500732,
"learning_rate": 9.644400746172896e-06,
"loss": 1.177,
"step": 3097
},
{
"epoch": 0.823936170212766,
"grad_norm": 3.9587206840515137,
"learning_rate": 9.644074922794139e-06,
"loss": 1.2768,
"step": 3098
},
{
"epoch": 0.8242021276595745,
"grad_norm": 3.2773869037628174,
"learning_rate": 9.643748955722238e-06,
"loss": 1.2397,
"step": 3099
},
{
"epoch": 0.824468085106383,
"grad_norm": 3.796388864517212,
"learning_rate": 9.643422844967274e-06,
"loss": 1.3281,
"step": 3100
},
{
"epoch": 0.8247340425531915,
"grad_norm": 3.6081080436706543,
"learning_rate": 9.643096590539343e-06,
"loss": 1.1514,
"step": 3101
},
{
"epoch": 0.825,
"grad_norm": 3.6461782455444336,
"learning_rate": 9.642770192448537e-06,
"loss": 1.3713,
"step": 3102
},
{
"epoch": 0.8252659574468085,
"grad_norm": 3.731442451477051,
"learning_rate": 9.642443650704954e-06,
"loss": 1.3621,
"step": 3103
},
{
"epoch": 0.825531914893617,
"grad_norm": 3.8544721603393555,
"learning_rate": 9.642116965318697e-06,
"loss": 1.2699,
"step": 3104
},
{
"epoch": 0.8257978723404256,
"grad_norm": 3.6057963371276855,
"learning_rate": 9.641790136299877e-06,
"loss": 1.1425,
"step": 3105
},
{
"epoch": 0.826063829787234,
"grad_norm": 3.618706226348877,
"learning_rate": 9.641463163658606e-06,
"loss": 1.309,
"step": 3106
},
{
"epoch": 0.8263297872340426,
"grad_norm": 3.2677018642425537,
"learning_rate": 9.641136047405e-06,
"loss": 1.221,
"step": 3107
},
{
"epoch": 0.8265957446808511,
"grad_norm": 3.311882734298706,
"learning_rate": 9.64080878754918e-06,
"loss": 1.2231,
"step": 3108
},
{
"epoch": 0.8268617021276595,
"grad_norm": 3.435105562210083,
"learning_rate": 9.640481384101273e-06,
"loss": 1.3697,
"step": 3109
},
{
"epoch": 0.8271276595744681,
"grad_norm": 3.77473783493042,
"learning_rate": 9.640153837071407e-06,
"loss": 1.4063,
"step": 3110
},
{
"epoch": 0.8273936170212766,
"grad_norm": 3.6035094261169434,
"learning_rate": 9.63982614646972e-06,
"loss": 1.3273,
"step": 3111
},
{
"epoch": 0.8276595744680851,
"grad_norm": 3.4138381481170654,
"learning_rate": 9.639498312306348e-06,
"loss": 1.1646,
"step": 3112
},
{
"epoch": 0.8279255319148936,
"grad_norm": 3.638125419616699,
"learning_rate": 9.639170334591437e-06,
"loss": 1.3288,
"step": 3113
},
{
"epoch": 0.8281914893617022,
"grad_norm": 3.917206287384033,
"learning_rate": 9.638842213335132e-06,
"loss": 1.3541,
"step": 3114
},
{
"epoch": 0.8284574468085106,
"grad_norm": 4.120351314544678,
"learning_rate": 9.63851394854759e-06,
"loss": 1.3473,
"step": 3115
},
{
"epoch": 0.8287234042553191,
"grad_norm": 3.6400179862976074,
"learning_rate": 9.638185540238963e-06,
"loss": 1.3199,
"step": 3116
},
{
"epoch": 0.8289893617021277,
"grad_norm": 3.4678385257720947,
"learning_rate": 9.637856988419413e-06,
"loss": 1.3348,
"step": 3117
},
{
"epoch": 0.8292553191489361,
"grad_norm": 3.490227460861206,
"learning_rate": 9.637528293099111e-06,
"loss": 1.2041,
"step": 3118
},
{
"epoch": 0.8295212765957447,
"grad_norm": 3.3085920810699463,
"learning_rate": 9.637199454288222e-06,
"loss": 1.2509,
"step": 3119
},
{
"epoch": 0.8297872340425532,
"grad_norm": 3.5364296436309814,
"learning_rate": 9.636870471996923e-06,
"loss": 1.3302,
"step": 3120
},
{
"epoch": 0.8300531914893617,
"grad_norm": 3.952470302581787,
"learning_rate": 9.636541346235392e-06,
"loss": 1.3387,
"step": 3121
},
{
"epoch": 0.8303191489361702,
"grad_norm": 3.678920269012451,
"learning_rate": 9.636212077013812e-06,
"loss": 1.2225,
"step": 3122
},
{
"epoch": 0.8305851063829788,
"grad_norm": 3.4960269927978516,
"learning_rate": 9.635882664342373e-06,
"loss": 1.1883,
"step": 3123
},
{
"epoch": 0.8308510638297872,
"grad_norm": 3.1453335285186768,
"learning_rate": 9.635553108231266e-06,
"loss": 1.0471,
"step": 3124
},
{
"epoch": 0.8311170212765957,
"grad_norm": 3.6323747634887695,
"learning_rate": 9.635223408690688e-06,
"loss": 1.1595,
"step": 3125
},
{
"epoch": 0.8313829787234043,
"grad_norm": 3.2408368587493896,
"learning_rate": 9.634893565730841e-06,
"loss": 1.2454,
"step": 3126
},
{
"epoch": 0.8316489361702127,
"grad_norm": 3.628117322921753,
"learning_rate": 9.63456357936193e-06,
"loss": 1.3161,
"step": 3127
},
{
"epoch": 0.8319148936170213,
"grad_norm": 3.896415948867798,
"learning_rate": 9.634233449594165e-06,
"loss": 1.29,
"step": 3128
},
{
"epoch": 0.8321808510638298,
"grad_norm": 3.3425135612487793,
"learning_rate": 9.63390317643776e-06,
"loss": 1.0845,
"step": 3129
},
{
"epoch": 0.8324468085106383,
"grad_norm": 3.593471050262451,
"learning_rate": 9.633572759902936e-06,
"loss": 1.1751,
"step": 3130
},
{
"epoch": 0.8327127659574468,
"grad_norm": 3.8105530738830566,
"learning_rate": 9.633242199999916e-06,
"loss": 1.2935,
"step": 3131
},
{
"epoch": 0.8329787234042553,
"grad_norm": 3.5633177757263184,
"learning_rate": 9.632911496738927e-06,
"loss": 1.2376,
"step": 3132
},
{
"epoch": 0.8332446808510638,
"grad_norm": 3.5305428504943848,
"learning_rate": 9.632580650130201e-06,
"loss": 1.2905,
"step": 3133
},
{
"epoch": 0.8335106382978723,
"grad_norm": 3.328059196472168,
"learning_rate": 9.632249660183977e-06,
"loss": 1.2773,
"step": 3134
},
{
"epoch": 0.8337765957446809,
"grad_norm": 3.8208043575286865,
"learning_rate": 9.631918526910493e-06,
"loss": 1.2472,
"step": 3135
},
{
"epoch": 0.8340425531914893,
"grad_norm": 3.6366043090820312,
"learning_rate": 9.631587250319998e-06,
"loss": 1.1361,
"step": 3136
},
{
"epoch": 0.8343085106382979,
"grad_norm": 3.3834152221679688,
"learning_rate": 9.631255830422739e-06,
"loss": 1.2766,
"step": 3137
},
{
"epoch": 0.8345744680851064,
"grad_norm": 3.6326873302459717,
"learning_rate": 9.630924267228973e-06,
"loss": 1.2792,
"step": 3138
},
{
"epoch": 0.8348404255319148,
"grad_norm": 3.720566749572754,
"learning_rate": 9.630592560748957e-06,
"loss": 1.113,
"step": 3139
},
{
"epoch": 0.8351063829787234,
"grad_norm": 3.732006549835205,
"learning_rate": 9.630260710992956e-06,
"loss": 1.1235,
"step": 3140
},
{
"epoch": 0.8353723404255319,
"grad_norm": 3.3565263748168945,
"learning_rate": 9.629928717971237e-06,
"loss": 1.1881,
"step": 3141
},
{
"epoch": 0.8356382978723405,
"grad_norm": 3.7368946075439453,
"learning_rate": 9.629596581694072e-06,
"loss": 1.2955,
"step": 3142
},
{
"epoch": 0.8359042553191489,
"grad_norm": 3.77895188331604,
"learning_rate": 9.629264302171739e-06,
"loss": 1.2691,
"step": 3143
},
{
"epoch": 0.8361702127659575,
"grad_norm": 3.6195473670959473,
"learning_rate": 9.628931879414519e-06,
"loss": 1.125,
"step": 3144
},
{
"epoch": 0.836436170212766,
"grad_norm": 3.4380621910095215,
"learning_rate": 9.628599313432694e-06,
"loss": 1.2379,
"step": 3145
},
{
"epoch": 0.8367021276595744,
"grad_norm": 3.972651958465576,
"learning_rate": 9.628266604236558e-06,
"loss": 1.2316,
"step": 3146
},
{
"epoch": 0.836968085106383,
"grad_norm": 3.770378351211548,
"learning_rate": 9.627933751836405e-06,
"loss": 1.4091,
"step": 3147
},
{
"epoch": 0.8372340425531914,
"grad_norm": 3.359567165374756,
"learning_rate": 9.627600756242532e-06,
"loss": 1.076,
"step": 3148
},
{
"epoch": 0.8375,
"grad_norm": 3.5449929237365723,
"learning_rate": 9.627267617465243e-06,
"loss": 1.1785,
"step": 3149
},
{
"epoch": 0.8377659574468085,
"grad_norm": 3.8262412548065186,
"learning_rate": 9.626934335514847e-06,
"loss": 1.1613,
"step": 3150
},
{
"epoch": 0.8380319148936171,
"grad_norm": 3.5842607021331787,
"learning_rate": 9.626600910401656e-06,
"loss": 1.4153,
"step": 3151
},
{
"epoch": 0.8382978723404255,
"grad_norm": 3.2474827766418457,
"learning_rate": 9.626267342135983e-06,
"loss": 1.1652,
"step": 3152
},
{
"epoch": 0.8385638297872341,
"grad_norm": 3.3414809703826904,
"learning_rate": 9.625933630728153e-06,
"loss": 1.062,
"step": 3153
},
{
"epoch": 0.8388297872340426,
"grad_norm": 3.496842384338379,
"learning_rate": 9.62559977618849e-06,
"loss": 1.255,
"step": 3154
},
{
"epoch": 0.839095744680851,
"grad_norm": 3.2567241191864014,
"learning_rate": 9.625265778527325e-06,
"loss": 1.1378,
"step": 3155
},
{
"epoch": 0.8393617021276596,
"grad_norm": 3.720892906188965,
"learning_rate": 9.62493163775499e-06,
"loss": 1.4717,
"step": 3156
},
{
"epoch": 0.839627659574468,
"grad_norm": 3.342963695526123,
"learning_rate": 9.624597353881827e-06,
"loss": 1.2974,
"step": 3157
},
{
"epoch": 0.8398936170212766,
"grad_norm": 3.3030459880828857,
"learning_rate": 9.624262926918174e-06,
"loss": 1.1823,
"step": 3158
},
{
"epoch": 0.8401595744680851,
"grad_norm": 3.4827306270599365,
"learning_rate": 9.623928356874384e-06,
"loss": 1.2282,
"step": 3159
},
{
"epoch": 0.8404255319148937,
"grad_norm": 3.247631311416626,
"learning_rate": 9.623593643760805e-06,
"loss": 1.2173,
"step": 3160
},
{
"epoch": 0.8406914893617021,
"grad_norm": 3.571974515914917,
"learning_rate": 9.623258787587795e-06,
"loss": 1.2277,
"step": 3161
},
{
"epoch": 0.8409574468085106,
"grad_norm": 3.5363829135894775,
"learning_rate": 9.622923788365716e-06,
"loss": 1.2212,
"step": 3162
},
{
"epoch": 0.8412234042553192,
"grad_norm": 3.816324234008789,
"learning_rate": 9.622588646104934e-06,
"loss": 1.3759,
"step": 3163
},
{
"epoch": 0.8414893617021276,
"grad_norm": 3.8033061027526855,
"learning_rate": 9.622253360815814e-06,
"loss": 1.1493,
"step": 3164
},
{
"epoch": 0.8417553191489362,
"grad_norm": 3.7425754070281982,
"learning_rate": 9.621917932508733e-06,
"loss": 1.1964,
"step": 3165
},
{
"epoch": 0.8420212765957447,
"grad_norm": 3.4991588592529297,
"learning_rate": 9.62158236119407e-06,
"loss": 1.2337,
"step": 3166
},
{
"epoch": 0.8422872340425532,
"grad_norm": 3.450436592102051,
"learning_rate": 9.621246646882209e-06,
"loss": 1.1413,
"step": 3167
},
{
"epoch": 0.8425531914893617,
"grad_norm": 3.449032783508301,
"learning_rate": 9.620910789583534e-06,
"loss": 1.269,
"step": 3168
},
{
"epoch": 0.8428191489361702,
"grad_norm": 3.609985589981079,
"learning_rate": 9.62057478930844e-06,
"loss": 1.2008,
"step": 3169
},
{
"epoch": 0.8430851063829787,
"grad_norm": 3.5072379112243652,
"learning_rate": 9.620238646067322e-06,
"loss": 1.2176,
"step": 3170
},
{
"epoch": 0.8433510638297872,
"grad_norm": 3.481480836868286,
"learning_rate": 9.619902359870579e-06,
"loss": 1.2152,
"step": 3171
},
{
"epoch": 0.8436170212765958,
"grad_norm": 3.640972852706909,
"learning_rate": 9.619565930728618e-06,
"loss": 1.4143,
"step": 3172
},
{
"epoch": 0.8438829787234042,
"grad_norm": 3.5323524475097656,
"learning_rate": 9.61922935865185e-06,
"loss": 1.1856,
"step": 3173
},
{
"epoch": 0.8441489361702128,
"grad_norm": 3.837163209915161,
"learning_rate": 9.618892643650686e-06,
"loss": 1.243,
"step": 3174
},
{
"epoch": 0.8444148936170213,
"grad_norm": 3.702387809753418,
"learning_rate": 9.618555785735546e-06,
"loss": 1.1177,
"step": 3175
},
{
"epoch": 0.8446808510638298,
"grad_norm": 3.696453094482422,
"learning_rate": 9.618218784916851e-06,
"loss": 1.2794,
"step": 3176
},
{
"epoch": 0.8449468085106383,
"grad_norm": 3.467315435409546,
"learning_rate": 9.617881641205032e-06,
"loss": 1.1261,
"step": 3177
},
{
"epoch": 0.8452127659574468,
"grad_norm": 3.392866849899292,
"learning_rate": 9.617544354610516e-06,
"loss": 1.3169,
"step": 3178
},
{
"epoch": 0.8454787234042553,
"grad_norm": 3.4695167541503906,
"learning_rate": 9.617206925143742e-06,
"loss": 1.3706,
"step": 3179
},
{
"epoch": 0.8457446808510638,
"grad_norm": 3.658966064453125,
"learning_rate": 9.61686935281515e-06,
"loss": 1.289,
"step": 3180
},
{
"epoch": 0.8460106382978724,
"grad_norm": 3.779771327972412,
"learning_rate": 9.616531637635183e-06,
"loss": 1.2999,
"step": 3181
},
{
"epoch": 0.8462765957446808,
"grad_norm": 3.8787152767181396,
"learning_rate": 9.616193779614294e-06,
"loss": 1.2876,
"step": 3182
},
{
"epoch": 0.8465425531914894,
"grad_norm": 3.5529751777648926,
"learning_rate": 9.615855778762933e-06,
"loss": 1.2511,
"step": 3183
},
{
"epoch": 0.8468085106382979,
"grad_norm": 4.681981563568115,
"learning_rate": 9.61551763509156e-06,
"loss": 1.3139,
"step": 3184
},
{
"epoch": 0.8470744680851063,
"grad_norm": 3.130150556564331,
"learning_rate": 9.615179348610638e-06,
"loss": 1.1744,
"step": 3185
},
{
"epoch": 0.8473404255319149,
"grad_norm": 3.374901056289673,
"learning_rate": 9.614840919330632e-06,
"loss": 1.0669,
"step": 3186
},
{
"epoch": 0.8476063829787234,
"grad_norm": 3.805163621902466,
"learning_rate": 9.614502347262015e-06,
"loss": 1.3958,
"step": 3187
},
{
"epoch": 0.847872340425532,
"grad_norm": 3.173311948776245,
"learning_rate": 9.614163632415265e-06,
"loss": 1.2402,
"step": 3188
},
{
"epoch": 0.8481382978723404,
"grad_norm": 3.7105321884155273,
"learning_rate": 9.613824774800857e-06,
"loss": 1.2364,
"step": 3189
},
{
"epoch": 0.848404255319149,
"grad_norm": 3.5191519260406494,
"learning_rate": 9.613485774429279e-06,
"loss": 1.3238,
"step": 3190
},
{
"epoch": 0.8486702127659574,
"grad_norm": 3.2969210147857666,
"learning_rate": 9.613146631311018e-06,
"loss": 1.2284,
"step": 3191
},
{
"epoch": 0.8489361702127659,
"grad_norm": 3.6637449264526367,
"learning_rate": 9.612807345456571e-06,
"loss": 1.1128,
"step": 3192
},
{
"epoch": 0.8492021276595745,
"grad_norm": 3.9408974647521973,
"learning_rate": 9.612467916876434e-06,
"loss": 1.171,
"step": 3193
},
{
"epoch": 0.8494680851063829,
"grad_norm": 3.3598899841308594,
"learning_rate": 9.612128345581108e-06,
"loss": 1.1941,
"step": 3194
},
{
"epoch": 0.8497340425531915,
"grad_norm": 3.5474600791931152,
"learning_rate": 9.6117886315811e-06,
"loss": 1.1679,
"step": 3195
},
{
"epoch": 0.85,
"grad_norm": 3.9404945373535156,
"learning_rate": 9.611448774886925e-06,
"loss": 1.3117,
"step": 3196
},
{
"epoch": 0.8502659574468086,
"grad_norm": 3.389488935470581,
"learning_rate": 9.611108775509093e-06,
"loss": 1.1708,
"step": 3197
},
{
"epoch": 0.850531914893617,
"grad_norm": 3.5706136226654053,
"learning_rate": 9.610768633458127e-06,
"loss": 1.249,
"step": 3198
},
{
"epoch": 0.8507978723404256,
"grad_norm": 3.899035930633545,
"learning_rate": 9.610428348744552e-06,
"loss": 1.2828,
"step": 3199
},
{
"epoch": 0.851063829787234,
"grad_norm": 3.648972511291504,
"learning_rate": 9.610087921378895e-06,
"loss": 1.2152,
"step": 3200
},
{
"epoch": 0.8513297872340425,
"grad_norm": 3.762350559234619,
"learning_rate": 9.60974735137169e-06,
"loss": 1.3663,
"step": 3201
},
{
"epoch": 0.8515957446808511,
"grad_norm": 3.8155291080474854,
"learning_rate": 9.609406638733474e-06,
"loss": 1.1777,
"step": 3202
},
{
"epoch": 0.8518617021276595,
"grad_norm": 3.5268514156341553,
"learning_rate": 9.609065783474792e-06,
"loss": 1.2634,
"step": 3203
},
{
"epoch": 0.8521276595744681,
"grad_norm": 3.3057730197906494,
"learning_rate": 9.608724785606186e-06,
"loss": 1.2208,
"step": 3204
},
{
"epoch": 0.8523936170212766,
"grad_norm": 3.9648935794830322,
"learning_rate": 9.60838364513821e-06,
"loss": 1.2936,
"step": 3205
},
{
"epoch": 0.8526595744680852,
"grad_norm": 3.8742856979370117,
"learning_rate": 9.608042362081418e-06,
"loss": 1.298,
"step": 3206
},
{
"epoch": 0.8529255319148936,
"grad_norm": 3.845383644104004,
"learning_rate": 9.60770093644637e-06,
"loss": 1.2274,
"step": 3207
},
{
"epoch": 0.8531914893617021,
"grad_norm": 3.532756805419922,
"learning_rate": 9.60735936824363e-06,
"loss": 1.339,
"step": 3208
},
{
"epoch": 0.8534574468085107,
"grad_norm": 3.7821319103240967,
"learning_rate": 9.607017657483768e-06,
"loss": 1.3414,
"step": 3209
},
{
"epoch": 0.8537234042553191,
"grad_norm": 3.5962960720062256,
"learning_rate": 9.606675804177355e-06,
"loss": 1.1815,
"step": 3210
},
{
"epoch": 0.8539893617021277,
"grad_norm": 3.8669700622558594,
"learning_rate": 9.606333808334966e-06,
"loss": 1.2821,
"step": 3211
},
{
"epoch": 0.8542553191489362,
"grad_norm": 3.288717269897461,
"learning_rate": 9.605991669967189e-06,
"loss": 1.1532,
"step": 3212
},
{
"epoch": 0.8545212765957447,
"grad_norm": 3.445049285888672,
"learning_rate": 9.605649389084605e-06,
"loss": 1.2534,
"step": 3213
},
{
"epoch": 0.8547872340425532,
"grad_norm": 3.075615644454956,
"learning_rate": 9.605306965697809e-06,
"loss": 1.0243,
"step": 3214
},
{
"epoch": 0.8550531914893617,
"grad_norm": 3.6676225662231445,
"learning_rate": 9.604964399817392e-06,
"loss": 1.2927,
"step": 3215
},
{
"epoch": 0.8553191489361702,
"grad_norm": 3.4644627571105957,
"learning_rate": 9.604621691453954e-06,
"loss": 1.2167,
"step": 3216
},
{
"epoch": 0.8555851063829787,
"grad_norm": 3.3108158111572266,
"learning_rate": 9.6042788406181e-06,
"loss": 1.2437,
"step": 3217
},
{
"epoch": 0.8558510638297873,
"grad_norm": 3.634568929672241,
"learning_rate": 9.603935847320437e-06,
"loss": 1.2587,
"step": 3218
},
{
"epoch": 0.8561170212765957,
"grad_norm": 3.472355365753174,
"learning_rate": 9.603592711571581e-06,
"loss": 1.1544,
"step": 3219
},
{
"epoch": 0.8563829787234043,
"grad_norm": 3.7467241287231445,
"learning_rate": 9.603249433382145e-06,
"loss": 1.1884,
"step": 3220
},
{
"epoch": 0.8566489361702128,
"grad_norm": 4.016312599182129,
"learning_rate": 9.60290601276275e-06,
"loss": 1.2884,
"step": 3221
},
{
"epoch": 0.8569148936170212,
"grad_norm": 3.432687282562256,
"learning_rate": 9.602562449724027e-06,
"loss": 1.2495,
"step": 3222
},
{
"epoch": 0.8571808510638298,
"grad_norm": 3.466148614883423,
"learning_rate": 9.6022187442766e-06,
"loss": 1.0967,
"step": 3223
},
{
"epoch": 0.8574468085106383,
"grad_norm": 3.7120723724365234,
"learning_rate": 9.60187489643111e-06,
"loss": 1.1666,
"step": 3224
},
{
"epoch": 0.8577127659574468,
"grad_norm": 3.6994261741638184,
"learning_rate": 9.60153090619819e-06,
"loss": 1.3106,
"step": 3225
},
{
"epoch": 0.8579787234042553,
"grad_norm": 3.481760025024414,
"learning_rate": 9.601186773588486e-06,
"loss": 1.2581,
"step": 3226
},
{
"epoch": 0.8582446808510639,
"grad_norm": 3.5702121257781982,
"learning_rate": 9.600842498612647e-06,
"loss": 1.3228,
"step": 3227
},
{
"epoch": 0.8585106382978723,
"grad_norm": 4.04725980758667,
"learning_rate": 9.600498081281324e-06,
"loss": 1.2431,
"step": 3228
},
{
"epoch": 0.8587765957446809,
"grad_norm": 3.632622480392456,
"learning_rate": 9.600153521605176e-06,
"loss": 1.1693,
"step": 3229
},
{
"epoch": 0.8590425531914894,
"grad_norm": 3.6271767616271973,
"learning_rate": 9.59980881959486e-06,
"loss": 1.2398,
"step": 3230
},
{
"epoch": 0.8593085106382978,
"grad_norm": 3.3347911834716797,
"learning_rate": 9.599463975261042e-06,
"loss": 1.1603,
"step": 3231
},
{
"epoch": 0.8595744680851064,
"grad_norm": 3.6934587955474854,
"learning_rate": 9.599118988614396e-06,
"loss": 1.305,
"step": 3232
},
{
"epoch": 0.8598404255319149,
"grad_norm": 3.461353063583374,
"learning_rate": 9.598773859665593e-06,
"loss": 1.2013,
"step": 3233
},
{
"epoch": 0.8601063829787234,
"grad_norm": 3.2839810848236084,
"learning_rate": 9.598428588425312e-06,
"loss": 1.1208,
"step": 3234
},
{
"epoch": 0.8603723404255319,
"grad_norm": 3.599320650100708,
"learning_rate": 9.598083174904235e-06,
"loss": 1.4372,
"step": 3235
},
{
"epoch": 0.8606382978723405,
"grad_norm": 3.540738105773926,
"learning_rate": 9.597737619113055e-06,
"loss": 1.0961,
"step": 3236
},
{
"epoch": 0.8609042553191489,
"grad_norm": 3.327744722366333,
"learning_rate": 9.597391921062457e-06,
"loss": 1.2087,
"step": 3237
},
{
"epoch": 0.8611702127659574,
"grad_norm": 3.619152545928955,
"learning_rate": 9.59704608076314e-06,
"loss": 1.3197,
"step": 3238
},
{
"epoch": 0.861436170212766,
"grad_norm": 3.381136178970337,
"learning_rate": 9.596700098225806e-06,
"loss": 1.258,
"step": 3239
},
{
"epoch": 0.8617021276595744,
"grad_norm": 3.6447596549987793,
"learning_rate": 9.59635397346116e-06,
"loss": 1.1877,
"step": 3240
},
{
"epoch": 0.861968085106383,
"grad_norm": 4.12053918838501,
"learning_rate": 9.596007706479908e-06,
"loss": 1.3712,
"step": 3241
},
{
"epoch": 0.8622340425531915,
"grad_norm": 3.1644914150238037,
"learning_rate": 9.595661297292768e-06,
"loss": 1.079,
"step": 3242
},
{
"epoch": 0.8625,
"grad_norm": 4.086709022521973,
"learning_rate": 9.595314745910455e-06,
"loss": 1.2766,
"step": 3243
},
{
"epoch": 0.8627659574468085,
"grad_norm": 4.086410999298096,
"learning_rate": 9.594968052343697e-06,
"loss": 1.2103,
"step": 3244
},
{
"epoch": 0.863031914893617,
"grad_norm": 3.550549030303955,
"learning_rate": 9.594621216603215e-06,
"loss": 1.3625,
"step": 3245
},
{
"epoch": 0.8632978723404255,
"grad_norm": 3.555739402770996,
"learning_rate": 9.594274238699744e-06,
"loss": 1.2163,
"step": 3246
},
{
"epoch": 0.863563829787234,
"grad_norm": 3.2902424335479736,
"learning_rate": 9.593927118644017e-06,
"loss": 0.9849,
"step": 3247
},
{
"epoch": 0.8638297872340426,
"grad_norm": 3.554675579071045,
"learning_rate": 9.593579856446778e-06,
"loss": 1.1437,
"step": 3248
},
{
"epoch": 0.864095744680851,
"grad_norm": 3.3788020610809326,
"learning_rate": 9.59323245211877e-06,
"loss": 1.2336,
"step": 3249
},
{
"epoch": 0.8643617021276596,
"grad_norm": 3.4318618774414062,
"learning_rate": 9.592884905670742e-06,
"loss": 1.2021,
"step": 3250
},
{
"epoch": 0.8646276595744681,
"grad_norm": 3.5366907119750977,
"learning_rate": 9.592537217113446e-06,
"loss": 1.3365,
"step": 3251
},
{
"epoch": 0.8648936170212767,
"grad_norm": 3.7782368659973145,
"learning_rate": 9.592189386457645e-06,
"loss": 1.3855,
"step": 3252
},
{
"epoch": 0.8651595744680851,
"grad_norm": 3.480111837387085,
"learning_rate": 9.591841413714094e-06,
"loss": 1.2029,
"step": 3253
},
{
"epoch": 0.8654255319148936,
"grad_norm": 3.305756092071533,
"learning_rate": 9.591493298893567e-06,
"loss": 1.1172,
"step": 3254
},
{
"epoch": 0.8656914893617021,
"grad_norm": 3.342085361480713,
"learning_rate": 9.591145042006829e-06,
"loss": 1.0662,
"step": 3255
},
{
"epoch": 0.8659574468085106,
"grad_norm": 3.6532325744628906,
"learning_rate": 9.590796643064658e-06,
"loss": 1.2083,
"step": 3256
},
{
"epoch": 0.8662234042553192,
"grad_norm": 3.8469889163970947,
"learning_rate": 9.590448102077835e-06,
"loss": 1.1185,
"step": 3257
},
{
"epoch": 0.8664893617021276,
"grad_norm": 3.6516644954681396,
"learning_rate": 9.590099419057142e-06,
"loss": 1.314,
"step": 3258
},
{
"epoch": 0.8667553191489362,
"grad_norm": 3.6090152263641357,
"learning_rate": 9.58975059401337e-06,
"loss": 1.2411,
"step": 3259
},
{
"epoch": 0.8670212765957447,
"grad_norm": 3.436042308807373,
"learning_rate": 9.589401626957309e-06,
"loss": 1.3095,
"step": 3260
},
{
"epoch": 0.8672872340425531,
"grad_norm": 3.2654285430908203,
"learning_rate": 9.589052517899759e-06,
"loss": 1.1265,
"step": 3261
},
{
"epoch": 0.8675531914893617,
"grad_norm": 3.6885263919830322,
"learning_rate": 9.588703266851523e-06,
"loss": 1.2568,
"step": 3262
},
{
"epoch": 0.8678191489361702,
"grad_norm": 3.9233293533325195,
"learning_rate": 9.588353873823404e-06,
"loss": 1.2273,
"step": 3263
},
{
"epoch": 0.8680851063829788,
"grad_norm": 3.254892349243164,
"learning_rate": 9.588004338826213e-06,
"loss": 1.0894,
"step": 3264
},
{
"epoch": 0.8683510638297872,
"grad_norm": 3.3320047855377197,
"learning_rate": 9.58765466187077e-06,
"loss": 1.3296,
"step": 3265
},
{
"epoch": 0.8686170212765958,
"grad_norm": 3.730386972427368,
"learning_rate": 9.587304842967887e-06,
"loss": 1.3909,
"step": 3266
},
{
"epoch": 0.8688829787234043,
"grad_norm": 3.557739734649658,
"learning_rate": 9.586954882128391e-06,
"loss": 1.2858,
"step": 3267
},
{
"epoch": 0.8691489361702127,
"grad_norm": 3.292858362197876,
"learning_rate": 9.58660477936311e-06,
"loss": 1.2351,
"step": 3268
},
{
"epoch": 0.8694148936170213,
"grad_norm": 3.87530255317688,
"learning_rate": 9.58625453468288e-06,
"loss": 1.1993,
"step": 3269
},
{
"epoch": 0.8696808510638298,
"grad_norm": 3.5502493381500244,
"learning_rate": 9.585904148098532e-06,
"loss": 1.2225,
"step": 3270
},
{
"epoch": 0.8699468085106383,
"grad_norm": 3.9256691932678223,
"learning_rate": 9.585553619620913e-06,
"loss": 1.4114,
"step": 3271
},
{
"epoch": 0.8702127659574468,
"grad_norm": 3.4120373725891113,
"learning_rate": 9.585202949260866e-06,
"loss": 1.1049,
"step": 3272
},
{
"epoch": 0.8704787234042554,
"grad_norm": 3.6664795875549316,
"learning_rate": 9.58485213702924e-06,
"loss": 1.1906,
"step": 3273
},
{
"epoch": 0.8707446808510638,
"grad_norm": 3.315964460372925,
"learning_rate": 9.584501182936891e-06,
"loss": 1.1104,
"step": 3274
},
{
"epoch": 0.8710106382978723,
"grad_norm": 3.3911890983581543,
"learning_rate": 9.584150086994678e-06,
"loss": 1.1979,
"step": 3275
},
{
"epoch": 0.8712765957446809,
"grad_norm": 3.3415443897247314,
"learning_rate": 9.583798849213467e-06,
"loss": 1.2044,
"step": 3276
},
{
"epoch": 0.8715425531914893,
"grad_norm": 3.4745638370513916,
"learning_rate": 9.58344746960412e-06,
"loss": 1.2126,
"step": 3277
},
{
"epoch": 0.8718085106382979,
"grad_norm": 3.358224868774414,
"learning_rate": 9.58309594817751e-06,
"loss": 1.2591,
"step": 3278
},
{
"epoch": 0.8720744680851064,
"grad_norm": 3.607102155685425,
"learning_rate": 9.582744284944519e-06,
"loss": 1.2529,
"step": 3279
},
{
"epoch": 0.8723404255319149,
"grad_norm": 3.4642441272735596,
"learning_rate": 9.582392479916023e-06,
"loss": 1.1749,
"step": 3280
},
{
"epoch": 0.8726063829787234,
"grad_norm": 3.5729122161865234,
"learning_rate": 9.582040533102908e-06,
"loss": 1.3488,
"step": 3281
},
{
"epoch": 0.872872340425532,
"grad_norm": 3.499811887741089,
"learning_rate": 9.581688444516064e-06,
"loss": 1.1714,
"step": 3282
},
{
"epoch": 0.8731382978723404,
"grad_norm": 3.7235212326049805,
"learning_rate": 9.581336214166386e-06,
"loss": 1.2336,
"step": 3283
},
{
"epoch": 0.8734042553191489,
"grad_norm": 3.3966002464294434,
"learning_rate": 9.580983842064772e-06,
"loss": 1.2197,
"step": 3284
},
{
"epoch": 0.8736702127659575,
"grad_norm": 3.7711052894592285,
"learning_rate": 9.580631328222124e-06,
"loss": 1.3275,
"step": 3285
},
{
"epoch": 0.8739361702127659,
"grad_norm": 3.6308035850524902,
"learning_rate": 9.58027867264935e-06,
"loss": 1.1036,
"step": 3286
},
{
"epoch": 0.8742021276595745,
"grad_norm": 3.5871105194091797,
"learning_rate": 9.579925875357361e-06,
"loss": 1.2099,
"step": 3287
},
{
"epoch": 0.874468085106383,
"grad_norm": 3.3607616424560547,
"learning_rate": 9.579572936357073e-06,
"loss": 1.3576,
"step": 3288
},
{
"epoch": 0.8747340425531915,
"grad_norm": 3.5098683834075928,
"learning_rate": 9.579219855659407e-06,
"loss": 1.1218,
"step": 3289
},
{
"epoch": 0.875,
"grad_norm": 3.2693376541137695,
"learning_rate": 9.578866633275289e-06,
"loss": 1.2022,
"step": 3290
},
{
"epoch": 0.8752659574468085,
"grad_norm": 3.9929087162017822,
"learning_rate": 9.578513269215643e-06,
"loss": 1.2267,
"step": 3291
},
{
"epoch": 0.875531914893617,
"grad_norm": 3.7925865650177,
"learning_rate": 9.578159763491408e-06,
"loss": 1.3087,
"step": 3292
},
{
"epoch": 0.8757978723404255,
"grad_norm": 3.5196733474731445,
"learning_rate": 9.577806116113519e-06,
"loss": 1.2655,
"step": 3293
},
{
"epoch": 0.8760638297872341,
"grad_norm": 3.529148578643799,
"learning_rate": 9.57745232709292e-06,
"loss": 1.1591,
"step": 3294
},
{
"epoch": 0.8763297872340425,
"grad_norm": 3.423691987991333,
"learning_rate": 9.577098396440557e-06,
"loss": 1.2312,
"step": 3295
},
{
"epoch": 0.8765957446808511,
"grad_norm": 3.6896872520446777,
"learning_rate": 9.57674432416738e-06,
"loss": 1.3319,
"step": 3296
},
{
"epoch": 0.8768617021276596,
"grad_norm": 3.2412073612213135,
"learning_rate": 9.576390110284343e-06,
"loss": 1.1944,
"step": 3297
},
{
"epoch": 0.877127659574468,
"grad_norm": 3.716688871383667,
"learning_rate": 9.576035754802411e-06,
"loss": 1.1713,
"step": 3298
},
{
"epoch": 0.8773936170212766,
"grad_norm": 3.721823215484619,
"learning_rate": 9.575681257732546e-06,
"loss": 1.2639,
"step": 3299
},
{
"epoch": 0.8776595744680851,
"grad_norm": 3.4668095111846924,
"learning_rate": 9.575326619085713e-06,
"loss": 1.2198,
"step": 3300
},
{
"epoch": 0.8779255319148936,
"grad_norm": 3.647254467010498,
"learning_rate": 9.574971838872889e-06,
"loss": 1.2587,
"step": 3301
},
{
"epoch": 0.8781914893617021,
"grad_norm": 3.563108205795288,
"learning_rate": 9.574616917105049e-06,
"loss": 1.2173,
"step": 3302
},
{
"epoch": 0.8784574468085107,
"grad_norm": 5.121861457824707,
"learning_rate": 9.574261853793176e-06,
"loss": 1.2889,
"step": 3303
},
{
"epoch": 0.8787234042553191,
"grad_norm": 3.9446914196014404,
"learning_rate": 9.573906648948256e-06,
"loss": 1.4498,
"step": 3304
},
{
"epoch": 0.8789893617021277,
"grad_norm": 3.368877649307251,
"learning_rate": 9.573551302581279e-06,
"loss": 1.1592,
"step": 3305
},
{
"epoch": 0.8792553191489362,
"grad_norm": 3.4360673427581787,
"learning_rate": 9.57319581470324e-06,
"loss": 1.2784,
"step": 3306
},
{
"epoch": 0.8795212765957446,
"grad_norm": 3.9499571323394775,
"learning_rate": 9.572840185325139e-06,
"loss": 1.2127,
"step": 3307
},
{
"epoch": 0.8797872340425532,
"grad_norm": 3.3917598724365234,
"learning_rate": 9.572484414457976e-06,
"loss": 1.1193,
"step": 3308
},
{
"epoch": 0.8800531914893617,
"grad_norm": 3.3946712017059326,
"learning_rate": 9.572128502112765e-06,
"loss": 1.2026,
"step": 3309
},
{
"epoch": 0.8803191489361702,
"grad_norm": 3.7101964950561523,
"learning_rate": 9.571772448300514e-06,
"loss": 1.2095,
"step": 3310
},
{
"epoch": 0.8805851063829787,
"grad_norm": 3.727922201156616,
"learning_rate": 9.571416253032241e-06,
"loss": 1.4194,
"step": 3311
},
{
"epoch": 0.8808510638297873,
"grad_norm": 3.457578182220459,
"learning_rate": 9.571059916318967e-06,
"loss": 1.26,
"step": 3312
},
{
"epoch": 0.8811170212765957,
"grad_norm": 3.6214683055877686,
"learning_rate": 9.570703438171717e-06,
"loss": 1.3319,
"step": 3313
},
{
"epoch": 0.8813829787234042,
"grad_norm": 3.4604907035827637,
"learning_rate": 9.570346818601522e-06,
"loss": 1.1988,
"step": 3314
},
{
"epoch": 0.8816489361702128,
"grad_norm": 3.6304855346679688,
"learning_rate": 9.569990057619414e-06,
"loss": 1.3127,
"step": 3315
},
{
"epoch": 0.8819148936170212,
"grad_norm": 3.6774277687072754,
"learning_rate": 9.569633155236436e-06,
"loss": 1.1874,
"step": 3316
},
{
"epoch": 0.8821808510638298,
"grad_norm": 3.3065695762634277,
"learning_rate": 9.569276111463626e-06,
"loss": 1.2098,
"step": 3317
},
{
"epoch": 0.8824468085106383,
"grad_norm": 3.712066650390625,
"learning_rate": 9.568918926312033e-06,
"loss": 1.2148,
"step": 3318
},
{
"epoch": 0.8827127659574469,
"grad_norm": 3.215933084487915,
"learning_rate": 9.568561599792709e-06,
"loss": 1.2424,
"step": 3319
},
{
"epoch": 0.8829787234042553,
"grad_norm": 3.317523717880249,
"learning_rate": 9.568204131916712e-06,
"loss": 1.1701,
"step": 3320
},
{
"epoch": 0.8832446808510638,
"grad_norm": 4.0422749519348145,
"learning_rate": 9.5678465226951e-06,
"loss": 1.3527,
"step": 3321
},
{
"epoch": 0.8835106382978724,
"grad_norm": 3.700969934463501,
"learning_rate": 9.56748877213894e-06,
"loss": 1.243,
"step": 3322
},
{
"epoch": 0.8837765957446808,
"grad_norm": 3.6172409057617188,
"learning_rate": 9.567130880259296e-06,
"loss": 1.3409,
"step": 3323
},
{
"epoch": 0.8840425531914894,
"grad_norm": 3.587956190109253,
"learning_rate": 9.56677284706725e-06,
"loss": 1.327,
"step": 3324
},
{
"epoch": 0.8843085106382979,
"grad_norm": 3.8839058876037598,
"learning_rate": 9.566414672573873e-06,
"loss": 1.2556,
"step": 3325
},
{
"epoch": 0.8845744680851064,
"grad_norm": 3.610464572906494,
"learning_rate": 9.56605635679025e-06,
"loss": 1.2233,
"step": 3326
},
{
"epoch": 0.8848404255319149,
"grad_norm": 3.350374221801758,
"learning_rate": 9.565697899727466e-06,
"loss": 1.1454,
"step": 3327
},
{
"epoch": 0.8851063829787233,
"grad_norm": 3.175729513168335,
"learning_rate": 9.565339301396616e-06,
"loss": 1.1474,
"step": 3328
},
{
"epoch": 0.8853723404255319,
"grad_norm": 3.39150333404541,
"learning_rate": 9.564980561808793e-06,
"loss": 1.1578,
"step": 3329
},
{
"epoch": 0.8856382978723404,
"grad_norm": 4.003450393676758,
"learning_rate": 9.564621680975095e-06,
"loss": 1.3537,
"step": 3330
},
{
"epoch": 0.885904255319149,
"grad_norm": 3.366062879562378,
"learning_rate": 9.564262658906628e-06,
"loss": 1.2119,
"step": 3331
},
{
"epoch": 0.8861702127659574,
"grad_norm": 4.014388084411621,
"learning_rate": 9.563903495614503e-06,
"loss": 1.3046,
"step": 3332
},
{
"epoch": 0.886436170212766,
"grad_norm": 3.3641979694366455,
"learning_rate": 9.563544191109828e-06,
"loss": 1.1204,
"step": 3333
},
{
"epoch": 0.8867021276595745,
"grad_norm": 3.584113836288452,
"learning_rate": 9.563184745403725e-06,
"loss": 1.1223,
"step": 3334
},
{
"epoch": 0.886968085106383,
"grad_norm": 3.905111312866211,
"learning_rate": 9.562825158507311e-06,
"loss": 1.2031,
"step": 3335
},
{
"epoch": 0.8872340425531915,
"grad_norm": 3.787869453430176,
"learning_rate": 9.562465430431716e-06,
"loss": 1.1798,
"step": 3336
},
{
"epoch": 0.8875,
"grad_norm": 3.336646795272827,
"learning_rate": 9.562105561188069e-06,
"loss": 1.0405,
"step": 3337
},
{
"epoch": 0.8877659574468085,
"grad_norm": 3.7780652046203613,
"learning_rate": 9.561745550787504e-06,
"loss": 1.1147,
"step": 3338
},
{
"epoch": 0.888031914893617,
"grad_norm": 3.8940999507904053,
"learning_rate": 9.561385399241164e-06,
"loss": 1.371,
"step": 3339
},
{
"epoch": 0.8882978723404256,
"grad_norm": 3.7703256607055664,
"learning_rate": 9.561025106560184e-06,
"loss": 1.2073,
"step": 3340
},
{
"epoch": 0.888563829787234,
"grad_norm": 3.8208539485931396,
"learning_rate": 9.560664672755721e-06,
"loss": 1.3914,
"step": 3341
},
{
"epoch": 0.8888297872340426,
"grad_norm": 3.8787341117858887,
"learning_rate": 9.560304097838922e-06,
"loss": 1.2999,
"step": 3342
},
{
"epoch": 0.8890957446808511,
"grad_norm": 3.4178457260131836,
"learning_rate": 9.559943381820947e-06,
"loss": 1.2978,
"step": 3343
},
{
"epoch": 0.8893617021276595,
"grad_norm": 3.7168829441070557,
"learning_rate": 9.559582524712953e-06,
"loss": 1.2428,
"step": 3344
},
{
"epoch": 0.8896276595744681,
"grad_norm": 3.8447728157043457,
"learning_rate": 9.55922152652611e-06,
"loss": 1.3121,
"step": 3345
},
{
"epoch": 0.8898936170212766,
"grad_norm": 3.5572218894958496,
"learning_rate": 9.558860387271583e-06,
"loss": 1.3853,
"step": 3346
},
{
"epoch": 0.8901595744680851,
"grad_norm": 3.461214780807495,
"learning_rate": 9.558499106960548e-06,
"loss": 1.2634,
"step": 3347
},
{
"epoch": 0.8904255319148936,
"grad_norm": 3.4366822242736816,
"learning_rate": 9.558137685604184e-06,
"loss": 1.322,
"step": 3348
},
{
"epoch": 0.8906914893617022,
"grad_norm": 3.7072808742523193,
"learning_rate": 9.557776123213673e-06,
"loss": 1.2393,
"step": 3349
},
{
"epoch": 0.8909574468085106,
"grad_norm": 3.6192643642425537,
"learning_rate": 9.557414419800204e-06,
"loss": 1.2106,
"step": 3350
},
{
"epoch": 0.8912234042553191,
"grad_norm": 3.3502161502838135,
"learning_rate": 9.557052575374967e-06,
"loss": 1.1333,
"step": 3351
},
{
"epoch": 0.8914893617021277,
"grad_norm": 3.4909167289733887,
"learning_rate": 9.556690589949158e-06,
"loss": 1.2107,
"step": 3352
},
{
"epoch": 0.8917553191489361,
"grad_norm": 3.3816614151000977,
"learning_rate": 9.556328463533976e-06,
"loss": 1.217,
"step": 3353
},
{
"epoch": 0.8920212765957447,
"grad_norm": 3.6492433547973633,
"learning_rate": 9.55596619614063e-06,
"loss": 1.1954,
"step": 3354
},
{
"epoch": 0.8922872340425532,
"grad_norm": 3.4829185009002686,
"learning_rate": 9.555603787780321e-06,
"loss": 1.1374,
"step": 3355
},
{
"epoch": 0.8925531914893617,
"grad_norm": 3.2989566326141357,
"learning_rate": 9.555241238464271e-06,
"loss": 1.2678,
"step": 3356
},
{
"epoch": 0.8928191489361702,
"grad_norm": 3.325765609741211,
"learning_rate": 9.554878548203695e-06,
"loss": 1.1352,
"step": 3357
},
{
"epoch": 0.8930851063829788,
"grad_norm": 3.680143356323242,
"learning_rate": 9.55451571700981e-06,
"loss": 1.1376,
"step": 3358
},
{
"epoch": 0.8933510638297872,
"grad_norm": 3.4539363384246826,
"learning_rate": 9.554152744893848e-06,
"loss": 1.2099,
"step": 3359
},
{
"epoch": 0.8936170212765957,
"grad_norm": 3.541053295135498,
"learning_rate": 9.553789631867039e-06,
"loss": 1.2115,
"step": 3360
},
{
"epoch": 0.8938829787234043,
"grad_norm": 3.2321863174438477,
"learning_rate": 9.553426377940618e-06,
"loss": 1.2008,
"step": 3361
},
{
"epoch": 0.8941489361702127,
"grad_norm": 4.26365852355957,
"learning_rate": 9.553062983125822e-06,
"loss": 1.3757,
"step": 3362
},
{
"epoch": 0.8944148936170213,
"grad_norm": 3.7996468544006348,
"learning_rate": 9.552699447433899e-06,
"loss": 1.3071,
"step": 3363
},
{
"epoch": 0.8946808510638298,
"grad_norm": 3.2904140949249268,
"learning_rate": 9.552335770876094e-06,
"loss": 1.0914,
"step": 3364
},
{
"epoch": 0.8949468085106383,
"grad_norm": 3.48201584815979,
"learning_rate": 9.551971953463659e-06,
"loss": 1.1438,
"step": 3365
},
{
"epoch": 0.8952127659574468,
"grad_norm": 3.721348285675049,
"learning_rate": 9.551607995207854e-06,
"loss": 1.1116,
"step": 3366
},
{
"epoch": 0.8954787234042553,
"grad_norm": 3.6480965614318848,
"learning_rate": 9.551243896119938e-06,
"loss": 1.1571,
"step": 3367
},
{
"epoch": 0.8957446808510638,
"grad_norm": 3.7615323066711426,
"learning_rate": 9.550879656211179e-06,
"loss": 1.4653,
"step": 3368
},
{
"epoch": 0.8960106382978723,
"grad_norm": 3.1234636306762695,
"learning_rate": 9.550515275492843e-06,
"loss": 1.1518,
"step": 3369
},
{
"epoch": 0.8962765957446809,
"grad_norm": 3.5595285892486572,
"learning_rate": 9.550150753976209e-06,
"loss": 1.213,
"step": 3370
},
{
"epoch": 0.8965425531914893,
"grad_norm": 3.4824399948120117,
"learning_rate": 9.549786091672553e-06,
"loss": 1.1228,
"step": 3371
},
{
"epoch": 0.8968085106382979,
"grad_norm": 3.6110517978668213,
"learning_rate": 9.549421288593157e-06,
"loss": 1.3169,
"step": 3372
},
{
"epoch": 0.8970744680851064,
"grad_norm": 4.197827339172363,
"learning_rate": 9.549056344749312e-06,
"loss": 1.4542,
"step": 3373
},
{
"epoch": 0.8973404255319148,
"grad_norm": 3.3921542167663574,
"learning_rate": 9.548691260152308e-06,
"loss": 1.236,
"step": 3374
},
{
"epoch": 0.8976063829787234,
"grad_norm": 3.5142951011657715,
"learning_rate": 9.54832603481344e-06,
"loss": 1.2546,
"step": 3375
},
{
"epoch": 0.8978723404255319,
"grad_norm": 3.390557050704956,
"learning_rate": 9.547960668744009e-06,
"loss": 1.2041,
"step": 3376
},
{
"epoch": 0.8981382978723405,
"grad_norm": 3.5497653484344482,
"learning_rate": 9.547595161955321e-06,
"loss": 1.2139,
"step": 3377
},
{
"epoch": 0.8984042553191489,
"grad_norm": 3.379268169403076,
"learning_rate": 9.547229514458684e-06,
"loss": 1.1503,
"step": 3378
},
{
"epoch": 0.8986702127659575,
"grad_norm": 3.826500177383423,
"learning_rate": 9.546863726265414e-06,
"loss": 1.2808,
"step": 3379
},
{
"epoch": 0.898936170212766,
"grad_norm": 3.121777296066284,
"learning_rate": 9.546497797386824e-06,
"loss": 1.1966,
"step": 3380
},
{
"epoch": 0.8992021276595744,
"grad_norm": 3.6707565784454346,
"learning_rate": 9.546131727834242e-06,
"loss": 1.33,
"step": 3381
},
{
"epoch": 0.899468085106383,
"grad_norm": 3.555612325668335,
"learning_rate": 9.545765517618992e-06,
"loss": 1.1858,
"step": 3382
},
{
"epoch": 0.8997340425531914,
"grad_norm": 3.481360912322998,
"learning_rate": 9.545399166752402e-06,
"loss": 1.4109,
"step": 3383
},
{
"epoch": 0.9,
"grad_norm": 3.1930184364318848,
"learning_rate": 9.545032675245814e-06,
"loss": 1.1161,
"step": 3384
},
{
"epoch": 0.9002659574468085,
"grad_norm": 3.5262556076049805,
"learning_rate": 9.544666043110562e-06,
"loss": 1.2255,
"step": 3385
},
{
"epoch": 0.9005319148936171,
"grad_norm": 3.4826877117156982,
"learning_rate": 9.544299270357992e-06,
"loss": 1.2001,
"step": 3386
},
{
"epoch": 0.9007978723404255,
"grad_norm": 3.602201223373413,
"learning_rate": 9.543932356999452e-06,
"loss": 1.2133,
"step": 3387
},
{
"epoch": 0.9010638297872341,
"grad_norm": 3.6607158184051514,
"learning_rate": 9.543565303046297e-06,
"loss": 1.1962,
"step": 3388
},
{
"epoch": 0.9013297872340426,
"grad_norm": 3.664412260055542,
"learning_rate": 9.543198108509879e-06,
"loss": 1.2857,
"step": 3389
},
{
"epoch": 0.901595744680851,
"grad_norm": 3.5442616939544678,
"learning_rate": 9.542830773401564e-06,
"loss": 1.2096,
"step": 3390
},
{
"epoch": 0.9018617021276596,
"grad_norm": 4.058464527130127,
"learning_rate": 9.542463297732716e-06,
"loss": 1.4371,
"step": 3391
},
{
"epoch": 0.902127659574468,
"grad_norm": 3.6064326763153076,
"learning_rate": 9.542095681514708e-06,
"loss": 1.2809,
"step": 3392
},
{
"epoch": 0.9023936170212766,
"grad_norm": 3.585545301437378,
"learning_rate": 9.541727924758907e-06,
"loss": 1.3174,
"step": 3393
},
{
"epoch": 0.9026595744680851,
"grad_norm": 3.465228319168091,
"learning_rate": 9.5413600274767e-06,
"loss": 1.2042,
"step": 3394
},
{
"epoch": 0.9029255319148937,
"grad_norm": 3.581475019454956,
"learning_rate": 9.540991989679468e-06,
"loss": 1.3837,
"step": 3395
},
{
"epoch": 0.9031914893617021,
"grad_norm": 3.4275171756744385,
"learning_rate": 9.540623811378597e-06,
"loss": 1.209,
"step": 3396
},
{
"epoch": 0.9034574468085106,
"grad_norm": 3.159125328063965,
"learning_rate": 9.540255492585478e-06,
"loss": 1.2519,
"step": 3397
},
{
"epoch": 0.9037234042553192,
"grad_norm": 3.7644615173339844,
"learning_rate": 9.53988703331151e-06,
"loss": 1.2965,
"step": 3398
},
{
"epoch": 0.9039893617021276,
"grad_norm": 3.519270896911621,
"learning_rate": 9.53951843356809e-06,
"loss": 1.2125,
"step": 3399
},
{
"epoch": 0.9042553191489362,
"grad_norm": 3.7408711910247803,
"learning_rate": 9.539149693366628e-06,
"loss": 1.3432,
"step": 3400
},
{
"epoch": 0.9045212765957447,
"grad_norm": 3.343994617462158,
"learning_rate": 9.538780812718527e-06,
"loss": 1.2149,
"step": 3401
},
{
"epoch": 0.9047872340425532,
"grad_norm": 3.3215134143829346,
"learning_rate": 9.538411791635205e-06,
"loss": 1.2844,
"step": 3402
},
{
"epoch": 0.9050531914893617,
"grad_norm": 3.9590845108032227,
"learning_rate": 9.53804263012808e-06,
"loss": 1.289,
"step": 3403
},
{
"epoch": 0.9053191489361702,
"grad_norm": 3.299415349960327,
"learning_rate": 9.537673328208572e-06,
"loss": 1.0875,
"step": 3404
},
{
"epoch": 0.9055851063829787,
"grad_norm": 3.5640780925750732,
"learning_rate": 9.53730388588811e-06,
"loss": 1.2735,
"step": 3405
},
{
"epoch": 0.9058510638297872,
"grad_norm": 3.2300360202789307,
"learning_rate": 9.536934303178123e-06,
"loss": 1.3574,
"step": 3406
},
{
"epoch": 0.9061170212765958,
"grad_norm": 3.6983630657196045,
"learning_rate": 9.536564580090046e-06,
"loss": 1.2751,
"step": 3407
},
{
"epoch": 0.9063829787234042,
"grad_norm": 3.740288257598877,
"learning_rate": 9.536194716635322e-06,
"loss": 1.25,
"step": 3408
},
{
"epoch": 0.9066489361702128,
"grad_norm": 3.6063649654388428,
"learning_rate": 9.535824712825393e-06,
"loss": 1.1656,
"step": 3409
},
{
"epoch": 0.9069148936170213,
"grad_norm": 3.738442897796631,
"learning_rate": 9.535454568671705e-06,
"loss": 1.3204,
"step": 3410
},
{
"epoch": 0.9071808510638298,
"grad_norm": 3.7406976222991943,
"learning_rate": 9.535084284185714e-06,
"loss": 1.2681,
"step": 3411
},
{
"epoch": 0.9074468085106383,
"grad_norm": 3.7773613929748535,
"learning_rate": 9.534713859378875e-06,
"loss": 1.2303,
"step": 3412
},
{
"epoch": 0.9077127659574468,
"grad_norm": 3.531691312789917,
"learning_rate": 9.53434329426265e-06,
"loss": 1.1495,
"step": 3413
},
{
"epoch": 0.9079787234042553,
"grad_norm": 3.730365514755249,
"learning_rate": 9.533972588848507e-06,
"loss": 1.1998,
"step": 3414
},
{
"epoch": 0.9082446808510638,
"grad_norm": 4.04153299331665,
"learning_rate": 9.533601743147911e-06,
"loss": 1.2527,
"step": 3415
},
{
"epoch": 0.9085106382978724,
"grad_norm": 3.547910451889038,
"learning_rate": 9.53323075717234e-06,
"loss": 1.3033,
"step": 3416
},
{
"epoch": 0.9087765957446808,
"grad_norm": 3.444802761077881,
"learning_rate": 9.532859630933276e-06,
"loss": 1.2513,
"step": 3417
},
{
"epoch": 0.9090425531914894,
"grad_norm": 3.7553112506866455,
"learning_rate": 9.532488364442195e-06,
"loss": 1.1689,
"step": 3418
},
{
"epoch": 0.9093085106382979,
"grad_norm": 3.748389959335327,
"learning_rate": 9.532116957710587e-06,
"loss": 1.2341,
"step": 3419
},
{
"epoch": 0.9095744680851063,
"grad_norm": 3.5497937202453613,
"learning_rate": 9.531745410749946e-06,
"loss": 1.198,
"step": 3420
},
{
"epoch": 0.9098404255319149,
"grad_norm": 3.540468692779541,
"learning_rate": 9.531373723571765e-06,
"loss": 1.3774,
"step": 3421
},
{
"epoch": 0.9101063829787234,
"grad_norm": 3.332838535308838,
"learning_rate": 9.531001896187548e-06,
"loss": 1.3205,
"step": 3422
},
{
"epoch": 0.910372340425532,
"grad_norm": 3.7700576782226562,
"learning_rate": 9.530629928608797e-06,
"loss": 1.0956,
"step": 3423
},
{
"epoch": 0.9106382978723404,
"grad_norm": 3.387652635574341,
"learning_rate": 9.530257820847022e-06,
"loss": 1.1835,
"step": 3424
},
{
"epoch": 0.910904255319149,
"grad_norm": 3.9318602085113525,
"learning_rate": 9.529885572913735e-06,
"loss": 1.3197,
"step": 3425
},
{
"epoch": 0.9111702127659574,
"grad_norm": 3.158997058868408,
"learning_rate": 9.529513184820458e-06,
"loss": 1.2074,
"step": 3426
},
{
"epoch": 0.9114361702127659,
"grad_norm": 3.5039327144622803,
"learning_rate": 9.529140656578707e-06,
"loss": 1.3652,
"step": 3427
},
{
"epoch": 0.9117021276595745,
"grad_norm": 3.682145118713379,
"learning_rate": 9.528767988200015e-06,
"loss": 1.1703,
"step": 3428
},
{
"epoch": 0.9119680851063829,
"grad_norm": 3.6255364418029785,
"learning_rate": 9.528395179695907e-06,
"loss": 1.269,
"step": 3429
},
{
"epoch": 0.9122340425531915,
"grad_norm": 3.666750907897949,
"learning_rate": 9.528022231077921e-06,
"loss": 1.4003,
"step": 3430
},
{
"epoch": 0.9125,
"grad_norm": 3.167771816253662,
"learning_rate": 9.527649142357596e-06,
"loss": 1.1409,
"step": 3431
},
{
"epoch": 0.9127659574468086,
"grad_norm": 3.6556570529937744,
"learning_rate": 9.527275913546475e-06,
"loss": 1.3847,
"step": 3432
},
{
"epoch": 0.913031914893617,
"grad_norm": 3.794574737548828,
"learning_rate": 9.526902544656108e-06,
"loss": 1.3673,
"step": 3433
},
{
"epoch": 0.9132978723404256,
"grad_norm": 3.597594976425171,
"learning_rate": 9.526529035698046e-06,
"loss": 1.068,
"step": 3434
},
{
"epoch": 0.913563829787234,
"grad_norm": 3.1316208839416504,
"learning_rate": 9.526155386683848e-06,
"loss": 1.1379,
"step": 3435
},
{
"epoch": 0.9138297872340425,
"grad_norm": 3.3742425441741943,
"learning_rate": 9.525781597625073e-06,
"loss": 1.2233,
"step": 3436
},
{
"epoch": 0.9140957446808511,
"grad_norm": 3.6747100353240967,
"learning_rate": 9.525407668533286e-06,
"loss": 1.3035,
"step": 3437
},
{
"epoch": 0.9143617021276595,
"grad_norm": 3.4809205532073975,
"learning_rate": 9.525033599420058e-06,
"loss": 1.1033,
"step": 3438
},
{
"epoch": 0.9146276595744681,
"grad_norm": 3.575571298599243,
"learning_rate": 9.524659390296961e-06,
"loss": 1.222,
"step": 3439
},
{
"epoch": 0.9148936170212766,
"grad_norm": 3.502336263656616,
"learning_rate": 9.524285041175578e-06,
"loss": 1.1575,
"step": 3440
},
{
"epoch": 0.9151595744680852,
"grad_norm": 3.6172244548797607,
"learning_rate": 9.523910552067489e-06,
"loss": 1.1852,
"step": 3441
},
{
"epoch": 0.9154255319148936,
"grad_norm": 3.6247096061706543,
"learning_rate": 9.523535922984281e-06,
"loss": 1.4405,
"step": 3442
},
{
"epoch": 0.9156914893617021,
"grad_norm": 3.5026776790618896,
"learning_rate": 9.523161153937546e-06,
"loss": 1.2206,
"step": 3443
},
{
"epoch": 0.9159574468085107,
"grad_norm": 3.7139501571655273,
"learning_rate": 9.522786244938877e-06,
"loss": 1.3555,
"step": 3444
},
{
"epoch": 0.9162234042553191,
"grad_norm": 3.3043665885925293,
"learning_rate": 9.522411195999879e-06,
"loss": 1.0747,
"step": 3445
},
{
"epoch": 0.9164893617021277,
"grad_norm": 3.3844451904296875,
"learning_rate": 9.522036007132154e-06,
"loss": 1.2419,
"step": 3446
},
{
"epoch": 0.9167553191489362,
"grad_norm": 3.499330520629883,
"learning_rate": 9.521660678347311e-06,
"loss": 1.2287,
"step": 3447
},
{
"epoch": 0.9170212765957447,
"grad_norm": 3.4153192043304443,
"learning_rate": 9.521285209656964e-06,
"loss": 1.2425,
"step": 3448
},
{
"epoch": 0.9172872340425532,
"grad_norm": 3.838230848312378,
"learning_rate": 9.520909601072726e-06,
"loss": 1.2476,
"step": 3449
},
{
"epoch": 0.9175531914893617,
"grad_norm": 3.879303455352783,
"learning_rate": 9.520533852606226e-06,
"loss": 1.2743,
"step": 3450
},
{
"epoch": 0.9178191489361702,
"grad_norm": 3.2687835693359375,
"learning_rate": 9.520157964269083e-06,
"loss": 1.0722,
"step": 3451
},
{
"epoch": 0.9180851063829787,
"grad_norm": 3.6070616245269775,
"learning_rate": 9.519781936072933e-06,
"loss": 1.2863,
"step": 3452
},
{
"epoch": 0.9183510638297873,
"grad_norm": 3.410642623901367,
"learning_rate": 9.519405768029408e-06,
"loss": 1.2184,
"step": 3453
},
{
"epoch": 0.9186170212765957,
"grad_norm": 3.642425775527954,
"learning_rate": 9.519029460150148e-06,
"loss": 1.2836,
"step": 3454
},
{
"epoch": 0.9188829787234043,
"grad_norm": 3.6479597091674805,
"learning_rate": 9.518653012446794e-06,
"loss": 1.3349,
"step": 3455
},
{
"epoch": 0.9191489361702128,
"grad_norm": 3.2941248416900635,
"learning_rate": 9.518276424931e-06,
"loss": 1.1445,
"step": 3456
},
{
"epoch": 0.9194148936170212,
"grad_norm": 3.3414933681488037,
"learning_rate": 9.51789969761441e-06,
"loss": 1.3321,
"step": 3457
},
{
"epoch": 0.9196808510638298,
"grad_norm": 3.39167857170105,
"learning_rate": 9.517522830508685e-06,
"loss": 1.222,
"step": 3458
},
{
"epoch": 0.9199468085106383,
"grad_norm": 3.520202875137329,
"learning_rate": 9.517145823625485e-06,
"loss": 1.2299,
"step": 3459
},
{
"epoch": 0.9202127659574468,
"grad_norm": 3.953166961669922,
"learning_rate": 9.516768676976476e-06,
"loss": 1.3692,
"step": 3460
},
{
"epoch": 0.9204787234042553,
"grad_norm": 3.654834032058716,
"learning_rate": 9.516391390573326e-06,
"loss": 1.1788,
"step": 3461
},
{
"epoch": 0.9207446808510639,
"grad_norm": 4.268529415130615,
"learning_rate": 9.516013964427708e-06,
"loss": 1.3661,
"step": 3462
},
{
"epoch": 0.9210106382978723,
"grad_norm": 3.7426726818084717,
"learning_rate": 9.515636398551302e-06,
"loss": 1.3322,
"step": 3463
},
{
"epoch": 0.9212765957446809,
"grad_norm": 3.7757678031921387,
"learning_rate": 9.515258692955788e-06,
"loss": 1.2663,
"step": 3464
},
{
"epoch": 0.9215425531914894,
"grad_norm": 3.2425293922424316,
"learning_rate": 9.514880847652855e-06,
"loss": 1.1537,
"step": 3465
},
{
"epoch": 0.9218085106382978,
"grad_norm": 3.891484498977661,
"learning_rate": 9.514502862654192e-06,
"loss": 1.3394,
"step": 3466
},
{
"epoch": 0.9220744680851064,
"grad_norm": 3.499422788619995,
"learning_rate": 9.514124737971495e-06,
"loss": 1.3386,
"step": 3467
},
{
"epoch": 0.9223404255319149,
"grad_norm": 3.8201444149017334,
"learning_rate": 9.513746473616466e-06,
"loss": 1.2374,
"step": 3468
},
{
"epoch": 0.9226063829787234,
"grad_norm": 3.488330841064453,
"learning_rate": 9.513368069600806e-06,
"loss": 1.1239,
"step": 3469
},
{
"epoch": 0.9228723404255319,
"grad_norm": 3.2124156951904297,
"learning_rate": 9.512989525936223e-06,
"loss": 1.2058,
"step": 3470
},
{
"epoch": 0.9231382978723405,
"grad_norm": 3.4447717666625977,
"learning_rate": 9.512610842634432e-06,
"loss": 1.1785,
"step": 3471
},
{
"epoch": 0.9234042553191489,
"grad_norm": 3.3703794479370117,
"learning_rate": 9.512232019707148e-06,
"loss": 1.3696,
"step": 3472
},
{
"epoch": 0.9236702127659574,
"grad_norm": 3.2821013927459717,
"learning_rate": 9.511853057166094e-06,
"loss": 1.181,
"step": 3473
},
{
"epoch": 0.923936170212766,
"grad_norm": 3.2314436435699463,
"learning_rate": 9.511473955022992e-06,
"loss": 1.2571,
"step": 3474
},
{
"epoch": 0.9242021276595744,
"grad_norm": 3.635651111602783,
"learning_rate": 9.511094713289575e-06,
"loss": 1.2779,
"step": 3475
},
{
"epoch": 0.924468085106383,
"grad_norm": 3.7356226444244385,
"learning_rate": 9.510715331977579e-06,
"loss": 1.3406,
"step": 3476
},
{
"epoch": 0.9247340425531915,
"grad_norm": 3.5567257404327393,
"learning_rate": 9.510335811098737e-06,
"loss": 1.2792,
"step": 3477
},
{
"epoch": 0.925,
"grad_norm": 3.603287696838379,
"learning_rate": 9.509956150664796e-06,
"loss": 1.1966,
"step": 3478
},
{
"epoch": 0.9252659574468085,
"grad_norm": 3.915576219558716,
"learning_rate": 9.509576350687502e-06,
"loss": 1.2955,
"step": 3479
},
{
"epoch": 0.925531914893617,
"grad_norm": 3.7345378398895264,
"learning_rate": 9.509196411178605e-06,
"loss": 1.1994,
"step": 3480
},
{
"epoch": 0.9257978723404255,
"grad_norm": 3.4640583992004395,
"learning_rate": 9.508816332149862e-06,
"loss": 1.1937,
"step": 3481
},
{
"epoch": 0.926063829787234,
"grad_norm": 3.5885074138641357,
"learning_rate": 9.508436113613036e-06,
"loss": 1.2895,
"step": 3482
},
{
"epoch": 0.9263297872340426,
"grad_norm": 3.241925001144409,
"learning_rate": 9.508055755579886e-06,
"loss": 1.1693,
"step": 3483
},
{
"epoch": 0.926595744680851,
"grad_norm": 3.664020538330078,
"learning_rate": 9.507675258062183e-06,
"loss": 1.2333,
"step": 3484
},
{
"epoch": 0.9268617021276596,
"grad_norm": 3.365907669067383,
"learning_rate": 9.507294621071702e-06,
"loss": 1.1572,
"step": 3485
},
{
"epoch": 0.9271276595744681,
"grad_norm": 3.634084939956665,
"learning_rate": 9.506913844620217e-06,
"loss": 1.1676,
"step": 3486
},
{
"epoch": 0.9273936170212767,
"grad_norm": 3.2822062969207764,
"learning_rate": 9.506532928719514e-06,
"loss": 1.2271,
"step": 3487
},
{
"epoch": 0.9276595744680851,
"grad_norm": 3.920335292816162,
"learning_rate": 9.506151873381376e-06,
"loss": 1.3218,
"step": 3488
},
{
"epoch": 0.9279255319148936,
"grad_norm": 3.8373231887817383,
"learning_rate": 9.505770678617592e-06,
"loss": 1.2391,
"step": 3489
},
{
"epoch": 0.9281914893617021,
"grad_norm": 3.5426108837127686,
"learning_rate": 9.50538934443996e-06,
"loss": 1.2676,
"step": 3490
},
{
"epoch": 0.9284574468085106,
"grad_norm": 3.550251007080078,
"learning_rate": 9.505007870860276e-06,
"loss": 1.2651,
"step": 3491
},
{
"epoch": 0.9287234042553192,
"grad_norm": 3.3801169395446777,
"learning_rate": 9.504626257890345e-06,
"loss": 1.1764,
"step": 3492
},
{
"epoch": 0.9289893617021276,
"grad_norm": 4.002630233764648,
"learning_rate": 9.504244505541974e-06,
"loss": 1.2602,
"step": 3493
},
{
"epoch": 0.9292553191489362,
"grad_norm": 3.6300952434539795,
"learning_rate": 9.503862613826976e-06,
"loss": 1.1864,
"step": 3494
},
{
"epoch": 0.9295212765957447,
"grad_norm": 3.574536085128784,
"learning_rate": 9.503480582757163e-06,
"loss": 1.3364,
"step": 3495
},
{
"epoch": 0.9297872340425531,
"grad_norm": 3.6244354248046875,
"learning_rate": 9.50309841234436e-06,
"loss": 1.1998,
"step": 3496
},
{
"epoch": 0.9300531914893617,
"grad_norm": 3.826706886291504,
"learning_rate": 9.502716102600393e-06,
"loss": 1.1791,
"step": 3497
},
{
"epoch": 0.9303191489361702,
"grad_norm": 3.3346476554870605,
"learning_rate": 9.502333653537085e-06,
"loss": 1.1943,
"step": 3498
},
{
"epoch": 0.9305851063829788,
"grad_norm": 3.4599905014038086,
"learning_rate": 9.501951065166276e-06,
"loss": 1.2966,
"step": 3499
},
{
"epoch": 0.9308510638297872,
"grad_norm": 3.6470425128936768,
"learning_rate": 9.501568337499798e-06,
"loss": 1.2633,
"step": 3500
},
{
"epoch": 0.9308510638297872,
"eval_loss": 1.2690000534057617,
"eval_runtime": 12.8787,
"eval_samples_per_second": 31.059,
"eval_steps_per_second": 3.882,
"step": 3500
},
{
"epoch": 0.9311170212765958,
"grad_norm": 3.7849044799804688,
"learning_rate": 9.501185470549496e-06,
"loss": 1.2158,
"step": 3501
},
{
"epoch": 0.9313829787234043,
"grad_norm": 3.3262534141540527,
"learning_rate": 9.500802464327217e-06,
"loss": 1.2429,
"step": 3502
},
{
"epoch": 0.9316489361702127,
"grad_norm": 3.458172559738159,
"learning_rate": 9.500419318844811e-06,
"loss": 1.2177,
"step": 3503
},
{
"epoch": 0.9319148936170213,
"grad_norm": 3.7243428230285645,
"learning_rate": 9.500036034114132e-06,
"loss": 1.2877,
"step": 3504
},
{
"epoch": 0.9321808510638298,
"grad_norm": 3.6194655895233154,
"learning_rate": 9.49965261014704e-06,
"loss": 1.3507,
"step": 3505
},
{
"epoch": 0.9324468085106383,
"grad_norm": 3.4799468517303467,
"learning_rate": 9.499269046955398e-06,
"loss": 1.2658,
"step": 3506
},
{
"epoch": 0.9327127659574468,
"grad_norm": 3.6711440086364746,
"learning_rate": 9.498885344551077e-06,
"loss": 1.1922,
"step": 3507
},
{
"epoch": 0.9329787234042554,
"grad_norm": 3.7202506065368652,
"learning_rate": 9.498501502945943e-06,
"loss": 1.1922,
"step": 3508
},
{
"epoch": 0.9332446808510638,
"grad_norm": 3.440639019012451,
"learning_rate": 9.498117522151878e-06,
"loss": 1.1795,
"step": 3509
},
{
"epoch": 0.9335106382978723,
"grad_norm": 3.513429880142212,
"learning_rate": 9.497733402180761e-06,
"loss": 1.2098,
"step": 3510
},
{
"epoch": 0.9337765957446809,
"grad_norm": 3.599651575088501,
"learning_rate": 9.497349143044478e-06,
"loss": 1.2052,
"step": 3511
},
{
"epoch": 0.9340425531914893,
"grad_norm": 4.015235900878906,
"learning_rate": 9.496964744754915e-06,
"loss": 1.233,
"step": 3512
},
{
"epoch": 0.9343085106382979,
"grad_norm": 3.3815979957580566,
"learning_rate": 9.49658020732397e-06,
"loss": 1.1291,
"step": 3513
},
{
"epoch": 0.9345744680851064,
"grad_norm": 3.3032724857330322,
"learning_rate": 9.49619553076354e-06,
"loss": 1.2174,
"step": 3514
},
{
"epoch": 0.9348404255319149,
"grad_norm": 3.571817398071289,
"learning_rate": 9.495810715085526e-06,
"loss": 1.3212,
"step": 3515
},
{
"epoch": 0.9351063829787234,
"grad_norm": 3.5486996173858643,
"learning_rate": 9.495425760301836e-06,
"loss": 1.1428,
"step": 3516
},
{
"epoch": 0.935372340425532,
"grad_norm": 3.3801069259643555,
"learning_rate": 9.495040666424378e-06,
"loss": 1.1673,
"step": 3517
},
{
"epoch": 0.9356382978723404,
"grad_norm": 3.6057615280151367,
"learning_rate": 9.494655433465071e-06,
"loss": 1.1342,
"step": 3518
},
{
"epoch": 0.9359042553191489,
"grad_norm": 3.6146769523620605,
"learning_rate": 9.494270061435834e-06,
"loss": 1.4436,
"step": 3519
},
{
"epoch": 0.9361702127659575,
"grad_norm": 3.200052499771118,
"learning_rate": 9.493884550348589e-06,
"loss": 1.1598,
"step": 3520
},
{
"epoch": 0.9364361702127659,
"grad_norm": 3.6785783767700195,
"learning_rate": 9.493498900215265e-06,
"loss": 1.2838,
"step": 3521
},
{
"epoch": 0.9367021276595745,
"grad_norm": 3.905540943145752,
"learning_rate": 9.493113111047794e-06,
"loss": 1.2665,
"step": 3522
},
{
"epoch": 0.936968085106383,
"grad_norm": 3.300579786300659,
"learning_rate": 9.492727182858115e-06,
"loss": 1.2111,
"step": 3523
},
{
"epoch": 0.9372340425531915,
"grad_norm": 3.8752784729003906,
"learning_rate": 9.492341115658167e-06,
"loss": 1.2444,
"step": 3524
},
{
"epoch": 0.9375,
"grad_norm": 3.561800241470337,
"learning_rate": 9.491954909459895e-06,
"loss": 1.2224,
"step": 3525
},
{
"epoch": 0.9377659574468085,
"grad_norm": 3.434983730316162,
"learning_rate": 9.491568564275252e-06,
"loss": 1.2249,
"step": 3526
},
{
"epoch": 0.938031914893617,
"grad_norm": 3.5711958408355713,
"learning_rate": 9.491182080116185e-06,
"loss": 1.3134,
"step": 3527
},
{
"epoch": 0.9382978723404255,
"grad_norm": 3.2614593505859375,
"learning_rate": 9.490795456994658e-06,
"loss": 1.1418,
"step": 3528
},
{
"epoch": 0.9385638297872341,
"grad_norm": 3.7001163959503174,
"learning_rate": 9.490408694922635e-06,
"loss": 1.2611,
"step": 3529
},
{
"epoch": 0.9388297872340425,
"grad_norm": 3.287165880203247,
"learning_rate": 9.490021793912079e-06,
"loss": 1.1458,
"step": 3530
},
{
"epoch": 0.9390957446808511,
"grad_norm": 3.9669268131256104,
"learning_rate": 9.489634753974961e-06,
"loss": 1.1978,
"step": 3531
},
{
"epoch": 0.9393617021276596,
"grad_norm": 3.8696441650390625,
"learning_rate": 9.48924757512326e-06,
"loss": 1.3488,
"step": 3532
},
{
"epoch": 0.939627659574468,
"grad_norm": 3.8109893798828125,
"learning_rate": 9.48886025736895e-06,
"loss": 1.2341,
"step": 3533
},
{
"epoch": 0.9398936170212766,
"grad_norm": 3.3541629314422607,
"learning_rate": 9.488472800724022e-06,
"loss": 1.1629,
"step": 3534
},
{
"epoch": 0.9401595744680851,
"grad_norm": 3.4784152507781982,
"learning_rate": 9.48808520520046e-06,
"loss": 1.3021,
"step": 3535
},
{
"epoch": 0.9404255319148936,
"grad_norm": 3.4299418926239014,
"learning_rate": 9.487697470810257e-06,
"loss": 1.1674,
"step": 3536
},
{
"epoch": 0.9406914893617021,
"grad_norm": 3.467414617538452,
"learning_rate": 9.487309597565413e-06,
"loss": 1.1953,
"step": 3537
},
{
"epoch": 0.9409574468085107,
"grad_norm": 3.263312816619873,
"learning_rate": 9.486921585477924e-06,
"loss": 1.1662,
"step": 3538
},
{
"epoch": 0.9412234042553191,
"grad_norm": 3.3032853603363037,
"learning_rate": 9.486533434559801e-06,
"loss": 1.2386,
"step": 3539
},
{
"epoch": 0.9414893617021277,
"grad_norm": 3.641338348388672,
"learning_rate": 9.48614514482305e-06,
"loss": 1.25,
"step": 3540
},
{
"epoch": 0.9417553191489362,
"grad_norm": 3.5189712047576904,
"learning_rate": 9.485756716279686e-06,
"loss": 1.2763,
"step": 3541
},
{
"epoch": 0.9420212765957446,
"grad_norm": 3.464155912399292,
"learning_rate": 9.485368148941728e-06,
"loss": 1.278,
"step": 3542
},
{
"epoch": 0.9422872340425532,
"grad_norm": 3.5938682556152344,
"learning_rate": 9.484979442821199e-06,
"loss": 1.1817,
"step": 3543
},
{
"epoch": 0.9425531914893617,
"grad_norm": 3.399099588394165,
"learning_rate": 9.484590597930125e-06,
"loss": 1.3007,
"step": 3544
},
{
"epoch": 0.9428191489361702,
"grad_norm": 3.681652545928955,
"learning_rate": 9.484201614280539e-06,
"loss": 1.1233,
"step": 3545
},
{
"epoch": 0.9430851063829787,
"grad_norm": 3.4110119342803955,
"learning_rate": 9.483812491884475e-06,
"loss": 1.3159,
"step": 3546
},
{
"epoch": 0.9433510638297873,
"grad_norm": 3.347201347351074,
"learning_rate": 9.483423230753975e-06,
"loss": 1.2668,
"step": 3547
},
{
"epoch": 0.9436170212765957,
"grad_norm": 3.551835775375366,
"learning_rate": 9.48303383090108e-06,
"loss": 1.2695,
"step": 3548
},
{
"epoch": 0.9438829787234042,
"grad_norm": 7.742011547088623,
"learning_rate": 9.48264429233784e-06,
"loss": 1.3468,
"step": 3549
},
{
"epoch": 0.9441489361702128,
"grad_norm": 3.5810296535491943,
"learning_rate": 9.482254615076307e-06,
"loss": 1.2088,
"step": 3550
},
{
"epoch": 0.9444148936170212,
"grad_norm": 3.6081788539886475,
"learning_rate": 9.481864799128541e-06,
"loss": 1.199,
"step": 3551
},
{
"epoch": 0.9446808510638298,
"grad_norm": 3.4480881690979004,
"learning_rate": 9.481474844506602e-06,
"loss": 1.2016,
"step": 3552
},
{
"epoch": 0.9449468085106383,
"grad_norm": 3.4126522541046143,
"learning_rate": 9.481084751222553e-06,
"loss": 1.0633,
"step": 3553
},
{
"epoch": 0.9452127659574469,
"grad_norm": 3.731552839279175,
"learning_rate": 9.480694519288467e-06,
"loss": 1.3171,
"step": 3554
},
{
"epoch": 0.9454787234042553,
"grad_norm": 3.7800607681274414,
"learning_rate": 9.480304148716418e-06,
"loss": 1.4008,
"step": 3555
},
{
"epoch": 0.9457446808510638,
"grad_norm": 3.509230375289917,
"learning_rate": 9.47991363951848e-06,
"loss": 1.2949,
"step": 3556
},
{
"epoch": 0.9460106382978724,
"grad_norm": 3.7124991416931152,
"learning_rate": 9.479522991706744e-06,
"loss": 1.1951,
"step": 3557
},
{
"epoch": 0.9462765957446808,
"grad_norm": 3.6707465648651123,
"learning_rate": 9.479132205293291e-06,
"loss": 1.1625,
"step": 3558
},
{
"epoch": 0.9465425531914894,
"grad_norm": 3.456841468811035,
"learning_rate": 9.478741280290214e-06,
"loss": 1.1969,
"step": 3559
},
{
"epoch": 0.9468085106382979,
"grad_norm": 4.189627170562744,
"learning_rate": 9.478350216709609e-06,
"loss": 1.4571,
"step": 3560
},
{
"epoch": 0.9470744680851064,
"grad_norm": 3.5188887119293213,
"learning_rate": 9.477959014563575e-06,
"loss": 1.2589,
"step": 3561
},
{
"epoch": 0.9473404255319149,
"grad_norm": 3.594780206680298,
"learning_rate": 9.477567673864217e-06,
"loss": 1.2652,
"step": 3562
},
{
"epoch": 0.9476063829787233,
"grad_norm": 3.3485286235809326,
"learning_rate": 9.477176194623644e-06,
"loss": 1.2256,
"step": 3563
},
{
"epoch": 0.9478723404255319,
"grad_norm": 3.549306631088257,
"learning_rate": 9.476784576853967e-06,
"loss": 1.2868,
"step": 3564
},
{
"epoch": 0.9481382978723404,
"grad_norm": 3.50877046585083,
"learning_rate": 9.476392820567306e-06,
"loss": 1.0912,
"step": 3565
},
{
"epoch": 0.948404255319149,
"grad_norm": 3.3570492267608643,
"learning_rate": 9.476000925775782e-06,
"loss": 1.2827,
"step": 3566
},
{
"epoch": 0.9486702127659574,
"grad_norm": 3.3039703369140625,
"learning_rate": 9.475608892491516e-06,
"loss": 1.1552,
"step": 3567
},
{
"epoch": 0.948936170212766,
"grad_norm": 3.559574604034424,
"learning_rate": 9.475216720726644e-06,
"loss": 1.1988,
"step": 3568
},
{
"epoch": 0.9492021276595745,
"grad_norm": 3.8060848712921143,
"learning_rate": 9.474824410493298e-06,
"loss": 1.3264,
"step": 3569
},
{
"epoch": 0.949468085106383,
"grad_norm": 3.3232123851776123,
"learning_rate": 9.474431961803615e-06,
"loss": 1.1884,
"step": 3570
},
{
"epoch": 0.9497340425531915,
"grad_norm": 3.821077346801758,
"learning_rate": 9.47403937466974e-06,
"loss": 1.3414,
"step": 3571
},
{
"epoch": 0.95,
"grad_norm": 3.464698076248169,
"learning_rate": 9.473646649103819e-06,
"loss": 1.1284,
"step": 3572
},
{
"epoch": 0.9502659574468085,
"grad_norm": 3.464268922805786,
"learning_rate": 9.473253785118003e-06,
"loss": 1.3262,
"step": 3573
},
{
"epoch": 0.950531914893617,
"grad_norm": 3.7841787338256836,
"learning_rate": 9.472860782724448e-06,
"loss": 1.1169,
"step": 3574
},
{
"epoch": 0.9507978723404256,
"grad_norm": 3.278888463973999,
"learning_rate": 9.472467641935314e-06,
"loss": 1.1413,
"step": 3575
},
{
"epoch": 0.951063829787234,
"grad_norm": 3.321603536605835,
"learning_rate": 9.472074362762767e-06,
"loss": 1.0513,
"step": 3576
},
{
"epoch": 0.9513297872340426,
"grad_norm": 3.8839926719665527,
"learning_rate": 9.471680945218973e-06,
"loss": 1.2412,
"step": 3577
},
{
"epoch": 0.9515957446808511,
"grad_norm": 3.5885181427001953,
"learning_rate": 9.471287389316107e-06,
"loss": 1.1092,
"step": 3578
},
{
"epoch": 0.9518617021276595,
"grad_norm": 3.592010498046875,
"learning_rate": 9.470893695066345e-06,
"loss": 1.275,
"step": 3579
},
{
"epoch": 0.9521276595744681,
"grad_norm": 3.785581111907959,
"learning_rate": 9.470499862481867e-06,
"loss": 1.3256,
"step": 3580
},
{
"epoch": 0.9523936170212766,
"grad_norm": 3.41489315032959,
"learning_rate": 9.47010589157486e-06,
"loss": 1.2419,
"step": 3581
},
{
"epoch": 0.9526595744680851,
"grad_norm": 3.4412648677825928,
"learning_rate": 9.469711782357513e-06,
"loss": 1.3029,
"step": 3582
},
{
"epoch": 0.9529255319148936,
"grad_norm": 3.6879758834838867,
"learning_rate": 9.469317534842025e-06,
"loss": 1.217,
"step": 3583
},
{
"epoch": 0.9531914893617022,
"grad_norm": 3.8642208576202393,
"learning_rate": 9.468923149040587e-06,
"loss": 1.3035,
"step": 3584
},
{
"epoch": 0.9534574468085106,
"grad_norm": 3.9491965770721436,
"learning_rate": 9.468528624965406e-06,
"loss": 1.3494,
"step": 3585
},
{
"epoch": 0.9537234042553191,
"grad_norm": 3.6963748931884766,
"learning_rate": 9.468133962628688e-06,
"loss": 1.1793,
"step": 3586
},
{
"epoch": 0.9539893617021277,
"grad_norm": 3.4110567569732666,
"learning_rate": 9.467739162042643e-06,
"loss": 1.1798,
"step": 3587
},
{
"epoch": 0.9542553191489361,
"grad_norm": 3.718494176864624,
"learning_rate": 9.46734422321949e-06,
"loss": 1.3528,
"step": 3588
},
{
"epoch": 0.9545212765957447,
"grad_norm": 3.9455974102020264,
"learning_rate": 9.466949146171449e-06,
"loss": 1.341,
"step": 3589
},
{
"epoch": 0.9547872340425532,
"grad_norm": 3.668195962905884,
"learning_rate": 9.46655393091074e-06,
"loss": 1.1503,
"step": 3590
},
{
"epoch": 0.9550531914893617,
"grad_norm": 3.662208080291748,
"learning_rate": 9.466158577449593e-06,
"loss": 1.3243,
"step": 3591
},
{
"epoch": 0.9553191489361702,
"grad_norm": 3.463543176651001,
"learning_rate": 9.465763085800244e-06,
"loss": 1.187,
"step": 3592
},
{
"epoch": 0.9555851063829788,
"grad_norm": 3.6207196712493896,
"learning_rate": 9.465367455974926e-06,
"loss": 1.2523,
"step": 3593
},
{
"epoch": 0.9558510638297872,
"grad_norm": 3.3348443508148193,
"learning_rate": 9.46497168798588e-06,
"loss": 1.2145,
"step": 3594
},
{
"epoch": 0.9561170212765957,
"grad_norm": 4.174299240112305,
"learning_rate": 9.464575781845355e-06,
"loss": 1.4818,
"step": 3595
},
{
"epoch": 0.9563829787234043,
"grad_norm": 3.3657476902008057,
"learning_rate": 9.464179737565598e-06,
"loss": 1.2587,
"step": 3596
},
{
"epoch": 0.9566489361702127,
"grad_norm": 3.697920560836792,
"learning_rate": 9.463783555158866e-06,
"loss": 1.36,
"step": 3597
},
{
"epoch": 0.9569148936170213,
"grad_norm": 3.825244903564453,
"learning_rate": 9.463387234637413e-06,
"loss": 1.2879,
"step": 3598
},
{
"epoch": 0.9571808510638298,
"grad_norm": 3.5759551525115967,
"learning_rate": 9.462990776013504e-06,
"loss": 1.4189,
"step": 3599
},
{
"epoch": 0.9574468085106383,
"grad_norm": 3.6317455768585205,
"learning_rate": 9.462594179299408e-06,
"loss": 1.3723,
"step": 3600
},
{
"epoch": 0.9577127659574468,
"grad_norm": 3.254585027694702,
"learning_rate": 9.46219744450739e-06,
"loss": 1.1231,
"step": 3601
},
{
"epoch": 0.9579787234042553,
"grad_norm": 3.0535624027252197,
"learning_rate": 9.461800571649734e-06,
"loss": 1.0536,
"step": 3602
},
{
"epoch": 0.9582446808510638,
"grad_norm": 3.603959798812866,
"learning_rate": 9.461403560738713e-06,
"loss": 1.254,
"step": 3603
},
{
"epoch": 0.9585106382978723,
"grad_norm": 3.4408342838287354,
"learning_rate": 9.461006411786613e-06,
"loss": 1.2253,
"step": 3604
},
{
"epoch": 0.9587765957446809,
"grad_norm": 3.6801369190216064,
"learning_rate": 9.460609124805724e-06,
"loss": 1.2253,
"step": 3605
},
{
"epoch": 0.9590425531914893,
"grad_norm": 3.968122959136963,
"learning_rate": 9.460211699808334e-06,
"loss": 1.2456,
"step": 3606
},
{
"epoch": 0.9593085106382979,
"grad_norm": 3.602989912033081,
"learning_rate": 9.459814136806746e-06,
"loss": 1.2261,
"step": 3607
},
{
"epoch": 0.9595744680851064,
"grad_norm": 3.5720174312591553,
"learning_rate": 9.459416435813258e-06,
"loss": 1.1869,
"step": 3608
},
{
"epoch": 0.9598404255319148,
"grad_norm": 3.626312732696533,
"learning_rate": 9.459018596840173e-06,
"loss": 1.3385,
"step": 3609
},
{
"epoch": 0.9601063829787234,
"grad_norm": 3.5388100147247314,
"learning_rate": 9.458620619899803e-06,
"loss": 1.2523,
"step": 3610
},
{
"epoch": 0.9603723404255319,
"grad_norm": 3.8266894817352295,
"learning_rate": 9.458222505004462e-06,
"loss": 1.4002,
"step": 3611
},
{
"epoch": 0.9606382978723405,
"grad_norm": 3.576223373413086,
"learning_rate": 9.457824252166467e-06,
"loss": 1.2669,
"step": 3612
},
{
"epoch": 0.9609042553191489,
"grad_norm": 3.5163745880126953,
"learning_rate": 9.457425861398144e-06,
"loss": 1.1806,
"step": 3613
},
{
"epoch": 0.9611702127659575,
"grad_norm": 3.586691379547119,
"learning_rate": 9.457027332711814e-06,
"loss": 1.3403,
"step": 3614
},
{
"epoch": 0.961436170212766,
"grad_norm": 3.5483405590057373,
"learning_rate": 9.456628666119812e-06,
"loss": 1.2426,
"step": 3615
},
{
"epoch": 0.9617021276595744,
"grad_norm": 3.600684881210327,
"learning_rate": 9.456229861634471e-06,
"loss": 1.2333,
"step": 3616
},
{
"epoch": 0.961968085106383,
"grad_norm": 3.446035385131836,
"learning_rate": 9.455830919268134e-06,
"loss": 1.161,
"step": 3617
},
{
"epoch": 0.9622340425531914,
"grad_norm": 3.329267978668213,
"learning_rate": 9.45543183903314e-06,
"loss": 1.1162,
"step": 3618
},
{
"epoch": 0.9625,
"grad_norm": 3.4342401027679443,
"learning_rate": 9.45503262094184e-06,
"loss": 1.3068,
"step": 3619
},
{
"epoch": 0.9627659574468085,
"grad_norm": 3.230329751968384,
"learning_rate": 9.454633265006585e-06,
"loss": 1.1398,
"step": 3620
},
{
"epoch": 0.9630319148936171,
"grad_norm": 3.3767967224121094,
"learning_rate": 9.454233771239733e-06,
"loss": 1.3104,
"step": 3621
},
{
"epoch": 0.9632978723404255,
"grad_norm": 3.2001163959503174,
"learning_rate": 9.453834139653643e-06,
"loss": 1.1632,
"step": 3622
},
{
"epoch": 0.9635638297872341,
"grad_norm": 3.9331612586975098,
"learning_rate": 9.453434370260683e-06,
"loss": 1.3891,
"step": 3623
},
{
"epoch": 0.9638297872340426,
"grad_norm": 4.0084052085876465,
"learning_rate": 9.453034463073218e-06,
"loss": 1.4323,
"step": 3624
},
{
"epoch": 0.964095744680851,
"grad_norm": 3.2673776149749756,
"learning_rate": 9.452634418103626e-06,
"loss": 1.0984,
"step": 3625
},
{
"epoch": 0.9643617021276596,
"grad_norm": 3.2544898986816406,
"learning_rate": 9.45223423536428e-06,
"loss": 1.2681,
"step": 3626
},
{
"epoch": 0.964627659574468,
"grad_norm": 3.625535488128662,
"learning_rate": 9.451833914867567e-06,
"loss": 1.258,
"step": 3627
},
{
"epoch": 0.9648936170212766,
"grad_norm": 3.048551082611084,
"learning_rate": 9.451433456625871e-06,
"loss": 1.207,
"step": 3628
},
{
"epoch": 0.9651595744680851,
"grad_norm": 3.567139148712158,
"learning_rate": 9.451032860651583e-06,
"loss": 1.2771,
"step": 3629
},
{
"epoch": 0.9654255319148937,
"grad_norm": 3.618807077407837,
"learning_rate": 9.450632126957098e-06,
"loss": 1.2666,
"step": 3630
},
{
"epoch": 0.9656914893617021,
"grad_norm": 3.4883675575256348,
"learning_rate": 9.450231255554814e-06,
"loss": 1.1142,
"step": 3631
},
{
"epoch": 0.9659574468085106,
"grad_norm": 3.687424898147583,
"learning_rate": 9.449830246457136e-06,
"loss": 1.1745,
"step": 3632
},
{
"epoch": 0.9662234042553192,
"grad_norm": 3.457051992416382,
"learning_rate": 9.44942909967647e-06,
"loss": 1.1846,
"step": 3633
},
{
"epoch": 0.9664893617021276,
"grad_norm": 3.5090994834899902,
"learning_rate": 9.449027815225231e-06,
"loss": 1.3255,
"step": 3634
},
{
"epoch": 0.9667553191489362,
"grad_norm": 3.2658236026763916,
"learning_rate": 9.448626393115833e-06,
"loss": 1.0964,
"step": 3635
},
{
"epoch": 0.9670212765957447,
"grad_norm": 3.7192766666412354,
"learning_rate": 9.448224833360695e-06,
"loss": 1.3171,
"step": 3636
},
{
"epoch": 0.9672872340425532,
"grad_norm": 3.891343355178833,
"learning_rate": 9.447823135972247e-06,
"loss": 1.206,
"step": 3637
},
{
"epoch": 0.9675531914893617,
"grad_norm": 3.7228803634643555,
"learning_rate": 9.447421300962911e-06,
"loss": 1.2032,
"step": 3638
},
{
"epoch": 0.9678191489361702,
"grad_norm": 3.348090171813965,
"learning_rate": 9.447019328345125e-06,
"loss": 1.2437,
"step": 3639
},
{
"epoch": 0.9680851063829787,
"grad_norm": 3.3824315071105957,
"learning_rate": 9.446617218131326e-06,
"loss": 1.1005,
"step": 3640
},
{
"epoch": 0.9683510638297872,
"grad_norm": 4.107891082763672,
"learning_rate": 9.446214970333954e-06,
"loss": 1.3365,
"step": 3641
},
{
"epoch": 0.9686170212765958,
"grad_norm": 3.609551191329956,
"learning_rate": 9.445812584965458e-06,
"loss": 1.2756,
"step": 3642
},
{
"epoch": 0.9688829787234042,
"grad_norm": 3.625800371170044,
"learning_rate": 9.445410062038284e-06,
"loss": 1.2114,
"step": 3643
},
{
"epoch": 0.9691489361702128,
"grad_norm": 3.605753183364868,
"learning_rate": 9.445007401564889e-06,
"loss": 1.3025,
"step": 3644
},
{
"epoch": 0.9694148936170213,
"grad_norm": 3.2446835041046143,
"learning_rate": 9.444604603557733e-06,
"loss": 1.2037,
"step": 3645
},
{
"epoch": 0.9696808510638298,
"grad_norm": 3.478797674179077,
"learning_rate": 9.444201668029278e-06,
"loss": 1.2862,
"step": 3646
},
{
"epoch": 0.9699468085106383,
"grad_norm": 3.33634352684021,
"learning_rate": 9.443798594991989e-06,
"loss": 1.1298,
"step": 3647
},
{
"epoch": 0.9702127659574468,
"grad_norm": 3.82041597366333,
"learning_rate": 9.44339538445834e-06,
"loss": 1.2301,
"step": 3648
},
{
"epoch": 0.9704787234042553,
"grad_norm": 3.5176687240600586,
"learning_rate": 9.442992036440808e-06,
"loss": 1.1489,
"step": 3649
},
{
"epoch": 0.9707446808510638,
"grad_norm": 3.265772819519043,
"learning_rate": 9.44258855095187e-06,
"loss": 1.1147,
"step": 3650
},
{
"epoch": 0.9710106382978724,
"grad_norm": 3.5735883712768555,
"learning_rate": 9.442184928004012e-06,
"loss": 1.2768,
"step": 3651
},
{
"epoch": 0.9712765957446808,
"grad_norm": 3.6002047061920166,
"learning_rate": 9.441781167609722e-06,
"loss": 1.3395,
"step": 3652
},
{
"epoch": 0.9715425531914894,
"grad_norm": 3.8888189792633057,
"learning_rate": 9.441377269781496e-06,
"loss": 1.2223,
"step": 3653
},
{
"epoch": 0.9718085106382979,
"grad_norm": 3.6971378326416016,
"learning_rate": 9.440973234531825e-06,
"loss": 1.1813,
"step": 3654
},
{
"epoch": 0.9720744680851063,
"grad_norm": 3.6079912185668945,
"learning_rate": 9.440569061873213e-06,
"loss": 1.1156,
"step": 3655
},
{
"epoch": 0.9723404255319149,
"grad_norm": 3.839540481567383,
"learning_rate": 9.440164751818168e-06,
"loss": 1.4711,
"step": 3656
},
{
"epoch": 0.9726063829787234,
"grad_norm": 3.7191896438598633,
"learning_rate": 9.439760304379197e-06,
"loss": 1.2351,
"step": 3657
},
{
"epoch": 0.972872340425532,
"grad_norm": 3.902529001235962,
"learning_rate": 9.439355719568817e-06,
"loss": 1.3487,
"step": 3658
},
{
"epoch": 0.9731382978723404,
"grad_norm": 3.389925241470337,
"learning_rate": 9.438950997399543e-06,
"loss": 1.1905,
"step": 3659
},
{
"epoch": 0.973404255319149,
"grad_norm": 3.6134610176086426,
"learning_rate": 9.438546137883898e-06,
"loss": 1.2323,
"step": 3660
},
{
"epoch": 0.9736702127659574,
"grad_norm": 4.062784671783447,
"learning_rate": 9.438141141034409e-06,
"loss": 1.2437,
"step": 3661
},
{
"epoch": 0.9739361702127659,
"grad_norm": 3.6207644939422607,
"learning_rate": 9.437736006863611e-06,
"loss": 1.2922,
"step": 3662
},
{
"epoch": 0.9742021276595745,
"grad_norm": 3.2939248085021973,
"learning_rate": 9.437330735384034e-06,
"loss": 1.2348,
"step": 3663
},
{
"epoch": 0.9744680851063829,
"grad_norm": 3.6209723949432373,
"learning_rate": 9.43692532660822e-06,
"loss": 1.2698,
"step": 3664
},
{
"epoch": 0.9747340425531915,
"grad_norm": 3.766961097717285,
"learning_rate": 9.436519780548712e-06,
"loss": 1.3306,
"step": 3665
},
{
"epoch": 0.975,
"grad_norm": 3.1702146530151367,
"learning_rate": 9.43611409721806e-06,
"loss": 1.2877,
"step": 3666
},
{
"epoch": 0.9752659574468086,
"grad_norm": 3.411604642868042,
"learning_rate": 9.435708276628814e-06,
"loss": 1.1874,
"step": 3667
},
{
"epoch": 0.975531914893617,
"grad_norm": 3.3507773876190186,
"learning_rate": 9.435302318793533e-06,
"loss": 1.1614,
"step": 3668
},
{
"epoch": 0.9757978723404256,
"grad_norm": 3.42853045463562,
"learning_rate": 9.434896223724774e-06,
"loss": 1.128,
"step": 3669
},
{
"epoch": 0.976063829787234,
"grad_norm": 3.5911173820495605,
"learning_rate": 9.434489991435106e-06,
"loss": 1.2216,
"step": 3670
},
{
"epoch": 0.9763297872340425,
"grad_norm": 3.4679529666900635,
"learning_rate": 9.434083621937096e-06,
"loss": 1.1932,
"step": 3671
},
{
"epoch": 0.9765957446808511,
"grad_norm": 3.4107143878936768,
"learning_rate": 9.433677115243318e-06,
"loss": 1.1279,
"step": 3672
},
{
"epoch": 0.9768617021276595,
"grad_norm": 3.5593109130859375,
"learning_rate": 9.433270471366352e-06,
"loss": 1.1996,
"step": 3673
},
{
"epoch": 0.9771276595744681,
"grad_norm": 3.193164110183716,
"learning_rate": 9.432863690318777e-06,
"loss": 1.103,
"step": 3674
},
{
"epoch": 0.9773936170212766,
"grad_norm": 3.5351223945617676,
"learning_rate": 9.432456772113179e-06,
"loss": 1.2212,
"step": 3675
},
{
"epoch": 0.9776595744680852,
"grad_norm": 3.4629955291748047,
"learning_rate": 9.432049716762151e-06,
"loss": 1.2055,
"step": 3676
},
{
"epoch": 0.9779255319148936,
"grad_norm": 3.661907196044922,
"learning_rate": 9.431642524278286e-06,
"loss": 1.3389,
"step": 3677
},
{
"epoch": 0.9781914893617021,
"grad_norm": 3.140364408493042,
"learning_rate": 9.431235194674185e-06,
"loss": 1.2099,
"step": 3678
},
{
"epoch": 0.9784574468085107,
"grad_norm": 3.7145817279815674,
"learning_rate": 9.43082772796245e-06,
"loss": 1.49,
"step": 3679
},
{
"epoch": 0.9787234042553191,
"grad_norm": 3.3982760906219482,
"learning_rate": 9.430420124155687e-06,
"loss": 1.2001,
"step": 3680
},
{
"epoch": 0.9789893617021277,
"grad_norm": 3.7518324851989746,
"learning_rate": 9.43001238326651e-06,
"loss": 1.4143,
"step": 3681
},
{
"epoch": 0.9792553191489362,
"grad_norm": 3.708822250366211,
"learning_rate": 9.429604505307535e-06,
"loss": 1.2038,
"step": 3682
},
{
"epoch": 0.9795212765957447,
"grad_norm": 3.5261037349700928,
"learning_rate": 9.42919649029138e-06,
"loss": 1.2233,
"step": 3683
},
{
"epoch": 0.9797872340425532,
"grad_norm": 3.842564582824707,
"learning_rate": 9.428788338230672e-06,
"loss": 1.3385,
"step": 3684
},
{
"epoch": 0.9800531914893617,
"grad_norm": 3.688267230987549,
"learning_rate": 9.428380049138038e-06,
"loss": 1.2034,
"step": 3685
},
{
"epoch": 0.9803191489361702,
"grad_norm": 3.877396583557129,
"learning_rate": 9.42797162302611e-06,
"loss": 1.2775,
"step": 3686
},
{
"epoch": 0.9805851063829787,
"grad_norm": 3.4748518466949463,
"learning_rate": 9.427563059907528e-06,
"loss": 1.4141,
"step": 3687
},
{
"epoch": 0.9808510638297873,
"grad_norm": 3.0281589031219482,
"learning_rate": 9.427154359794931e-06,
"loss": 1.2591,
"step": 3688
},
{
"epoch": 0.9811170212765957,
"grad_norm": 3.5246212482452393,
"learning_rate": 9.42674552270097e-06,
"loss": 1.1775,
"step": 3689
},
{
"epoch": 0.9813829787234043,
"grad_norm": 3.599862813949585,
"learning_rate": 9.426336548638287e-06,
"loss": 1.187,
"step": 3690
},
{
"epoch": 0.9816489361702128,
"grad_norm": 3.8031740188598633,
"learning_rate": 9.42592743761954e-06,
"loss": 1.3704,
"step": 3691
},
{
"epoch": 0.9819148936170212,
"grad_norm": 3.708652973175049,
"learning_rate": 9.425518189657388e-06,
"loss": 1.2567,
"step": 3692
},
{
"epoch": 0.9821808510638298,
"grad_norm": 3.341240882873535,
"learning_rate": 9.425108804764493e-06,
"loss": 1.4062,
"step": 3693
},
{
"epoch": 0.9824468085106383,
"grad_norm": 3.5106687545776367,
"learning_rate": 9.42469928295352e-06,
"loss": 1.1759,
"step": 3694
},
{
"epoch": 0.9827127659574468,
"grad_norm": 3.153082847595215,
"learning_rate": 9.424289624237143e-06,
"loss": 1.1955,
"step": 3695
},
{
"epoch": 0.9829787234042553,
"grad_norm": 3.4173176288604736,
"learning_rate": 9.423879828628038e-06,
"loss": 1.3188,
"step": 3696
},
{
"epoch": 0.9832446808510639,
"grad_norm": 3.5854523181915283,
"learning_rate": 9.42346989613888e-06,
"loss": 1.2425,
"step": 3697
},
{
"epoch": 0.9835106382978723,
"grad_norm": 3.536123752593994,
"learning_rate": 9.423059826782355e-06,
"loss": 1.2088,
"step": 3698
},
{
"epoch": 0.9837765957446809,
"grad_norm": 3.5280613899230957,
"learning_rate": 9.422649620571155e-06,
"loss": 1.4956,
"step": 3699
},
{
"epoch": 0.9840425531914894,
"grad_norm": 3.896684169769287,
"learning_rate": 9.422239277517964e-06,
"loss": 1.3236,
"step": 3700
},
{
"epoch": 0.9843085106382978,
"grad_norm": 3.417961597442627,
"learning_rate": 9.421828797635487e-06,
"loss": 1.2044,
"step": 3701
},
{
"epoch": 0.9845744680851064,
"grad_norm": 3.4376044273376465,
"learning_rate": 9.421418180936419e-06,
"loss": 1.2657,
"step": 3702
},
{
"epoch": 0.9848404255319149,
"grad_norm": 3.8742475509643555,
"learning_rate": 9.421007427433467e-06,
"loss": 1.2526,
"step": 3703
},
{
"epoch": 0.9851063829787234,
"grad_norm": 4.002706527709961,
"learning_rate": 9.42059653713934e-06,
"loss": 1.446,
"step": 3704
},
{
"epoch": 0.9853723404255319,
"grad_norm": 3.462308883666992,
"learning_rate": 9.420185510066753e-06,
"loss": 1.2338,
"step": 3705
},
{
"epoch": 0.9856382978723405,
"grad_norm": 3.684730291366577,
"learning_rate": 9.41977434622842e-06,
"loss": 1.2417,
"step": 3706
},
{
"epoch": 0.9859042553191489,
"grad_norm": 3.5235018730163574,
"learning_rate": 9.419363045637067e-06,
"loss": 1.3775,
"step": 3707
},
{
"epoch": 0.9861702127659574,
"grad_norm": 3.2986860275268555,
"learning_rate": 9.418951608305417e-06,
"loss": 1.1967,
"step": 3708
},
{
"epoch": 0.986436170212766,
"grad_norm": 3.2341742515563965,
"learning_rate": 9.418540034246202e-06,
"loss": 1.1223,
"step": 3709
},
{
"epoch": 0.9867021276595744,
"grad_norm": 3.5601837635040283,
"learning_rate": 9.418128323472157e-06,
"loss": 1.2934,
"step": 3710
},
{
"epoch": 0.986968085106383,
"grad_norm": 4.002072811126709,
"learning_rate": 9.41771647599602e-06,
"loss": 1.2226,
"step": 3711
},
{
"epoch": 0.9872340425531915,
"grad_norm": 3.6095480918884277,
"learning_rate": 9.417304491830533e-06,
"loss": 1.2332,
"step": 3712
},
{
"epoch": 0.9875,
"grad_norm": 3.7682595252990723,
"learning_rate": 9.416892370988445e-06,
"loss": 1.1929,
"step": 3713
},
{
"epoch": 0.9877659574468085,
"grad_norm": 3.4983551502227783,
"learning_rate": 9.416480113482505e-06,
"loss": 1.2426,
"step": 3714
},
{
"epoch": 0.988031914893617,
"grad_norm": 3.490725040435791,
"learning_rate": 9.416067719325472e-06,
"loss": 1.2009,
"step": 3715
},
{
"epoch": 0.9882978723404255,
"grad_norm": 3.564605474472046,
"learning_rate": 9.415655188530104e-06,
"loss": 1.2105,
"step": 3716
},
{
"epoch": 0.988563829787234,
"grad_norm": 3.5361475944519043,
"learning_rate": 9.415242521109166e-06,
"loss": 1.3389,
"step": 3717
},
{
"epoch": 0.9888297872340426,
"grad_norm": 3.3671114444732666,
"learning_rate": 9.414829717075426e-06,
"loss": 1.3157,
"step": 3718
},
{
"epoch": 0.989095744680851,
"grad_norm": 3.7442715167999268,
"learning_rate": 9.414416776441656e-06,
"loss": 1.1551,
"step": 3719
},
{
"epoch": 0.9893617021276596,
"grad_norm": 3.4414875507354736,
"learning_rate": 9.414003699220636e-06,
"loss": 1.2135,
"step": 3720
},
{
"epoch": 0.9896276595744681,
"grad_norm": 4.052205562591553,
"learning_rate": 9.413590485425143e-06,
"loss": 1.3299,
"step": 3721
},
{
"epoch": 0.9898936170212767,
"grad_norm": 3.0953876972198486,
"learning_rate": 9.413177135067964e-06,
"loss": 1.1183,
"step": 3722
},
{
"epoch": 0.9901595744680851,
"grad_norm": 3.767108678817749,
"learning_rate": 9.41276364816189e-06,
"loss": 1.325,
"step": 3723
},
{
"epoch": 0.9904255319148936,
"grad_norm": 3.3017489910125732,
"learning_rate": 9.412350024719713e-06,
"loss": 1.2328,
"step": 3724
},
{
"epoch": 0.9906914893617021,
"grad_norm": 3.5287554264068604,
"learning_rate": 9.41193626475423e-06,
"loss": 1.2442,
"step": 3725
},
{
"epoch": 0.9909574468085106,
"grad_norm": 3.6898324489593506,
"learning_rate": 9.411522368278243e-06,
"loss": 1.2682,
"step": 3726
},
{
"epoch": 0.9912234042553192,
"grad_norm": 3.9228873252868652,
"learning_rate": 9.411108335304562e-06,
"loss": 1.3415,
"step": 3727
},
{
"epoch": 0.9914893617021276,
"grad_norm": 3.9011435508728027,
"learning_rate": 9.410694165845996e-06,
"loss": 1.2388,
"step": 3728
},
{
"epoch": 0.9917553191489362,
"grad_norm": 3.714230537414551,
"learning_rate": 9.41027985991536e-06,
"loss": 1.2085,
"step": 3729
},
{
"epoch": 0.9920212765957447,
"grad_norm": 3.627887010574341,
"learning_rate": 9.409865417525473e-06,
"loss": 1.2682,
"step": 3730
},
{
"epoch": 0.9922872340425531,
"grad_norm": 3.4126439094543457,
"learning_rate": 9.409450838689156e-06,
"loss": 1.2089,
"step": 3731
},
{
"epoch": 0.9925531914893617,
"grad_norm": 3.5555756092071533,
"learning_rate": 9.409036123419239e-06,
"loss": 1.2066,
"step": 3732
},
{
"epoch": 0.9928191489361702,
"grad_norm": 3.5292632579803467,
"learning_rate": 9.408621271728555e-06,
"loss": 1.1913,
"step": 3733
},
{
"epoch": 0.9930851063829788,
"grad_norm": 3.5443150997161865,
"learning_rate": 9.408206283629937e-06,
"loss": 1.2293,
"step": 3734
},
{
"epoch": 0.9933510638297872,
"grad_norm": 3.8415119647979736,
"learning_rate": 9.407791159136226e-06,
"loss": 1.496,
"step": 3735
},
{
"epoch": 0.9936170212765958,
"grad_norm": 3.647085189819336,
"learning_rate": 9.407375898260267e-06,
"loss": 1.1983,
"step": 3736
},
{
"epoch": 0.9938829787234043,
"grad_norm": 3.2950799465179443,
"learning_rate": 9.40696050101491e-06,
"loss": 1.1298,
"step": 3737
},
{
"epoch": 0.9941489361702127,
"grad_norm": 3.837249517440796,
"learning_rate": 9.406544967413008e-06,
"loss": 1.2763,
"step": 3738
},
{
"epoch": 0.9944148936170213,
"grad_norm": 3.437069892883301,
"learning_rate": 9.406129297467414e-06,
"loss": 1.1689,
"step": 3739
},
{
"epoch": 0.9946808510638298,
"grad_norm": 3.7600064277648926,
"learning_rate": 9.405713491190992e-06,
"loss": 1.4092,
"step": 3740
},
{
"epoch": 0.9949468085106383,
"grad_norm": 3.547830104827881,
"learning_rate": 9.405297548596607e-06,
"loss": 1.3794,
"step": 3741
},
{
"epoch": 0.9952127659574468,
"grad_norm": 3.673377752304077,
"learning_rate": 9.404881469697132e-06,
"loss": 1.1934,
"step": 3742
},
{
"epoch": 0.9954787234042554,
"grad_norm": 3.6018290519714355,
"learning_rate": 9.404465254505435e-06,
"loss": 1.2228,
"step": 3743
},
{
"epoch": 0.9957446808510638,
"grad_norm": 3.5014569759368896,
"learning_rate": 9.4040489030344e-06,
"loss": 1.1731,
"step": 3744
},
{
"epoch": 0.9960106382978723,
"grad_norm": 3.6044108867645264,
"learning_rate": 9.403632415296907e-06,
"loss": 1.2917,
"step": 3745
},
{
"epoch": 0.9962765957446809,
"grad_norm": 3.626147985458374,
"learning_rate": 9.40321579130584e-06,
"loss": 1.2297,
"step": 3746
},
{
"epoch": 0.9965425531914893,
"grad_norm": 3.5548157691955566,
"learning_rate": 9.402799031074095e-06,
"loss": 1.2096,
"step": 3747
},
{
"epoch": 0.9968085106382979,
"grad_norm": 4.016201019287109,
"learning_rate": 9.402382134614563e-06,
"loss": 1.2461,
"step": 3748
},
{
"epoch": 0.9970744680851064,
"grad_norm": 3.2637929916381836,
"learning_rate": 9.401965101940144e-06,
"loss": 1.1531,
"step": 3749
},
{
"epoch": 0.9973404255319149,
"grad_norm": 3.330240249633789,
"learning_rate": 9.40154793306374e-06,
"loss": 1.1598,
"step": 3750
},
{
"epoch": 0.9976063829787234,
"grad_norm": 3.522907257080078,
"learning_rate": 9.401130627998265e-06,
"loss": 1.1563,
"step": 3751
},
{
"epoch": 0.997872340425532,
"grad_norm": 3.462400197982788,
"learning_rate": 9.400713186756625e-06,
"loss": 1.0948,
"step": 3752
},
{
"epoch": 0.9981382978723404,
"grad_norm": 3.6393964290618896,
"learning_rate": 9.400295609351738e-06,
"loss": 1.2499,
"step": 3753
},
{
"epoch": 0.9984042553191489,
"grad_norm": 3.4382801055908203,
"learning_rate": 9.399877895796526e-06,
"loss": 1.2587,
"step": 3754
},
{
"epoch": 0.9986702127659575,
"grad_norm": 3.769301414489746,
"learning_rate": 9.399460046103908e-06,
"loss": 1.283,
"step": 3755
},
{
"epoch": 0.9989361702127659,
"grad_norm": 3.3904542922973633,
"learning_rate": 9.399042060286819e-06,
"loss": 1.3667,
"step": 3756
},
{
"epoch": 0.9992021276595745,
"grad_norm": 3.413027763366699,
"learning_rate": 9.398623938358188e-06,
"loss": 1.1575,
"step": 3757
},
{
"epoch": 0.999468085106383,
"grad_norm": 3.8313398361206055,
"learning_rate": 9.398205680330954e-06,
"loss": 1.1665,
"step": 3758
},
{
"epoch": 0.9997340425531915,
"grad_norm": 3.5040853023529053,
"learning_rate": 9.397787286218058e-06,
"loss": 1.3182,
"step": 3759
},
{
"epoch": 1.0,
"grad_norm": 3.6746809482574463,
"learning_rate": 9.397368756032445e-06,
"loss": 1.2287,
"step": 3760
},
{
"epoch": 1.0002659574468085,
"grad_norm": 3.308379650115967,
"learning_rate": 9.396950089787066e-06,
"loss": 0.8299,
"step": 3761
},
{
"epoch": 1.000531914893617,
"grad_norm": 3.8195013999938965,
"learning_rate": 9.396531287494877e-06,
"loss": 0.8431,
"step": 3762
},
{
"epoch": 1.0007978723404256,
"grad_norm": 3.317417621612549,
"learning_rate": 9.396112349168832e-06,
"loss": 0.9087,
"step": 3763
},
{
"epoch": 1.001063829787234,
"grad_norm": 3.6359126567840576,
"learning_rate": 9.395693274821893e-06,
"loss": 0.8605,
"step": 3764
},
{
"epoch": 1.0013297872340425,
"grad_norm": 3.3946707248687744,
"learning_rate": 9.39527406446703e-06,
"loss": 0.9424,
"step": 3765
},
{
"epoch": 1.001595744680851,
"grad_norm": 3.7910523414611816,
"learning_rate": 9.394854718117214e-06,
"loss": 0.7635,
"step": 3766
},
{
"epoch": 1.0018617021276597,
"grad_norm": 3.847181558609009,
"learning_rate": 9.394435235785417e-06,
"loss": 0.8419,
"step": 3767
},
{
"epoch": 1.0021276595744681,
"grad_norm": 3.5999948978424072,
"learning_rate": 9.394015617484621e-06,
"loss": 0.7906,
"step": 3768
},
{
"epoch": 1.0023936170212766,
"grad_norm": 3.53528094291687,
"learning_rate": 9.393595863227808e-06,
"loss": 0.7652,
"step": 3769
},
{
"epoch": 1.002659574468085,
"grad_norm": 4.102449417114258,
"learning_rate": 9.393175973027967e-06,
"loss": 0.837,
"step": 3770
},
{
"epoch": 1.0029255319148935,
"grad_norm": 4.625784397125244,
"learning_rate": 9.392755946898087e-06,
"loss": 0.8694,
"step": 3771
},
{
"epoch": 1.0031914893617022,
"grad_norm": 3.7955758571624756,
"learning_rate": 9.392335784851168e-06,
"loss": 0.7127,
"step": 3772
},
{
"epoch": 1.0034574468085107,
"grad_norm": 4.6287970542907715,
"learning_rate": 9.39191548690021e-06,
"loss": 0.6634,
"step": 3773
},
{
"epoch": 1.0037234042553191,
"grad_norm": 4.188403129577637,
"learning_rate": 9.391495053058213e-06,
"loss": 0.7676,
"step": 3774
},
{
"epoch": 1.0039893617021276,
"grad_norm": 4.061558723449707,
"learning_rate": 9.39107448333819e-06,
"loss": 0.6863,
"step": 3775
},
{
"epoch": 1.004255319148936,
"grad_norm": 3.9614672660827637,
"learning_rate": 9.390653777753151e-06,
"loss": 0.8902,
"step": 3776
},
{
"epoch": 1.0045212765957447,
"grad_norm": 3.7978405952453613,
"learning_rate": 9.390232936316116e-06,
"loss": 0.8576,
"step": 3777
},
{
"epoch": 1.0047872340425532,
"grad_norm": 4.081401348114014,
"learning_rate": 9.389811959040106e-06,
"loss": 0.9293,
"step": 3778
},
{
"epoch": 1.0050531914893617,
"grad_norm": 4.4708123207092285,
"learning_rate": 9.389390845938147e-06,
"loss": 0.7971,
"step": 3779
},
{
"epoch": 1.0053191489361701,
"grad_norm": 3.670398235321045,
"learning_rate": 9.388969597023265e-06,
"loss": 0.7746,
"step": 3780
},
{
"epoch": 1.0055851063829788,
"grad_norm": 3.678659200668335,
"learning_rate": 9.388548212308496e-06,
"loss": 0.7505,
"step": 3781
},
{
"epoch": 1.0058510638297873,
"grad_norm": 3.943781614303589,
"learning_rate": 9.388126691806879e-06,
"loss": 0.7205,
"step": 3782
},
{
"epoch": 1.0061170212765957,
"grad_norm": 3.976630926132202,
"learning_rate": 9.387705035531455e-06,
"loss": 0.8597,
"step": 3783
},
{
"epoch": 1.0063829787234042,
"grad_norm": 3.6376004219055176,
"learning_rate": 9.387283243495273e-06,
"loss": 0.7911,
"step": 3784
},
{
"epoch": 1.0066489361702127,
"grad_norm": 3.698863983154297,
"learning_rate": 9.386861315711382e-06,
"loss": 0.7718,
"step": 3785
},
{
"epoch": 1.0069148936170214,
"grad_norm": 3.553309679031372,
"learning_rate": 9.386439252192836e-06,
"loss": 0.8233,
"step": 3786
},
{
"epoch": 1.0071808510638298,
"grad_norm": 3.588423252105713,
"learning_rate": 9.386017052952694e-06,
"loss": 0.782,
"step": 3787
},
{
"epoch": 1.0074468085106383,
"grad_norm": 3.5977461338043213,
"learning_rate": 9.385594718004023e-06,
"loss": 0.8548,
"step": 3788
},
{
"epoch": 1.0077127659574467,
"grad_norm": 4.447713375091553,
"learning_rate": 9.385172247359887e-06,
"loss": 0.833,
"step": 3789
},
{
"epoch": 1.0079787234042554,
"grad_norm": 3.6044774055480957,
"learning_rate": 9.384749641033358e-06,
"loss": 0.8453,
"step": 3790
},
{
"epoch": 1.008244680851064,
"grad_norm": 3.4909749031066895,
"learning_rate": 9.384326899037515e-06,
"loss": 0.7723,
"step": 3791
},
{
"epoch": 1.0085106382978724,
"grad_norm": 3.8825156688690186,
"learning_rate": 9.383904021385433e-06,
"loss": 0.7219,
"step": 3792
},
{
"epoch": 1.0087765957446808,
"grad_norm": 4.605208396911621,
"learning_rate": 9.3834810080902e-06,
"loss": 0.8625,
"step": 3793
},
{
"epoch": 1.0090425531914893,
"grad_norm": 3.8827695846557617,
"learning_rate": 9.383057859164904e-06,
"loss": 0.7579,
"step": 3794
},
{
"epoch": 1.009308510638298,
"grad_norm": 3.8152899742126465,
"learning_rate": 9.382634574622637e-06,
"loss": 0.7785,
"step": 3795
},
{
"epoch": 1.0095744680851064,
"grad_norm": 3.9749300479888916,
"learning_rate": 9.382211154476497e-06,
"loss": 0.7768,
"step": 3796
},
{
"epoch": 1.0098404255319149,
"grad_norm": 3.9352428913116455,
"learning_rate": 9.381787598739586e-06,
"loss": 0.9265,
"step": 3797
},
{
"epoch": 1.0101063829787233,
"grad_norm": 3.8235480785369873,
"learning_rate": 9.381363907425006e-06,
"loss": 0.7915,
"step": 3798
},
{
"epoch": 1.0103723404255318,
"grad_norm": 4.1063103675842285,
"learning_rate": 9.380940080545869e-06,
"loss": 0.8271,
"step": 3799
},
{
"epoch": 1.0106382978723405,
"grad_norm": 3.7685892581939697,
"learning_rate": 9.380516118115287e-06,
"loss": 0.7611,
"step": 3800
},
{
"epoch": 1.010904255319149,
"grad_norm": 3.679269790649414,
"learning_rate": 9.380092020146379e-06,
"loss": 0.7943,
"step": 3801
},
{
"epoch": 1.0111702127659574,
"grad_norm": 3.7096617221832275,
"learning_rate": 9.379667786652267e-06,
"loss": 0.8254,
"step": 3802
},
{
"epoch": 1.0114361702127659,
"grad_norm": 3.4425570964813232,
"learning_rate": 9.379243417646077e-06,
"loss": 0.7538,
"step": 3803
},
{
"epoch": 1.0117021276595746,
"grad_norm": 3.324869155883789,
"learning_rate": 9.378818913140941e-06,
"loss": 0.6687,
"step": 3804
},
{
"epoch": 1.011968085106383,
"grad_norm": 3.6117424964904785,
"learning_rate": 9.378394273149992e-06,
"loss": 0.8059,
"step": 3805
},
{
"epoch": 1.0122340425531915,
"grad_norm": 3.843747615814209,
"learning_rate": 9.377969497686369e-06,
"loss": 0.7257,
"step": 3806
},
{
"epoch": 1.0125,
"grad_norm": 3.997349977493286,
"learning_rate": 9.377544586763216e-06,
"loss": 0.837,
"step": 3807
},
{
"epoch": 1.0127659574468084,
"grad_norm": 3.5746796131134033,
"learning_rate": 9.377119540393677e-06,
"loss": 0.7891,
"step": 3808
},
{
"epoch": 1.013031914893617,
"grad_norm": 3.7787206172943115,
"learning_rate": 9.37669435859091e-06,
"loss": 0.7984,
"step": 3809
},
{
"epoch": 1.0132978723404256,
"grad_norm": 4.2211174964904785,
"learning_rate": 9.376269041368063e-06,
"loss": 0.7274,
"step": 3810
},
{
"epoch": 1.013563829787234,
"grad_norm": 3.591057300567627,
"learning_rate": 9.375843588738302e-06,
"loss": 0.807,
"step": 3811
},
{
"epoch": 1.0138297872340425,
"grad_norm": 3.5017266273498535,
"learning_rate": 9.375418000714787e-06,
"loss": 0.7173,
"step": 3812
},
{
"epoch": 1.014095744680851,
"grad_norm": 4.4692487716674805,
"learning_rate": 9.374992277310688e-06,
"loss": 0.7584,
"step": 3813
},
{
"epoch": 1.0143617021276596,
"grad_norm": 4.453067302703857,
"learning_rate": 9.374566418539178e-06,
"loss": 0.8444,
"step": 3814
},
{
"epoch": 1.014627659574468,
"grad_norm": 4.007133483886719,
"learning_rate": 9.37414042441343e-06,
"loss": 0.7163,
"step": 3815
},
{
"epoch": 1.0148936170212766,
"grad_norm": 3.714021682739258,
"learning_rate": 9.37371429494663e-06,
"loss": 0.7979,
"step": 3816
},
{
"epoch": 1.015159574468085,
"grad_norm": 4.196898460388184,
"learning_rate": 9.37328803015196e-06,
"loss": 0.8057,
"step": 3817
},
{
"epoch": 1.0154255319148937,
"grad_norm": 3.6794686317443848,
"learning_rate": 9.37286163004261e-06,
"loss": 0.8608,
"step": 3818
},
{
"epoch": 1.0156914893617022,
"grad_norm": 4.034078121185303,
"learning_rate": 9.37243509463177e-06,
"loss": 0.8794,
"step": 3819
},
{
"epoch": 1.0159574468085106,
"grad_norm": 3.671816110610962,
"learning_rate": 9.37200842393264e-06,
"loss": 0.755,
"step": 3820
},
{
"epoch": 1.016223404255319,
"grad_norm": 3.6856508255004883,
"learning_rate": 9.371581617958424e-06,
"loss": 0.7839,
"step": 3821
},
{
"epoch": 1.0164893617021276,
"grad_norm": 4.332293510437012,
"learning_rate": 9.371154676722326e-06,
"loss": 0.8305,
"step": 3822
},
{
"epoch": 1.0167553191489362,
"grad_norm": 4.032402038574219,
"learning_rate": 9.370727600237557e-06,
"loss": 0.8552,
"step": 3823
},
{
"epoch": 1.0170212765957447,
"grad_norm": 4.2808756828308105,
"learning_rate": 9.370300388517329e-06,
"loss": 0.8609,
"step": 3824
},
{
"epoch": 1.0172872340425532,
"grad_norm": 3.675684690475464,
"learning_rate": 9.36987304157486e-06,
"loss": 0.7307,
"step": 3825
},
{
"epoch": 1.0175531914893616,
"grad_norm": 3.6821727752685547,
"learning_rate": 9.369445559423376e-06,
"loss": 0.8393,
"step": 3826
},
{
"epoch": 1.0178191489361703,
"grad_norm": 4.112141132354736,
"learning_rate": 9.369017942076101e-06,
"loss": 0.8027,
"step": 3827
},
{
"epoch": 1.0180851063829788,
"grad_norm": 3.8829188346862793,
"learning_rate": 9.368590189546268e-06,
"loss": 0.8558,
"step": 3828
},
{
"epoch": 1.0183510638297872,
"grad_norm": 4.182821750640869,
"learning_rate": 9.368162301847112e-06,
"loss": 0.9872,
"step": 3829
},
{
"epoch": 1.0186170212765957,
"grad_norm": 4.043810844421387,
"learning_rate": 9.36773427899187e-06,
"loss": 0.731,
"step": 3830
},
{
"epoch": 1.0188829787234042,
"grad_norm": 3.6814448833465576,
"learning_rate": 9.367306120993787e-06,
"loss": 0.7434,
"step": 3831
},
{
"epoch": 1.0191489361702128,
"grad_norm": 3.823333978652954,
"learning_rate": 9.366877827866112e-06,
"loss": 0.7962,
"step": 3832
},
{
"epoch": 1.0194148936170213,
"grad_norm": 4.10197639465332,
"learning_rate": 9.366449399622092e-06,
"loss": 0.8655,
"step": 3833
},
{
"epoch": 1.0196808510638298,
"grad_norm": 3.4033734798431396,
"learning_rate": 9.366020836274991e-06,
"loss": 0.6871,
"step": 3834
},
{
"epoch": 1.0199468085106382,
"grad_norm": 3.9210493564605713,
"learning_rate": 9.365592137838063e-06,
"loss": 0.8913,
"step": 3835
},
{
"epoch": 1.0202127659574467,
"grad_norm": 3.972930431365967,
"learning_rate": 9.365163304324576e-06,
"loss": 0.7394,
"step": 3836
},
{
"epoch": 1.0204787234042554,
"grad_norm": 3.603489875793457,
"learning_rate": 9.364734335747795e-06,
"loss": 0.6501,
"step": 3837
},
{
"epoch": 1.0207446808510638,
"grad_norm": 3.678868532180786,
"learning_rate": 9.364305232120997e-06,
"loss": 0.7685,
"step": 3838
},
{
"epoch": 1.0210106382978723,
"grad_norm": 4.074692726135254,
"learning_rate": 9.363875993457454e-06,
"loss": 0.8085,
"step": 3839
},
{
"epoch": 1.0212765957446808,
"grad_norm": 3.683279514312744,
"learning_rate": 9.363446619770452e-06,
"loss": 0.7703,
"step": 3840
},
{
"epoch": 1.0215425531914895,
"grad_norm": 3.837007999420166,
"learning_rate": 9.363017111073273e-06,
"loss": 0.8403,
"step": 3841
},
{
"epoch": 1.021808510638298,
"grad_norm": 4.0264973640441895,
"learning_rate": 9.362587467379208e-06,
"loss": 0.8001,
"step": 3842
},
{
"epoch": 1.0220744680851064,
"grad_norm": 3.9169387817382812,
"learning_rate": 9.362157688701551e-06,
"loss": 0.7603,
"step": 3843
},
{
"epoch": 1.0223404255319148,
"grad_norm": 3.4985976219177246,
"learning_rate": 9.3617277750536e-06,
"loss": 0.6856,
"step": 3844
},
{
"epoch": 1.0226063829787233,
"grad_norm": 3.9737682342529297,
"learning_rate": 9.361297726448656e-06,
"loss": 0.8021,
"step": 3845
},
{
"epoch": 1.022872340425532,
"grad_norm": 4.206306457519531,
"learning_rate": 9.360867542900023e-06,
"loss": 0.7726,
"step": 3846
},
{
"epoch": 1.0231382978723405,
"grad_norm": 3.5013468265533447,
"learning_rate": 9.360437224421017e-06,
"loss": 0.7046,
"step": 3847
},
{
"epoch": 1.023404255319149,
"grad_norm": 4.186954021453857,
"learning_rate": 9.360006771024947e-06,
"loss": 0.8574,
"step": 3848
},
{
"epoch": 1.0236702127659574,
"grad_norm": 3.8380942344665527,
"learning_rate": 9.359576182725136e-06,
"loss": 0.8463,
"step": 3849
},
{
"epoch": 1.023936170212766,
"grad_norm": 4.439043998718262,
"learning_rate": 9.359145459534906e-06,
"loss": 0.868,
"step": 3850
},
{
"epoch": 1.0242021276595745,
"grad_norm": 3.555283546447754,
"learning_rate": 9.358714601467581e-06,
"loss": 0.7842,
"step": 3851
},
{
"epoch": 1.024468085106383,
"grad_norm": 3.4938576221466064,
"learning_rate": 9.358283608536498e-06,
"loss": 0.8562,
"step": 3852
},
{
"epoch": 1.0247340425531914,
"grad_norm": 3.709388256072998,
"learning_rate": 9.357852480754985e-06,
"loss": 0.7753,
"step": 3853
},
{
"epoch": 1.025,
"grad_norm": 3.594524621963501,
"learning_rate": 9.357421218136387e-06,
"loss": 0.9016,
"step": 3854
},
{
"epoch": 1.0252659574468086,
"grad_norm": 3.8423714637756348,
"learning_rate": 9.356989820694046e-06,
"loss": 0.918,
"step": 3855
},
{
"epoch": 1.025531914893617,
"grad_norm": 4.120334625244141,
"learning_rate": 9.356558288441312e-06,
"loss": 0.8276,
"step": 3856
},
{
"epoch": 1.0257978723404255,
"grad_norm": 3.7441205978393555,
"learning_rate": 9.356126621391532e-06,
"loss": 0.6485,
"step": 3857
},
{
"epoch": 1.026063829787234,
"grad_norm": 3.652815341949463,
"learning_rate": 9.35569481955807e-06,
"loss": 0.8443,
"step": 3858
},
{
"epoch": 1.0263297872340424,
"grad_norm": 3.8127315044403076,
"learning_rate": 9.355262882954277e-06,
"loss": 0.8928,
"step": 3859
},
{
"epoch": 1.0265957446808511,
"grad_norm": 4.254662036895752,
"learning_rate": 9.354830811593527e-06,
"loss": 0.7228,
"step": 3860
},
{
"epoch": 1.0268617021276596,
"grad_norm": 3.737208366394043,
"learning_rate": 9.354398605489182e-06,
"loss": 0.7144,
"step": 3861
},
{
"epoch": 1.027127659574468,
"grad_norm": 4.630359172821045,
"learning_rate": 9.353966264654619e-06,
"loss": 1.0136,
"step": 3862
},
{
"epoch": 1.0273936170212765,
"grad_norm": 4.139670372009277,
"learning_rate": 9.353533789103213e-06,
"loss": 0.7467,
"step": 3863
},
{
"epoch": 1.0276595744680852,
"grad_norm": 3.5735762119293213,
"learning_rate": 9.353101178848345e-06,
"loss": 0.6863,
"step": 3864
},
{
"epoch": 1.0279255319148937,
"grad_norm": 4.091590881347656,
"learning_rate": 9.352668433903402e-06,
"loss": 0.9083,
"step": 3865
},
{
"epoch": 1.0281914893617021,
"grad_norm": 4.462408065795898,
"learning_rate": 9.352235554281775e-06,
"loss": 0.8134,
"step": 3866
},
{
"epoch": 1.0284574468085106,
"grad_norm": 4.514068603515625,
"learning_rate": 9.351802539996853e-06,
"loss": 0.8516,
"step": 3867
},
{
"epoch": 1.028723404255319,
"grad_norm": 4.771678447723389,
"learning_rate": 9.351369391062037e-06,
"loss": 0.8317,
"step": 3868
},
{
"epoch": 1.0289893617021277,
"grad_norm": 3.9608962535858154,
"learning_rate": 9.350936107490731e-06,
"loss": 0.7668,
"step": 3869
},
{
"epoch": 1.0292553191489362,
"grad_norm": 3.6606082916259766,
"learning_rate": 9.350502689296337e-06,
"loss": 0.8021,
"step": 3870
},
{
"epoch": 1.0295212765957447,
"grad_norm": 3.395991563796997,
"learning_rate": 9.35006913649227e-06,
"loss": 0.7561,
"step": 3871
},
{
"epoch": 1.0297872340425531,
"grad_norm": 3.9416377544403076,
"learning_rate": 9.34963544909194e-06,
"loss": 0.6551,
"step": 3872
},
{
"epoch": 1.0300531914893618,
"grad_norm": 3.8515100479125977,
"learning_rate": 9.34920162710877e-06,
"loss": 0.9596,
"step": 3873
},
{
"epoch": 1.0303191489361703,
"grad_norm": 3.532066583633423,
"learning_rate": 9.34876767055618e-06,
"loss": 0.7312,
"step": 3874
},
{
"epoch": 1.0305851063829787,
"grad_norm": 3.523547887802124,
"learning_rate": 9.3483335794476e-06,
"loss": 0.9029,
"step": 3875
},
{
"epoch": 1.0308510638297872,
"grad_norm": 3.8942482471466064,
"learning_rate": 9.347899353796456e-06,
"loss": 0.852,
"step": 3876
},
{
"epoch": 1.0311170212765957,
"grad_norm": 3.8025577068328857,
"learning_rate": 9.347464993616191e-06,
"loss": 0.7704,
"step": 3877
},
{
"epoch": 1.0313829787234043,
"grad_norm": 3.5986201763153076,
"learning_rate": 9.347030498920239e-06,
"loss": 0.8289,
"step": 3878
},
{
"epoch": 1.0316489361702128,
"grad_norm": 4.27517032623291,
"learning_rate": 9.346595869722044e-06,
"loss": 0.9252,
"step": 3879
},
{
"epoch": 1.0319148936170213,
"grad_norm": 3.845385789871216,
"learning_rate": 9.346161106035056e-06,
"loss": 0.7372,
"step": 3880
},
{
"epoch": 1.0321808510638297,
"grad_norm": 3.875645875930786,
"learning_rate": 9.345726207872728e-06,
"loss": 0.9036,
"step": 3881
},
{
"epoch": 1.0324468085106382,
"grad_norm": 4.004083156585693,
"learning_rate": 9.345291175248514e-06,
"loss": 0.8,
"step": 3882
},
{
"epoch": 1.0327127659574469,
"grad_norm": 4.025826930999756,
"learning_rate": 9.344856008175874e-06,
"loss": 0.8063,
"step": 3883
},
{
"epoch": 1.0329787234042553,
"grad_norm": 4.168485641479492,
"learning_rate": 9.344420706668274e-06,
"loss": 0.8712,
"step": 3884
},
{
"epoch": 1.0332446808510638,
"grad_norm": 3.7525241374969482,
"learning_rate": 9.343985270739184e-06,
"loss": 0.8075,
"step": 3885
},
{
"epoch": 1.0335106382978723,
"grad_norm": 4.079540729522705,
"learning_rate": 9.343549700402073e-06,
"loss": 0.7574,
"step": 3886
},
{
"epoch": 1.033776595744681,
"grad_norm": 3.5480105876922607,
"learning_rate": 9.34311399567042e-06,
"loss": 0.8544,
"step": 3887
},
{
"epoch": 1.0340425531914894,
"grad_norm": 3.6420836448669434,
"learning_rate": 9.342678156557709e-06,
"loss": 0.8279,
"step": 3888
},
{
"epoch": 1.0343085106382979,
"grad_norm": 3.8541533946990967,
"learning_rate": 9.342242183077422e-06,
"loss": 0.8794,
"step": 3889
},
{
"epoch": 1.0345744680851063,
"grad_norm": 3.5861008167266846,
"learning_rate": 9.341806075243049e-06,
"loss": 0.7949,
"step": 3890
},
{
"epoch": 1.0348404255319148,
"grad_norm": 4.284236431121826,
"learning_rate": 9.341369833068086e-06,
"loss": 0.7882,
"step": 3891
},
{
"epoch": 1.0351063829787235,
"grad_norm": 4.239330768585205,
"learning_rate": 9.340933456566028e-06,
"loss": 0.8299,
"step": 3892
},
{
"epoch": 1.035372340425532,
"grad_norm": 4.633347988128662,
"learning_rate": 9.340496945750377e-06,
"loss": 0.9297,
"step": 3893
},
{
"epoch": 1.0356382978723404,
"grad_norm": 4.2658538818359375,
"learning_rate": 9.340060300634642e-06,
"loss": 0.7928,
"step": 3894
},
{
"epoch": 1.0359042553191489,
"grad_norm": 3.876652717590332,
"learning_rate": 9.33962352123233e-06,
"loss": 0.7742,
"step": 3895
},
{
"epoch": 1.0361702127659576,
"grad_norm": 3.939422130584717,
"learning_rate": 9.339186607556959e-06,
"loss": 0.7676,
"step": 3896
},
{
"epoch": 1.036436170212766,
"grad_norm": 3.9666736125946045,
"learning_rate": 9.338749559622042e-06,
"loss": 0.8759,
"step": 3897
},
{
"epoch": 1.0367021276595745,
"grad_norm": 3.6032910346984863,
"learning_rate": 9.338312377441108e-06,
"loss": 0.6806,
"step": 3898
},
{
"epoch": 1.036968085106383,
"grad_norm": 3.6236395835876465,
"learning_rate": 9.337875061027681e-06,
"loss": 0.8275,
"step": 3899
},
{
"epoch": 1.0372340425531914,
"grad_norm": 4.132247447967529,
"learning_rate": 9.337437610395292e-06,
"loss": 0.8429,
"step": 3900
},
{
"epoch": 1.0375,
"grad_norm": 3.7111639976501465,
"learning_rate": 9.337000025557477e-06,
"loss": 0.9638,
"step": 3901
},
{
"epoch": 1.0377659574468086,
"grad_norm": 3.9870896339416504,
"learning_rate": 9.336562306527775e-06,
"loss": 0.7931,
"step": 3902
},
{
"epoch": 1.038031914893617,
"grad_norm": 3.9265518188476562,
"learning_rate": 9.336124453319729e-06,
"loss": 0.7928,
"step": 3903
},
{
"epoch": 1.0382978723404255,
"grad_norm": 3.5974245071411133,
"learning_rate": 9.335686465946888e-06,
"loss": 0.7127,
"step": 3904
},
{
"epoch": 1.038563829787234,
"grad_norm": 3.6213388442993164,
"learning_rate": 9.335248344422803e-06,
"loss": 0.7669,
"step": 3905
},
{
"epoch": 1.0388297872340426,
"grad_norm": 4.555843830108643,
"learning_rate": 9.33481008876103e-06,
"loss": 0.8885,
"step": 3906
},
{
"epoch": 1.039095744680851,
"grad_norm": 4.553684234619141,
"learning_rate": 9.33437169897513e-06,
"loss": 0.9339,
"step": 3907
},
{
"epoch": 1.0393617021276595,
"grad_norm": 4.390134811401367,
"learning_rate": 9.333933175078665e-06,
"loss": 0.887,
"step": 3908
},
{
"epoch": 1.039627659574468,
"grad_norm": 4.3838677406311035,
"learning_rate": 9.333494517085205e-06,
"loss": 0.8234,
"step": 3909
},
{
"epoch": 1.0398936170212767,
"grad_norm": 4.019488334655762,
"learning_rate": 9.333055725008323e-06,
"loss": 0.9096,
"step": 3910
},
{
"epoch": 1.0401595744680852,
"grad_norm": 3.4591004848480225,
"learning_rate": 9.332616798861596e-06,
"loss": 0.7404,
"step": 3911
},
{
"epoch": 1.0404255319148936,
"grad_norm": 4.587208271026611,
"learning_rate": 9.332177738658603e-06,
"loss": 0.8192,
"step": 3912
},
{
"epoch": 1.040691489361702,
"grad_norm": 3.734438180923462,
"learning_rate": 9.331738544412932e-06,
"loss": 0.8286,
"step": 3913
},
{
"epoch": 1.0409574468085105,
"grad_norm": 3.7644083499908447,
"learning_rate": 9.33129921613817e-06,
"loss": 0.8243,
"step": 3914
},
{
"epoch": 1.0412234042553192,
"grad_norm": 3.412766456604004,
"learning_rate": 9.33085975384791e-06,
"loss": 0.8141,
"step": 3915
},
{
"epoch": 1.0414893617021277,
"grad_norm": 3.1695566177368164,
"learning_rate": 9.33042015755575e-06,
"loss": 0.6531,
"step": 3916
},
{
"epoch": 1.0417553191489362,
"grad_norm": 4.0986151695251465,
"learning_rate": 9.329980427275293e-06,
"loss": 0.8253,
"step": 3917
},
{
"epoch": 1.0420212765957446,
"grad_norm": 3.9123079776763916,
"learning_rate": 9.329540563020143e-06,
"loss": 0.8211,
"step": 3918
},
{
"epoch": 1.0422872340425533,
"grad_norm": 3.860915184020996,
"learning_rate": 9.32910056480391e-06,
"loss": 0.7886,
"step": 3919
},
{
"epoch": 1.0425531914893618,
"grad_norm": 3.6465773582458496,
"learning_rate": 9.328660432640211e-06,
"loss": 0.7254,
"step": 3920
},
{
"epoch": 1.0428191489361702,
"grad_norm": 4.174450874328613,
"learning_rate": 9.328220166542659e-06,
"loss": 0.8686,
"step": 3921
},
{
"epoch": 1.0430851063829787,
"grad_norm": 3.563661575317383,
"learning_rate": 9.32777976652488e-06,
"loss": 0.8862,
"step": 3922
},
{
"epoch": 1.0433510638297872,
"grad_norm": 3.976609468460083,
"learning_rate": 9.3273392326005e-06,
"loss": 0.9412,
"step": 3923
},
{
"epoch": 1.0436170212765958,
"grad_norm": 3.979386568069458,
"learning_rate": 9.32689856478315e-06,
"loss": 0.767,
"step": 3924
},
{
"epoch": 1.0438829787234043,
"grad_norm": 3.6504030227661133,
"learning_rate": 9.326457763086463e-06,
"loss": 0.7288,
"step": 3925
},
{
"epoch": 1.0441489361702128,
"grad_norm": 3.5788464546203613,
"learning_rate": 9.32601682752408e-06,
"loss": 0.7756,
"step": 3926
},
{
"epoch": 1.0444148936170212,
"grad_norm": 4.129055976867676,
"learning_rate": 9.325575758109642e-06,
"loss": 0.8129,
"step": 3927
},
{
"epoch": 1.0446808510638297,
"grad_norm": 4.022395133972168,
"learning_rate": 9.325134554856799e-06,
"loss": 0.8346,
"step": 3928
},
{
"epoch": 1.0449468085106384,
"grad_norm": 3.9106342792510986,
"learning_rate": 9.3246932177792e-06,
"loss": 0.7345,
"step": 3929
},
{
"epoch": 1.0452127659574468,
"grad_norm": 5.765318870544434,
"learning_rate": 9.324251746890501e-06,
"loss": 1.0247,
"step": 3930
},
{
"epoch": 1.0454787234042553,
"grad_norm": 3.858736276626587,
"learning_rate": 9.323810142204361e-06,
"loss": 0.8736,
"step": 3931
},
{
"epoch": 1.0457446808510638,
"grad_norm": 3.313824415206909,
"learning_rate": 9.323368403734445e-06,
"loss": 0.8105,
"step": 3932
},
{
"epoch": 1.0460106382978724,
"grad_norm": 3.7220394611358643,
"learning_rate": 9.32292653149442e-06,
"loss": 0.7904,
"step": 3933
},
{
"epoch": 1.046276595744681,
"grad_norm": 3.852928638458252,
"learning_rate": 9.32248452549796e-06,
"loss": 0.7263,
"step": 3934
},
{
"epoch": 1.0465425531914894,
"grad_norm": 3.9275519847869873,
"learning_rate": 9.322042385758738e-06,
"loss": 0.8318,
"step": 3935
},
{
"epoch": 1.0468085106382978,
"grad_norm": 4.239774227142334,
"learning_rate": 9.321600112290439e-06,
"loss": 0.7238,
"step": 3936
},
{
"epoch": 1.0470744680851063,
"grad_norm": 3.672391891479492,
"learning_rate": 9.321157705106741e-06,
"loss": 0.87,
"step": 3937
},
{
"epoch": 1.047340425531915,
"grad_norm": 3.510413646697998,
"learning_rate": 9.320715164221338e-06,
"loss": 0.7332,
"step": 3938
},
{
"epoch": 1.0476063829787234,
"grad_norm": 3.9943974018096924,
"learning_rate": 9.32027248964792e-06,
"loss": 0.7492,
"step": 3939
},
{
"epoch": 1.047872340425532,
"grad_norm": 3.3832719326019287,
"learning_rate": 9.319829681400185e-06,
"loss": 0.7657,
"step": 3940
},
{
"epoch": 1.0481382978723404,
"grad_norm": 3.761160135269165,
"learning_rate": 9.319386739491834e-06,
"loss": 0.7968,
"step": 3941
},
{
"epoch": 1.048404255319149,
"grad_norm": 3.9942009449005127,
"learning_rate": 9.31894366393657e-06,
"loss": 0.8027,
"step": 3942
},
{
"epoch": 1.0486702127659575,
"grad_norm": 3.8257179260253906,
"learning_rate": 9.318500454748105e-06,
"loss": 0.8245,
"step": 3943
},
{
"epoch": 1.048936170212766,
"grad_norm": 4.181244850158691,
"learning_rate": 9.318057111940153e-06,
"loss": 0.7048,
"step": 3944
},
{
"epoch": 1.0492021276595744,
"grad_norm": 4.021924018859863,
"learning_rate": 9.317613635526431e-06,
"loss": 0.8669,
"step": 3945
},
{
"epoch": 1.049468085106383,
"grad_norm": 4.112471580505371,
"learning_rate": 9.317170025520656e-06,
"loss": 0.7719,
"step": 3946
},
{
"epoch": 1.0497340425531916,
"grad_norm": 4.079671859741211,
"learning_rate": 9.31672628193656e-06,
"loss": 0.9156,
"step": 3947
},
{
"epoch": 1.05,
"grad_norm": 3.6803247928619385,
"learning_rate": 9.31628240478787e-06,
"loss": 0.741,
"step": 3948
},
{
"epoch": 1.0502659574468085,
"grad_norm": 3.8785572052001953,
"learning_rate": 9.315838394088322e-06,
"loss": 0.7652,
"step": 3949
},
{
"epoch": 1.050531914893617,
"grad_norm": 3.9115874767303467,
"learning_rate": 9.31539424985165e-06,
"loss": 0.8373,
"step": 3950
},
{
"epoch": 1.0507978723404254,
"grad_norm": 4.03147029876709,
"learning_rate": 9.3149499720916e-06,
"loss": 0.7918,
"step": 3951
},
{
"epoch": 1.0510638297872341,
"grad_norm": 3.7957963943481445,
"learning_rate": 9.31450556082192e-06,
"loss": 0.8583,
"step": 3952
},
{
"epoch": 1.0513297872340426,
"grad_norm": 3.83341646194458,
"learning_rate": 9.314061016056354e-06,
"loss": 0.8166,
"step": 3953
},
{
"epoch": 1.051595744680851,
"grad_norm": 3.7149436473846436,
"learning_rate": 9.313616337808664e-06,
"loss": 0.7958,
"step": 3954
},
{
"epoch": 1.0518617021276595,
"grad_norm": 3.941300392150879,
"learning_rate": 9.313171526092606e-06,
"loss": 0.8765,
"step": 3955
},
{
"epoch": 1.0521276595744682,
"grad_norm": 3.688690423965454,
"learning_rate": 9.312726580921942e-06,
"loss": 0.7011,
"step": 3956
},
{
"epoch": 1.0523936170212767,
"grad_norm": 3.683009147644043,
"learning_rate": 9.31228150231044e-06,
"loss": 0.7307,
"step": 3957
},
{
"epoch": 1.0526595744680851,
"grad_norm": 3.816660165786743,
"learning_rate": 9.311836290271872e-06,
"loss": 0.8001,
"step": 3958
},
{
"epoch": 1.0529255319148936,
"grad_norm": 3.8870654106140137,
"learning_rate": 9.311390944820012e-06,
"loss": 0.7563,
"step": 3959
},
{
"epoch": 1.053191489361702,
"grad_norm": 4.011544704437256,
"learning_rate": 9.31094546596864e-06,
"loss": 0.946,
"step": 3960
},
{
"epoch": 1.0534574468085107,
"grad_norm": 4.572283744812012,
"learning_rate": 9.31049985373154e-06,
"loss": 0.8803,
"step": 3961
},
{
"epoch": 1.0537234042553192,
"grad_norm": 3.7621991634368896,
"learning_rate": 9.310054108122499e-06,
"loss": 0.8607,
"step": 3962
},
{
"epoch": 1.0539893617021276,
"grad_norm": 3.4957644939422607,
"learning_rate": 9.309608229155311e-06,
"loss": 0.7627,
"step": 3963
},
{
"epoch": 1.054255319148936,
"grad_norm": 4.007942199707031,
"learning_rate": 9.30916221684377e-06,
"loss": 0.7599,
"step": 3964
},
{
"epoch": 1.0545212765957448,
"grad_norm": 3.790900945663452,
"learning_rate": 9.308716071201676e-06,
"loss": 0.6845,
"step": 3965
},
{
"epoch": 1.0547872340425533,
"grad_norm": 4.06134557723999,
"learning_rate": 9.308269792242833e-06,
"loss": 0.8446,
"step": 3966
},
{
"epoch": 1.0550531914893617,
"grad_norm": 3.927212715148926,
"learning_rate": 9.30782337998105e-06,
"loss": 0.8009,
"step": 3967
},
{
"epoch": 1.0553191489361702,
"grad_norm": 3.9333722591400146,
"learning_rate": 9.307376834430142e-06,
"loss": 0.8184,
"step": 3968
},
{
"epoch": 1.0555851063829786,
"grad_norm": 4.4977288246154785,
"learning_rate": 9.306930155603923e-06,
"loss": 0.841,
"step": 3969
},
{
"epoch": 1.0558510638297873,
"grad_norm": 3.587890386581421,
"learning_rate": 9.306483343516212e-06,
"loss": 0.6937,
"step": 3970
},
{
"epoch": 1.0561170212765958,
"grad_norm": 4.001445293426514,
"learning_rate": 9.30603639818084e-06,
"loss": 0.8711,
"step": 3971
},
{
"epoch": 1.0563829787234043,
"grad_norm": 3.6268887519836426,
"learning_rate": 9.30558931961163e-06,
"loss": 0.7053,
"step": 3972
},
{
"epoch": 1.0566489361702127,
"grad_norm": 3.929903030395508,
"learning_rate": 9.305142107822415e-06,
"loss": 0.8549,
"step": 3973
},
{
"epoch": 1.0569148936170212,
"grad_norm": 3.7672524452209473,
"learning_rate": 9.304694762827038e-06,
"loss": 0.6872,
"step": 3974
},
{
"epoch": 1.0571808510638299,
"grad_norm": 4.7689738273620605,
"learning_rate": 9.304247284639335e-06,
"loss": 0.8544,
"step": 3975
},
{
"epoch": 1.0574468085106383,
"grad_norm": 3.8088295459747314,
"learning_rate": 9.303799673273153e-06,
"loss": 0.7047,
"step": 3976
},
{
"epoch": 1.0577127659574468,
"grad_norm": 4.246236324310303,
"learning_rate": 9.303351928742344e-06,
"loss": 0.7887,
"step": 3977
},
{
"epoch": 1.0579787234042553,
"grad_norm": 3.864558696746826,
"learning_rate": 9.302904051060758e-06,
"loss": 0.828,
"step": 3978
},
{
"epoch": 1.058244680851064,
"grad_norm": 4.24592399597168,
"learning_rate": 9.302456040242257e-06,
"loss": 0.7851,
"step": 3979
},
{
"epoch": 1.0585106382978724,
"grad_norm": 4.1537909507751465,
"learning_rate": 9.302007896300697e-06,
"loss": 0.8281,
"step": 3980
},
{
"epoch": 1.0587765957446809,
"grad_norm": 4.180373668670654,
"learning_rate": 9.30155961924995e-06,
"loss": 0.8334,
"step": 3981
},
{
"epoch": 1.0590425531914893,
"grad_norm": 3.3669097423553467,
"learning_rate": 9.301111209103883e-06,
"loss": 0.745,
"step": 3982
},
{
"epoch": 1.0593085106382978,
"grad_norm": 3.8249645233154297,
"learning_rate": 9.300662665876373e-06,
"loss": 0.8035,
"step": 3983
},
{
"epoch": 1.0595744680851065,
"grad_norm": 3.8265540599823,
"learning_rate": 9.300213989581294e-06,
"loss": 0.708,
"step": 3984
},
{
"epoch": 1.059840425531915,
"grad_norm": 4.226235866546631,
"learning_rate": 9.299765180232534e-06,
"loss": 0.8594,
"step": 3985
},
{
"epoch": 1.0601063829787234,
"grad_norm": 4.107953071594238,
"learning_rate": 9.299316237843976e-06,
"loss": 0.8162,
"step": 3986
},
{
"epoch": 1.0603723404255319,
"grad_norm": 3.8606715202331543,
"learning_rate": 9.298867162429511e-06,
"loss": 0.7562,
"step": 3987
},
{
"epoch": 1.0606382978723403,
"grad_norm": 3.6489405632019043,
"learning_rate": 9.298417954003036e-06,
"loss": 0.7331,
"step": 3988
},
{
"epoch": 1.060904255319149,
"grad_norm": 4.5174150466918945,
"learning_rate": 9.297968612578448e-06,
"loss": 0.8392,
"step": 3989
},
{
"epoch": 1.0611702127659575,
"grad_norm": 3.8880250453948975,
"learning_rate": 9.29751913816965e-06,
"loss": 0.8565,
"step": 3990
},
{
"epoch": 1.061436170212766,
"grad_norm": 3.8482306003570557,
"learning_rate": 9.297069530790552e-06,
"loss": 0.6222,
"step": 3991
},
{
"epoch": 1.0617021276595744,
"grad_norm": 3.9345664978027344,
"learning_rate": 9.296619790455062e-06,
"loss": 0.7166,
"step": 3992
},
{
"epoch": 1.061968085106383,
"grad_norm": 4.360013961791992,
"learning_rate": 9.296169917177099e-06,
"loss": 0.7584,
"step": 3993
},
{
"epoch": 1.0622340425531915,
"grad_norm": 3.7796449661254883,
"learning_rate": 9.295719910970577e-06,
"loss": 0.8688,
"step": 3994
},
{
"epoch": 1.0625,
"grad_norm": 3.968502998352051,
"learning_rate": 9.295269771849426e-06,
"loss": 0.7795,
"step": 3995
},
{
"epoch": 1.0627659574468085,
"grad_norm": 4.514654636383057,
"learning_rate": 9.294819499827572e-06,
"loss": 0.8955,
"step": 3996
},
{
"epoch": 1.063031914893617,
"grad_norm": 3.8706483840942383,
"learning_rate": 9.294369094918945e-06,
"loss": 0.7875,
"step": 3997
},
{
"epoch": 1.0632978723404256,
"grad_norm": 3.6928679943084717,
"learning_rate": 9.293918557137483e-06,
"loss": 0.7198,
"step": 3998
},
{
"epoch": 1.063563829787234,
"grad_norm": 3.9840540885925293,
"learning_rate": 9.293467886497123e-06,
"loss": 0.8831,
"step": 3999
},
{
"epoch": 1.0638297872340425,
"grad_norm": 4.153161525726318,
"learning_rate": 9.293017083011814e-06,
"loss": 0.8204,
"step": 4000
},
{
"epoch": 1.0638297872340425,
"eval_loss": 1.3173630237579346,
"eval_runtime": 13.912,
"eval_samples_per_second": 28.752,
"eval_steps_per_second": 3.594,
"step": 4000
},
{
"epoch": 1.064095744680851,
"grad_norm": 3.50370717048645,
"learning_rate": 9.2925661466955e-06,
"loss": 0.6799,
"step": 4001
},
{
"epoch": 1.0643617021276595,
"grad_norm": 3.481992244720459,
"learning_rate": 9.292115077562138e-06,
"loss": 0.6651,
"step": 4002
},
{
"epoch": 1.0646276595744681,
"grad_norm": 3.986703634262085,
"learning_rate": 9.291663875625681e-06,
"loss": 0.713,
"step": 4003
},
{
"epoch": 1.0648936170212766,
"grad_norm": 3.7703604698181152,
"learning_rate": 9.291212540900091e-06,
"loss": 0.8728,
"step": 4004
},
{
"epoch": 1.065159574468085,
"grad_norm": 3.9758448600769043,
"learning_rate": 9.290761073399333e-06,
"loss": 0.8273,
"step": 4005
},
{
"epoch": 1.0654255319148935,
"grad_norm": 3.999802350997925,
"learning_rate": 9.290309473137376e-06,
"loss": 0.8826,
"step": 4006
},
{
"epoch": 1.0656914893617022,
"grad_norm": 4.072256088256836,
"learning_rate": 9.289857740128192e-06,
"loss": 0.8037,
"step": 4007
},
{
"epoch": 1.0659574468085107,
"grad_norm": 3.619701623916626,
"learning_rate": 9.289405874385759e-06,
"loss": 0.6833,
"step": 4008
},
{
"epoch": 1.0662234042553191,
"grad_norm": 4.227363586425781,
"learning_rate": 9.288953875924057e-06,
"loss": 0.8688,
"step": 4009
},
{
"epoch": 1.0664893617021276,
"grad_norm": 3.589017629623413,
"learning_rate": 9.288501744757073e-06,
"loss": 0.6888,
"step": 4010
},
{
"epoch": 1.0667553191489363,
"grad_norm": 3.9024956226348877,
"learning_rate": 9.288049480898797e-06,
"loss": 0.8349,
"step": 4011
},
{
"epoch": 1.0670212765957447,
"grad_norm": 3.854668617248535,
"learning_rate": 9.287597084363222e-06,
"loss": 0.8158,
"step": 4012
},
{
"epoch": 1.0672872340425532,
"grad_norm": 3.511909008026123,
"learning_rate": 9.287144555164343e-06,
"loss": 0.8076,
"step": 4013
},
{
"epoch": 1.0675531914893617,
"grad_norm": 4.2021098136901855,
"learning_rate": 9.286691893316165e-06,
"loss": 0.8434,
"step": 4014
},
{
"epoch": 1.0678191489361701,
"grad_norm": 3.823734760284424,
"learning_rate": 9.286239098832693e-06,
"loss": 0.8124,
"step": 4015
},
{
"epoch": 1.0680851063829788,
"grad_norm": 3.6504952907562256,
"learning_rate": 9.285786171727938e-06,
"loss": 0.7402,
"step": 4016
},
{
"epoch": 1.0683510638297873,
"grad_norm": 3.7579758167266846,
"learning_rate": 9.28533311201591e-06,
"loss": 0.8335,
"step": 4017
},
{
"epoch": 1.0686170212765957,
"grad_norm": 3.902036428451538,
"learning_rate": 9.284879919710631e-06,
"loss": 0.8564,
"step": 4018
},
{
"epoch": 1.0688829787234042,
"grad_norm": 3.6956422328948975,
"learning_rate": 9.284426594826124e-06,
"loss": 0.7766,
"step": 4019
},
{
"epoch": 1.0691489361702127,
"grad_norm": 3.866909980773926,
"learning_rate": 9.283973137376414e-06,
"loss": 0.8988,
"step": 4020
},
{
"epoch": 1.0694148936170214,
"grad_norm": 4.163184642791748,
"learning_rate": 9.28351954737553e-06,
"loss": 0.9235,
"step": 4021
},
{
"epoch": 1.0696808510638298,
"grad_norm": 4.208329200744629,
"learning_rate": 9.28306582483751e-06,
"loss": 0.7734,
"step": 4022
},
{
"epoch": 1.0699468085106383,
"grad_norm": 4.030316352844238,
"learning_rate": 9.28261196977639e-06,
"loss": 0.8427,
"step": 4023
},
{
"epoch": 1.0702127659574467,
"grad_norm": 3.842853307723999,
"learning_rate": 9.282157982206212e-06,
"loss": 0.8647,
"step": 4024
},
{
"epoch": 1.0704787234042552,
"grad_norm": 4.306194305419922,
"learning_rate": 9.281703862141024e-06,
"loss": 0.7107,
"step": 4025
},
{
"epoch": 1.070744680851064,
"grad_norm": 4.034607887268066,
"learning_rate": 9.28124960959488e-06,
"loss": 0.76,
"step": 4026
},
{
"epoch": 1.0710106382978724,
"grad_norm": 4.018486022949219,
"learning_rate": 9.280795224581832e-06,
"loss": 0.8058,
"step": 4027
},
{
"epoch": 1.0712765957446808,
"grad_norm": 4.060681343078613,
"learning_rate": 9.280340707115938e-06,
"loss": 0.772,
"step": 4028
},
{
"epoch": 1.0715425531914893,
"grad_norm": 3.8870697021484375,
"learning_rate": 9.279886057211264e-06,
"loss": 0.8036,
"step": 4029
},
{
"epoch": 1.071808510638298,
"grad_norm": 3.455979585647583,
"learning_rate": 9.279431274881876e-06,
"loss": 0.6292,
"step": 4030
},
{
"epoch": 1.0720744680851064,
"grad_norm": 3.5263242721557617,
"learning_rate": 9.278976360141848e-06,
"loss": 0.7937,
"step": 4031
},
{
"epoch": 1.0723404255319149,
"grad_norm": 4.214826583862305,
"learning_rate": 9.27852131300525e-06,
"loss": 0.8888,
"step": 4032
},
{
"epoch": 1.0726063829787233,
"grad_norm": 3.6315364837646484,
"learning_rate": 9.278066133486167e-06,
"loss": 0.7101,
"step": 4033
},
{
"epoch": 1.0728723404255318,
"grad_norm": 4.311771869659424,
"learning_rate": 9.277610821598682e-06,
"loss": 0.8687,
"step": 4034
},
{
"epoch": 1.0731382978723405,
"grad_norm": 3.720752716064453,
"learning_rate": 9.277155377356881e-06,
"loss": 0.709,
"step": 4035
},
{
"epoch": 1.073404255319149,
"grad_norm": 3.8687169551849365,
"learning_rate": 9.276699800774858e-06,
"loss": 0.7483,
"step": 4036
},
{
"epoch": 1.0736702127659574,
"grad_norm": 4.010682582855225,
"learning_rate": 9.276244091866706e-06,
"loss": 0.7954,
"step": 4037
},
{
"epoch": 1.0739361702127659,
"grad_norm": 3.9716639518737793,
"learning_rate": 9.27578825064653e-06,
"loss": 0.8228,
"step": 4038
},
{
"epoch": 1.0742021276595746,
"grad_norm": 3.6064131259918213,
"learning_rate": 9.275332277128428e-06,
"loss": 0.8019,
"step": 4039
},
{
"epoch": 1.074468085106383,
"grad_norm": 3.986684560775757,
"learning_rate": 9.274876171326514e-06,
"loss": 0.7684,
"step": 4040
},
{
"epoch": 1.0747340425531915,
"grad_norm": 3.6139955520629883,
"learning_rate": 9.274419933254897e-06,
"loss": 0.7885,
"step": 4041
},
{
"epoch": 1.075,
"grad_norm": 4.203228950500488,
"learning_rate": 9.273963562927695e-06,
"loss": 0.8082,
"step": 4042
},
{
"epoch": 1.0752659574468084,
"grad_norm": 4.109843730926514,
"learning_rate": 9.27350706035903e-06,
"loss": 0.6948,
"step": 4043
},
{
"epoch": 1.075531914893617,
"grad_norm": 3.8464603424072266,
"learning_rate": 9.273050425563023e-06,
"loss": 0.8871,
"step": 4044
},
{
"epoch": 1.0757978723404256,
"grad_norm": 3.8080790042877197,
"learning_rate": 9.272593658553806e-06,
"loss": 0.7375,
"step": 4045
},
{
"epoch": 1.076063829787234,
"grad_norm": 3.829904556274414,
"learning_rate": 9.272136759345512e-06,
"loss": 0.7572,
"step": 4046
},
{
"epoch": 1.0763297872340425,
"grad_norm": 4.1604390144348145,
"learning_rate": 9.271679727952274e-06,
"loss": 0.7503,
"step": 4047
},
{
"epoch": 1.076595744680851,
"grad_norm": 3.538896322250366,
"learning_rate": 9.271222564388238e-06,
"loss": 0.7042,
"step": 4048
},
{
"epoch": 1.0768617021276596,
"grad_norm": 3.960331439971924,
"learning_rate": 9.270765268667547e-06,
"loss": 0.8119,
"step": 4049
},
{
"epoch": 1.077127659574468,
"grad_norm": 4.355499267578125,
"learning_rate": 9.270307840804349e-06,
"loss": 0.8219,
"step": 4050
},
{
"epoch": 1.0773936170212766,
"grad_norm": 4.223673343658447,
"learning_rate": 9.2698502808128e-06,
"loss": 0.782,
"step": 4051
},
{
"epoch": 1.077659574468085,
"grad_norm": 3.8911452293395996,
"learning_rate": 9.269392588707056e-06,
"loss": 0.8562,
"step": 4052
},
{
"epoch": 1.0779255319148937,
"grad_norm": 3.9379541873931885,
"learning_rate": 9.268934764501279e-06,
"loss": 0.8103,
"step": 4053
},
{
"epoch": 1.0781914893617022,
"grad_norm": 4.371243000030518,
"learning_rate": 9.268476808209635e-06,
"loss": 0.7773,
"step": 4054
},
{
"epoch": 1.0784574468085106,
"grad_norm": 3.5743019580841064,
"learning_rate": 9.26801871984629e-06,
"loss": 0.8976,
"step": 4055
},
{
"epoch": 1.078723404255319,
"grad_norm": 3.959336280822754,
"learning_rate": 9.267560499425425e-06,
"loss": 0.8294,
"step": 4056
},
{
"epoch": 1.0789893617021276,
"grad_norm": 3.2908687591552734,
"learning_rate": 9.267102146961211e-06,
"loss": 0.7021,
"step": 4057
},
{
"epoch": 1.0792553191489362,
"grad_norm": 3.952495574951172,
"learning_rate": 9.266643662467834e-06,
"loss": 0.8368,
"step": 4058
},
{
"epoch": 1.0795212765957447,
"grad_norm": 3.691890239715576,
"learning_rate": 9.266185045959478e-06,
"loss": 0.7606,
"step": 4059
},
{
"epoch": 1.0797872340425532,
"grad_norm": 4.092920780181885,
"learning_rate": 9.265726297450332e-06,
"loss": 0.7791,
"step": 4060
},
{
"epoch": 1.0800531914893616,
"grad_norm": 4.004536151885986,
"learning_rate": 9.265267416954595e-06,
"loss": 0.7055,
"step": 4061
},
{
"epoch": 1.0803191489361703,
"grad_norm": 3.7672064304351807,
"learning_rate": 9.26480840448646e-06,
"loss": 0.7552,
"step": 4062
},
{
"epoch": 1.0805851063829788,
"grad_norm": 3.8815436363220215,
"learning_rate": 9.264349260060134e-06,
"loss": 0.7602,
"step": 4063
},
{
"epoch": 1.0808510638297872,
"grad_norm": 4.021637916564941,
"learning_rate": 9.26388998368982e-06,
"loss": 0.7595,
"step": 4064
},
{
"epoch": 1.0811170212765957,
"grad_norm": 3.9159035682678223,
"learning_rate": 9.26343057538973e-06,
"loss": 0.7554,
"step": 4065
},
{
"epoch": 1.0813829787234042,
"grad_norm": 3.9444377422332764,
"learning_rate": 9.26297103517408e-06,
"loss": 0.6694,
"step": 4066
},
{
"epoch": 1.0816489361702128,
"grad_norm": 3.8889427185058594,
"learning_rate": 9.262511363057085e-06,
"loss": 0.7356,
"step": 4067
},
{
"epoch": 1.0819148936170213,
"grad_norm": 4.03524923324585,
"learning_rate": 9.262051559052972e-06,
"loss": 0.6715,
"step": 4068
},
{
"epoch": 1.0821808510638298,
"grad_norm": 4.430936336517334,
"learning_rate": 9.261591623175965e-06,
"loss": 0.9173,
"step": 4069
},
{
"epoch": 1.0824468085106382,
"grad_norm": 3.784855604171753,
"learning_rate": 9.261131555440295e-06,
"loss": 0.8472,
"step": 4070
},
{
"epoch": 1.0827127659574467,
"grad_norm": 3.9647388458251953,
"learning_rate": 9.260671355860196e-06,
"loss": 0.6908,
"step": 4071
},
{
"epoch": 1.0829787234042554,
"grad_norm": 4.330158710479736,
"learning_rate": 9.260211024449913e-06,
"loss": 0.7744,
"step": 4072
},
{
"epoch": 1.0832446808510638,
"grad_norm": 3.934960126876831,
"learning_rate": 9.259750561223682e-06,
"loss": 0.7585,
"step": 4073
},
{
"epoch": 1.0835106382978723,
"grad_norm": 4.234976291656494,
"learning_rate": 9.259289966195754e-06,
"loss": 0.7642,
"step": 4074
},
{
"epoch": 1.0837765957446808,
"grad_norm": 4.297840118408203,
"learning_rate": 9.25882923938038e-06,
"loss": 0.8493,
"step": 4075
},
{
"epoch": 1.0840425531914895,
"grad_norm": 3.9343340396881104,
"learning_rate": 9.258368380791818e-06,
"loss": 0.8649,
"step": 4076
},
{
"epoch": 1.084308510638298,
"grad_norm": 4.02085018157959,
"learning_rate": 9.257907390444322e-06,
"loss": 0.7595,
"step": 4077
},
{
"epoch": 1.0845744680851064,
"grad_norm": 4.010712146759033,
"learning_rate": 9.257446268352158e-06,
"loss": 0.9151,
"step": 4078
},
{
"epoch": 1.0848404255319148,
"grad_norm": 3.8062400817871094,
"learning_rate": 9.256985014529595e-06,
"loss": 0.8318,
"step": 4079
},
{
"epoch": 1.0851063829787233,
"grad_norm": 4.219789505004883,
"learning_rate": 9.256523628990903e-06,
"loss": 0.7924,
"step": 4080
},
{
"epoch": 1.085372340425532,
"grad_norm": 3.7686777114868164,
"learning_rate": 9.25606211175036e-06,
"loss": 0.8027,
"step": 4081
},
{
"epoch": 1.0856382978723405,
"grad_norm": 3.6773087978363037,
"learning_rate": 9.255600462822241e-06,
"loss": 0.7568,
"step": 4082
},
{
"epoch": 1.085904255319149,
"grad_norm": 3.480522394180298,
"learning_rate": 9.255138682220837e-06,
"loss": 0.7156,
"step": 4083
},
{
"epoch": 1.0861702127659574,
"grad_norm": 3.8398611545562744,
"learning_rate": 9.254676769960429e-06,
"loss": 0.7162,
"step": 4084
},
{
"epoch": 1.086436170212766,
"grad_norm": 3.8505029678344727,
"learning_rate": 9.254214726055314e-06,
"loss": 0.8488,
"step": 4085
},
{
"epoch": 1.0867021276595745,
"grad_norm": 4.238323211669922,
"learning_rate": 9.253752550519787e-06,
"loss": 0.8742,
"step": 4086
},
{
"epoch": 1.086968085106383,
"grad_norm": 3.7396814823150635,
"learning_rate": 9.253290243368149e-06,
"loss": 0.8127,
"step": 4087
},
{
"epoch": 1.0872340425531914,
"grad_norm": 4.44807767868042,
"learning_rate": 9.2528278046147e-06,
"loss": 0.8144,
"step": 4088
},
{
"epoch": 1.0875,
"grad_norm": 3.88287091255188,
"learning_rate": 9.252365234273754e-06,
"loss": 0.691,
"step": 4089
},
{
"epoch": 1.0877659574468086,
"grad_norm": 3.7738873958587646,
"learning_rate": 9.251902532359622e-06,
"loss": 0.7662,
"step": 4090
},
{
"epoch": 1.088031914893617,
"grad_norm": 3.789278745651245,
"learning_rate": 9.251439698886618e-06,
"loss": 0.7773,
"step": 4091
},
{
"epoch": 1.0882978723404255,
"grad_norm": 3.8501172065734863,
"learning_rate": 9.250976733869065e-06,
"loss": 0.795,
"step": 4092
},
{
"epoch": 1.088563829787234,
"grad_norm": 4.324002265930176,
"learning_rate": 9.250513637321287e-06,
"loss": 0.7957,
"step": 4093
},
{
"epoch": 1.0888297872340424,
"grad_norm": 3.598450183868408,
"learning_rate": 9.250050409257612e-06,
"loss": 0.8029,
"step": 4094
},
{
"epoch": 1.0890957446808511,
"grad_norm": 3.749985694885254,
"learning_rate": 9.249587049692375e-06,
"loss": 0.7377,
"step": 4095
},
{
"epoch": 1.0893617021276596,
"grad_norm": 3.7555527687072754,
"learning_rate": 9.24912355863991e-06,
"loss": 0.7276,
"step": 4096
},
{
"epoch": 1.089627659574468,
"grad_norm": 3.826099395751953,
"learning_rate": 9.248659936114558e-06,
"loss": 0.9592,
"step": 4097
},
{
"epoch": 1.0898936170212765,
"grad_norm": 4.4053263664245605,
"learning_rate": 9.248196182130669e-06,
"loss": 0.846,
"step": 4098
},
{
"epoch": 1.0901595744680852,
"grad_norm": 3.7693631649017334,
"learning_rate": 9.247732296702586e-06,
"loss": 0.8702,
"step": 4099
},
{
"epoch": 1.0904255319148937,
"grad_norm": 3.8193347454071045,
"learning_rate": 9.247268279844666e-06,
"loss": 0.8124,
"step": 4100
},
{
"epoch": 1.0906914893617021,
"grad_norm": 3.5872762203216553,
"learning_rate": 9.246804131571263e-06,
"loss": 0.8409,
"step": 4101
},
{
"epoch": 1.0909574468085106,
"grad_norm": 3.6679608821868896,
"learning_rate": 9.246339851896742e-06,
"loss": 0.8331,
"step": 4102
},
{
"epoch": 1.091223404255319,
"grad_norm": 3.838644027709961,
"learning_rate": 9.245875440835466e-06,
"loss": 0.8683,
"step": 4103
},
{
"epoch": 1.0914893617021277,
"grad_norm": 4.146610736846924,
"learning_rate": 9.245410898401806e-06,
"loss": 0.7721,
"step": 4104
},
{
"epoch": 1.0917553191489362,
"grad_norm": 3.685303211212158,
"learning_rate": 9.244946224610132e-06,
"loss": 0.6993,
"step": 4105
},
{
"epoch": 1.0920212765957447,
"grad_norm": 3.9541261196136475,
"learning_rate": 9.244481419474824e-06,
"loss": 0.7942,
"step": 4106
},
{
"epoch": 1.0922872340425531,
"grad_norm": 4.122397422790527,
"learning_rate": 9.244016483010266e-06,
"loss": 0.7709,
"step": 4107
},
{
"epoch": 1.0925531914893618,
"grad_norm": 4.400294303894043,
"learning_rate": 9.24355141523084e-06,
"loss": 0.8702,
"step": 4108
},
{
"epoch": 1.0928191489361703,
"grad_norm": 4.555760383605957,
"learning_rate": 9.243086216150938e-06,
"loss": 0.8594,
"step": 4109
},
{
"epoch": 1.0930851063829787,
"grad_norm": 4.033708095550537,
"learning_rate": 9.242620885784952e-06,
"loss": 0.9066,
"step": 4110
},
{
"epoch": 1.0933510638297872,
"grad_norm": 3.908421754837036,
"learning_rate": 9.24215542414728e-06,
"loss": 0.7454,
"step": 4111
},
{
"epoch": 1.0936170212765957,
"grad_norm": 3.8368232250213623,
"learning_rate": 9.241689831252327e-06,
"loss": 0.6895,
"step": 4112
},
{
"epoch": 1.0938829787234043,
"grad_norm": 3.6774628162384033,
"learning_rate": 9.241224107114495e-06,
"loss": 0.8634,
"step": 4113
},
{
"epoch": 1.0941489361702128,
"grad_norm": 4.185787677764893,
"learning_rate": 9.240758251748195e-06,
"loss": 0.8685,
"step": 4114
},
{
"epoch": 1.0944148936170213,
"grad_norm": 3.8751626014709473,
"learning_rate": 9.240292265167843e-06,
"loss": 0.86,
"step": 4115
},
{
"epoch": 1.0946808510638297,
"grad_norm": 4.215353965759277,
"learning_rate": 9.239826147387857e-06,
"loss": 0.8188,
"step": 4116
},
{
"epoch": 1.0949468085106382,
"grad_norm": 3.7287204265594482,
"learning_rate": 9.239359898422656e-06,
"loss": 0.71,
"step": 4117
},
{
"epoch": 1.0952127659574469,
"grad_norm": 3.8123693466186523,
"learning_rate": 9.238893518286668e-06,
"loss": 0.7727,
"step": 4118
},
{
"epoch": 1.0954787234042553,
"grad_norm": 3.990419626235962,
"learning_rate": 9.238427006994325e-06,
"loss": 0.7953,
"step": 4119
},
{
"epoch": 1.0957446808510638,
"grad_norm": 3.976417303085327,
"learning_rate": 9.237960364560063e-06,
"loss": 0.8596,
"step": 4120
},
{
"epoch": 1.0960106382978723,
"grad_norm": 4.219186305999756,
"learning_rate": 9.237493590998315e-06,
"loss": 0.809,
"step": 4121
},
{
"epoch": 1.096276595744681,
"grad_norm": 3.693594455718994,
"learning_rate": 9.237026686323527e-06,
"loss": 0.8066,
"step": 4122
},
{
"epoch": 1.0965425531914894,
"grad_norm": 3.7492263317108154,
"learning_rate": 9.236559650550143e-06,
"loss": 0.7525,
"step": 4123
},
{
"epoch": 1.0968085106382979,
"grad_norm": 4.333737850189209,
"learning_rate": 9.236092483692617e-06,
"loss": 0.8718,
"step": 4124
},
{
"epoch": 1.0970744680851063,
"grad_norm": 3.505357503890991,
"learning_rate": 9.235625185765403e-06,
"loss": 0.8482,
"step": 4125
},
{
"epoch": 1.0973404255319148,
"grad_norm": 4.302443027496338,
"learning_rate": 9.235157756782957e-06,
"loss": 1.0046,
"step": 4126
},
{
"epoch": 1.0976063829787235,
"grad_norm": 3.8847270011901855,
"learning_rate": 9.234690196759746e-06,
"loss": 0.8921,
"step": 4127
},
{
"epoch": 1.097872340425532,
"grad_norm": 3.976154327392578,
"learning_rate": 9.234222505710232e-06,
"loss": 0.7338,
"step": 4128
},
{
"epoch": 1.0981382978723404,
"grad_norm": 3.829082489013672,
"learning_rate": 9.233754683648891e-06,
"loss": 0.7554,
"step": 4129
},
{
"epoch": 1.0984042553191489,
"grad_norm": 3.693549633026123,
"learning_rate": 9.233286730590195e-06,
"loss": 0.7555,
"step": 4130
},
{
"epoch": 1.0986702127659576,
"grad_norm": 3.9820609092712402,
"learning_rate": 9.232818646548622e-06,
"loss": 0.8567,
"step": 4131
},
{
"epoch": 1.098936170212766,
"grad_norm": 3.9395439624786377,
"learning_rate": 9.232350431538656e-06,
"loss": 0.7728,
"step": 4132
},
{
"epoch": 1.0992021276595745,
"grad_norm": 4.385442733764648,
"learning_rate": 9.231882085574788e-06,
"loss": 0.7803,
"step": 4133
},
{
"epoch": 1.099468085106383,
"grad_norm": 4.260448932647705,
"learning_rate": 9.231413608671504e-06,
"loss": 0.8111,
"step": 4134
},
{
"epoch": 1.0997340425531914,
"grad_norm": 3.9470431804656982,
"learning_rate": 9.2309450008433e-06,
"loss": 0.718,
"step": 4135
},
{
"epoch": 1.1,
"grad_norm": 3.897451877593994,
"learning_rate": 9.230476262104678e-06,
"loss": 0.7257,
"step": 4136
},
{
"epoch": 1.1002659574468086,
"grad_norm": 4.178949356079102,
"learning_rate": 9.23000739247014e-06,
"loss": 0.8704,
"step": 4137
},
{
"epoch": 1.100531914893617,
"grad_norm": 3.9306554794311523,
"learning_rate": 9.22953839195419e-06,
"loss": 0.8856,
"step": 4138
},
{
"epoch": 1.1007978723404255,
"grad_norm": 3.2699522972106934,
"learning_rate": 9.229069260571346e-06,
"loss": 0.7263,
"step": 4139
},
{
"epoch": 1.101063829787234,
"grad_norm": 3.980687141418457,
"learning_rate": 9.228599998336119e-06,
"loss": 0.8805,
"step": 4140
},
{
"epoch": 1.1013297872340426,
"grad_norm": 4.091682434082031,
"learning_rate": 9.228130605263028e-06,
"loss": 0.8572,
"step": 4141
},
{
"epoch": 1.101595744680851,
"grad_norm": 3.8642654418945312,
"learning_rate": 9.2276610813666e-06,
"loss": 0.7285,
"step": 4142
},
{
"epoch": 1.1018617021276595,
"grad_norm": 3.6476948261260986,
"learning_rate": 9.227191426661359e-06,
"loss": 0.7736,
"step": 4143
},
{
"epoch": 1.102127659574468,
"grad_norm": 3.8674888610839844,
"learning_rate": 9.22672164116184e-06,
"loss": 0.6885,
"step": 4144
},
{
"epoch": 1.1023936170212767,
"grad_norm": 3.6890833377838135,
"learning_rate": 9.226251724882576e-06,
"loss": 0.9683,
"step": 4145
},
{
"epoch": 1.1026595744680852,
"grad_norm": 3.688188314437866,
"learning_rate": 9.225781677838108e-06,
"loss": 0.8236,
"step": 4146
},
{
"epoch": 1.1029255319148936,
"grad_norm": 4.241778373718262,
"learning_rate": 9.22531150004298e-06,
"loss": 0.7666,
"step": 4147
},
{
"epoch": 1.103191489361702,
"grad_norm": 3.8804636001586914,
"learning_rate": 9.22484119151174e-06,
"loss": 0.7547,
"step": 4148
},
{
"epoch": 1.1034574468085105,
"grad_norm": 3.8728346824645996,
"learning_rate": 9.224370752258938e-06,
"loss": 0.7856,
"step": 4149
},
{
"epoch": 1.1037234042553192,
"grad_norm": 3.4745118618011475,
"learning_rate": 9.223900182299132e-06,
"loss": 0.8213,
"step": 4150
},
{
"epoch": 1.1039893617021277,
"grad_norm": 3.9133832454681396,
"learning_rate": 9.223429481646881e-06,
"loss": 0.8894,
"step": 4151
},
{
"epoch": 1.1042553191489362,
"grad_norm": 3.5466485023498535,
"learning_rate": 9.22295865031675e-06,
"loss": 0.7024,
"step": 4152
},
{
"epoch": 1.1045212765957446,
"grad_norm": 4.195438385009766,
"learning_rate": 9.222487688323306e-06,
"loss": 0.9108,
"step": 4153
},
{
"epoch": 1.1047872340425533,
"grad_norm": 4.125967025756836,
"learning_rate": 9.222016595681122e-06,
"loss": 0.7909,
"step": 4154
},
{
"epoch": 1.1050531914893618,
"grad_norm": 3.8983302116394043,
"learning_rate": 9.221545372404774e-06,
"loss": 0.8179,
"step": 4155
},
{
"epoch": 1.1053191489361702,
"grad_norm": 4.264431953430176,
"learning_rate": 9.22107401850884e-06,
"loss": 0.8438,
"step": 4156
},
{
"epoch": 1.1055851063829787,
"grad_norm": 3.9519243240356445,
"learning_rate": 9.220602534007908e-06,
"loss": 0.7254,
"step": 4157
},
{
"epoch": 1.1058510638297872,
"grad_norm": 4.435789585113525,
"learning_rate": 9.220130918916563e-06,
"loss": 0.8453,
"step": 4158
},
{
"epoch": 1.1061170212765958,
"grad_norm": 4.175622463226318,
"learning_rate": 9.2196591732494e-06,
"loss": 0.8253,
"step": 4159
},
{
"epoch": 1.1063829787234043,
"grad_norm": 3.691840410232544,
"learning_rate": 9.219187297021015e-06,
"loss": 0.7372,
"step": 4160
},
{
"epoch": 1.1066489361702128,
"grad_norm": 3.997159957885742,
"learning_rate": 9.218715290246007e-06,
"loss": 0.9002,
"step": 4161
},
{
"epoch": 1.1069148936170212,
"grad_norm": 3.8894736766815186,
"learning_rate": 9.21824315293898e-06,
"loss": 0.8466,
"step": 4162
},
{
"epoch": 1.1071808510638297,
"grad_norm": 4.081361293792725,
"learning_rate": 9.217770885114544e-06,
"loss": 0.8159,
"step": 4163
},
{
"epoch": 1.1074468085106384,
"grad_norm": 3.6552507877349854,
"learning_rate": 9.21729848678731e-06,
"loss": 0.7608,
"step": 4164
},
{
"epoch": 1.1077127659574468,
"grad_norm": 3.844689130783081,
"learning_rate": 9.216825957971898e-06,
"loss": 0.8599,
"step": 4165
},
{
"epoch": 1.1079787234042553,
"grad_norm": 3.742281198501587,
"learning_rate": 9.216353298682925e-06,
"loss": 0.8188,
"step": 4166
},
{
"epoch": 1.1082446808510638,
"grad_norm": 4.145520210266113,
"learning_rate": 9.215880508935016e-06,
"loss": 0.8485,
"step": 4167
},
{
"epoch": 1.1085106382978724,
"grad_norm": 4.048991680145264,
"learning_rate": 9.2154075887428e-06,
"loss": 0.8058,
"step": 4168
},
{
"epoch": 1.108776595744681,
"grad_norm": 3.9312491416931152,
"learning_rate": 9.214934538120912e-06,
"loss": 0.8728,
"step": 4169
},
{
"epoch": 1.1090425531914894,
"grad_norm": 4.000396251678467,
"learning_rate": 9.214461357083986e-06,
"loss": 0.8695,
"step": 4170
},
{
"epoch": 1.1093085106382978,
"grad_norm": 4.0020904541015625,
"learning_rate": 9.213988045646664e-06,
"loss": 0.7386,
"step": 4171
},
{
"epoch": 1.1095744680851063,
"grad_norm": 3.527221441268921,
"learning_rate": 9.21351460382359e-06,
"loss": 0.8856,
"step": 4172
},
{
"epoch": 1.109840425531915,
"grad_norm": 3.984145164489746,
"learning_rate": 9.213041031629413e-06,
"loss": 0.7518,
"step": 4173
},
{
"epoch": 1.1101063829787234,
"grad_norm": 3.6558425426483154,
"learning_rate": 9.212567329078787e-06,
"loss": 0.7465,
"step": 4174
},
{
"epoch": 1.110372340425532,
"grad_norm": 4.261702060699463,
"learning_rate": 9.21209349618637e-06,
"loss": 0.8813,
"step": 4175
},
{
"epoch": 1.1106382978723404,
"grad_norm": 3.556643486022949,
"learning_rate": 9.211619532966817e-06,
"loss": 0.8007,
"step": 4176
},
{
"epoch": 1.110904255319149,
"grad_norm": 3.8246734142303467,
"learning_rate": 9.211145439434801e-06,
"loss": 0.7599,
"step": 4177
},
{
"epoch": 1.1111702127659575,
"grad_norm": 3.6221678256988525,
"learning_rate": 9.210671215604985e-06,
"loss": 0.8526,
"step": 4178
},
{
"epoch": 1.111436170212766,
"grad_norm": 3.6839540004730225,
"learning_rate": 9.210196861492045e-06,
"loss": 0.88,
"step": 4179
},
{
"epoch": 1.1117021276595744,
"grad_norm": 3.7845680713653564,
"learning_rate": 9.209722377110657e-06,
"loss": 0.7316,
"step": 4180
},
{
"epoch": 1.111968085106383,
"grad_norm": 3.9798831939697266,
"learning_rate": 9.209247762475502e-06,
"loss": 0.7928,
"step": 4181
},
{
"epoch": 1.1122340425531916,
"grad_norm": 3.394745349884033,
"learning_rate": 9.208773017601265e-06,
"loss": 0.7692,
"step": 4182
},
{
"epoch": 1.1125,
"grad_norm": 3.9630630016326904,
"learning_rate": 9.208298142502637e-06,
"loss": 0.8699,
"step": 4183
},
{
"epoch": 1.1127659574468085,
"grad_norm": 4.089821815490723,
"learning_rate": 9.207823137194307e-06,
"loss": 0.8295,
"step": 4184
},
{
"epoch": 1.113031914893617,
"grad_norm": 3.949355125427246,
"learning_rate": 9.20734800169098e-06,
"loss": 0.8049,
"step": 4185
},
{
"epoch": 1.1132978723404254,
"grad_norm": 3.588606119155884,
"learning_rate": 9.206872736007348e-06,
"loss": 0.7184,
"step": 4186
},
{
"epoch": 1.1135638297872341,
"grad_norm": 4.689065933227539,
"learning_rate": 9.206397340158122e-06,
"loss": 0.8687,
"step": 4187
},
{
"epoch": 1.1138297872340426,
"grad_norm": 3.685701847076416,
"learning_rate": 9.20592181415801e-06,
"loss": 0.7918,
"step": 4188
},
{
"epoch": 1.114095744680851,
"grad_norm": 4.084209442138672,
"learning_rate": 9.205446158021725e-06,
"loss": 0.888,
"step": 4189
},
{
"epoch": 1.1143617021276595,
"grad_norm": 3.9949495792388916,
"learning_rate": 9.204970371763984e-06,
"loss": 0.7975,
"step": 4190
},
{
"epoch": 1.1146276595744682,
"grad_norm": 4.016841888427734,
"learning_rate": 9.204494455399509e-06,
"loss": 0.8413,
"step": 4191
},
{
"epoch": 1.1148936170212767,
"grad_norm": 4.1810712814331055,
"learning_rate": 9.204018408943026e-06,
"loss": 0.7981,
"step": 4192
},
{
"epoch": 1.1151595744680851,
"grad_norm": 3.305906295776367,
"learning_rate": 9.203542232409263e-06,
"loss": 0.6931,
"step": 4193
},
{
"epoch": 1.1154255319148936,
"grad_norm": 4.138253688812256,
"learning_rate": 9.203065925812955e-06,
"loss": 0.7971,
"step": 4194
},
{
"epoch": 1.115691489361702,
"grad_norm": 4.11892557144165,
"learning_rate": 9.20258948916884e-06,
"loss": 0.7175,
"step": 4195
},
{
"epoch": 1.1159574468085107,
"grad_norm": 3.4274680614471436,
"learning_rate": 9.202112922491657e-06,
"loss": 0.7685,
"step": 4196
},
{
"epoch": 1.1162234042553192,
"grad_norm": 3.894113540649414,
"learning_rate": 9.201636225796151e-06,
"loss": 0.6782,
"step": 4197
},
{
"epoch": 1.1164893617021276,
"grad_norm": 4.417131423950195,
"learning_rate": 9.201159399097077e-06,
"loss": 0.7756,
"step": 4198
},
{
"epoch": 1.116755319148936,
"grad_norm": 4.476882457733154,
"learning_rate": 9.200682442409183e-06,
"loss": 0.8896,
"step": 4199
},
{
"epoch": 1.1170212765957448,
"grad_norm": 3.9255595207214355,
"learning_rate": 9.200205355747228e-06,
"loss": 0.669,
"step": 4200
},
{
"epoch": 1.1172872340425533,
"grad_norm": 3.3451404571533203,
"learning_rate": 9.199728139125976e-06,
"loss": 0.6271,
"step": 4201
},
{
"epoch": 1.1175531914893617,
"grad_norm": 4.113248825073242,
"learning_rate": 9.199250792560187e-06,
"loss": 0.8501,
"step": 4202
},
{
"epoch": 1.1178191489361702,
"grad_norm": 3.8352253437042236,
"learning_rate": 9.198773316064639e-06,
"loss": 0.6881,
"step": 4203
},
{
"epoch": 1.1180851063829786,
"grad_norm": 3.8396568298339844,
"learning_rate": 9.1982957096541e-06,
"loss": 0.695,
"step": 4204
},
{
"epoch": 1.1183510638297873,
"grad_norm": 4.240661144256592,
"learning_rate": 9.197817973343347e-06,
"loss": 0.8287,
"step": 4205
},
{
"epoch": 1.1186170212765958,
"grad_norm": 3.553846836090088,
"learning_rate": 9.197340107147166e-06,
"loss": 0.7441,
"step": 4206
},
{
"epoch": 1.1188829787234043,
"grad_norm": 4.087765693664551,
"learning_rate": 9.196862111080339e-06,
"loss": 0.6896,
"step": 4207
},
{
"epoch": 1.1191489361702127,
"grad_norm": 4.254801273345947,
"learning_rate": 9.196383985157657e-06,
"loss": 0.794,
"step": 4208
},
{
"epoch": 1.1194148936170212,
"grad_norm": 3.8654487133026123,
"learning_rate": 9.195905729393913e-06,
"loss": 0.7891,
"step": 4209
},
{
"epoch": 1.1196808510638299,
"grad_norm": 4.078755855560303,
"learning_rate": 9.195427343803906e-06,
"loss": 0.9686,
"step": 4210
},
{
"epoch": 1.1199468085106383,
"grad_norm": 3.3730618953704834,
"learning_rate": 9.19494882840244e-06,
"loss": 0.7186,
"step": 4211
},
{
"epoch": 1.1202127659574468,
"grad_norm": 3.944267511367798,
"learning_rate": 9.194470183204315e-06,
"loss": 0.7949,
"step": 4212
},
{
"epoch": 1.1204787234042553,
"grad_norm": 3.8274521827697754,
"learning_rate": 9.193991408224347e-06,
"loss": 0.8237,
"step": 4213
},
{
"epoch": 1.1207446808510637,
"grad_norm": 3.8445777893066406,
"learning_rate": 9.193512503477345e-06,
"loss": 0.7119,
"step": 4214
},
{
"epoch": 1.1210106382978724,
"grad_norm": 4.098488807678223,
"learning_rate": 9.19303346897813e-06,
"loss": 0.9102,
"step": 4215
},
{
"epoch": 1.1212765957446809,
"grad_norm": 4.096566200256348,
"learning_rate": 9.192554304741522e-06,
"loss": 0.8465,
"step": 4216
},
{
"epoch": 1.1215425531914893,
"grad_norm": 3.770343065261841,
"learning_rate": 9.192075010782348e-06,
"loss": 0.8278,
"step": 4217
},
{
"epoch": 1.1218085106382978,
"grad_norm": 3.843766689300537,
"learning_rate": 9.191595587115439e-06,
"loss": 0.8402,
"step": 4218
},
{
"epoch": 1.1220744680851065,
"grad_norm": 4.594594478607178,
"learning_rate": 9.191116033755625e-06,
"loss": 0.8473,
"step": 4219
},
{
"epoch": 1.122340425531915,
"grad_norm": 4.192259311676025,
"learning_rate": 9.190636350717747e-06,
"loss": 0.8356,
"step": 4220
},
{
"epoch": 1.1226063829787234,
"grad_norm": 3.919210195541382,
"learning_rate": 9.190156538016648e-06,
"loss": 0.8494,
"step": 4221
},
{
"epoch": 1.1228723404255319,
"grad_norm": 4.091637134552002,
"learning_rate": 9.189676595667172e-06,
"loss": 0.7264,
"step": 4222
},
{
"epoch": 1.1231382978723405,
"grad_norm": 4.496889114379883,
"learning_rate": 9.189196523684168e-06,
"loss": 0.876,
"step": 4223
},
{
"epoch": 1.123404255319149,
"grad_norm": 3.492234230041504,
"learning_rate": 9.188716322082494e-06,
"loss": 0.7568,
"step": 4224
},
{
"epoch": 1.1236702127659575,
"grad_norm": 3.6598973274230957,
"learning_rate": 9.188235990877004e-06,
"loss": 0.683,
"step": 4225
},
{
"epoch": 1.123936170212766,
"grad_norm": 4.073709964752197,
"learning_rate": 9.18775553008256e-06,
"loss": 0.7798,
"step": 4226
},
{
"epoch": 1.1242021276595744,
"grad_norm": 4.100635528564453,
"learning_rate": 9.18727493971403e-06,
"loss": 0.8356,
"step": 4227
},
{
"epoch": 1.124468085106383,
"grad_norm": 4.231848239898682,
"learning_rate": 9.186794219786285e-06,
"loss": 0.8528,
"step": 4228
},
{
"epoch": 1.1247340425531915,
"grad_norm": 3.7461369037628174,
"learning_rate": 9.186313370314196e-06,
"loss": 0.7103,
"step": 4229
},
{
"epoch": 1.125,
"grad_norm": 3.610039234161377,
"learning_rate": 9.185832391312644e-06,
"loss": 0.7271,
"step": 4230
},
{
"epoch": 1.1252659574468085,
"grad_norm": 3.5538463592529297,
"learning_rate": 9.18535128279651e-06,
"loss": 0.82,
"step": 4231
},
{
"epoch": 1.125531914893617,
"grad_norm": 3.878833293914795,
"learning_rate": 9.184870044780677e-06,
"loss": 0.8418,
"step": 4232
},
{
"epoch": 1.1257978723404256,
"grad_norm": 4.012277126312256,
"learning_rate": 9.184388677280038e-06,
"loss": 0.8024,
"step": 4233
},
{
"epoch": 1.126063829787234,
"grad_norm": 3.702630043029785,
"learning_rate": 9.183907180309489e-06,
"loss": 0.7978,
"step": 4234
},
{
"epoch": 1.1263297872340425,
"grad_norm": 4.186684608459473,
"learning_rate": 9.183425553883925e-06,
"loss": 0.8459,
"step": 4235
},
{
"epoch": 1.126595744680851,
"grad_norm": 4.011842727661133,
"learning_rate": 9.18294379801825e-06,
"loss": 0.7931,
"step": 4236
},
{
"epoch": 1.1268617021276595,
"grad_norm": 4.870151042938232,
"learning_rate": 9.182461912727368e-06,
"loss": 0.9028,
"step": 4237
},
{
"epoch": 1.1271276595744681,
"grad_norm": 3.5846457481384277,
"learning_rate": 9.18197989802619e-06,
"loss": 0.783,
"step": 4238
},
{
"epoch": 1.1273936170212766,
"grad_norm": 3.910689115524292,
"learning_rate": 9.181497753929629e-06,
"loss": 0.8441,
"step": 4239
},
{
"epoch": 1.127659574468085,
"grad_norm": 3.768601894378662,
"learning_rate": 9.181015480452607e-06,
"loss": 0.8207,
"step": 4240
},
{
"epoch": 1.1279255319148935,
"grad_norm": 4.229056358337402,
"learning_rate": 9.18053307761004e-06,
"loss": 0.8025,
"step": 4241
},
{
"epoch": 1.1281914893617022,
"grad_norm": 4.3545050621032715,
"learning_rate": 9.180050545416861e-06,
"loss": 0.8154,
"step": 4242
},
{
"epoch": 1.1284574468085107,
"grad_norm": 4.138397693634033,
"learning_rate": 9.179567883887997e-06,
"loss": 0.8033,
"step": 4243
},
{
"epoch": 1.1287234042553191,
"grad_norm": 3.9504189491271973,
"learning_rate": 9.17908509303838e-06,
"loss": 0.85,
"step": 4244
},
{
"epoch": 1.1289893617021276,
"grad_norm": 3.9662301540374756,
"learning_rate": 9.178602172882951e-06,
"loss": 0.8327,
"step": 4245
},
{
"epoch": 1.1292553191489363,
"grad_norm": 4.157631874084473,
"learning_rate": 9.178119123436651e-06,
"loss": 0.8558,
"step": 4246
},
{
"epoch": 1.1295212765957447,
"grad_norm": 3.9172611236572266,
"learning_rate": 9.177635944714424e-06,
"loss": 0.9087,
"step": 4247
},
{
"epoch": 1.1297872340425532,
"grad_norm": 3.9250762462615967,
"learning_rate": 9.177152636731225e-06,
"loss": 0.7709,
"step": 4248
},
{
"epoch": 1.1300531914893617,
"grad_norm": 3.6299500465393066,
"learning_rate": 9.176669199502004e-06,
"loss": 0.717,
"step": 4249
},
{
"epoch": 1.1303191489361701,
"grad_norm": 4.225446701049805,
"learning_rate": 9.17618563304172e-06,
"loss": 0.8766,
"step": 4250
},
{
"epoch": 1.1305851063829788,
"grad_norm": 3.9178264141082764,
"learning_rate": 9.175701937365337e-06,
"loss": 0.7634,
"step": 4251
},
{
"epoch": 1.1308510638297873,
"grad_norm": 3.905505657196045,
"learning_rate": 9.175218112487821e-06,
"loss": 0.7784,
"step": 4252
},
{
"epoch": 1.1311170212765957,
"grad_norm": 4.228585243225098,
"learning_rate": 9.174734158424138e-06,
"loss": 0.8445,
"step": 4253
},
{
"epoch": 1.1313829787234042,
"grad_norm": 3.9836041927337646,
"learning_rate": 9.174250075189268e-06,
"loss": 0.8252,
"step": 4254
},
{
"epoch": 1.1316489361702127,
"grad_norm": 4.349749565124512,
"learning_rate": 9.173765862798185e-06,
"loss": 0.8154,
"step": 4255
},
{
"epoch": 1.1319148936170214,
"grad_norm": 3.7815349102020264,
"learning_rate": 9.17328152126587e-06,
"loss": 0.7356,
"step": 4256
},
{
"epoch": 1.1321808510638298,
"grad_norm": 3.9180119037628174,
"learning_rate": 9.172797050607313e-06,
"loss": 0.8098,
"step": 4257
},
{
"epoch": 1.1324468085106383,
"grad_norm": 3.720789670944214,
"learning_rate": 9.172312450837504e-06,
"loss": 0.815,
"step": 4258
},
{
"epoch": 1.1327127659574467,
"grad_norm": 4.155251502990723,
"learning_rate": 9.171827721971434e-06,
"loss": 0.8976,
"step": 4259
},
{
"epoch": 1.1329787234042552,
"grad_norm": 4.600409030914307,
"learning_rate": 9.171342864024103e-06,
"loss": 0.8868,
"step": 4260
},
{
"epoch": 1.133244680851064,
"grad_norm": 3.8379268646240234,
"learning_rate": 9.170857877010512e-06,
"loss": 0.7867,
"step": 4261
},
{
"epoch": 1.1335106382978724,
"grad_norm": 4.109460830688477,
"learning_rate": 9.170372760945668e-06,
"loss": 0.7826,
"step": 4262
},
{
"epoch": 1.1337765957446808,
"grad_norm": 3.895494222640991,
"learning_rate": 9.16988751584458e-06,
"loss": 0.854,
"step": 4263
},
{
"epoch": 1.1340425531914893,
"grad_norm": 3.7237160205841064,
"learning_rate": 9.169402141722264e-06,
"loss": 0.7098,
"step": 4264
},
{
"epoch": 1.134308510638298,
"grad_norm": 4.19631814956665,
"learning_rate": 9.168916638593736e-06,
"loss": 0.9218,
"step": 4265
},
{
"epoch": 1.1345744680851064,
"grad_norm": 4.052074909210205,
"learning_rate": 9.168431006474018e-06,
"loss": 0.8367,
"step": 4266
},
{
"epoch": 1.1348404255319149,
"grad_norm": 4.097432613372803,
"learning_rate": 9.167945245378139e-06,
"loss": 0.8705,
"step": 4267
},
{
"epoch": 1.1351063829787233,
"grad_norm": 3.81488037109375,
"learning_rate": 9.167459355321127e-06,
"loss": 0.6803,
"step": 4268
},
{
"epoch": 1.135372340425532,
"grad_norm": 4.266942501068115,
"learning_rate": 9.166973336318015e-06,
"loss": 0.8108,
"step": 4269
},
{
"epoch": 1.1356382978723405,
"grad_norm": 3.9824750423431396,
"learning_rate": 9.166487188383841e-06,
"loss": 0.811,
"step": 4270
},
{
"epoch": 1.135904255319149,
"grad_norm": 3.8896446228027344,
"learning_rate": 9.16600091153365e-06,
"loss": 0.8925,
"step": 4271
},
{
"epoch": 1.1361702127659574,
"grad_norm": 4.690064907073975,
"learning_rate": 9.165514505782484e-06,
"loss": 1.1356,
"step": 4272
},
{
"epoch": 1.1364361702127659,
"grad_norm": 4.304286479949951,
"learning_rate": 9.165027971145397e-06,
"loss": 0.8041,
"step": 4273
},
{
"epoch": 1.1367021276595746,
"grad_norm": 4.315762519836426,
"learning_rate": 9.16454130763744e-06,
"loss": 0.7519,
"step": 4274
},
{
"epoch": 1.136968085106383,
"grad_norm": 4.10341739654541,
"learning_rate": 9.16405451527367e-06,
"loss": 0.919,
"step": 4275
},
{
"epoch": 1.1372340425531915,
"grad_norm": 3.7802481651306152,
"learning_rate": 9.163567594069154e-06,
"loss": 0.8271,
"step": 4276
},
{
"epoch": 1.1375,
"grad_norm": 4.523904323577881,
"learning_rate": 9.163080544038953e-06,
"loss": 0.7865,
"step": 4277
},
{
"epoch": 1.1377659574468084,
"grad_norm": 3.958662509918213,
"learning_rate": 9.162593365198138e-06,
"loss": 0.8165,
"step": 4278
},
{
"epoch": 1.138031914893617,
"grad_norm": 3.8943662643432617,
"learning_rate": 9.162106057561784e-06,
"loss": 0.7951,
"step": 4279
},
{
"epoch": 1.1382978723404256,
"grad_norm": 3.9076874256134033,
"learning_rate": 9.161618621144967e-06,
"loss": 0.8135,
"step": 4280
},
{
"epoch": 1.138563829787234,
"grad_norm": 3.5434067249298096,
"learning_rate": 9.161131055962773e-06,
"loss": 0.7228,
"step": 4281
},
{
"epoch": 1.1388297872340425,
"grad_norm": 4.137996673583984,
"learning_rate": 9.160643362030284e-06,
"loss": 0.7711,
"step": 4282
},
{
"epoch": 1.139095744680851,
"grad_norm": 3.783001661300659,
"learning_rate": 9.160155539362589e-06,
"loss": 0.8494,
"step": 4283
},
{
"epoch": 1.1393617021276596,
"grad_norm": 3.8411149978637695,
"learning_rate": 9.159667587974786e-06,
"loss": 0.7447,
"step": 4284
},
{
"epoch": 1.139627659574468,
"grad_norm": 3.6387648582458496,
"learning_rate": 9.15917950788197e-06,
"loss": 0.8385,
"step": 4285
},
{
"epoch": 1.1398936170212766,
"grad_norm": 4.564189910888672,
"learning_rate": 9.158691299099241e-06,
"loss": 0.7572,
"step": 4286
},
{
"epoch": 1.140159574468085,
"grad_norm": 4.022932529449463,
"learning_rate": 9.15820296164171e-06,
"loss": 0.7129,
"step": 4287
},
{
"epoch": 1.1404255319148937,
"grad_norm": 4.345612525939941,
"learning_rate": 9.157714495524481e-06,
"loss": 0.8371,
"step": 4288
},
{
"epoch": 1.1406914893617022,
"grad_norm": 4.161421298980713,
"learning_rate": 9.157225900762672e-06,
"loss": 0.7528,
"step": 4289
},
{
"epoch": 1.1409574468085106,
"grad_norm": 4.042864799499512,
"learning_rate": 9.156737177371399e-06,
"loss": 0.8491,
"step": 4290
},
{
"epoch": 1.141223404255319,
"grad_norm": 3.8026928901672363,
"learning_rate": 9.156248325365782e-06,
"loss": 0.8444,
"step": 4291
},
{
"epoch": 1.1414893617021278,
"grad_norm": 4.251069068908691,
"learning_rate": 9.15575934476095e-06,
"loss": 0.7857,
"step": 4292
},
{
"epoch": 1.1417553191489362,
"grad_norm": 3.8531103134155273,
"learning_rate": 9.155270235572031e-06,
"loss": 0.867,
"step": 4293
},
{
"epoch": 1.1420212765957447,
"grad_norm": 3.975175142288208,
"learning_rate": 9.15478099781416e-06,
"loss": 0.808,
"step": 4294
},
{
"epoch": 1.1422872340425532,
"grad_norm": 3.695078134536743,
"learning_rate": 9.154291631502471e-06,
"loss": 0.7942,
"step": 4295
},
{
"epoch": 1.1425531914893616,
"grad_norm": 3.8435237407684326,
"learning_rate": 9.15380213665211e-06,
"loss": 0.8701,
"step": 4296
},
{
"epoch": 1.1428191489361703,
"grad_norm": 3.642451047897339,
"learning_rate": 9.153312513278219e-06,
"loss": 0.7479,
"step": 4297
},
{
"epoch": 1.1430851063829788,
"grad_norm": 3.8612117767333984,
"learning_rate": 9.15282276139595e-06,
"loss": 0.8394,
"step": 4298
},
{
"epoch": 1.1433510638297872,
"grad_norm": 3.818319082260132,
"learning_rate": 9.152332881020454e-06,
"loss": 0.789,
"step": 4299
},
{
"epoch": 1.1436170212765957,
"grad_norm": 3.6774802207946777,
"learning_rate": 9.15184287216689e-06,
"loss": 0.7991,
"step": 4300
},
{
"epoch": 1.1438829787234042,
"grad_norm": 4.338614463806152,
"learning_rate": 9.15135273485042e-06,
"loss": 0.8602,
"step": 4301
},
{
"epoch": 1.1441489361702128,
"grad_norm": 3.9688498973846436,
"learning_rate": 9.15086246908621e-06,
"loss": 0.7759,
"step": 4302
},
{
"epoch": 1.1444148936170213,
"grad_norm": 3.848708152770996,
"learning_rate": 9.150372074889427e-06,
"loss": 0.7635,
"step": 4303
},
{
"epoch": 1.1446808510638298,
"grad_norm": 4.042501926422119,
"learning_rate": 9.149881552275244e-06,
"loss": 0.8029,
"step": 4304
},
{
"epoch": 1.1449468085106382,
"grad_norm": 4.199094772338867,
"learning_rate": 9.149390901258841e-06,
"loss": 0.8343,
"step": 4305
},
{
"epoch": 1.1452127659574467,
"grad_norm": 4.045470714569092,
"learning_rate": 9.1489001218554e-06,
"loss": 0.831,
"step": 4306
},
{
"epoch": 1.1454787234042554,
"grad_norm": 3.7915914058685303,
"learning_rate": 9.148409214080103e-06,
"loss": 0.8476,
"step": 4307
},
{
"epoch": 1.1457446808510638,
"grad_norm": 3.7452378273010254,
"learning_rate": 9.14791817794814e-06,
"loss": 0.776,
"step": 4308
},
{
"epoch": 1.1460106382978723,
"grad_norm": 3.521505355834961,
"learning_rate": 9.147427013474706e-06,
"loss": 0.6753,
"step": 4309
},
{
"epoch": 1.1462765957446808,
"grad_norm": 3.906930923461914,
"learning_rate": 9.146935720674996e-06,
"loss": 0.6909,
"step": 4310
},
{
"epoch": 1.1465425531914895,
"grad_norm": 4.262080192565918,
"learning_rate": 9.146444299564215e-06,
"loss": 0.8444,
"step": 4311
},
{
"epoch": 1.146808510638298,
"grad_norm": 4.085954666137695,
"learning_rate": 9.145952750157563e-06,
"loss": 0.7587,
"step": 4312
},
{
"epoch": 1.1470744680851064,
"grad_norm": 3.9519617557525635,
"learning_rate": 9.145461072470253e-06,
"loss": 0.8757,
"step": 4313
},
{
"epoch": 1.1473404255319148,
"grad_norm": 4.349664211273193,
"learning_rate": 9.144969266517495e-06,
"loss": 0.7766,
"step": 4314
},
{
"epoch": 1.1476063829787235,
"grad_norm": 5.140100955963135,
"learning_rate": 9.144477332314509e-06,
"loss": 0.9414,
"step": 4315
},
{
"epoch": 1.147872340425532,
"grad_norm": 3.641763210296631,
"learning_rate": 9.143985269876516e-06,
"loss": 0.7562,
"step": 4316
},
{
"epoch": 1.1481382978723405,
"grad_norm": 3.641606092453003,
"learning_rate": 9.143493079218738e-06,
"loss": 0.7992,
"step": 4317
},
{
"epoch": 1.148404255319149,
"grad_norm": 4.611671447753906,
"learning_rate": 9.143000760356407e-06,
"loss": 0.8306,
"step": 4318
},
{
"epoch": 1.1486702127659574,
"grad_norm": 3.4973011016845703,
"learning_rate": 9.142508313304754e-06,
"loss": 0.7915,
"step": 4319
},
{
"epoch": 1.148936170212766,
"grad_norm": 3.9405927658081055,
"learning_rate": 9.142015738079017e-06,
"loss": 0.8279,
"step": 4320
},
{
"epoch": 1.1492021276595745,
"grad_norm": 4.37050199508667,
"learning_rate": 9.141523034694436e-06,
"loss": 0.8506,
"step": 4321
},
{
"epoch": 1.149468085106383,
"grad_norm": 4.181821346282959,
"learning_rate": 9.141030203166256e-06,
"loss": 0.8439,
"step": 4322
},
{
"epoch": 1.1497340425531914,
"grad_norm": 3.8523123264312744,
"learning_rate": 9.140537243509729e-06,
"loss": 0.7565,
"step": 4323
},
{
"epoch": 1.15,
"grad_norm": 3.5637168884277344,
"learning_rate": 9.140044155740102e-06,
"loss": 0.7406,
"step": 4324
},
{
"epoch": 1.1502659574468086,
"grad_norm": 3.8401317596435547,
"learning_rate": 9.139550939872635e-06,
"loss": 0.8231,
"step": 4325
},
{
"epoch": 1.150531914893617,
"grad_norm": 4.033459186553955,
"learning_rate": 9.139057595922587e-06,
"loss": 0.7585,
"step": 4326
},
{
"epoch": 1.1507978723404255,
"grad_norm": 4.144162654876709,
"learning_rate": 9.138564123905225e-06,
"loss": 0.8237,
"step": 4327
},
{
"epoch": 1.151063829787234,
"grad_norm": 4.219383716583252,
"learning_rate": 9.138070523835816e-06,
"loss": 0.793,
"step": 4328
},
{
"epoch": 1.1513297872340424,
"grad_norm": 4.144248962402344,
"learning_rate": 9.137576795729635e-06,
"loss": 0.743,
"step": 4329
},
{
"epoch": 1.1515957446808511,
"grad_norm": 3.836845636367798,
"learning_rate": 9.137082939601953e-06,
"loss": 0.7829,
"step": 4330
},
{
"epoch": 1.1518617021276596,
"grad_norm": 3.8342814445495605,
"learning_rate": 9.136588955468057e-06,
"loss": 0.7298,
"step": 4331
},
{
"epoch": 1.152127659574468,
"grad_norm": 3.852695941925049,
"learning_rate": 9.136094843343228e-06,
"loss": 0.8051,
"step": 4332
},
{
"epoch": 1.1523936170212765,
"grad_norm": 3.9740166664123535,
"learning_rate": 9.135600603242753e-06,
"loss": 0.8096,
"step": 4333
},
{
"epoch": 1.1526595744680852,
"grad_norm": 4.557644367218018,
"learning_rate": 9.13510623518193e-06,
"loss": 0.8826,
"step": 4334
},
{
"epoch": 1.1529255319148937,
"grad_norm": 4.095839500427246,
"learning_rate": 9.13461173917605e-06,
"loss": 0.7624,
"step": 4335
},
{
"epoch": 1.1531914893617021,
"grad_norm": 3.6598823070526123,
"learning_rate": 9.134117115240412e-06,
"loss": 0.6786,
"step": 4336
},
{
"epoch": 1.1534574468085106,
"grad_norm": 4.052873611450195,
"learning_rate": 9.133622363390326e-06,
"loss": 0.7476,
"step": 4337
},
{
"epoch": 1.1537234042553193,
"grad_norm": 3.892709255218506,
"learning_rate": 9.133127483641096e-06,
"loss": 0.7902,
"step": 4338
},
{
"epoch": 1.1539893617021277,
"grad_norm": 4.127117156982422,
"learning_rate": 9.132632476008036e-06,
"loss": 0.8427,
"step": 4339
},
{
"epoch": 1.1542553191489362,
"grad_norm": 3.911402463912964,
"learning_rate": 9.132137340506464e-06,
"loss": 0.744,
"step": 4340
},
{
"epoch": 1.1545212765957447,
"grad_norm": 4.6202826499938965,
"learning_rate": 9.131642077151695e-06,
"loss": 0.816,
"step": 4341
},
{
"epoch": 1.1547872340425531,
"grad_norm": 3.967888593673706,
"learning_rate": 9.131146685959055e-06,
"loss": 0.8608,
"step": 4342
},
{
"epoch": 1.1550531914893618,
"grad_norm": 3.7461965084075928,
"learning_rate": 9.130651166943875e-06,
"loss": 0.8002,
"step": 4343
},
{
"epoch": 1.1553191489361703,
"grad_norm": 3.893925666809082,
"learning_rate": 9.130155520121484e-06,
"loss": 0.7651,
"step": 4344
},
{
"epoch": 1.1555851063829787,
"grad_norm": 4.108353614807129,
"learning_rate": 9.129659745507219e-06,
"loss": 0.847,
"step": 4345
},
{
"epoch": 1.1558510638297872,
"grad_norm": 3.766580104827881,
"learning_rate": 9.129163843116417e-06,
"loss": 0.7361,
"step": 4346
},
{
"epoch": 1.1561170212765957,
"grad_norm": 4.005224227905273,
"learning_rate": 9.128667812964428e-06,
"loss": 0.846,
"step": 4347
},
{
"epoch": 1.1563829787234043,
"grad_norm": 4.085299491882324,
"learning_rate": 9.128171655066592e-06,
"loss": 0.7435,
"step": 4348
},
{
"epoch": 1.1566489361702128,
"grad_norm": 3.649341583251953,
"learning_rate": 9.127675369438267e-06,
"loss": 0.7848,
"step": 4349
},
{
"epoch": 1.1569148936170213,
"grad_norm": 4.286210536956787,
"learning_rate": 9.127178956094805e-06,
"loss": 0.8657,
"step": 4350
},
{
"epoch": 1.1571808510638297,
"grad_norm": 3.8484995365142822,
"learning_rate": 9.12668241505157e-06,
"loss": 0.7356,
"step": 4351
},
{
"epoch": 1.1574468085106382,
"grad_norm": 3.80110239982605,
"learning_rate": 9.12618574632392e-06,
"loss": 0.8581,
"step": 4352
},
{
"epoch": 1.1577127659574469,
"grad_norm": 4.16612434387207,
"learning_rate": 9.125688949927223e-06,
"loss": 0.9135,
"step": 4353
},
{
"epoch": 1.1579787234042553,
"grad_norm": 4.107837677001953,
"learning_rate": 9.125192025876855e-06,
"loss": 0.8993,
"step": 4354
},
{
"epoch": 1.1582446808510638,
"grad_norm": 3.7631843090057373,
"learning_rate": 9.124694974188188e-06,
"loss": 0.7997,
"step": 4355
},
{
"epoch": 1.1585106382978723,
"grad_norm": 4.244007587432861,
"learning_rate": 9.124197794876604e-06,
"loss": 0.806,
"step": 4356
},
{
"epoch": 1.1587765957446807,
"grad_norm": 3.4537291526794434,
"learning_rate": 9.123700487957484e-06,
"loss": 0.7259,
"step": 4357
},
{
"epoch": 1.1590425531914894,
"grad_norm": 4.083813667297363,
"learning_rate": 9.123203053446215e-06,
"loss": 0.7935,
"step": 4358
},
{
"epoch": 1.1593085106382979,
"grad_norm": 3.842515707015991,
"learning_rate": 9.12270549135819e-06,
"loss": 0.8403,
"step": 4359
},
{
"epoch": 1.1595744680851063,
"grad_norm": 3.8198819160461426,
"learning_rate": 9.122207801708802e-06,
"loss": 0.8035,
"step": 4360
},
{
"epoch": 1.1598404255319148,
"grad_norm": 4.05394172668457,
"learning_rate": 9.121709984513453e-06,
"loss": 0.6678,
"step": 4361
},
{
"epoch": 1.1601063829787235,
"grad_norm": 3.8895061016082764,
"learning_rate": 9.121212039787543e-06,
"loss": 0.7822,
"step": 4362
},
{
"epoch": 1.160372340425532,
"grad_norm": 4.040393829345703,
"learning_rate": 9.12071396754648e-06,
"loss": 0.8669,
"step": 4363
},
{
"epoch": 1.1606382978723404,
"grad_norm": 3.8143858909606934,
"learning_rate": 9.120215767805677e-06,
"loss": 0.9251,
"step": 4364
},
{
"epoch": 1.1609042553191489,
"grad_norm": 3.8011443614959717,
"learning_rate": 9.119717440580547e-06,
"loss": 0.7142,
"step": 4365
},
{
"epoch": 1.1611702127659576,
"grad_norm": 4.147587776184082,
"learning_rate": 9.119218985886506e-06,
"loss": 0.8196,
"step": 4366
},
{
"epoch": 1.161436170212766,
"grad_norm": 4.035295009613037,
"learning_rate": 9.118720403738984e-06,
"loss": 0.9006,
"step": 4367
},
{
"epoch": 1.1617021276595745,
"grad_norm": 4.253767967224121,
"learning_rate": 9.118221694153401e-06,
"loss": 0.9149,
"step": 4368
},
{
"epoch": 1.161968085106383,
"grad_norm": 3.7400970458984375,
"learning_rate": 9.11772285714519e-06,
"loss": 0.847,
"step": 4369
},
{
"epoch": 1.1622340425531914,
"grad_norm": 4.12266731262207,
"learning_rate": 9.117223892729788e-06,
"loss": 0.8159,
"step": 4370
},
{
"epoch": 1.1625,
"grad_norm": 3.939617395401001,
"learning_rate": 9.11672480092263e-06,
"loss": 0.8515,
"step": 4371
},
{
"epoch": 1.1627659574468086,
"grad_norm": 3.597660541534424,
"learning_rate": 9.11622558173916e-06,
"loss": 0.7139,
"step": 4372
},
{
"epoch": 1.163031914893617,
"grad_norm": 3.8929126262664795,
"learning_rate": 9.115726235194825e-06,
"loss": 0.755,
"step": 4373
},
{
"epoch": 1.1632978723404255,
"grad_norm": 3.9748990535736084,
"learning_rate": 9.115226761305071e-06,
"loss": 0.9779,
"step": 4374
},
{
"epoch": 1.163563829787234,
"grad_norm": 3.6702117919921875,
"learning_rate": 9.11472716008536e-06,
"loss": 0.7913,
"step": 4375
},
{
"epoch": 1.1638297872340426,
"grad_norm": 3.5676674842834473,
"learning_rate": 9.114227431551144e-06,
"loss": 0.8714,
"step": 4376
},
{
"epoch": 1.164095744680851,
"grad_norm": 3.871457576751709,
"learning_rate": 9.113727575717887e-06,
"loss": 0.7551,
"step": 4377
},
{
"epoch": 1.1643617021276595,
"grad_norm": 3.709536552429199,
"learning_rate": 9.113227592601057e-06,
"loss": 0.7476,
"step": 4378
},
{
"epoch": 1.164627659574468,
"grad_norm": 4.048936367034912,
"learning_rate": 9.112727482216123e-06,
"loss": 0.822,
"step": 4379
},
{
"epoch": 1.1648936170212765,
"grad_norm": 4.941551685333252,
"learning_rate": 9.112227244578557e-06,
"loss": 0.942,
"step": 4380
},
{
"epoch": 1.1651595744680852,
"grad_norm": 3.971956491470337,
"learning_rate": 9.111726879703839e-06,
"loss": 0.898,
"step": 4381
},
{
"epoch": 1.1654255319148936,
"grad_norm": 4.139491558074951,
"learning_rate": 9.111226387607452e-06,
"loss": 0.9185,
"step": 4382
},
{
"epoch": 1.165691489361702,
"grad_norm": 3.8217787742614746,
"learning_rate": 9.110725768304878e-06,
"loss": 0.8598,
"step": 4383
},
{
"epoch": 1.1659574468085105,
"grad_norm": 3.656966209411621,
"learning_rate": 9.11022502181161e-06,
"loss": 0.7433,
"step": 4384
},
{
"epoch": 1.1662234042553192,
"grad_norm": 4.29415225982666,
"learning_rate": 9.10972414814314e-06,
"loss": 0.7777,
"step": 4385
},
{
"epoch": 1.1664893617021277,
"grad_norm": 3.9143810272216797,
"learning_rate": 9.109223147314968e-06,
"loss": 0.678,
"step": 4386
},
{
"epoch": 1.1667553191489362,
"grad_norm": 4.056838512420654,
"learning_rate": 9.108722019342592e-06,
"loss": 0.6778,
"step": 4387
},
{
"epoch": 1.1670212765957446,
"grad_norm": 3.9018867015838623,
"learning_rate": 9.10822076424152e-06,
"loss": 0.8195,
"step": 4388
},
{
"epoch": 1.1672872340425533,
"grad_norm": 4.0093994140625,
"learning_rate": 9.10771938202726e-06,
"loss": 0.9474,
"step": 4389
},
{
"epoch": 1.1675531914893618,
"grad_norm": 4.224606037139893,
"learning_rate": 9.107217872715326e-06,
"loss": 0.7376,
"step": 4390
},
{
"epoch": 1.1678191489361702,
"grad_norm": 3.831489086151123,
"learning_rate": 9.106716236321236e-06,
"loss": 0.731,
"step": 4391
},
{
"epoch": 1.1680851063829787,
"grad_norm": 3.8180394172668457,
"learning_rate": 9.106214472860511e-06,
"loss": 0.7458,
"step": 4392
},
{
"epoch": 1.1683510638297872,
"grad_norm": 3.393148899078369,
"learning_rate": 9.105712582348676e-06,
"loss": 0.7216,
"step": 4393
},
{
"epoch": 1.1686170212765958,
"grad_norm": 4.6142964363098145,
"learning_rate": 9.105210564801259e-06,
"loss": 0.7643,
"step": 4394
},
{
"epoch": 1.1688829787234043,
"grad_norm": 4.428558826446533,
"learning_rate": 9.104708420233794e-06,
"loss": 0.8364,
"step": 4395
},
{
"epoch": 1.1691489361702128,
"grad_norm": 4.209799766540527,
"learning_rate": 9.104206148661819e-06,
"loss": 0.7965,
"step": 4396
},
{
"epoch": 1.1694148936170212,
"grad_norm": 4.0707831382751465,
"learning_rate": 9.10370375010087e-06,
"loss": 0.7676,
"step": 4397
},
{
"epoch": 1.1696808510638297,
"grad_norm": 3.684016227722168,
"learning_rate": 9.103201224566499e-06,
"loss": 0.8018,
"step": 4398
},
{
"epoch": 1.1699468085106384,
"grad_norm": 4.157726287841797,
"learning_rate": 9.10269857207425e-06,
"loss": 0.8431,
"step": 4399
},
{
"epoch": 1.1702127659574468,
"grad_norm": 3.866776704788208,
"learning_rate": 9.102195792639677e-06,
"loss": 0.9013,
"step": 4400
},
{
"epoch": 1.1704787234042553,
"grad_norm": 3.8174455165863037,
"learning_rate": 9.101692886278336e-06,
"loss": 0.8174,
"step": 4401
},
{
"epoch": 1.1707446808510638,
"grad_norm": 4.051540851593018,
"learning_rate": 9.101189853005788e-06,
"loss": 0.8006,
"step": 4402
},
{
"epoch": 1.1710106382978722,
"grad_norm": 4.115768909454346,
"learning_rate": 9.100686692837598e-06,
"loss": 0.8905,
"step": 4403
},
{
"epoch": 1.171276595744681,
"grad_norm": 3.989694595336914,
"learning_rate": 9.100183405789334e-06,
"loss": 0.8763,
"step": 4404
},
{
"epoch": 1.1715425531914894,
"grad_norm": 3.5945072174072266,
"learning_rate": 9.099679991876567e-06,
"loss": 0.7173,
"step": 4405
},
{
"epoch": 1.1718085106382978,
"grad_norm": 3.627795934677124,
"learning_rate": 9.099176451114876e-06,
"loss": 0.7708,
"step": 4406
},
{
"epoch": 1.1720744680851063,
"grad_norm": 4.366139888763428,
"learning_rate": 9.098672783519837e-06,
"loss": 0.7882,
"step": 4407
},
{
"epoch": 1.172340425531915,
"grad_norm": 4.13855504989624,
"learning_rate": 9.098168989107038e-06,
"loss": 0.7776,
"step": 4408
},
{
"epoch": 1.1726063829787234,
"grad_norm": 3.8078205585479736,
"learning_rate": 9.097665067892066e-06,
"loss": 0.7194,
"step": 4409
},
{
"epoch": 1.172872340425532,
"grad_norm": 3.676452398300171,
"learning_rate": 9.09716101989051e-06,
"loss": 0.7386,
"step": 4410
},
{
"epoch": 1.1731382978723404,
"grad_norm": 4.525330066680908,
"learning_rate": 9.09665684511797e-06,
"loss": 0.8734,
"step": 4411
},
{
"epoch": 1.173404255319149,
"grad_norm": 4.38550329208374,
"learning_rate": 9.096152543590045e-06,
"loss": 0.8248,
"step": 4412
},
{
"epoch": 1.1736702127659575,
"grad_norm": 4.337765693664551,
"learning_rate": 9.095648115322336e-06,
"loss": 0.8992,
"step": 4413
},
{
"epoch": 1.173936170212766,
"grad_norm": 4.145912170410156,
"learning_rate": 9.095143560330453e-06,
"loss": 0.8119,
"step": 4414
},
{
"epoch": 1.1742021276595744,
"grad_norm": 3.5085721015930176,
"learning_rate": 9.094638878630007e-06,
"loss": 0.744,
"step": 4415
},
{
"epoch": 1.174468085106383,
"grad_norm": 4.225882053375244,
"learning_rate": 9.094134070236614e-06,
"loss": 0.8368,
"step": 4416
},
{
"epoch": 1.1747340425531916,
"grad_norm": 4.2498273849487305,
"learning_rate": 9.09362913516589e-06,
"loss": 0.7281,
"step": 4417
},
{
"epoch": 1.175,
"grad_norm": 3.8343684673309326,
"learning_rate": 9.093124073433464e-06,
"loss": 0.8521,
"step": 4418
},
{
"epoch": 1.1752659574468085,
"grad_norm": 4.265048503875732,
"learning_rate": 9.092618885054958e-06,
"loss": 0.8624,
"step": 4419
},
{
"epoch": 1.175531914893617,
"grad_norm": 4.251501560211182,
"learning_rate": 9.092113570046005e-06,
"loss": 0.7163,
"step": 4420
},
{
"epoch": 1.1757978723404254,
"grad_norm": 3.9519202709198,
"learning_rate": 9.091608128422243e-06,
"loss": 0.8139,
"step": 4421
},
{
"epoch": 1.1760638297872341,
"grad_norm": 3.785550832748413,
"learning_rate": 9.091102560199306e-06,
"loss": 0.7897,
"step": 4422
},
{
"epoch": 1.1763297872340426,
"grad_norm": 4.2011260986328125,
"learning_rate": 9.090596865392838e-06,
"loss": 0.8119,
"step": 4423
},
{
"epoch": 1.176595744680851,
"grad_norm": 3.7419655323028564,
"learning_rate": 9.090091044018488e-06,
"loss": 0.64,
"step": 4424
},
{
"epoch": 1.1768617021276595,
"grad_norm": 3.561340093612671,
"learning_rate": 9.089585096091906e-06,
"loss": 0.7546,
"step": 4425
},
{
"epoch": 1.177127659574468,
"grad_norm": 3.971997022628784,
"learning_rate": 9.089079021628746e-06,
"loss": 0.8783,
"step": 4426
},
{
"epoch": 1.1773936170212767,
"grad_norm": 4.214608669281006,
"learning_rate": 9.088572820644667e-06,
"loss": 0.9312,
"step": 4427
},
{
"epoch": 1.1776595744680851,
"grad_norm": 3.867511749267578,
"learning_rate": 9.088066493155332e-06,
"loss": 0.9171,
"step": 4428
},
{
"epoch": 1.1779255319148936,
"grad_norm": 3.8267605304718018,
"learning_rate": 9.087560039176407e-06,
"loss": 0.7369,
"step": 4429
},
{
"epoch": 1.178191489361702,
"grad_norm": 3.9210994243621826,
"learning_rate": 9.08705345872356e-06,
"loss": 0.7975,
"step": 4430
},
{
"epoch": 1.1784574468085107,
"grad_norm": 3.820697069168091,
"learning_rate": 9.086546751812467e-06,
"loss": 0.7579,
"step": 4431
},
{
"epoch": 1.1787234042553192,
"grad_norm": 4.319027423858643,
"learning_rate": 9.086039918458806e-06,
"loss": 0.7671,
"step": 4432
},
{
"epoch": 1.1789893617021276,
"grad_norm": 3.768254280090332,
"learning_rate": 9.085532958678262e-06,
"loss": 0.7075,
"step": 4433
},
{
"epoch": 1.179255319148936,
"grad_norm": 3.8115556240081787,
"learning_rate": 9.085025872486516e-06,
"loss": 0.6844,
"step": 4434
},
{
"epoch": 1.1795212765957448,
"grad_norm": 3.6113126277923584,
"learning_rate": 9.08451865989926e-06,
"loss": 0.7161,
"step": 4435
},
{
"epoch": 1.1797872340425533,
"grad_norm": 4.16688871383667,
"learning_rate": 9.08401132093219e-06,
"loss": 0.8756,
"step": 4436
},
{
"epoch": 1.1800531914893617,
"grad_norm": 4.136419773101807,
"learning_rate": 9.083503855600997e-06,
"loss": 0.8072,
"step": 4437
},
{
"epoch": 1.1803191489361702,
"grad_norm": 4.0323357582092285,
"learning_rate": 9.08299626392139e-06,
"loss": 0.7889,
"step": 4438
},
{
"epoch": 1.1805851063829786,
"grad_norm": 3.848400354385376,
"learning_rate": 9.082488545909072e-06,
"loss": 0.8467,
"step": 4439
},
{
"epoch": 1.1808510638297873,
"grad_norm": 3.8820831775665283,
"learning_rate": 9.08198070157975e-06,
"loss": 0.7926,
"step": 4440
},
{
"epoch": 1.1811170212765958,
"grad_norm": 3.9585654735565186,
"learning_rate": 9.08147273094914e-06,
"loss": 0.8671,
"step": 4441
},
{
"epoch": 1.1813829787234043,
"grad_norm": 4.736848831176758,
"learning_rate": 9.080964634032958e-06,
"loss": 0.8953,
"step": 4442
},
{
"epoch": 1.1816489361702127,
"grad_norm": 4.1310343742370605,
"learning_rate": 9.080456410846926e-06,
"loss": 0.7878,
"step": 4443
},
{
"epoch": 1.1819148936170212,
"grad_norm": 3.701655149459839,
"learning_rate": 9.079948061406769e-06,
"loss": 0.7205,
"step": 4444
},
{
"epoch": 1.1821808510638299,
"grad_norm": 4.258152008056641,
"learning_rate": 9.079439585728214e-06,
"loss": 0.8573,
"step": 4445
},
{
"epoch": 1.1824468085106383,
"grad_norm": 4.08727502822876,
"learning_rate": 9.078930983826997e-06,
"loss": 0.8661,
"step": 4446
},
{
"epoch": 1.1827127659574468,
"grad_norm": 4.263191223144531,
"learning_rate": 9.078422255718852e-06,
"loss": 0.9975,
"step": 4447
},
{
"epoch": 1.1829787234042553,
"grad_norm": 3.8881144523620605,
"learning_rate": 9.07791340141952e-06,
"loss": 0.8825,
"step": 4448
},
{
"epoch": 1.1832446808510637,
"grad_norm": 4.034143924713135,
"learning_rate": 9.077404420944746e-06,
"loss": 0.7645,
"step": 4449
},
{
"epoch": 1.1835106382978724,
"grad_norm": 3.6815900802612305,
"learning_rate": 9.076895314310282e-06,
"loss": 0.845,
"step": 4450
},
{
"epoch": 1.1837765957446809,
"grad_norm": 4.061761379241943,
"learning_rate": 9.076386081531873e-06,
"loss": 0.715,
"step": 4451
},
{
"epoch": 1.1840425531914893,
"grad_norm": 3.675588846206665,
"learning_rate": 9.075876722625281e-06,
"loss": 0.6865,
"step": 4452
},
{
"epoch": 1.1843085106382978,
"grad_norm": 3.922511577606201,
"learning_rate": 9.075367237606265e-06,
"loss": 0.8139,
"step": 4453
},
{
"epoch": 1.1845744680851065,
"grad_norm": 4.45919132232666,
"learning_rate": 9.074857626490587e-06,
"loss": 0.8832,
"step": 4454
},
{
"epoch": 1.184840425531915,
"grad_norm": 3.8306045532226562,
"learning_rate": 9.074347889294017e-06,
"loss": 0.775,
"step": 4455
},
{
"epoch": 1.1851063829787234,
"grad_norm": 4.380180358886719,
"learning_rate": 9.073838026032328e-06,
"loss": 0.8028,
"step": 4456
},
{
"epoch": 1.1853723404255319,
"grad_norm": 3.6403377056121826,
"learning_rate": 9.073328036721292e-06,
"loss": 0.7365,
"step": 4457
},
{
"epoch": 1.1856382978723405,
"grad_norm": 4.642416477203369,
"learning_rate": 9.072817921376692e-06,
"loss": 1.0456,
"step": 4458
},
{
"epoch": 1.185904255319149,
"grad_norm": 4.2514753341674805,
"learning_rate": 9.07230768001431e-06,
"loss": 0.8752,
"step": 4459
},
{
"epoch": 1.1861702127659575,
"grad_norm": 4.097993850708008,
"learning_rate": 9.071797312649934e-06,
"loss": 0.8805,
"step": 4460
},
{
"epoch": 1.186436170212766,
"grad_norm": 3.6704015731811523,
"learning_rate": 9.071286819299355e-06,
"loss": 0.7362,
"step": 4461
},
{
"epoch": 1.1867021276595744,
"grad_norm": 3.5198822021484375,
"learning_rate": 9.070776199978369e-06,
"loss": 0.6528,
"step": 4462
},
{
"epoch": 1.186968085106383,
"grad_norm": 4.044826507568359,
"learning_rate": 9.070265454702774e-06,
"loss": 0.785,
"step": 4463
},
{
"epoch": 1.1872340425531915,
"grad_norm": 3.775392770767212,
"learning_rate": 9.069754583488375e-06,
"loss": 0.7664,
"step": 4464
},
{
"epoch": 1.1875,
"grad_norm": 3.9251670837402344,
"learning_rate": 9.069243586350976e-06,
"loss": 0.7694,
"step": 4465
},
{
"epoch": 1.1877659574468085,
"grad_norm": 4.138858318328857,
"learning_rate": 9.06873246330639e-06,
"loss": 0.8734,
"step": 4466
},
{
"epoch": 1.188031914893617,
"grad_norm": 3.8749899864196777,
"learning_rate": 9.06822121437043e-06,
"loss": 0.7114,
"step": 4467
},
{
"epoch": 1.1882978723404256,
"grad_norm": 4.107519626617432,
"learning_rate": 9.067709839558917e-06,
"loss": 0.7998,
"step": 4468
},
{
"epoch": 1.188563829787234,
"grad_norm": 3.6962497234344482,
"learning_rate": 9.067198338887673e-06,
"loss": 0.8317,
"step": 4469
},
{
"epoch": 1.1888297872340425,
"grad_norm": 4.575094223022461,
"learning_rate": 9.066686712372524e-06,
"loss": 0.8399,
"step": 4470
},
{
"epoch": 1.189095744680851,
"grad_norm": 4.391597747802734,
"learning_rate": 9.0661749600293e-06,
"loss": 0.8801,
"step": 4471
},
{
"epoch": 1.1893617021276595,
"grad_norm": 3.650452136993408,
"learning_rate": 9.065663081873834e-06,
"loss": 0.7738,
"step": 4472
},
{
"epoch": 1.1896276595744681,
"grad_norm": 4.12108039855957,
"learning_rate": 9.065151077921968e-06,
"loss": 0.8333,
"step": 4473
},
{
"epoch": 1.1898936170212766,
"grad_norm": 4.204649925231934,
"learning_rate": 9.064638948189539e-06,
"loss": 0.8531,
"step": 4474
},
{
"epoch": 1.190159574468085,
"grad_norm": 4.241077423095703,
"learning_rate": 9.064126692692397e-06,
"loss": 0.8215,
"step": 4475
},
{
"epoch": 1.1904255319148935,
"grad_norm": 4.215181350708008,
"learning_rate": 9.06361431144639e-06,
"loss": 0.7595,
"step": 4476
},
{
"epoch": 1.1906914893617022,
"grad_norm": 3.597543239593506,
"learning_rate": 9.06310180446737e-06,
"loss": 0.7967,
"step": 4477
},
{
"epoch": 1.1909574468085107,
"grad_norm": 4.075351238250732,
"learning_rate": 9.0625891717712e-06,
"loss": 0.8158,
"step": 4478
},
{
"epoch": 1.1912234042553191,
"grad_norm": 3.5748724937438965,
"learning_rate": 9.062076413373735e-06,
"loss": 0.733,
"step": 4479
},
{
"epoch": 1.1914893617021276,
"grad_norm": 3.9107751846313477,
"learning_rate": 9.061563529290845e-06,
"loss": 0.8057,
"step": 4480
},
{
"epoch": 1.1917553191489363,
"grad_norm": 4.108970642089844,
"learning_rate": 9.061050519538397e-06,
"loss": 0.9214,
"step": 4481
},
{
"epoch": 1.1920212765957447,
"grad_norm": 3.9196219444274902,
"learning_rate": 9.060537384132264e-06,
"loss": 0.8046,
"step": 4482
},
{
"epoch": 1.1922872340425532,
"grad_norm": 3.312999963760376,
"learning_rate": 9.060024123088324e-06,
"loss": 0.6791,
"step": 4483
},
{
"epoch": 1.1925531914893617,
"grad_norm": 4.010212421417236,
"learning_rate": 9.05951073642246e-06,
"loss": 0.8244,
"step": 4484
},
{
"epoch": 1.1928191489361701,
"grad_norm": 3.9299821853637695,
"learning_rate": 9.05899722415055e-06,
"loss": 0.7054,
"step": 4485
},
{
"epoch": 1.1930851063829788,
"grad_norm": 4.205704212188721,
"learning_rate": 9.05848358628849e-06,
"loss": 0.9058,
"step": 4486
},
{
"epoch": 1.1933510638297873,
"grad_norm": 4.133444309234619,
"learning_rate": 9.057969822852168e-06,
"loss": 0.8414,
"step": 4487
},
{
"epoch": 1.1936170212765957,
"grad_norm": 3.7199227809906006,
"learning_rate": 9.057455933857483e-06,
"loss": 0.7884,
"step": 4488
},
{
"epoch": 1.1938829787234042,
"grad_norm": 4.377199172973633,
"learning_rate": 9.056941919320335e-06,
"loss": 0.7732,
"step": 4489
},
{
"epoch": 1.1941489361702127,
"grad_norm": 4.171092987060547,
"learning_rate": 9.056427779256624e-06,
"loss": 0.8652,
"step": 4490
},
{
"epoch": 1.1944148936170214,
"grad_norm": 3.7670929431915283,
"learning_rate": 9.055913513682267e-06,
"loss": 0.7825,
"step": 4491
},
{
"epoch": 1.1946808510638298,
"grad_norm": 3.9210784435272217,
"learning_rate": 9.055399122613166e-06,
"loss": 0.8515,
"step": 4492
},
{
"epoch": 1.1949468085106383,
"grad_norm": 3.543363094329834,
"learning_rate": 9.054884606065243e-06,
"loss": 0.6883,
"step": 4493
},
{
"epoch": 1.1952127659574467,
"grad_norm": 3.9357686042785645,
"learning_rate": 9.054369964054418e-06,
"loss": 0.7847,
"step": 4494
},
{
"epoch": 1.1954787234042552,
"grad_norm": 3.5497348308563232,
"learning_rate": 9.05385519659661e-06,
"loss": 0.8664,
"step": 4495
},
{
"epoch": 1.195744680851064,
"grad_norm": 4.09616756439209,
"learning_rate": 9.053340303707752e-06,
"loss": 0.7928,
"step": 4496
},
{
"epoch": 1.1960106382978724,
"grad_norm": 4.135888576507568,
"learning_rate": 9.052825285403771e-06,
"loss": 0.8372,
"step": 4497
},
{
"epoch": 1.1962765957446808,
"grad_norm": 4.014375686645508,
"learning_rate": 9.052310141700605e-06,
"loss": 0.7838,
"step": 4498
},
{
"epoch": 1.1965425531914893,
"grad_norm": 4.164703369140625,
"learning_rate": 9.051794872614193e-06,
"loss": 0.7346,
"step": 4499
},
{
"epoch": 1.196808510638298,
"grad_norm": 3.9445199966430664,
"learning_rate": 9.051279478160475e-06,
"loss": 0.7969,
"step": 4500
},
{
"epoch": 1.196808510638298,
"eval_loss": 1.3114004135131836,
"eval_runtime": 13.8708,
"eval_samples_per_second": 28.838,
"eval_steps_per_second": 3.605,
"step": 4500
},
{
"epoch": 1.1970744680851064,
"grad_norm": 4.145724773406982,
"learning_rate": 9.050763958355401e-06,
"loss": 0.864,
"step": 4501
},
{
"epoch": 1.1973404255319149,
"grad_norm": 3.9395062923431396,
"learning_rate": 9.050248313214921e-06,
"loss": 0.8854,
"step": 4502
},
{
"epoch": 1.1976063829787233,
"grad_norm": 3.7419703006744385,
"learning_rate": 9.04973254275499e-06,
"loss": 0.778,
"step": 4503
},
{
"epoch": 1.197872340425532,
"grad_norm": 3.620009422302246,
"learning_rate": 9.049216646991568e-06,
"loss": 0.6522,
"step": 4504
},
{
"epoch": 1.1981382978723405,
"grad_norm": 4.093226909637451,
"learning_rate": 9.048700625940613e-06,
"loss": 0.7909,
"step": 4505
},
{
"epoch": 1.198404255319149,
"grad_norm": 4.31190824508667,
"learning_rate": 9.048184479618094e-06,
"loss": 0.87,
"step": 4506
},
{
"epoch": 1.1986702127659574,
"grad_norm": 3.5274550914764404,
"learning_rate": 9.047668208039981e-06,
"loss": 0.7015,
"step": 4507
},
{
"epoch": 1.1989361702127659,
"grad_norm": 4.295877933502197,
"learning_rate": 9.04715181122225e-06,
"loss": 0.8673,
"step": 4508
},
{
"epoch": 1.1992021276595746,
"grad_norm": 4.239846706390381,
"learning_rate": 9.046635289180875e-06,
"loss": 0.7815,
"step": 4509
},
{
"epoch": 1.199468085106383,
"grad_norm": 4.294873237609863,
"learning_rate": 9.046118641931841e-06,
"loss": 0.8275,
"step": 4510
},
{
"epoch": 1.1997340425531915,
"grad_norm": 4.2128586769104,
"learning_rate": 9.045601869491131e-06,
"loss": 0.885,
"step": 4511
},
{
"epoch": 1.2,
"grad_norm": 4.04133415222168,
"learning_rate": 9.045084971874738e-06,
"loss": 0.6479,
"step": 4512
},
{
"epoch": 1.2002659574468084,
"grad_norm": 4.300421714782715,
"learning_rate": 9.044567949098653e-06,
"loss": 0.7596,
"step": 4513
},
{
"epoch": 1.200531914893617,
"grad_norm": 4.0186896324157715,
"learning_rate": 9.044050801178873e-06,
"loss": 0.9244,
"step": 4514
},
{
"epoch": 1.2007978723404256,
"grad_norm": 3.989703416824341,
"learning_rate": 9.043533528131401e-06,
"loss": 0.8296,
"step": 4515
},
{
"epoch": 1.201063829787234,
"grad_norm": 3.6627588272094727,
"learning_rate": 9.043016129972239e-06,
"loss": 0.6557,
"step": 4516
},
{
"epoch": 1.2013297872340425,
"grad_norm": 4.000990867614746,
"learning_rate": 9.042498606717401e-06,
"loss": 0.8114,
"step": 4517
},
{
"epoch": 1.201595744680851,
"grad_norm": 4.12056827545166,
"learning_rate": 9.041980958382895e-06,
"loss": 0.7866,
"step": 4518
},
{
"epoch": 1.2018617021276596,
"grad_norm": 4.345433712005615,
"learning_rate": 9.041463184984739e-06,
"loss": 0.9222,
"step": 4519
},
{
"epoch": 1.202127659574468,
"grad_norm": 3.629518747329712,
"learning_rate": 9.040945286538954e-06,
"loss": 0.6739,
"step": 4520
},
{
"epoch": 1.2023936170212766,
"grad_norm": 4.012117862701416,
"learning_rate": 9.040427263061563e-06,
"loss": 0.8168,
"step": 4521
},
{
"epoch": 1.202659574468085,
"grad_norm": 3.6947031021118164,
"learning_rate": 9.039909114568597e-06,
"loss": 0.7811,
"step": 4522
},
{
"epoch": 1.2029255319148937,
"grad_norm": 4.276979446411133,
"learning_rate": 9.039390841076086e-06,
"loss": 0.9514,
"step": 4523
},
{
"epoch": 1.2031914893617022,
"grad_norm": 3.970949411392212,
"learning_rate": 9.038872442600066e-06,
"loss": 0.832,
"step": 4524
},
{
"epoch": 1.2034574468085106,
"grad_norm": 4.2050323486328125,
"learning_rate": 9.038353919156579e-06,
"loss": 0.838,
"step": 4525
},
{
"epoch": 1.203723404255319,
"grad_norm": 3.872286319732666,
"learning_rate": 9.037835270761667e-06,
"loss": 0.8424,
"step": 4526
},
{
"epoch": 1.2039893617021278,
"grad_norm": 4.053325653076172,
"learning_rate": 9.037316497431377e-06,
"loss": 0.8673,
"step": 4527
},
{
"epoch": 1.2042553191489362,
"grad_norm": 3.982133388519287,
"learning_rate": 9.036797599181762e-06,
"loss": 0.7101,
"step": 4528
},
{
"epoch": 1.2045212765957447,
"grad_norm": 4.298680782318115,
"learning_rate": 9.036278576028876e-06,
"loss": 0.8027,
"step": 4529
},
{
"epoch": 1.2047872340425532,
"grad_norm": 3.7166576385498047,
"learning_rate": 9.035759427988779e-06,
"loss": 0.8048,
"step": 4530
},
{
"epoch": 1.2050531914893616,
"grad_norm": 4.02637243270874,
"learning_rate": 9.035240155077532e-06,
"loss": 0.8519,
"step": 4531
},
{
"epoch": 1.2053191489361703,
"grad_norm": 4.048903942108154,
"learning_rate": 9.034720757311206e-06,
"loss": 0.8076,
"step": 4532
},
{
"epoch": 1.2055851063829788,
"grad_norm": 3.8102221488952637,
"learning_rate": 9.034201234705869e-06,
"loss": 0.8361,
"step": 4533
},
{
"epoch": 1.2058510638297872,
"grad_norm": 4.269223213195801,
"learning_rate": 9.033681587277596e-06,
"loss": 0.9528,
"step": 4534
},
{
"epoch": 1.2061170212765957,
"grad_norm": 4.001543998718262,
"learning_rate": 9.033161815042465e-06,
"loss": 0.8678,
"step": 4535
},
{
"epoch": 1.2063829787234042,
"grad_norm": 4.034337997436523,
"learning_rate": 9.032641918016559e-06,
"loss": 0.7533,
"step": 4536
},
{
"epoch": 1.2066489361702128,
"grad_norm": 3.7186598777770996,
"learning_rate": 9.032121896215965e-06,
"loss": 0.8469,
"step": 4537
},
{
"epoch": 1.2069148936170213,
"grad_norm": 3.8396542072296143,
"learning_rate": 9.03160174965677e-06,
"loss": 0.7419,
"step": 4538
},
{
"epoch": 1.2071808510638298,
"grad_norm": 3.971125602722168,
"learning_rate": 9.031081478355074e-06,
"loss": 0.7997,
"step": 4539
},
{
"epoch": 1.2074468085106382,
"grad_norm": 3.9450175762176514,
"learning_rate": 9.03056108232697e-06,
"loss": 0.9049,
"step": 4540
},
{
"epoch": 1.2077127659574467,
"grad_norm": 3.878206729888916,
"learning_rate": 9.03004056158856e-06,
"loss": 0.7389,
"step": 4541
},
{
"epoch": 1.2079787234042554,
"grad_norm": 4.157868385314941,
"learning_rate": 9.02951991615595e-06,
"loss": 0.8474,
"step": 4542
},
{
"epoch": 1.2082446808510638,
"grad_norm": 4.203000068664551,
"learning_rate": 9.02899914604525e-06,
"loss": 0.7146,
"step": 4543
},
{
"epoch": 1.2085106382978723,
"grad_norm": 4.336871147155762,
"learning_rate": 9.028478251272573e-06,
"loss": 0.7901,
"step": 4544
},
{
"epoch": 1.2087765957446808,
"grad_norm": 4.467360973358154,
"learning_rate": 9.027957231854034e-06,
"loss": 0.6987,
"step": 4545
},
{
"epoch": 1.2090425531914895,
"grad_norm": 4.293298721313477,
"learning_rate": 9.027436087805759e-06,
"loss": 0.8706,
"step": 4546
},
{
"epoch": 1.209308510638298,
"grad_norm": 4.344003200531006,
"learning_rate": 9.026914819143867e-06,
"loss": 0.8803,
"step": 4547
},
{
"epoch": 1.2095744680851064,
"grad_norm": 3.9396615028381348,
"learning_rate": 9.026393425884491e-06,
"loss": 0.8195,
"step": 4548
},
{
"epoch": 1.2098404255319148,
"grad_norm": 4.163116931915283,
"learning_rate": 9.025871908043762e-06,
"loss": 0.8396,
"step": 4549
},
{
"epoch": 1.2101063829787235,
"grad_norm": 3.790417194366455,
"learning_rate": 9.025350265637816e-06,
"loss": 0.9279,
"step": 4550
},
{
"epoch": 1.210372340425532,
"grad_norm": 3.6482441425323486,
"learning_rate": 9.024828498682793e-06,
"loss": 0.8154,
"step": 4551
},
{
"epoch": 1.2106382978723405,
"grad_norm": 4.012534141540527,
"learning_rate": 9.024306607194839e-06,
"loss": 0.777,
"step": 4552
},
{
"epoch": 1.210904255319149,
"grad_norm": 3.850843906402588,
"learning_rate": 9.0237845911901e-06,
"loss": 0.6989,
"step": 4553
},
{
"epoch": 1.2111702127659574,
"grad_norm": 3.810297966003418,
"learning_rate": 9.023262450684727e-06,
"loss": 0.8284,
"step": 4554
},
{
"epoch": 1.211436170212766,
"grad_norm": 3.643862247467041,
"learning_rate": 9.022740185694877e-06,
"loss": 0.9392,
"step": 4555
},
{
"epoch": 1.2117021276595745,
"grad_norm": 3.707839012145996,
"learning_rate": 9.022217796236711e-06,
"loss": 0.794,
"step": 4556
},
{
"epoch": 1.211968085106383,
"grad_norm": 4.23673152923584,
"learning_rate": 9.02169528232639e-06,
"loss": 0.7546,
"step": 4557
},
{
"epoch": 1.2122340425531914,
"grad_norm": 4.236415386199951,
"learning_rate": 9.021172643980082e-06,
"loss": 0.9645,
"step": 4558
},
{
"epoch": 1.2125,
"grad_norm": 3.956615686416626,
"learning_rate": 9.02064988121396e-06,
"loss": 0.9095,
"step": 4559
},
{
"epoch": 1.2127659574468086,
"grad_norm": 4.126330852508545,
"learning_rate": 9.020126994044194e-06,
"loss": 0.7762,
"step": 4560
},
{
"epoch": 1.213031914893617,
"grad_norm": 4.501354694366455,
"learning_rate": 9.019603982486967e-06,
"loss": 0.873,
"step": 4561
},
{
"epoch": 1.2132978723404255,
"grad_norm": 4.185324192047119,
"learning_rate": 9.01908084655846e-06,
"loss": 0.8071,
"step": 4562
},
{
"epoch": 1.213563829787234,
"grad_norm": 4.112594127655029,
"learning_rate": 9.018557586274858e-06,
"loss": 0.7762,
"step": 4563
},
{
"epoch": 1.2138297872340424,
"grad_norm": 3.841365098953247,
"learning_rate": 9.018034201652357e-06,
"loss": 0.8042,
"step": 4564
},
{
"epoch": 1.2140957446808511,
"grad_norm": 3.9603569507598877,
"learning_rate": 9.017510692707144e-06,
"loss": 0.6254,
"step": 4565
},
{
"epoch": 1.2143617021276596,
"grad_norm": 3.6832830905914307,
"learning_rate": 9.016987059455422e-06,
"loss": 0.7013,
"step": 4566
},
{
"epoch": 1.214627659574468,
"grad_norm": 4.155395030975342,
"learning_rate": 9.01646330191339e-06,
"loss": 0.8052,
"step": 4567
},
{
"epoch": 1.2148936170212765,
"grad_norm": 3.9648375511169434,
"learning_rate": 9.015939420097255e-06,
"loss": 0.778,
"step": 4568
},
{
"epoch": 1.2151595744680852,
"grad_norm": 3.8621366024017334,
"learning_rate": 9.015415414023226e-06,
"loss": 0.7851,
"step": 4569
},
{
"epoch": 1.2154255319148937,
"grad_norm": 4.207528114318848,
"learning_rate": 9.014891283707517e-06,
"loss": 0.9192,
"step": 4570
},
{
"epoch": 1.2156914893617021,
"grad_norm": 4.204238414764404,
"learning_rate": 9.014367029166344e-06,
"loss": 0.8175,
"step": 4571
},
{
"epoch": 1.2159574468085106,
"grad_norm": 4.0870537757873535,
"learning_rate": 9.013842650415927e-06,
"loss": 0.8294,
"step": 4572
},
{
"epoch": 1.2162234042553193,
"grad_norm": 4.164912700653076,
"learning_rate": 9.013318147472497e-06,
"loss": 0.8457,
"step": 4573
},
{
"epoch": 1.2164893617021277,
"grad_norm": 4.122684478759766,
"learning_rate": 9.012793520352276e-06,
"loss": 0.7565,
"step": 4574
},
{
"epoch": 1.2167553191489362,
"grad_norm": 4.155274391174316,
"learning_rate": 9.012268769071499e-06,
"loss": 0.7522,
"step": 4575
},
{
"epoch": 1.2170212765957447,
"grad_norm": 4.182219505310059,
"learning_rate": 9.011743893646402e-06,
"loss": 0.842,
"step": 4576
},
{
"epoch": 1.2172872340425531,
"grad_norm": 3.9600305557250977,
"learning_rate": 9.011218894093226e-06,
"loss": 0.7938,
"step": 4577
},
{
"epoch": 1.2175531914893618,
"grad_norm": 3.977374792098999,
"learning_rate": 9.010693770428217e-06,
"loss": 0.7021,
"step": 4578
},
{
"epoch": 1.2178191489361703,
"grad_norm": 4.227469444274902,
"learning_rate": 9.010168522667617e-06,
"loss": 0.8016,
"step": 4579
},
{
"epoch": 1.2180851063829787,
"grad_norm": 3.7802317142486572,
"learning_rate": 9.009643150827683e-06,
"loss": 0.7565,
"step": 4580
},
{
"epoch": 1.2183510638297872,
"grad_norm": 3.9615867137908936,
"learning_rate": 9.00911765492467e-06,
"loss": 0.8134,
"step": 4581
},
{
"epoch": 1.2186170212765957,
"grad_norm": 3.852104902267456,
"learning_rate": 9.008592034974836e-06,
"loss": 0.7654,
"step": 4582
},
{
"epoch": 1.2188829787234043,
"grad_norm": 3.5889623165130615,
"learning_rate": 9.008066290994443e-06,
"loss": 0.816,
"step": 4583
},
{
"epoch": 1.2191489361702128,
"grad_norm": 3.7613863945007324,
"learning_rate": 9.007540422999762e-06,
"loss": 0.7356,
"step": 4584
},
{
"epoch": 1.2194148936170213,
"grad_norm": 4.141067981719971,
"learning_rate": 9.007014431007064e-06,
"loss": 0.8445,
"step": 4585
},
{
"epoch": 1.2196808510638297,
"grad_norm": 3.842954635620117,
"learning_rate": 9.00648831503262e-06,
"loss": 0.7844,
"step": 4586
},
{
"epoch": 1.2199468085106382,
"grad_norm": 3.799661159515381,
"learning_rate": 9.00596207509271e-06,
"loss": 0.8777,
"step": 4587
},
{
"epoch": 1.2202127659574469,
"grad_norm": 4.335452079772949,
"learning_rate": 9.005435711203619e-06,
"loss": 0.936,
"step": 4588
},
{
"epoch": 1.2204787234042553,
"grad_norm": 3.905426025390625,
"learning_rate": 9.004909223381628e-06,
"loss": 0.7583,
"step": 4589
},
{
"epoch": 1.2207446808510638,
"grad_norm": 3.950054168701172,
"learning_rate": 9.004382611643032e-06,
"loss": 0.8512,
"step": 4590
},
{
"epoch": 1.2210106382978723,
"grad_norm": 4.1044135093688965,
"learning_rate": 9.003855876004124e-06,
"loss": 0.7941,
"step": 4591
},
{
"epoch": 1.2212765957446807,
"grad_norm": 3.908524751663208,
"learning_rate": 9.003329016481201e-06,
"loss": 0.7502,
"step": 4592
},
{
"epoch": 1.2215425531914894,
"grad_norm": 3.6956968307495117,
"learning_rate": 9.002802033090564e-06,
"loss": 0.7847,
"step": 4593
},
{
"epoch": 1.2218085106382979,
"grad_norm": 4.292162895202637,
"learning_rate": 9.00227492584852e-06,
"loss": 0.7966,
"step": 4594
},
{
"epoch": 1.2220744680851063,
"grad_norm": 4.15654993057251,
"learning_rate": 9.001747694771378e-06,
"loss": 0.7523,
"step": 4595
},
{
"epoch": 1.2223404255319148,
"grad_norm": 3.5688204765319824,
"learning_rate": 9.00122033987545e-06,
"loss": 0.6891,
"step": 4596
},
{
"epoch": 1.2226063829787235,
"grad_norm": 3.962028980255127,
"learning_rate": 9.000692861177056e-06,
"loss": 0.7285,
"step": 4597
},
{
"epoch": 1.222872340425532,
"grad_norm": 4.2762651443481445,
"learning_rate": 9.000165258692512e-06,
"loss": 0.8359,
"step": 4598
},
{
"epoch": 1.2231382978723404,
"grad_norm": 4.260420799255371,
"learning_rate": 8.999637532438145e-06,
"loss": 0.9171,
"step": 4599
},
{
"epoch": 1.2234042553191489,
"grad_norm": 4.032958507537842,
"learning_rate": 8.999109682430288e-06,
"loss": 0.8082,
"step": 4600
},
{
"epoch": 1.2236702127659576,
"grad_norm": 3.772594690322876,
"learning_rate": 8.998581708685264e-06,
"loss": 0.8029,
"step": 4601
},
{
"epoch": 1.223936170212766,
"grad_norm": 4.074283123016357,
"learning_rate": 8.998053611219418e-06,
"loss": 0.729,
"step": 4602
},
{
"epoch": 1.2242021276595745,
"grad_norm": 3.5871801376342773,
"learning_rate": 8.997525390049084e-06,
"loss": 0.8645,
"step": 4603
},
{
"epoch": 1.224468085106383,
"grad_norm": 3.789030075073242,
"learning_rate": 8.996997045190608e-06,
"loss": 0.7226,
"step": 4604
},
{
"epoch": 1.2247340425531914,
"grad_norm": 3.840949296951294,
"learning_rate": 8.996468576660337e-06,
"loss": 0.8817,
"step": 4605
},
{
"epoch": 1.225,
"grad_norm": 4.251964569091797,
"learning_rate": 8.995939984474624e-06,
"loss": 0.7567,
"step": 4606
},
{
"epoch": 1.2252659574468086,
"grad_norm": 3.7050812244415283,
"learning_rate": 8.995411268649823e-06,
"loss": 0.8609,
"step": 4607
},
{
"epoch": 1.225531914893617,
"grad_norm": 4.209064483642578,
"learning_rate": 8.994882429202294e-06,
"loss": 0.8653,
"step": 4608
},
{
"epoch": 1.2257978723404255,
"grad_norm": 4.214296340942383,
"learning_rate": 8.994353466148399e-06,
"loss": 0.8262,
"step": 4609
},
{
"epoch": 1.226063829787234,
"grad_norm": 3.9574646949768066,
"learning_rate": 8.993824379504505e-06,
"loss": 0.7383,
"step": 4610
},
{
"epoch": 1.2263297872340426,
"grad_norm": 4.194293975830078,
"learning_rate": 8.993295169286982e-06,
"loss": 0.7483,
"step": 4611
},
{
"epoch": 1.226595744680851,
"grad_norm": 3.9258837699890137,
"learning_rate": 8.992765835512205e-06,
"loss": 0.7151,
"step": 4612
},
{
"epoch": 1.2268617021276595,
"grad_norm": 3.662429094314575,
"learning_rate": 8.992236378196552e-06,
"loss": 0.8595,
"step": 4613
},
{
"epoch": 1.227127659574468,
"grad_norm": 3.745591640472412,
"learning_rate": 8.991706797356407e-06,
"loss": 0.8065,
"step": 4614
},
{
"epoch": 1.2273936170212765,
"grad_norm": 3.8420639038085938,
"learning_rate": 8.991177093008153e-06,
"loss": 0.7613,
"step": 4615
},
{
"epoch": 1.2276595744680852,
"grad_norm": 3.994805097579956,
"learning_rate": 8.990647265168179e-06,
"loss": 0.7919,
"step": 4616
},
{
"epoch": 1.2279255319148936,
"grad_norm": 4.0484514236450195,
"learning_rate": 8.990117313852882e-06,
"loss": 0.9,
"step": 4617
},
{
"epoch": 1.228191489361702,
"grad_norm": 3.999068260192871,
"learning_rate": 8.989587239078658e-06,
"loss": 0.7472,
"step": 4618
},
{
"epoch": 1.2284574468085105,
"grad_norm": 3.9625680446624756,
"learning_rate": 8.989057040861905e-06,
"loss": 1.0265,
"step": 4619
},
{
"epoch": 1.2287234042553192,
"grad_norm": 4.0248284339904785,
"learning_rate": 8.988526719219035e-06,
"loss": 0.7525,
"step": 4620
},
{
"epoch": 1.2289893617021277,
"grad_norm": 3.985003709793091,
"learning_rate": 8.987996274166449e-06,
"loss": 0.8491,
"step": 4621
},
{
"epoch": 1.2292553191489362,
"grad_norm": 3.5832836627960205,
"learning_rate": 8.987465705720565e-06,
"loss": 0.6647,
"step": 4622
},
{
"epoch": 1.2295212765957446,
"grad_norm": 3.5431840419769287,
"learning_rate": 8.986935013897796e-06,
"loss": 0.7142,
"step": 4623
},
{
"epoch": 1.2297872340425533,
"grad_norm": 3.745082139968872,
"learning_rate": 8.986404198714561e-06,
"loss": 0.6538,
"step": 4624
},
{
"epoch": 1.2300531914893618,
"grad_norm": 3.653146982192993,
"learning_rate": 8.98587326018729e-06,
"loss": 0.7833,
"step": 4625
},
{
"epoch": 1.2303191489361702,
"grad_norm": 3.9238173961639404,
"learning_rate": 8.985342198332407e-06,
"loss": 0.8265,
"step": 4626
},
{
"epoch": 1.2305851063829787,
"grad_norm": 4.6217265129089355,
"learning_rate": 8.984811013166345e-06,
"loss": 0.9442,
"step": 4627
},
{
"epoch": 1.2308510638297872,
"grad_norm": 3.7040395736694336,
"learning_rate": 8.98427970470554e-06,
"loss": 0.8234,
"step": 4628
},
{
"epoch": 1.2311170212765958,
"grad_norm": 3.8721320629119873,
"learning_rate": 8.983748272966426e-06,
"loss": 0.8997,
"step": 4629
},
{
"epoch": 1.2313829787234043,
"grad_norm": 3.5621466636657715,
"learning_rate": 8.983216717965453e-06,
"loss": 0.8186,
"step": 4630
},
{
"epoch": 1.2316489361702128,
"grad_norm": 3.854879379272461,
"learning_rate": 8.982685039719064e-06,
"loss": 0.773,
"step": 4631
},
{
"epoch": 1.2319148936170212,
"grad_norm": 3.9702491760253906,
"learning_rate": 8.982153238243712e-06,
"loss": 0.8645,
"step": 4632
},
{
"epoch": 1.2321808510638297,
"grad_norm": 4.122603416442871,
"learning_rate": 8.981621313555849e-06,
"loss": 0.7651,
"step": 4633
},
{
"epoch": 1.2324468085106384,
"grad_norm": 4.362513065338135,
"learning_rate": 8.981089265671936e-06,
"loss": 0.8279,
"step": 4634
},
{
"epoch": 1.2327127659574468,
"grad_norm": 4.333089351654053,
"learning_rate": 8.980557094608433e-06,
"loss": 0.8613,
"step": 4635
},
{
"epoch": 1.2329787234042553,
"grad_norm": 3.9214844703674316,
"learning_rate": 8.980024800381807e-06,
"loss": 0.8316,
"step": 4636
},
{
"epoch": 1.2332446808510638,
"grad_norm": 3.9786224365234375,
"learning_rate": 8.979492383008528e-06,
"loss": 0.8405,
"step": 4637
},
{
"epoch": 1.2335106382978722,
"grad_norm": 4.105279445648193,
"learning_rate": 8.978959842505071e-06,
"loss": 0.8187,
"step": 4638
},
{
"epoch": 1.233776595744681,
"grad_norm": 4.662153244018555,
"learning_rate": 8.97842717888791e-06,
"loss": 0.8309,
"step": 4639
},
{
"epoch": 1.2340425531914894,
"grad_norm": 4.0390400886535645,
"learning_rate": 8.977894392173527e-06,
"loss": 0.823,
"step": 4640
},
{
"epoch": 1.2343085106382978,
"grad_norm": 3.574883222579956,
"learning_rate": 8.97736148237841e-06,
"loss": 0.899,
"step": 4641
},
{
"epoch": 1.2345744680851063,
"grad_norm": 3.9242796897888184,
"learning_rate": 8.976828449519047e-06,
"loss": 0.9994,
"step": 4642
},
{
"epoch": 1.234840425531915,
"grad_norm": 3.9096062183380127,
"learning_rate": 8.976295293611927e-06,
"loss": 0.907,
"step": 4643
},
{
"epoch": 1.2351063829787234,
"grad_norm": 4.211862087249756,
"learning_rate": 8.97576201467355e-06,
"loss": 0.807,
"step": 4644
},
{
"epoch": 1.235372340425532,
"grad_norm": 3.7779862880706787,
"learning_rate": 8.975228612720415e-06,
"loss": 0.7325,
"step": 4645
},
{
"epoch": 1.2356382978723404,
"grad_norm": 4.162439823150635,
"learning_rate": 8.974695087769027e-06,
"loss": 0.9018,
"step": 4646
},
{
"epoch": 1.235904255319149,
"grad_norm": 3.9376440048217773,
"learning_rate": 8.974161439835894e-06,
"loss": 0.7467,
"step": 4647
},
{
"epoch": 1.2361702127659575,
"grad_norm": 3.728128433227539,
"learning_rate": 8.973627668937528e-06,
"loss": 0.6471,
"step": 4648
},
{
"epoch": 1.236436170212766,
"grad_norm": 4.1924967765808105,
"learning_rate": 8.97309377509044e-06,
"loss": 0.8827,
"step": 4649
},
{
"epoch": 1.2367021276595744,
"grad_norm": 3.9644808769226074,
"learning_rate": 8.972559758311156e-06,
"loss": 0.737,
"step": 4650
},
{
"epoch": 1.236968085106383,
"grad_norm": 4.276489734649658,
"learning_rate": 8.972025618616195e-06,
"loss": 0.7805,
"step": 4651
},
{
"epoch": 1.2372340425531916,
"grad_norm": 4.115257263183594,
"learning_rate": 8.971491356022086e-06,
"loss": 0.8479,
"step": 4652
},
{
"epoch": 1.2375,
"grad_norm": 4.143589019775391,
"learning_rate": 8.970956970545356e-06,
"loss": 0.7716,
"step": 4653
},
{
"epoch": 1.2377659574468085,
"grad_norm": 3.872377634048462,
"learning_rate": 8.970422462202543e-06,
"loss": 0.7949,
"step": 4654
},
{
"epoch": 1.238031914893617,
"grad_norm": 3.9074594974517822,
"learning_rate": 8.969887831010185e-06,
"loss": 0.818,
"step": 4655
},
{
"epoch": 1.2382978723404254,
"grad_norm": 3.7083117961883545,
"learning_rate": 8.969353076984823e-06,
"loss": 0.823,
"step": 4656
},
{
"epoch": 1.2385638297872341,
"grad_norm": 3.952829122543335,
"learning_rate": 8.968818200143005e-06,
"loss": 0.7928,
"step": 4657
},
{
"epoch": 1.2388297872340426,
"grad_norm": 4.015969276428223,
"learning_rate": 8.96828320050128e-06,
"loss": 0.8713,
"step": 4658
},
{
"epoch": 1.239095744680851,
"grad_norm": 4.456661701202393,
"learning_rate": 8.967748078076197e-06,
"loss": 0.8482,
"step": 4659
},
{
"epoch": 1.2393617021276595,
"grad_norm": 3.8664846420288086,
"learning_rate": 8.96721283288432e-06,
"loss": 0.7526,
"step": 4660
},
{
"epoch": 1.239627659574468,
"grad_norm": 4.358894348144531,
"learning_rate": 8.966677464942206e-06,
"loss": 0.7756,
"step": 4661
},
{
"epoch": 1.2398936170212767,
"grad_norm": 3.8991811275482178,
"learning_rate": 8.96614197426642e-06,
"loss": 0.7629,
"step": 4662
},
{
"epoch": 1.2401595744680851,
"grad_norm": 3.752913236618042,
"learning_rate": 8.965606360873533e-06,
"loss": 0.7598,
"step": 4663
},
{
"epoch": 1.2404255319148936,
"grad_norm": 4.097616672515869,
"learning_rate": 8.965070624780117e-06,
"loss": 0.7635,
"step": 4664
},
{
"epoch": 1.240691489361702,
"grad_norm": 3.855180025100708,
"learning_rate": 8.964534766002747e-06,
"loss": 0.8571,
"step": 4665
},
{
"epoch": 1.2409574468085107,
"grad_norm": 4.117387771606445,
"learning_rate": 8.963998784558001e-06,
"loss": 0.8517,
"step": 4666
},
{
"epoch": 1.2412234042553192,
"grad_norm": 4.247325897216797,
"learning_rate": 8.963462680462469e-06,
"loss": 0.7862,
"step": 4667
},
{
"epoch": 1.2414893617021276,
"grad_norm": 4.604616165161133,
"learning_rate": 8.962926453732734e-06,
"loss": 0.8325,
"step": 4668
},
{
"epoch": 1.241755319148936,
"grad_norm": 4.283206462860107,
"learning_rate": 8.96239010438539e-06,
"loss": 0.7897,
"step": 4669
},
{
"epoch": 1.2420212765957448,
"grad_norm": 4.039552688598633,
"learning_rate": 8.96185363243703e-06,
"loss": 0.8889,
"step": 4670
},
{
"epoch": 1.2422872340425533,
"grad_norm": 3.6952388286590576,
"learning_rate": 8.961317037904253e-06,
"loss": 0.7318,
"step": 4671
},
{
"epoch": 1.2425531914893617,
"grad_norm": 4.330514907836914,
"learning_rate": 8.960780320803665e-06,
"loss": 0.8473,
"step": 4672
},
{
"epoch": 1.2428191489361702,
"grad_norm": 3.8652656078338623,
"learning_rate": 8.960243481151869e-06,
"loss": 0.7744,
"step": 4673
},
{
"epoch": 1.2430851063829786,
"grad_norm": 4.232844352722168,
"learning_rate": 8.959706518965479e-06,
"loss": 0.7232,
"step": 4674
},
{
"epoch": 1.2433510638297873,
"grad_norm": 3.9439735412597656,
"learning_rate": 8.959169434261106e-06,
"loss": 0.7025,
"step": 4675
},
{
"epoch": 1.2436170212765958,
"grad_norm": 3.876521587371826,
"learning_rate": 8.958632227055369e-06,
"loss": 0.6779,
"step": 4676
},
{
"epoch": 1.2438829787234043,
"grad_norm": 3.7715842723846436,
"learning_rate": 8.95809489736489e-06,
"loss": 0.7331,
"step": 4677
},
{
"epoch": 1.2441489361702127,
"grad_norm": 4.344306945800781,
"learning_rate": 8.957557445206297e-06,
"loss": 0.797,
"step": 4678
},
{
"epoch": 1.2444148936170212,
"grad_norm": 3.924248218536377,
"learning_rate": 8.957019870596216e-06,
"loss": 0.9321,
"step": 4679
},
{
"epoch": 1.2446808510638299,
"grad_norm": 3.8048911094665527,
"learning_rate": 8.956482173551281e-06,
"loss": 0.7405,
"step": 4680
},
{
"epoch": 1.2449468085106383,
"grad_norm": 4.218112468719482,
"learning_rate": 8.95594435408813e-06,
"loss": 0.8395,
"step": 4681
},
{
"epoch": 1.2452127659574468,
"grad_norm": 3.683992385864258,
"learning_rate": 8.955406412223402e-06,
"loss": 0.7261,
"step": 4682
},
{
"epoch": 1.2454787234042553,
"grad_norm": 4.05771541595459,
"learning_rate": 8.954868347973742e-06,
"loss": 0.85,
"step": 4683
},
{
"epoch": 1.2457446808510637,
"grad_norm": 4.423064708709717,
"learning_rate": 8.954330161355803e-06,
"loss": 0.8632,
"step": 4684
},
{
"epoch": 1.2460106382978724,
"grad_norm": 4.039585113525391,
"learning_rate": 8.953791852386229e-06,
"loss": 0.8078,
"step": 4685
},
{
"epoch": 1.2462765957446809,
"grad_norm": 4.336376190185547,
"learning_rate": 8.953253421081682e-06,
"loss": 0.807,
"step": 4686
},
{
"epoch": 1.2465425531914893,
"grad_norm": 4.025651454925537,
"learning_rate": 8.95271486745882e-06,
"loss": 0.8651,
"step": 4687
},
{
"epoch": 1.2468085106382978,
"grad_norm": 3.839545488357544,
"learning_rate": 8.952176191534305e-06,
"loss": 0.7696,
"step": 4688
},
{
"epoch": 1.2470744680851065,
"grad_norm": 3.4037442207336426,
"learning_rate": 8.951637393324806e-06,
"loss": 0.7827,
"step": 4689
},
{
"epoch": 1.247340425531915,
"grad_norm": 4.202190399169922,
"learning_rate": 8.951098472846994e-06,
"loss": 0.6717,
"step": 4690
},
{
"epoch": 1.2476063829787234,
"grad_norm": 4.145596027374268,
"learning_rate": 8.950559430117542e-06,
"loss": 0.8201,
"step": 4691
},
{
"epoch": 1.2478723404255319,
"grad_norm": 4.066543102264404,
"learning_rate": 8.950020265153133e-06,
"loss": 0.7651,
"step": 4692
},
{
"epoch": 1.2481382978723405,
"grad_norm": 3.9612643718719482,
"learning_rate": 8.949480977970444e-06,
"loss": 0.7625,
"step": 4693
},
{
"epoch": 1.248404255319149,
"grad_norm": 3.6797444820404053,
"learning_rate": 8.948941568586165e-06,
"loss": 0.7396,
"step": 4694
},
{
"epoch": 1.2486702127659575,
"grad_norm": 4.5470662117004395,
"learning_rate": 8.948402037016984e-06,
"loss": 0.831,
"step": 4695
},
{
"epoch": 1.248936170212766,
"grad_norm": 3.3565194606781006,
"learning_rate": 8.947862383279594e-06,
"loss": 0.6773,
"step": 4696
},
{
"epoch": 1.2492021276595744,
"grad_norm": 4.042359352111816,
"learning_rate": 8.947322607390694e-06,
"loss": 0.8052,
"step": 4697
},
{
"epoch": 1.249468085106383,
"grad_norm": 3.909513235092163,
"learning_rate": 8.946782709366988e-06,
"loss": 0.8849,
"step": 4698
},
{
"epoch": 1.2497340425531915,
"grad_norm": 4.553561687469482,
"learning_rate": 8.946242689225175e-06,
"loss": 0.9048,
"step": 4699
},
{
"epoch": 1.25,
"grad_norm": 4.289936542510986,
"learning_rate": 8.94570254698197e-06,
"loss": 0.8465,
"step": 4700
},
{
"epoch": 1.2502659574468085,
"grad_norm": 3.7364187240600586,
"learning_rate": 8.94516228265408e-06,
"loss": 0.9081,
"step": 4701
},
{
"epoch": 1.250531914893617,
"grad_norm": 3.8869049549102783,
"learning_rate": 8.944621896258226e-06,
"loss": 0.7625,
"step": 4702
},
{
"epoch": 1.2507978723404256,
"grad_norm": 4.203104019165039,
"learning_rate": 8.944081387811126e-06,
"loss": 0.7822,
"step": 4703
},
{
"epoch": 1.251063829787234,
"grad_norm": 3.810011148452759,
"learning_rate": 8.943540757329503e-06,
"loss": 0.9403,
"step": 4704
},
{
"epoch": 1.2513297872340425,
"grad_norm": 3.795477867126465,
"learning_rate": 8.943000004830087e-06,
"loss": 0.7856,
"step": 4705
},
{
"epoch": 1.251595744680851,
"grad_norm": 4.174344062805176,
"learning_rate": 8.942459130329608e-06,
"loss": 0.8522,
"step": 4706
},
{
"epoch": 1.2518617021276595,
"grad_norm": 3.6374874114990234,
"learning_rate": 8.941918133844803e-06,
"loss": 0.8471,
"step": 4707
},
{
"epoch": 1.2521276595744681,
"grad_norm": 3.645719528198242,
"learning_rate": 8.941377015392407e-06,
"loss": 0.7564,
"step": 4708
},
{
"epoch": 1.2523936170212766,
"grad_norm": 4.238284587860107,
"learning_rate": 8.94083577498917e-06,
"loss": 0.9556,
"step": 4709
},
{
"epoch": 1.252659574468085,
"grad_norm": 4.101098537445068,
"learning_rate": 8.940294412651831e-06,
"loss": 0.9095,
"step": 4710
},
{
"epoch": 1.2529255319148938,
"grad_norm": 3.56626296043396,
"learning_rate": 8.939752928397146e-06,
"loss": 0.7358,
"step": 4711
},
{
"epoch": 1.253191489361702,
"grad_norm": 3.680903434753418,
"learning_rate": 8.939211322241866e-06,
"loss": 0.7556,
"step": 4712
},
{
"epoch": 1.2534574468085107,
"grad_norm": 4.173125267028809,
"learning_rate": 8.938669594202748e-06,
"loss": 0.7488,
"step": 4713
},
{
"epoch": 1.2537234042553191,
"grad_norm": 4.197647571563721,
"learning_rate": 8.938127744296559e-06,
"loss": 0.8367,
"step": 4714
},
{
"epoch": 1.2539893617021276,
"grad_norm": 3.5184898376464844,
"learning_rate": 8.937585772540058e-06,
"loss": 0.7586,
"step": 4715
},
{
"epoch": 1.2542553191489363,
"grad_norm": 4.331880569458008,
"learning_rate": 8.93704367895002e-06,
"loss": 0.9277,
"step": 4716
},
{
"epoch": 1.2545212765957447,
"grad_norm": 4.3062238693237305,
"learning_rate": 8.936501463543213e-06,
"loss": 0.7798,
"step": 4717
},
{
"epoch": 1.2547872340425532,
"grad_norm": 4.3987956047058105,
"learning_rate": 8.935959126336418e-06,
"loss": 0.8121,
"step": 4718
},
{
"epoch": 1.2550531914893617,
"grad_norm": 3.8964762687683105,
"learning_rate": 8.935416667346412e-06,
"loss": 0.8318,
"step": 4719
},
{
"epoch": 1.2553191489361701,
"grad_norm": 4.110397815704346,
"learning_rate": 8.934874086589981e-06,
"loss": 0.7502,
"step": 4720
},
{
"epoch": 1.2555851063829788,
"grad_norm": 3.531947135925293,
"learning_rate": 8.934331384083914e-06,
"loss": 0.7613,
"step": 4721
},
{
"epoch": 1.2558510638297873,
"grad_norm": 3.8877408504486084,
"learning_rate": 8.933788559845001e-06,
"loss": 0.7568,
"step": 4722
},
{
"epoch": 1.2561170212765957,
"grad_norm": 3.653062582015991,
"learning_rate": 8.93324561389004e-06,
"loss": 0.7156,
"step": 4723
},
{
"epoch": 1.2563829787234042,
"grad_norm": 3.9823882579803467,
"learning_rate": 8.932702546235827e-06,
"loss": 0.8349,
"step": 4724
},
{
"epoch": 1.2566489361702127,
"grad_norm": 3.867664337158203,
"learning_rate": 8.932159356899169e-06,
"loss": 0.7605,
"step": 4725
},
{
"epoch": 1.2569148936170214,
"grad_norm": 3.945042371749878,
"learning_rate": 8.93161604589687e-06,
"loss": 0.698,
"step": 4726
},
{
"epoch": 1.2571808510638298,
"grad_norm": 4.207972049713135,
"learning_rate": 8.93107261324574e-06,
"loss": 0.9514,
"step": 4727
},
{
"epoch": 1.2574468085106383,
"grad_norm": 3.8403220176696777,
"learning_rate": 8.930529058962597e-06,
"loss": 0.7912,
"step": 4728
},
{
"epoch": 1.2577127659574467,
"grad_norm": 3.9817752838134766,
"learning_rate": 8.929985383064257e-06,
"loss": 0.752,
"step": 4729
},
{
"epoch": 1.2579787234042552,
"grad_norm": 3.786790132522583,
"learning_rate": 8.929441585567543e-06,
"loss": 0.7753,
"step": 4730
},
{
"epoch": 1.258244680851064,
"grad_norm": 3.5705316066741943,
"learning_rate": 8.928897666489278e-06,
"loss": 0.6983,
"step": 4731
},
{
"epoch": 1.2585106382978724,
"grad_norm": 3.8111605644226074,
"learning_rate": 8.928353625846294e-06,
"loss": 0.9261,
"step": 4732
},
{
"epoch": 1.2587765957446808,
"grad_norm": 3.8016891479492188,
"learning_rate": 8.927809463655424e-06,
"loss": 0.9297,
"step": 4733
},
{
"epoch": 1.2590425531914895,
"grad_norm": 3.998060941696167,
"learning_rate": 8.927265179933506e-06,
"loss": 0.8105,
"step": 4734
},
{
"epoch": 1.2593085106382977,
"grad_norm": 3.4611032009124756,
"learning_rate": 8.926720774697379e-06,
"loss": 0.7404,
"step": 4735
},
{
"epoch": 1.2595744680851064,
"grad_norm": 4.086428165435791,
"learning_rate": 8.926176247963886e-06,
"loss": 0.7905,
"step": 4736
},
{
"epoch": 1.2598404255319149,
"grad_norm": 4.124720573425293,
"learning_rate": 8.92563159974988e-06,
"loss": 0.9439,
"step": 4737
},
{
"epoch": 1.2601063829787233,
"grad_norm": 3.536327600479126,
"learning_rate": 8.92508683007221e-06,
"loss": 0.7992,
"step": 4738
},
{
"epoch": 1.260372340425532,
"grad_norm": 3.884551763534546,
"learning_rate": 8.924541938947731e-06,
"loss": 0.8708,
"step": 4739
},
{
"epoch": 1.2606382978723405,
"grad_norm": 4.106461048126221,
"learning_rate": 8.923996926393306e-06,
"loss": 0.8013,
"step": 4740
},
{
"epoch": 1.260904255319149,
"grad_norm": 3.6707823276519775,
"learning_rate": 8.923451792425795e-06,
"loss": 0.7818,
"step": 4741
},
{
"epoch": 1.2611702127659574,
"grad_norm": 4.26462984085083,
"learning_rate": 8.922906537062066e-06,
"loss": 0.9622,
"step": 4742
},
{
"epoch": 1.2614361702127659,
"grad_norm": 4.356677055358887,
"learning_rate": 8.92236116031899e-06,
"loss": 0.9918,
"step": 4743
},
{
"epoch": 1.2617021276595746,
"grad_norm": 3.735673427581787,
"learning_rate": 8.921815662213442e-06,
"loss": 0.6767,
"step": 4744
},
{
"epoch": 1.261968085106383,
"grad_norm": 3.9601590633392334,
"learning_rate": 8.9212700427623e-06,
"loss": 0.8667,
"step": 4745
},
{
"epoch": 1.2622340425531915,
"grad_norm": 3.9646952152252197,
"learning_rate": 8.920724301982446e-06,
"loss": 0.7383,
"step": 4746
},
{
"epoch": 1.2625,
"grad_norm": 3.402167320251465,
"learning_rate": 8.920178439890765e-06,
"loss": 0.7373,
"step": 4747
},
{
"epoch": 1.2627659574468084,
"grad_norm": 4.096093654632568,
"learning_rate": 8.91963245650415e-06,
"loss": 0.7765,
"step": 4748
},
{
"epoch": 1.263031914893617,
"grad_norm": 3.612751007080078,
"learning_rate": 8.91908635183949e-06,
"loss": 0.8401,
"step": 4749
},
{
"epoch": 1.2632978723404256,
"grad_norm": 4.043914318084717,
"learning_rate": 8.918540125913686e-06,
"loss": 0.7371,
"step": 4750
},
{
"epoch": 1.263563829787234,
"grad_norm": 3.865091562271118,
"learning_rate": 8.917993778743636e-06,
"loss": 0.6962,
"step": 4751
},
{
"epoch": 1.2638297872340425,
"grad_norm": 4.154531478881836,
"learning_rate": 8.917447310346245e-06,
"loss": 0.8158,
"step": 4752
},
{
"epoch": 1.264095744680851,
"grad_norm": 3.6052658557891846,
"learning_rate": 8.916900720738423e-06,
"loss": 0.7131,
"step": 4753
},
{
"epoch": 1.2643617021276596,
"grad_norm": 4.163410186767578,
"learning_rate": 8.916354009937081e-06,
"loss": 0.8955,
"step": 4754
},
{
"epoch": 1.264627659574468,
"grad_norm": 3.979421377182007,
"learning_rate": 8.915807177959133e-06,
"loss": 0.8712,
"step": 4755
},
{
"epoch": 1.2648936170212766,
"grad_norm": 3.4931585788726807,
"learning_rate": 8.915260224821504e-06,
"loss": 0.8079,
"step": 4756
},
{
"epoch": 1.265159574468085,
"grad_norm": 3.8094661235809326,
"learning_rate": 8.914713150541113e-06,
"loss": 0.8143,
"step": 4757
},
{
"epoch": 1.2654255319148935,
"grad_norm": 4.149999618530273,
"learning_rate": 8.914165955134886e-06,
"loss": 0.789,
"step": 4758
},
{
"epoch": 1.2656914893617022,
"grad_norm": 3.9979913234710693,
"learning_rate": 8.913618638619757e-06,
"loss": 0.8312,
"step": 4759
},
{
"epoch": 1.2659574468085106,
"grad_norm": 4.05308723449707,
"learning_rate": 8.91307120101266e-06,
"loss": 0.8029,
"step": 4760
},
{
"epoch": 1.266223404255319,
"grad_norm": 4.013595104217529,
"learning_rate": 8.912523642330533e-06,
"loss": 0.8625,
"step": 4761
},
{
"epoch": 1.2664893617021278,
"grad_norm": 3.932847023010254,
"learning_rate": 8.911975962590319e-06,
"loss": 0.8532,
"step": 4762
},
{
"epoch": 1.2667553191489362,
"grad_norm": 4.163691520690918,
"learning_rate": 8.911428161808962e-06,
"loss": 0.9048,
"step": 4763
},
{
"epoch": 1.2670212765957447,
"grad_norm": 4.368598461151123,
"learning_rate": 8.910880240003413e-06,
"loss": 0.7907,
"step": 4764
},
{
"epoch": 1.2672872340425532,
"grad_norm": 4.071594715118408,
"learning_rate": 8.910332197190623e-06,
"loss": 0.8764,
"step": 4765
},
{
"epoch": 1.2675531914893616,
"grad_norm": 3.6952078342437744,
"learning_rate": 8.909784033387552e-06,
"loss": 0.8343,
"step": 4766
},
{
"epoch": 1.2678191489361703,
"grad_norm": 3.967707872390747,
"learning_rate": 8.909235748611161e-06,
"loss": 0.7465,
"step": 4767
},
{
"epoch": 1.2680851063829788,
"grad_norm": 4.079662799835205,
"learning_rate": 8.908687342878413e-06,
"loss": 0.8126,
"step": 4768
},
{
"epoch": 1.2683510638297872,
"grad_norm": 3.95373272895813,
"learning_rate": 8.908138816206275e-06,
"loss": 0.7309,
"step": 4769
},
{
"epoch": 1.2686170212765957,
"grad_norm": 3.959603786468506,
"learning_rate": 8.907590168611724e-06,
"loss": 0.7635,
"step": 4770
},
{
"epoch": 1.2688829787234042,
"grad_norm": 3.9669322967529297,
"learning_rate": 8.90704140011173e-06,
"loss": 0.9031,
"step": 4771
},
{
"epoch": 1.2691489361702128,
"grad_norm": 4.063694477081299,
"learning_rate": 8.906492510723276e-06,
"loss": 0.8292,
"step": 4772
},
{
"epoch": 1.2694148936170213,
"grad_norm": 3.9221720695495605,
"learning_rate": 8.905943500463344e-06,
"loss": 0.7683,
"step": 4773
},
{
"epoch": 1.2696808510638298,
"grad_norm": 3.9919097423553467,
"learning_rate": 8.905394369348921e-06,
"loss": 0.7647,
"step": 4774
},
{
"epoch": 1.2699468085106382,
"grad_norm": 3.8253092765808105,
"learning_rate": 8.904845117397e-06,
"loss": 0.7056,
"step": 4775
},
{
"epoch": 1.2702127659574467,
"grad_norm": 3.5580105781555176,
"learning_rate": 8.904295744624572e-06,
"loss": 0.7939,
"step": 4776
},
{
"epoch": 1.2704787234042554,
"grad_norm": 3.987231492996216,
"learning_rate": 8.903746251048638e-06,
"loss": 0.8708,
"step": 4777
},
{
"epoch": 1.2707446808510638,
"grad_norm": 3.8669490814208984,
"learning_rate": 8.903196636686198e-06,
"loss": 0.776,
"step": 4778
},
{
"epoch": 1.2710106382978723,
"grad_norm": 3.940711259841919,
"learning_rate": 8.902646901554258e-06,
"loss": 0.7831,
"step": 4779
},
{
"epoch": 1.2712765957446808,
"grad_norm": 4.304079055786133,
"learning_rate": 8.90209704566983e-06,
"loss": 0.8243,
"step": 4780
},
{
"epoch": 1.2715425531914892,
"grad_norm": 4.165473937988281,
"learning_rate": 8.901547069049924e-06,
"loss": 0.8804,
"step": 4781
},
{
"epoch": 1.271808510638298,
"grad_norm": 3.84690260887146,
"learning_rate": 8.900996971711558e-06,
"loss": 0.8067,
"step": 4782
},
{
"epoch": 1.2720744680851064,
"grad_norm": 3.9118542671203613,
"learning_rate": 8.900446753671754e-06,
"loss": 0.8676,
"step": 4783
},
{
"epoch": 1.2723404255319148,
"grad_norm": 4.110815525054932,
"learning_rate": 8.899896414947534e-06,
"loss": 0.6605,
"step": 4784
},
{
"epoch": 1.2726063829787235,
"grad_norm": 3.7008938789367676,
"learning_rate": 8.899345955555928e-06,
"loss": 0.7201,
"step": 4785
},
{
"epoch": 1.272872340425532,
"grad_norm": 4.3613691329956055,
"learning_rate": 8.898795375513966e-06,
"loss": 0.806,
"step": 4786
},
{
"epoch": 1.2731382978723405,
"grad_norm": 4.315506458282471,
"learning_rate": 8.898244674838687e-06,
"loss": 0.8599,
"step": 4787
},
{
"epoch": 1.273404255319149,
"grad_norm": 3.8863260746002197,
"learning_rate": 8.897693853547127e-06,
"loss": 0.7735,
"step": 4788
},
{
"epoch": 1.2736702127659574,
"grad_norm": 4.221061706542969,
"learning_rate": 8.89714291165633e-06,
"loss": 0.9449,
"step": 4789
},
{
"epoch": 1.273936170212766,
"grad_norm": 3.727510929107666,
"learning_rate": 8.896591849183343e-06,
"loss": 0.8311,
"step": 4790
},
{
"epoch": 1.2742021276595745,
"grad_norm": 3.9543018341064453,
"learning_rate": 8.896040666145218e-06,
"loss": 0.6876,
"step": 4791
},
{
"epoch": 1.274468085106383,
"grad_norm": 3.7465333938598633,
"learning_rate": 8.895489362559007e-06,
"loss": 0.7677,
"step": 4792
},
{
"epoch": 1.2747340425531914,
"grad_norm": 4.069217205047607,
"learning_rate": 8.894937938441768e-06,
"loss": 0.8168,
"step": 4793
},
{
"epoch": 1.275,
"grad_norm": 4.367965221405029,
"learning_rate": 8.894386393810563e-06,
"loss": 0.7627,
"step": 4794
},
{
"epoch": 1.2752659574468086,
"grad_norm": 3.4115452766418457,
"learning_rate": 8.893834728682459e-06,
"loss": 0.6498,
"step": 4795
},
{
"epoch": 1.275531914893617,
"grad_norm": 3.94594669342041,
"learning_rate": 8.893282943074524e-06,
"loss": 0.7735,
"step": 4796
},
{
"epoch": 1.2757978723404255,
"grad_norm": 3.6856279373168945,
"learning_rate": 8.89273103700383e-06,
"loss": 0.8616,
"step": 4797
},
{
"epoch": 1.276063829787234,
"grad_norm": 3.8516628742218018,
"learning_rate": 8.892179010487456e-06,
"loss": 0.8549,
"step": 4798
},
{
"epoch": 1.2763297872340424,
"grad_norm": 4.085914611816406,
"learning_rate": 8.891626863542479e-06,
"loss": 0.7623,
"step": 4799
},
{
"epoch": 1.2765957446808511,
"grad_norm": 3.8456547260284424,
"learning_rate": 8.891074596185987e-06,
"loss": 0.8117,
"step": 4800
},
{
"epoch": 1.2768617021276596,
"grad_norm": 4.302917003631592,
"learning_rate": 8.890522208435067e-06,
"loss": 0.8329,
"step": 4801
},
{
"epoch": 1.277127659574468,
"grad_norm": 4.0489912033081055,
"learning_rate": 8.889969700306807e-06,
"loss": 0.8957,
"step": 4802
},
{
"epoch": 1.2773936170212765,
"grad_norm": 4.2099199295043945,
"learning_rate": 8.889417071818306e-06,
"loss": 0.7582,
"step": 4803
},
{
"epoch": 1.277659574468085,
"grad_norm": 3.925480842590332,
"learning_rate": 8.888864322986658e-06,
"loss": 0.814,
"step": 4804
},
{
"epoch": 1.2779255319148937,
"grad_norm": 3.9066643714904785,
"learning_rate": 8.888311453828973e-06,
"loss": 0.798,
"step": 4805
},
{
"epoch": 1.2781914893617021,
"grad_norm": 3.6610445976257324,
"learning_rate": 8.887758464362352e-06,
"loss": 0.708,
"step": 4806
},
{
"epoch": 1.2784574468085106,
"grad_norm": 3.639225482940674,
"learning_rate": 8.887205354603908e-06,
"loss": 0.9377,
"step": 4807
},
{
"epoch": 1.2787234042553193,
"grad_norm": 4.213227272033691,
"learning_rate": 8.886652124570753e-06,
"loss": 0.8664,
"step": 4808
},
{
"epoch": 1.2789893617021277,
"grad_norm": 3.916071653366089,
"learning_rate": 8.886098774280006e-06,
"loss": 0.8438,
"step": 4809
},
{
"epoch": 1.2792553191489362,
"grad_norm": 3.6656155586242676,
"learning_rate": 8.885545303748786e-06,
"loss": 0.8395,
"step": 4810
},
{
"epoch": 1.2795212765957447,
"grad_norm": 3.8457565307617188,
"learning_rate": 8.884991712994223e-06,
"loss": 0.7528,
"step": 4811
},
{
"epoch": 1.2797872340425531,
"grad_norm": 4.223479270935059,
"learning_rate": 8.88443800203344e-06,
"loss": 0.8702,
"step": 4812
},
{
"epoch": 1.2800531914893618,
"grad_norm": 3.9296419620513916,
"learning_rate": 8.88388417088357e-06,
"loss": 0.8804,
"step": 4813
},
{
"epoch": 1.2803191489361703,
"grad_norm": 4.048618316650391,
"learning_rate": 8.883330219561754e-06,
"loss": 0.8696,
"step": 4814
},
{
"epoch": 1.2805851063829787,
"grad_norm": 3.960580825805664,
"learning_rate": 8.882776148085129e-06,
"loss": 0.7783,
"step": 4815
},
{
"epoch": 1.2808510638297872,
"grad_norm": 4.032505035400391,
"learning_rate": 8.882221956470838e-06,
"loss": 0.8208,
"step": 4816
},
{
"epoch": 1.2811170212765957,
"grad_norm": 4.192906379699707,
"learning_rate": 8.881667644736028e-06,
"loss": 0.8411,
"step": 4817
},
{
"epoch": 1.2813829787234043,
"grad_norm": 3.9931344985961914,
"learning_rate": 8.881113212897851e-06,
"loss": 0.8844,
"step": 4818
},
{
"epoch": 1.2816489361702128,
"grad_norm": 4.1028923988342285,
"learning_rate": 8.880558660973462e-06,
"loss": 0.7664,
"step": 4819
},
{
"epoch": 1.2819148936170213,
"grad_norm": 4.039322376251221,
"learning_rate": 8.880003988980019e-06,
"loss": 0.8436,
"step": 4820
},
{
"epoch": 1.2821808510638297,
"grad_norm": 4.0381388664245605,
"learning_rate": 8.879449196934687e-06,
"loss": 0.749,
"step": 4821
},
{
"epoch": 1.2824468085106382,
"grad_norm": 4.3847222328186035,
"learning_rate": 8.878894284854626e-06,
"loss": 0.8086,
"step": 4822
},
{
"epoch": 1.2827127659574469,
"grad_norm": 4.213246822357178,
"learning_rate": 8.878339252757011e-06,
"loss": 0.9063,
"step": 4823
},
{
"epoch": 1.2829787234042553,
"grad_norm": 4.628039360046387,
"learning_rate": 8.877784100659013e-06,
"loss": 0.9035,
"step": 4824
},
{
"epoch": 1.2832446808510638,
"grad_norm": 3.940800905227661,
"learning_rate": 8.877228828577809e-06,
"loss": 0.8975,
"step": 4825
},
{
"epoch": 1.2835106382978723,
"grad_norm": 3.82865571975708,
"learning_rate": 8.87667343653058e-06,
"loss": 0.7283,
"step": 4826
},
{
"epoch": 1.2837765957446807,
"grad_norm": 4.173588752746582,
"learning_rate": 8.876117924534511e-06,
"loss": 0.8323,
"step": 4827
},
{
"epoch": 1.2840425531914894,
"grad_norm": 3.6624155044555664,
"learning_rate": 8.87556229260679e-06,
"loss": 0.8799,
"step": 4828
},
{
"epoch": 1.2843085106382979,
"grad_norm": 3.8801040649414062,
"learning_rate": 8.875006540764607e-06,
"loss": 0.7246,
"step": 4829
},
{
"epoch": 1.2845744680851063,
"grad_norm": 3.9223177433013916,
"learning_rate": 8.874450669025161e-06,
"loss": 0.8083,
"step": 4830
},
{
"epoch": 1.284840425531915,
"grad_norm": 3.640429735183716,
"learning_rate": 8.87389467740565e-06,
"loss": 0.8996,
"step": 4831
},
{
"epoch": 1.2851063829787235,
"grad_norm": 3.7746853828430176,
"learning_rate": 8.873338565923275e-06,
"loss": 0.6899,
"step": 4832
},
{
"epoch": 1.285372340425532,
"grad_norm": 4.439557075500488,
"learning_rate": 8.872782334595246e-06,
"loss": 0.9741,
"step": 4833
},
{
"epoch": 1.2856382978723404,
"grad_norm": 4.051036834716797,
"learning_rate": 8.872225983438774e-06,
"loss": 0.8935,
"step": 4834
},
{
"epoch": 1.2859042553191489,
"grad_norm": 4.3584370613098145,
"learning_rate": 8.871669512471068e-06,
"loss": 0.8499,
"step": 4835
},
{
"epoch": 1.2861702127659576,
"grad_norm": 3.96370792388916,
"learning_rate": 8.87111292170935e-06,
"loss": 0.8756,
"step": 4836
},
{
"epoch": 1.286436170212766,
"grad_norm": 3.8416450023651123,
"learning_rate": 8.87055621117084e-06,
"loss": 0.7347,
"step": 4837
},
{
"epoch": 1.2867021276595745,
"grad_norm": 3.84533429145813,
"learning_rate": 8.869999380872765e-06,
"loss": 0.7894,
"step": 4838
},
{
"epoch": 1.286968085106383,
"grad_norm": 4.616893768310547,
"learning_rate": 8.869442430832351e-06,
"loss": 0.8618,
"step": 4839
},
{
"epoch": 1.2872340425531914,
"grad_norm": 3.9372458457946777,
"learning_rate": 8.868885361066835e-06,
"loss": 0.785,
"step": 4840
},
{
"epoch": 1.2875,
"grad_norm": 3.895632743835449,
"learning_rate": 8.868328171593448e-06,
"loss": 0.7812,
"step": 4841
},
{
"epoch": 1.2877659574468086,
"grad_norm": 4.029928684234619,
"learning_rate": 8.867770862429434e-06,
"loss": 0.8724,
"step": 4842
},
{
"epoch": 1.288031914893617,
"grad_norm": 3.8094303607940674,
"learning_rate": 8.867213433592037e-06,
"loss": 0.791,
"step": 4843
},
{
"epoch": 1.2882978723404255,
"grad_norm": 3.862415313720703,
"learning_rate": 8.866655885098502e-06,
"loss": 0.8223,
"step": 4844
},
{
"epoch": 1.288563829787234,
"grad_norm": 4.023502826690674,
"learning_rate": 8.866098216966081e-06,
"loss": 0.8339,
"step": 4845
},
{
"epoch": 1.2888297872340426,
"grad_norm": 3.7530012130737305,
"learning_rate": 8.865540429212031e-06,
"loss": 0.7766,
"step": 4846
},
{
"epoch": 1.289095744680851,
"grad_norm": 3.7417378425598145,
"learning_rate": 8.864982521853609e-06,
"loss": 0.9348,
"step": 4847
},
{
"epoch": 1.2893617021276595,
"grad_norm": 4.337246417999268,
"learning_rate": 8.864424494908076e-06,
"loss": 0.8423,
"step": 4848
},
{
"epoch": 1.289627659574468,
"grad_norm": 4.149337291717529,
"learning_rate": 8.8638663483927e-06,
"loss": 0.9212,
"step": 4849
},
{
"epoch": 1.2898936170212765,
"grad_norm": 4.155276298522949,
"learning_rate": 8.86330808232475e-06,
"loss": 0.9331,
"step": 4850
},
{
"epoch": 1.2901595744680852,
"grad_norm": 3.66481876373291,
"learning_rate": 8.8627496967215e-06,
"loss": 0.7795,
"step": 4851
},
{
"epoch": 1.2904255319148936,
"grad_norm": 4.018246650695801,
"learning_rate": 8.862191191600227e-06,
"loss": 0.8021,
"step": 4852
},
{
"epoch": 1.290691489361702,
"grad_norm": 4.123905658721924,
"learning_rate": 8.86163256697821e-06,
"loss": 0.8106,
"step": 4853
},
{
"epoch": 1.2909574468085108,
"grad_norm": 4.097765922546387,
"learning_rate": 8.861073822872735e-06,
"loss": 0.8006,
"step": 4854
},
{
"epoch": 1.2912234042553192,
"grad_norm": 4.317656517028809,
"learning_rate": 8.86051495930109e-06,
"loss": 0.8026,
"step": 4855
},
{
"epoch": 1.2914893617021277,
"grad_norm": 3.8379859924316406,
"learning_rate": 8.859955976280568e-06,
"loss": 0.813,
"step": 4856
},
{
"epoch": 1.2917553191489362,
"grad_norm": 4.173714637756348,
"learning_rate": 8.859396873828461e-06,
"loss": 0.8064,
"step": 4857
},
{
"epoch": 1.2920212765957446,
"grad_norm": 4.439601898193359,
"learning_rate": 8.858837651962073e-06,
"loss": 0.8187,
"step": 4858
},
{
"epoch": 1.2922872340425533,
"grad_norm": 3.970308542251587,
"learning_rate": 8.858278310698705e-06,
"loss": 0.7977,
"step": 4859
},
{
"epoch": 1.2925531914893618,
"grad_norm": 3.7830026149749756,
"learning_rate": 8.857718850055663e-06,
"loss": 0.7371,
"step": 4860
},
{
"epoch": 1.2928191489361702,
"grad_norm": 3.9715933799743652,
"learning_rate": 8.857159270050258e-06,
"loss": 0.9022,
"step": 4861
},
{
"epoch": 1.2930851063829787,
"grad_norm": 3.824910879135132,
"learning_rate": 8.856599570699805e-06,
"loss": 0.7895,
"step": 4862
},
{
"epoch": 1.2933510638297872,
"grad_norm": 4.079301357269287,
"learning_rate": 8.856039752021619e-06,
"loss": 0.8215,
"step": 4863
},
{
"epoch": 1.2936170212765958,
"grad_norm": 3.722262382507324,
"learning_rate": 8.855479814033024e-06,
"loss": 0.7611,
"step": 4864
},
{
"epoch": 1.2938829787234043,
"grad_norm": 3.853123664855957,
"learning_rate": 8.854919756751343e-06,
"loss": 0.7494,
"step": 4865
},
{
"epoch": 1.2941489361702128,
"grad_norm": 3.9518027305603027,
"learning_rate": 8.854359580193907e-06,
"loss": 0.7751,
"step": 4866
},
{
"epoch": 1.2944148936170212,
"grad_norm": 4.295631408691406,
"learning_rate": 8.853799284378048e-06,
"loss": 0.8227,
"step": 4867
},
{
"epoch": 1.2946808510638297,
"grad_norm": 3.7936043739318848,
"learning_rate": 8.853238869321104e-06,
"loss": 0.7634,
"step": 4868
},
{
"epoch": 1.2949468085106384,
"grad_norm": 4.017428874969482,
"learning_rate": 8.85267833504041e-06,
"loss": 0.732,
"step": 4869
},
{
"epoch": 1.2952127659574468,
"grad_norm": 4.081499099731445,
"learning_rate": 8.852117681553312e-06,
"loss": 0.8568,
"step": 4870
},
{
"epoch": 1.2954787234042553,
"grad_norm": 4.4456281661987305,
"learning_rate": 8.851556908877159e-06,
"loss": 0.8038,
"step": 4871
},
{
"epoch": 1.2957446808510638,
"grad_norm": 4.371933460235596,
"learning_rate": 8.8509960170293e-06,
"loss": 0.7515,
"step": 4872
},
{
"epoch": 1.2960106382978722,
"grad_norm": 3.5804035663604736,
"learning_rate": 8.85043500602709e-06,
"loss": 0.7818,
"step": 4873
},
{
"epoch": 1.296276595744681,
"grad_norm": 4.176633834838867,
"learning_rate": 8.849873875887888e-06,
"loss": 0.8217,
"step": 4874
},
{
"epoch": 1.2965425531914894,
"grad_norm": 3.9609858989715576,
"learning_rate": 8.849312626629055e-06,
"loss": 0.8517,
"step": 4875
},
{
"epoch": 1.2968085106382978,
"grad_norm": 4.5829291343688965,
"learning_rate": 8.848751258267959e-06,
"loss": 1.0122,
"step": 4876
},
{
"epoch": 1.2970744680851065,
"grad_norm": 3.677952766418457,
"learning_rate": 8.848189770821965e-06,
"loss": 0.8094,
"step": 4877
},
{
"epoch": 1.297340425531915,
"grad_norm": 4.067968368530273,
"learning_rate": 8.84762816430845e-06,
"loss": 0.8764,
"step": 4878
},
{
"epoch": 1.2976063829787234,
"grad_norm": 3.8500382900238037,
"learning_rate": 8.847066438744792e-06,
"loss": 0.8741,
"step": 4879
},
{
"epoch": 1.297872340425532,
"grad_norm": 3.8818368911743164,
"learning_rate": 8.846504594148366e-06,
"loss": 0.8485,
"step": 4880
},
{
"epoch": 1.2981382978723404,
"grad_norm": 3.9118518829345703,
"learning_rate": 8.84594263053656e-06,
"loss": 0.9005,
"step": 4881
},
{
"epoch": 1.298404255319149,
"grad_norm": 3.889709711074829,
"learning_rate": 8.84538054792676e-06,
"loss": 0.9367,
"step": 4882
},
{
"epoch": 1.2986702127659575,
"grad_norm": 3.9546077251434326,
"learning_rate": 8.844818346336361e-06,
"loss": 0.8102,
"step": 4883
},
{
"epoch": 1.298936170212766,
"grad_norm": 4.036288738250732,
"learning_rate": 8.844256025782754e-06,
"loss": 0.9124,
"step": 4884
},
{
"epoch": 1.2992021276595744,
"grad_norm": 3.9991087913513184,
"learning_rate": 8.84369358628334e-06,
"loss": 0.7885,
"step": 4885
},
{
"epoch": 1.299468085106383,
"grad_norm": 3.767066478729248,
"learning_rate": 8.84313102785552e-06,
"loss": 0.8147,
"step": 4886
},
{
"epoch": 1.2997340425531916,
"grad_norm": 3.645434617996216,
"learning_rate": 8.842568350516702e-06,
"loss": 0.7238,
"step": 4887
},
{
"epoch": 1.3,
"grad_norm": 3.777766466140747,
"learning_rate": 8.842005554284296e-06,
"loss": 0.816,
"step": 4888
},
{
"epoch": 1.3002659574468085,
"grad_norm": 3.8868510723114014,
"learning_rate": 8.841442639175714e-06,
"loss": 0.8835,
"step": 4889
},
{
"epoch": 1.300531914893617,
"grad_norm": 4.271452903747559,
"learning_rate": 8.840879605208374e-06,
"loss": 0.8119,
"step": 4890
},
{
"epoch": 1.3007978723404254,
"grad_norm": 3.4486215114593506,
"learning_rate": 8.840316452399697e-06,
"loss": 0.7602,
"step": 4891
},
{
"epoch": 1.3010638297872341,
"grad_norm": 3.726085901260376,
"learning_rate": 8.839753180767108e-06,
"loss": 0.7252,
"step": 4892
},
{
"epoch": 1.3013297872340426,
"grad_norm": 4.51430082321167,
"learning_rate": 8.839189790328033e-06,
"loss": 0.8133,
"step": 4893
},
{
"epoch": 1.301595744680851,
"grad_norm": 4.0574469566345215,
"learning_rate": 8.838626281099908e-06,
"loss": 0.8436,
"step": 4894
},
{
"epoch": 1.3018617021276595,
"grad_norm": 4.096327304840088,
"learning_rate": 8.838062653100165e-06,
"loss": 0.8056,
"step": 4895
},
{
"epoch": 1.302127659574468,
"grad_norm": 4.048945903778076,
"learning_rate": 8.837498906346247e-06,
"loss": 0.8764,
"step": 4896
},
{
"epoch": 1.3023936170212767,
"grad_norm": 3.9284706115722656,
"learning_rate": 8.836935040855591e-06,
"loss": 0.7626,
"step": 4897
},
{
"epoch": 1.3026595744680851,
"grad_norm": 3.914583444595337,
"learning_rate": 8.83637105664565e-06,
"loss": 0.7855,
"step": 4898
},
{
"epoch": 1.3029255319148936,
"grad_norm": 4.442378520965576,
"learning_rate": 8.835806953733871e-06,
"loss": 0.8103,
"step": 4899
},
{
"epoch": 1.3031914893617023,
"grad_norm": 3.8343191146850586,
"learning_rate": 8.83524273213771e-06,
"loss": 0.8425,
"step": 4900
},
{
"epoch": 1.3034574468085105,
"grad_norm": 4.154768943786621,
"learning_rate": 8.834678391874623e-06,
"loss": 0.7792,
"step": 4901
},
{
"epoch": 1.3037234042553192,
"grad_norm": 4.136390209197998,
"learning_rate": 8.834113932962071e-06,
"loss": 0.8578,
"step": 4902
},
{
"epoch": 1.3039893617021276,
"grad_norm": 4.139702320098877,
"learning_rate": 8.833549355417518e-06,
"loss": 0.724,
"step": 4903
},
{
"epoch": 1.304255319148936,
"grad_norm": 4.213815689086914,
"learning_rate": 8.83298465925844e-06,
"loss": 0.7892,
"step": 4904
},
{
"epoch": 1.3045212765957448,
"grad_norm": 4.048974990844727,
"learning_rate": 8.832419844502298e-06,
"loss": 0.829,
"step": 4905
},
{
"epoch": 1.3047872340425533,
"grad_norm": 4.729825496673584,
"learning_rate": 8.831854911166577e-06,
"loss": 0.9176,
"step": 4906
},
{
"epoch": 1.3050531914893617,
"grad_norm": 3.5801501274108887,
"learning_rate": 8.831289859268753e-06,
"loss": 0.724,
"step": 4907
},
{
"epoch": 1.3053191489361702,
"grad_norm": 4.097287654876709,
"learning_rate": 8.83072468882631e-06,
"loss": 0.8299,
"step": 4908
},
{
"epoch": 1.3055851063829786,
"grad_norm": 4.027351379394531,
"learning_rate": 8.830159399856734e-06,
"loss": 0.9384,
"step": 4909
},
{
"epoch": 1.3058510638297873,
"grad_norm": 4.275338649749756,
"learning_rate": 8.829593992377518e-06,
"loss": 0.7921,
"step": 4910
},
{
"epoch": 1.3061170212765958,
"grad_norm": 4.1409220695495605,
"learning_rate": 8.829028466406156e-06,
"loss": 0.8888,
"step": 4911
},
{
"epoch": 1.3063829787234043,
"grad_norm": 3.6458733081817627,
"learning_rate": 8.828462821960143e-06,
"loss": 0.7371,
"step": 4912
},
{
"epoch": 1.3066489361702127,
"grad_norm": 3.8695321083068848,
"learning_rate": 8.827897059056983e-06,
"loss": 0.8467,
"step": 4913
},
{
"epoch": 1.3069148936170212,
"grad_norm": 3.693190336227417,
"learning_rate": 8.827331177714183e-06,
"loss": 0.8182,
"step": 4914
},
{
"epoch": 1.3071808510638299,
"grad_norm": 3.806725263595581,
"learning_rate": 8.826765177949248e-06,
"loss": 0.8669,
"step": 4915
},
{
"epoch": 1.3074468085106383,
"grad_norm": 3.970451593399048,
"learning_rate": 8.826199059779695e-06,
"loss": 0.9024,
"step": 4916
},
{
"epoch": 1.3077127659574468,
"grad_norm": 3.7471280097961426,
"learning_rate": 8.825632823223037e-06,
"loss": 0.7707,
"step": 4917
},
{
"epoch": 1.3079787234042553,
"grad_norm": 4.0794267654418945,
"learning_rate": 8.825066468296796e-06,
"loss": 0.8489,
"step": 4918
},
{
"epoch": 1.3082446808510637,
"grad_norm": 3.681044578552246,
"learning_rate": 8.824499995018494e-06,
"loss": 0.7854,
"step": 4919
},
{
"epoch": 1.3085106382978724,
"grad_norm": 3.9300031661987305,
"learning_rate": 8.82393340340566e-06,
"loss": 0.8076,
"step": 4920
},
{
"epoch": 1.3087765957446809,
"grad_norm": 3.5358026027679443,
"learning_rate": 8.823366693475826e-06,
"loss": 0.7239,
"step": 4921
},
{
"epoch": 1.3090425531914893,
"grad_norm": 3.7831380367279053,
"learning_rate": 8.822799865246522e-06,
"loss": 0.8004,
"step": 4922
},
{
"epoch": 1.309308510638298,
"grad_norm": 3.6898906230926514,
"learning_rate": 8.822232918735292e-06,
"loss": 0.765,
"step": 4923
},
{
"epoch": 1.3095744680851062,
"grad_norm": 3.685541868209839,
"learning_rate": 8.821665853959673e-06,
"loss": 0.9544,
"step": 4924
},
{
"epoch": 1.309840425531915,
"grad_norm": 4.169592380523682,
"learning_rate": 8.821098670937215e-06,
"loss": 0.9082,
"step": 4925
},
{
"epoch": 1.3101063829787234,
"grad_norm": 3.870544910430908,
"learning_rate": 8.820531369685464e-06,
"loss": 0.7508,
"step": 4926
},
{
"epoch": 1.3103723404255319,
"grad_norm": 3.920816659927368,
"learning_rate": 8.819963950221976e-06,
"loss": 0.849,
"step": 4927
},
{
"epoch": 1.3106382978723405,
"grad_norm": 3.8789918422698975,
"learning_rate": 8.819396412564305e-06,
"loss": 0.7916,
"step": 4928
},
{
"epoch": 1.310904255319149,
"grad_norm": 3.8481719493865967,
"learning_rate": 8.818828756730012e-06,
"loss": 0.7985,
"step": 4929
},
{
"epoch": 1.3111702127659575,
"grad_norm": 4.481472015380859,
"learning_rate": 8.818260982736662e-06,
"loss": 0.7636,
"step": 4930
},
{
"epoch": 1.311436170212766,
"grad_norm": 3.4751243591308594,
"learning_rate": 8.81769309060182e-06,
"loss": 0.7336,
"step": 4931
},
{
"epoch": 1.3117021276595744,
"grad_norm": 4.149890899658203,
"learning_rate": 8.81712508034306e-06,
"loss": 0.8473,
"step": 4932
},
{
"epoch": 1.311968085106383,
"grad_norm": 3.9108872413635254,
"learning_rate": 8.816556951977955e-06,
"loss": 0.7656,
"step": 4933
},
{
"epoch": 1.3122340425531915,
"grad_norm": 3.8704488277435303,
"learning_rate": 8.815988705524086e-06,
"loss": 0.8214,
"step": 4934
},
{
"epoch": 1.3125,
"grad_norm": 4.183962821960449,
"learning_rate": 8.815420340999034e-06,
"loss": 0.8411,
"step": 4935
},
{
"epoch": 1.3127659574468085,
"grad_norm": 3.7032434940338135,
"learning_rate": 8.814851858420384e-06,
"loss": 0.8455,
"step": 4936
},
{
"epoch": 1.313031914893617,
"grad_norm": 3.5762336254119873,
"learning_rate": 8.814283257805724e-06,
"loss": 0.7208,
"step": 4937
},
{
"epoch": 1.3132978723404256,
"grad_norm": 4.197664260864258,
"learning_rate": 8.813714539172653e-06,
"loss": 0.8642,
"step": 4938
},
{
"epoch": 1.313563829787234,
"grad_norm": 3.5386626720428467,
"learning_rate": 8.81314570253876e-06,
"loss": 0.6846,
"step": 4939
},
{
"epoch": 1.3138297872340425,
"grad_norm": 4.332328796386719,
"learning_rate": 8.812576747921653e-06,
"loss": 0.7862,
"step": 4940
},
{
"epoch": 1.314095744680851,
"grad_norm": 3.6495919227600098,
"learning_rate": 8.81200767533893e-06,
"loss": 0.676,
"step": 4941
},
{
"epoch": 1.3143617021276595,
"grad_norm": 3.717625617980957,
"learning_rate": 8.811438484808204e-06,
"loss": 0.8879,
"step": 4942
},
{
"epoch": 1.3146276595744681,
"grad_norm": 4.201274394989014,
"learning_rate": 8.810869176347082e-06,
"loss": 0.9174,
"step": 4943
},
{
"epoch": 1.3148936170212766,
"grad_norm": 3.3899879455566406,
"learning_rate": 8.810299749973182e-06,
"loss": 0.7209,
"step": 4944
},
{
"epoch": 1.315159574468085,
"grad_norm": 3.821558713912964,
"learning_rate": 8.80973020570412e-06,
"loss": 0.647,
"step": 4945
},
{
"epoch": 1.3154255319148938,
"grad_norm": 4.011831760406494,
"learning_rate": 8.809160543557523e-06,
"loss": 0.8387,
"step": 4946
},
{
"epoch": 1.315691489361702,
"grad_norm": 4.121433258056641,
"learning_rate": 8.80859076355101e-06,
"loss": 0.7835,
"step": 4947
},
{
"epoch": 1.3159574468085107,
"grad_norm": 4.066422462463379,
"learning_rate": 8.808020865702218e-06,
"loss": 0.7569,
"step": 4948
},
{
"epoch": 1.3162234042553191,
"grad_norm": 3.7616024017333984,
"learning_rate": 8.807450850028776e-06,
"loss": 0.7514,
"step": 4949
},
{
"epoch": 1.3164893617021276,
"grad_norm": 3.809521198272705,
"learning_rate": 8.806880716548322e-06,
"loss": 0.8212,
"step": 4950
},
{
"epoch": 1.3167553191489363,
"grad_norm": 3.664140224456787,
"learning_rate": 8.806310465278496e-06,
"loss": 0.8303,
"step": 4951
},
{
"epoch": 1.3170212765957447,
"grad_norm": 3.978876829147339,
"learning_rate": 8.805740096236943e-06,
"loss": 0.8149,
"step": 4952
},
{
"epoch": 1.3172872340425532,
"grad_norm": 4.436275959014893,
"learning_rate": 8.805169609441312e-06,
"loss": 0.9033,
"step": 4953
},
{
"epoch": 1.3175531914893617,
"grad_norm": 3.9355101585388184,
"learning_rate": 8.804599004909251e-06,
"loss": 0.8599,
"step": 4954
},
{
"epoch": 1.3178191489361701,
"grad_norm": 3.6748297214508057,
"learning_rate": 8.80402828265842e-06,
"loss": 0.6637,
"step": 4955
},
{
"epoch": 1.3180851063829788,
"grad_norm": 3.953321695327759,
"learning_rate": 8.803457442706473e-06,
"loss": 0.7684,
"step": 4956
},
{
"epoch": 1.3183510638297873,
"grad_norm": 3.9680938720703125,
"learning_rate": 8.802886485071078e-06,
"loss": 0.8377,
"step": 4957
},
{
"epoch": 1.3186170212765957,
"grad_norm": 3.608375072479248,
"learning_rate": 8.802315409769894e-06,
"loss": 0.7671,
"step": 4958
},
{
"epoch": 1.3188829787234042,
"grad_norm": 3.7180373668670654,
"learning_rate": 8.801744216820596e-06,
"loss": 0.794,
"step": 4959
},
{
"epoch": 1.3191489361702127,
"grad_norm": 3.490082263946533,
"learning_rate": 8.801172906240857e-06,
"loss": 0.7993,
"step": 4960
},
{
"epoch": 1.3194148936170214,
"grad_norm": 3.9783389568328857,
"learning_rate": 8.800601478048351e-06,
"loss": 0.7455,
"step": 4961
},
{
"epoch": 1.3196808510638298,
"grad_norm": 4.333663463592529,
"learning_rate": 8.800029932260764e-06,
"loss": 0.8772,
"step": 4962
},
{
"epoch": 1.3199468085106383,
"grad_norm": 3.9584553241729736,
"learning_rate": 8.799458268895774e-06,
"loss": 0.8622,
"step": 4963
},
{
"epoch": 1.3202127659574467,
"grad_norm": 4.271299362182617,
"learning_rate": 8.798886487971073e-06,
"loss": 0.7591,
"step": 4964
},
{
"epoch": 1.3204787234042552,
"grad_norm": 4.128324508666992,
"learning_rate": 8.798314589504348e-06,
"loss": 0.7294,
"step": 4965
},
{
"epoch": 1.320744680851064,
"grad_norm": 3.613626718521118,
"learning_rate": 8.797742573513302e-06,
"loss": 0.8173,
"step": 4966
},
{
"epoch": 1.3210106382978724,
"grad_norm": 3.665271043777466,
"learning_rate": 8.797170440015627e-06,
"loss": 0.7592,
"step": 4967
},
{
"epoch": 1.3212765957446808,
"grad_norm": 4.036754608154297,
"learning_rate": 8.79659818902903e-06,
"loss": 0.7705,
"step": 4968
},
{
"epoch": 1.3215425531914895,
"grad_norm": 4.09188175201416,
"learning_rate": 8.796025820571213e-06,
"loss": 0.9028,
"step": 4969
},
{
"epoch": 1.3218085106382977,
"grad_norm": 3.8270485401153564,
"learning_rate": 8.795453334659889e-06,
"loss": 0.7337,
"step": 4970
},
{
"epoch": 1.3220744680851064,
"grad_norm": 4.005841255187988,
"learning_rate": 8.794880731312771e-06,
"loss": 0.8789,
"step": 4971
},
{
"epoch": 1.3223404255319149,
"grad_norm": 3.894681930541992,
"learning_rate": 8.794308010547574e-06,
"loss": 0.7452,
"step": 4972
},
{
"epoch": 1.3226063829787233,
"grad_norm": 3.7697856426239014,
"learning_rate": 8.79373517238202e-06,
"loss": 0.7111,
"step": 4973
},
{
"epoch": 1.322872340425532,
"grad_norm": 4.162429332733154,
"learning_rate": 8.793162216833835e-06,
"loss": 0.8352,
"step": 4974
},
{
"epoch": 1.3231382978723405,
"grad_norm": 4.8362298011779785,
"learning_rate": 8.792589143920743e-06,
"loss": 0.8807,
"step": 4975
},
{
"epoch": 1.323404255319149,
"grad_norm": 4.283027172088623,
"learning_rate": 8.792015953660478e-06,
"loss": 0.9241,
"step": 4976
},
{
"epoch": 1.3236702127659574,
"grad_norm": 3.7246296405792236,
"learning_rate": 8.791442646070776e-06,
"loss": 0.8158,
"step": 4977
},
{
"epoch": 1.3239361702127659,
"grad_norm": 3.9116530418395996,
"learning_rate": 8.790869221169374e-06,
"loss": 0.7603,
"step": 4978
},
{
"epoch": 1.3242021276595746,
"grad_norm": 4.164322853088379,
"learning_rate": 8.790295678974015e-06,
"loss": 0.7518,
"step": 4979
},
{
"epoch": 1.324468085106383,
"grad_norm": 3.459543228149414,
"learning_rate": 8.789722019502444e-06,
"loss": 0.8216,
"step": 4980
},
{
"epoch": 1.3247340425531915,
"grad_norm": 3.4385783672332764,
"learning_rate": 8.789148242772414e-06,
"loss": 0.5722,
"step": 4981
},
{
"epoch": 1.325,
"grad_norm": 3.881467580795288,
"learning_rate": 8.788574348801676e-06,
"loss": 0.7652,
"step": 4982
},
{
"epoch": 1.3252659574468084,
"grad_norm": 3.8028674125671387,
"learning_rate": 8.788000337607984e-06,
"loss": 0.7125,
"step": 4983
},
{
"epoch": 1.325531914893617,
"grad_norm": 3.595238447189331,
"learning_rate": 8.787426209209104e-06,
"loss": 0.6849,
"step": 4984
},
{
"epoch": 1.3257978723404256,
"grad_norm": 4.597902774810791,
"learning_rate": 8.786851963622799e-06,
"loss": 0.8314,
"step": 4985
},
{
"epoch": 1.326063829787234,
"grad_norm": 4.151714324951172,
"learning_rate": 8.786277600866834e-06,
"loss": 0.8624,
"step": 4986
},
{
"epoch": 1.3263297872340425,
"grad_norm": 3.7185237407684326,
"learning_rate": 8.785703120958984e-06,
"loss": 0.7547,
"step": 4987
},
{
"epoch": 1.326595744680851,
"grad_norm": 3.964048385620117,
"learning_rate": 8.785128523917022e-06,
"loss": 0.8626,
"step": 4988
},
{
"epoch": 1.3268617021276596,
"grad_norm": 3.9490604400634766,
"learning_rate": 8.784553809758724e-06,
"loss": 0.7927,
"step": 4989
},
{
"epoch": 1.327127659574468,
"grad_norm": 3.736051321029663,
"learning_rate": 8.783978978501879e-06,
"loss": 0.7581,
"step": 4990
},
{
"epoch": 1.3273936170212766,
"grad_norm": 4.048060417175293,
"learning_rate": 8.783404030164269e-06,
"loss": 0.8141,
"step": 4991
},
{
"epoch": 1.327659574468085,
"grad_norm": 3.542971134185791,
"learning_rate": 8.782828964763683e-06,
"loss": 0.8244,
"step": 4992
},
{
"epoch": 1.3279255319148935,
"grad_norm": 4.4042439460754395,
"learning_rate": 8.782253782317914e-06,
"loss": 0.7623,
"step": 4993
},
{
"epoch": 1.3281914893617022,
"grad_norm": 4.011150360107422,
"learning_rate": 8.781678482844763e-06,
"loss": 0.7879,
"step": 4994
},
{
"epoch": 1.3284574468085106,
"grad_norm": 3.9396347999572754,
"learning_rate": 8.781103066362024e-06,
"loss": 0.8731,
"step": 4995
},
{
"epoch": 1.328723404255319,
"grad_norm": 4.063819408416748,
"learning_rate": 8.780527532887506e-06,
"loss": 0.7255,
"step": 4996
},
{
"epoch": 1.3289893617021278,
"grad_norm": 3.684864044189453,
"learning_rate": 8.779951882439016e-06,
"loss": 0.7447,
"step": 4997
},
{
"epoch": 1.3292553191489362,
"grad_norm": 4.3980207443237305,
"learning_rate": 8.77937611503436e-06,
"loss": 0.8104,
"step": 4998
},
{
"epoch": 1.3295212765957447,
"grad_norm": 4.019001483917236,
"learning_rate": 8.778800230691363e-06,
"loss": 0.7426,
"step": 4999
},
{
"epoch": 1.3297872340425532,
"grad_norm": 4.1492486000061035,
"learning_rate": 8.778224229427836e-06,
"loss": 0.7929,
"step": 5000
},
{
"epoch": 1.3297872340425532,
"eval_loss": 1.2957489490509033,
"eval_runtime": 14.7283,
"eval_samples_per_second": 27.159,
"eval_steps_per_second": 3.395,
"step": 5000
},
{
"epoch": 1.3300531914893616,
"grad_norm": 3.742830753326416,
"learning_rate": 8.777648111261601e-06,
"loss": 0.6807,
"step": 5001
},
{
"epoch": 1.3303191489361703,
"grad_norm": 4.3522114753723145,
"learning_rate": 8.77707187621049e-06,
"loss": 0.8048,
"step": 5002
},
{
"epoch": 1.3305851063829788,
"grad_norm": 3.7916550636291504,
"learning_rate": 8.776495524292325e-06,
"loss": 0.8209,
"step": 5003
},
{
"epoch": 1.3308510638297872,
"grad_norm": 3.642531156539917,
"learning_rate": 8.775919055524941e-06,
"loss": 0.7274,
"step": 5004
},
{
"epoch": 1.3311170212765957,
"grad_norm": 3.885079860687256,
"learning_rate": 8.775342469926178e-06,
"loss": 0.8305,
"step": 5005
},
{
"epoch": 1.3313829787234042,
"grad_norm": 3.816824436187744,
"learning_rate": 8.774765767513876e-06,
"loss": 0.7605,
"step": 5006
},
{
"epoch": 1.3316489361702128,
"grad_norm": 4.696832656860352,
"learning_rate": 8.774188948305874e-06,
"loss": 0.8907,
"step": 5007
},
{
"epoch": 1.3319148936170213,
"grad_norm": 4.030970096588135,
"learning_rate": 8.773612012320023e-06,
"loss": 0.9613,
"step": 5008
},
{
"epoch": 1.3321808510638298,
"grad_norm": 4.046240329742432,
"learning_rate": 8.773034959574173e-06,
"loss": 0.7066,
"step": 5009
},
{
"epoch": 1.3324468085106382,
"grad_norm": 3.916098117828369,
"learning_rate": 8.77245779008618e-06,
"loss": 0.7762,
"step": 5010
},
{
"epoch": 1.3327127659574467,
"grad_norm": 4.096320629119873,
"learning_rate": 8.771880503873902e-06,
"loss": 0.7222,
"step": 5011
},
{
"epoch": 1.3329787234042554,
"grad_norm": 4.3136467933654785,
"learning_rate": 8.771303100955199e-06,
"loss": 0.8265,
"step": 5012
},
{
"epoch": 1.3332446808510638,
"grad_norm": 3.972031593322754,
"learning_rate": 8.770725581347938e-06,
"loss": 0.7263,
"step": 5013
},
{
"epoch": 1.3335106382978723,
"grad_norm": 4.295060634613037,
"learning_rate": 8.770147945069988e-06,
"loss": 0.8489,
"step": 5014
},
{
"epoch": 1.3337765957446808,
"grad_norm": 3.8986477851867676,
"learning_rate": 8.769570192139224e-06,
"loss": 0.7101,
"step": 5015
},
{
"epoch": 1.3340425531914892,
"grad_norm": 3.8135452270507812,
"learning_rate": 8.768992322573518e-06,
"loss": 0.7885,
"step": 5016
},
{
"epoch": 1.334308510638298,
"grad_norm": 3.727550983428955,
"learning_rate": 8.768414336390752e-06,
"loss": 0.8622,
"step": 5017
},
{
"epoch": 1.3345744680851064,
"grad_norm": 4.012676239013672,
"learning_rate": 8.76783623360881e-06,
"loss": 0.8938,
"step": 5018
},
{
"epoch": 1.3348404255319148,
"grad_norm": 4.344918727874756,
"learning_rate": 8.767258014245578e-06,
"loss": 0.8228,
"step": 5019
},
{
"epoch": 1.3351063829787235,
"grad_norm": 3.9926249980926514,
"learning_rate": 8.76667967831895e-06,
"loss": 0.6513,
"step": 5020
},
{
"epoch": 1.335372340425532,
"grad_norm": 4.119525909423828,
"learning_rate": 8.766101225846816e-06,
"loss": 0.7887,
"step": 5021
},
{
"epoch": 1.3356382978723405,
"grad_norm": 4.538883686065674,
"learning_rate": 8.765522656847077e-06,
"loss": 0.796,
"step": 5022
},
{
"epoch": 1.335904255319149,
"grad_norm": 3.7550501823425293,
"learning_rate": 8.764943971337633e-06,
"loss": 0.7695,
"step": 5023
},
{
"epoch": 1.3361702127659574,
"grad_norm": 3.611605405807495,
"learning_rate": 8.76436516933639e-06,
"loss": 0.7483,
"step": 5024
},
{
"epoch": 1.336436170212766,
"grad_norm": 4.187867164611816,
"learning_rate": 8.763786250861258e-06,
"loss": 0.8277,
"step": 5025
},
{
"epoch": 1.3367021276595745,
"grad_norm": 3.9223055839538574,
"learning_rate": 8.763207215930147e-06,
"loss": 0.7724,
"step": 5026
},
{
"epoch": 1.336968085106383,
"grad_norm": 4.048906326293945,
"learning_rate": 8.762628064560975e-06,
"loss": 0.7923,
"step": 5027
},
{
"epoch": 1.3372340425531914,
"grad_norm": 4.241153240203857,
"learning_rate": 8.762048796771659e-06,
"loss": 0.8776,
"step": 5028
},
{
"epoch": 1.3375,
"grad_norm": 3.759209632873535,
"learning_rate": 8.761469412580126e-06,
"loss": 0.7554,
"step": 5029
},
{
"epoch": 1.3377659574468086,
"grad_norm": 3.8906912803649902,
"learning_rate": 8.760889912004297e-06,
"loss": 0.6977,
"step": 5030
},
{
"epoch": 1.338031914893617,
"grad_norm": 3.9501161575317383,
"learning_rate": 8.760310295062112e-06,
"loss": 0.9481,
"step": 5031
},
{
"epoch": 1.3382978723404255,
"grad_norm": 3.918553590774536,
"learning_rate": 8.759730561771494e-06,
"loss": 0.7882,
"step": 5032
},
{
"epoch": 1.338563829787234,
"grad_norm": 4.063170909881592,
"learning_rate": 8.759150712150388e-06,
"loss": 0.8415,
"step": 5033
},
{
"epoch": 1.3388297872340424,
"grad_norm": 3.863600015640259,
"learning_rate": 8.758570746216732e-06,
"loss": 0.807,
"step": 5034
},
{
"epoch": 1.3390957446808511,
"grad_norm": 3.9519717693328857,
"learning_rate": 8.757990663988474e-06,
"loss": 0.8594,
"step": 5035
},
{
"epoch": 1.3393617021276596,
"grad_norm": 4.245703220367432,
"learning_rate": 8.75741046548356e-06,
"loss": 0.7987,
"step": 5036
},
{
"epoch": 1.339627659574468,
"grad_norm": 4.1299729347229,
"learning_rate": 8.75683015071994e-06,
"loss": 0.9377,
"step": 5037
},
{
"epoch": 1.3398936170212765,
"grad_norm": 3.744929552078247,
"learning_rate": 8.756249719715576e-06,
"loss": 0.6875,
"step": 5038
},
{
"epoch": 1.340159574468085,
"grad_norm": 3.7629339694976807,
"learning_rate": 8.75566917248842e-06,
"loss": 0.7619,
"step": 5039
},
{
"epoch": 1.3404255319148937,
"grad_norm": 4.09276819229126,
"learning_rate": 8.75508850905644e-06,
"loss": 0.7618,
"step": 5040
},
{
"epoch": 1.3406914893617021,
"grad_norm": 4.220356464385986,
"learning_rate": 8.7545077294376e-06,
"loss": 0.9246,
"step": 5041
},
{
"epoch": 1.3409574468085106,
"grad_norm": 3.9419326782226562,
"learning_rate": 8.753926833649871e-06,
"loss": 0.7463,
"step": 5042
},
{
"epoch": 1.3412234042553193,
"grad_norm": 4.060051918029785,
"learning_rate": 8.753345821711224e-06,
"loss": 0.9061,
"step": 5043
},
{
"epoch": 1.3414893617021277,
"grad_norm": 3.7086057662963867,
"learning_rate": 8.75276469363964e-06,
"loss": 0.8177,
"step": 5044
},
{
"epoch": 1.3417553191489362,
"grad_norm": 4.173861503601074,
"learning_rate": 8.752183449453098e-06,
"loss": 0.8117,
"step": 5045
},
{
"epoch": 1.3420212765957447,
"grad_norm": 4.282475471496582,
"learning_rate": 8.75160208916958e-06,
"loss": 0.8352,
"step": 5046
},
{
"epoch": 1.3422872340425531,
"grad_norm": 3.9250497817993164,
"learning_rate": 8.75102061280708e-06,
"loss": 0.8292,
"step": 5047
},
{
"epoch": 1.3425531914893618,
"grad_norm": 4.28936767578125,
"learning_rate": 8.750439020383584e-06,
"loss": 0.8269,
"step": 5048
},
{
"epoch": 1.3428191489361703,
"grad_norm": 4.007338523864746,
"learning_rate": 8.749857311917089e-06,
"loss": 0.8376,
"step": 5049
},
{
"epoch": 1.3430851063829787,
"grad_norm": 3.741140842437744,
"learning_rate": 8.749275487425595e-06,
"loss": 0.7936,
"step": 5050
},
{
"epoch": 1.3433510638297872,
"grad_norm": 3.8448450565338135,
"learning_rate": 8.748693546927101e-06,
"loss": 0.8088,
"step": 5051
},
{
"epoch": 1.3436170212765957,
"grad_norm": 4.5769782066345215,
"learning_rate": 8.748111490439617e-06,
"loss": 0.8315,
"step": 5052
},
{
"epoch": 1.3438829787234043,
"grad_norm": 4.1284871101379395,
"learning_rate": 8.74752931798115e-06,
"loss": 0.8866,
"step": 5053
},
{
"epoch": 1.3441489361702128,
"grad_norm": 3.9224517345428467,
"learning_rate": 8.746947029569715e-06,
"loss": 0.6403,
"step": 5054
},
{
"epoch": 1.3444148936170213,
"grad_norm": 4.114837169647217,
"learning_rate": 8.746364625223326e-06,
"loss": 0.7303,
"step": 5055
},
{
"epoch": 1.3446808510638297,
"grad_norm": 3.9492406845092773,
"learning_rate": 8.745782104960006e-06,
"loss": 0.7462,
"step": 5056
},
{
"epoch": 1.3449468085106382,
"grad_norm": 3.5633533000946045,
"learning_rate": 8.745199468797775e-06,
"loss": 0.8241,
"step": 5057
},
{
"epoch": 1.3452127659574469,
"grad_norm": 3.9602227210998535,
"learning_rate": 8.744616716754665e-06,
"loss": 0.8142,
"step": 5058
},
{
"epoch": 1.3454787234042553,
"grad_norm": 3.6486499309539795,
"learning_rate": 8.744033848848705e-06,
"loss": 0.7932,
"step": 5059
},
{
"epoch": 1.3457446808510638,
"grad_norm": 3.9516966342926025,
"learning_rate": 8.743450865097929e-06,
"loss": 0.7334,
"step": 5060
},
{
"epoch": 1.3460106382978723,
"grad_norm": 4.261397361755371,
"learning_rate": 8.742867765520377e-06,
"loss": 0.7549,
"step": 5061
},
{
"epoch": 1.3462765957446807,
"grad_norm": 4.082563877105713,
"learning_rate": 8.742284550134088e-06,
"loss": 0.8306,
"step": 5062
},
{
"epoch": 1.3465425531914894,
"grad_norm": 3.9603230953216553,
"learning_rate": 8.74170121895711e-06,
"loss": 0.832,
"step": 5063
},
{
"epoch": 1.3468085106382979,
"grad_norm": 4.0057692527771,
"learning_rate": 8.741117772007492e-06,
"loss": 0.783,
"step": 5064
},
{
"epoch": 1.3470744680851063,
"grad_norm": 4.130981922149658,
"learning_rate": 8.740534209303285e-06,
"loss": 0.6476,
"step": 5065
},
{
"epoch": 1.347340425531915,
"grad_norm": 3.641900062561035,
"learning_rate": 8.739950530862544e-06,
"loss": 0.9809,
"step": 5066
},
{
"epoch": 1.3476063829787235,
"grad_norm": 3.607656955718994,
"learning_rate": 8.739366736703331e-06,
"loss": 0.7784,
"step": 5067
},
{
"epoch": 1.347872340425532,
"grad_norm": 4.068065166473389,
"learning_rate": 8.73878282684371e-06,
"loss": 0.9063,
"step": 5068
},
{
"epoch": 1.3481382978723404,
"grad_norm": 3.952601671218872,
"learning_rate": 8.738198801301745e-06,
"loss": 0.9279,
"step": 5069
},
{
"epoch": 1.3484042553191489,
"grad_norm": 4.016735553741455,
"learning_rate": 8.737614660095507e-06,
"loss": 0.7658,
"step": 5070
},
{
"epoch": 1.3486702127659576,
"grad_norm": 3.669020891189575,
"learning_rate": 8.737030403243074e-06,
"loss": 0.6806,
"step": 5071
},
{
"epoch": 1.348936170212766,
"grad_norm": 3.659308910369873,
"learning_rate": 8.736446030762518e-06,
"loss": 0.7539,
"step": 5072
},
{
"epoch": 1.3492021276595745,
"grad_norm": 3.9839887619018555,
"learning_rate": 8.735861542671924e-06,
"loss": 0.7342,
"step": 5073
},
{
"epoch": 1.349468085106383,
"grad_norm": 3.9134328365325928,
"learning_rate": 8.735276938989375e-06,
"loss": 0.8636,
"step": 5074
},
{
"epoch": 1.3497340425531914,
"grad_norm": 3.841643810272217,
"learning_rate": 8.73469221973296e-06,
"loss": 0.7273,
"step": 5075
},
{
"epoch": 1.35,
"grad_norm": 3.903296947479248,
"learning_rate": 8.734107384920771e-06,
"loss": 0.8596,
"step": 5076
},
{
"epoch": 1.3502659574468086,
"grad_norm": 4.10729455947876,
"learning_rate": 8.733522434570901e-06,
"loss": 0.8268,
"step": 5077
},
{
"epoch": 1.350531914893617,
"grad_norm": 3.913231611251831,
"learning_rate": 8.732937368701453e-06,
"loss": 0.8017,
"step": 5078
},
{
"epoch": 1.3507978723404255,
"grad_norm": 3.795318365097046,
"learning_rate": 8.732352187330528e-06,
"loss": 0.6833,
"step": 5079
},
{
"epoch": 1.351063829787234,
"grad_norm": 3.991790294647217,
"learning_rate": 8.731766890476232e-06,
"loss": 0.7068,
"step": 5080
},
{
"epoch": 1.3513297872340426,
"grad_norm": 4.177598476409912,
"learning_rate": 8.731181478156673e-06,
"loss": 0.806,
"step": 5081
},
{
"epoch": 1.351595744680851,
"grad_norm": 3.855368137359619,
"learning_rate": 8.730595950389968e-06,
"loss": 0.7752,
"step": 5082
},
{
"epoch": 1.3518617021276595,
"grad_norm": 4.333880424499512,
"learning_rate": 8.730010307194232e-06,
"loss": 0.771,
"step": 5083
},
{
"epoch": 1.352127659574468,
"grad_norm": 3.9861552715301514,
"learning_rate": 8.729424548587585e-06,
"loss": 0.873,
"step": 5084
},
{
"epoch": 1.3523936170212765,
"grad_norm": 4.271336078643799,
"learning_rate": 8.728838674588151e-06,
"loss": 0.8345,
"step": 5085
},
{
"epoch": 1.3526595744680852,
"grad_norm": 4.418639659881592,
"learning_rate": 8.72825268521406e-06,
"loss": 0.9593,
"step": 5086
},
{
"epoch": 1.3529255319148936,
"grad_norm": 4.122128963470459,
"learning_rate": 8.72766658048344e-06,
"loss": 0.6917,
"step": 5087
},
{
"epoch": 1.353191489361702,
"grad_norm": 3.9738972187042236,
"learning_rate": 8.727080360414428e-06,
"loss": 0.7446,
"step": 5088
},
{
"epoch": 1.3534574468085108,
"grad_norm": 4.067488670349121,
"learning_rate": 8.726494025025162e-06,
"loss": 0.6886,
"step": 5089
},
{
"epoch": 1.3537234042553192,
"grad_norm": 3.782886028289795,
"learning_rate": 8.725907574333783e-06,
"loss": 0.8159,
"step": 5090
},
{
"epoch": 1.3539893617021277,
"grad_norm": 3.9360549449920654,
"learning_rate": 8.725321008358436e-06,
"loss": 0.8189,
"step": 5091
},
{
"epoch": 1.3542553191489362,
"grad_norm": 4.132941246032715,
"learning_rate": 8.724734327117273e-06,
"loss": 0.9677,
"step": 5092
},
{
"epoch": 1.3545212765957446,
"grad_norm": 4.25277042388916,
"learning_rate": 8.724147530628442e-06,
"loss": 0.8653,
"step": 5093
},
{
"epoch": 1.3547872340425533,
"grad_norm": 3.962684392929077,
"learning_rate": 8.723560618910103e-06,
"loss": 0.6903,
"step": 5094
},
{
"epoch": 1.3550531914893618,
"grad_norm": 3.9663078784942627,
"learning_rate": 8.722973591980414e-06,
"loss": 0.7572,
"step": 5095
},
{
"epoch": 1.3553191489361702,
"grad_norm": 4.48624849319458,
"learning_rate": 8.722386449857541e-06,
"loss": 0.9056,
"step": 5096
},
{
"epoch": 1.3555851063829787,
"grad_norm": 3.8394525051116943,
"learning_rate": 8.721799192559646e-06,
"loss": 0.7721,
"step": 5097
},
{
"epoch": 1.3558510638297872,
"grad_norm": 4.599715232849121,
"learning_rate": 8.721211820104903e-06,
"loss": 1.0118,
"step": 5098
},
{
"epoch": 1.3561170212765958,
"grad_norm": 4.1499528884887695,
"learning_rate": 8.720624332511484e-06,
"loss": 0.8979,
"step": 5099
},
{
"epoch": 1.3563829787234043,
"grad_norm": 3.8984806537628174,
"learning_rate": 8.72003672979757e-06,
"loss": 0.8824,
"step": 5100
},
{
"epoch": 1.3566489361702128,
"grad_norm": 3.709800958633423,
"learning_rate": 8.71944901198134e-06,
"loss": 0.8053,
"step": 5101
},
{
"epoch": 1.3569148936170212,
"grad_norm": 3.4785032272338867,
"learning_rate": 8.718861179080975e-06,
"loss": 0.6898,
"step": 5102
},
{
"epoch": 1.3571808510638297,
"grad_norm": 3.8457705974578857,
"learning_rate": 8.71827323111467e-06,
"loss": 0.75,
"step": 5103
},
{
"epoch": 1.3574468085106384,
"grad_norm": 3.66109299659729,
"learning_rate": 8.71768516810061e-06,
"loss": 0.7255,
"step": 5104
},
{
"epoch": 1.3577127659574468,
"grad_norm": 3.6998486518859863,
"learning_rate": 8.717096990056999e-06,
"loss": 0.8202,
"step": 5105
},
{
"epoch": 1.3579787234042553,
"grad_norm": 4.291678428649902,
"learning_rate": 8.716508697002027e-06,
"loss": 0.9424,
"step": 5106
},
{
"epoch": 1.3582446808510638,
"grad_norm": 3.870074987411499,
"learning_rate": 8.715920288953901e-06,
"loss": 0.8821,
"step": 5107
},
{
"epoch": 1.3585106382978722,
"grad_norm": 3.469759702682495,
"learning_rate": 8.715331765930828e-06,
"loss": 0.745,
"step": 5108
},
{
"epoch": 1.358776595744681,
"grad_norm": 4.048684597015381,
"learning_rate": 8.714743127951014e-06,
"loss": 0.9526,
"step": 5109
},
{
"epoch": 1.3590425531914894,
"grad_norm": 4.060766696929932,
"learning_rate": 8.714154375032675e-06,
"loss": 0.7971,
"step": 5110
},
{
"epoch": 1.3593085106382978,
"grad_norm": 4.004628658294678,
"learning_rate": 8.713565507194027e-06,
"loss": 0.8302,
"step": 5111
},
{
"epoch": 1.3595744680851065,
"grad_norm": 4.034252166748047,
"learning_rate": 8.712976524453289e-06,
"loss": 0.8873,
"step": 5112
},
{
"epoch": 1.359840425531915,
"grad_norm": 3.9113869667053223,
"learning_rate": 8.712387426828685e-06,
"loss": 0.7514,
"step": 5113
},
{
"epoch": 1.3601063829787234,
"grad_norm": 3.977827787399292,
"learning_rate": 8.711798214338445e-06,
"loss": 0.8099,
"step": 5114
},
{
"epoch": 1.360372340425532,
"grad_norm": 4.005003929138184,
"learning_rate": 8.711208887000797e-06,
"loss": 0.8888,
"step": 5115
},
{
"epoch": 1.3606382978723404,
"grad_norm": 3.7809715270996094,
"learning_rate": 8.710619444833977e-06,
"loss": 0.8131,
"step": 5116
},
{
"epoch": 1.360904255319149,
"grad_norm": 3.8309693336486816,
"learning_rate": 8.710029887856224e-06,
"loss": 0.6836,
"step": 5117
},
{
"epoch": 1.3611702127659575,
"grad_norm": 3.7106757164001465,
"learning_rate": 8.709440216085777e-06,
"loss": 0.8079,
"step": 5118
},
{
"epoch": 1.361436170212766,
"grad_norm": 4.386137962341309,
"learning_rate": 8.708850429540882e-06,
"loss": 0.8484,
"step": 5119
},
{
"epoch": 1.3617021276595744,
"grad_norm": 4.305933952331543,
"learning_rate": 8.708260528239788e-06,
"loss": 0.9357,
"step": 5120
},
{
"epoch": 1.361968085106383,
"grad_norm": 4.107351303100586,
"learning_rate": 8.70767051220075e-06,
"loss": 0.8932,
"step": 5121
},
{
"epoch": 1.3622340425531916,
"grad_norm": 3.7665624618530273,
"learning_rate": 8.707080381442016e-06,
"loss": 0.7792,
"step": 5122
},
{
"epoch": 1.3625,
"grad_norm": 4.177657604217529,
"learning_rate": 8.706490135981856e-06,
"loss": 0.8046,
"step": 5123
},
{
"epoch": 1.3627659574468085,
"grad_norm": 4.132664203643799,
"learning_rate": 8.705899775838525e-06,
"loss": 0.8516,
"step": 5124
},
{
"epoch": 1.363031914893617,
"grad_norm": 4.0525288581848145,
"learning_rate": 8.70530930103029e-06,
"loss": 0.8747,
"step": 5125
},
{
"epoch": 1.3632978723404254,
"grad_norm": 4.088098526000977,
"learning_rate": 8.704718711575424e-06,
"loss": 0.6531,
"step": 5126
},
{
"epoch": 1.3635638297872341,
"grad_norm": 3.944594144821167,
"learning_rate": 8.704128007492201e-06,
"loss": 0.8084,
"step": 5127
},
{
"epoch": 1.3638297872340426,
"grad_norm": 4.340763092041016,
"learning_rate": 8.703537188798894e-06,
"loss": 0.8186,
"step": 5128
},
{
"epoch": 1.364095744680851,
"grad_norm": 3.9249961376190186,
"learning_rate": 8.702946255513787e-06,
"loss": 0.8166,
"step": 5129
},
{
"epoch": 1.3643617021276595,
"grad_norm": 3.667654275894165,
"learning_rate": 8.702355207655164e-06,
"loss": 0.8432,
"step": 5130
},
{
"epoch": 1.364627659574468,
"grad_norm": 3.6376404762268066,
"learning_rate": 8.70176404524131e-06,
"loss": 0.7878,
"step": 5131
},
{
"epoch": 1.3648936170212767,
"grad_norm": 3.9054555892944336,
"learning_rate": 8.70117276829052e-06,
"loss": 0.7763,
"step": 5132
},
{
"epoch": 1.3651595744680851,
"grad_norm": 4.0739288330078125,
"learning_rate": 8.700581376821086e-06,
"loss": 0.728,
"step": 5133
},
{
"epoch": 1.3654255319148936,
"grad_norm": 3.8359971046447754,
"learning_rate": 8.699989870851308e-06,
"loss": 0.8314,
"step": 5134
},
{
"epoch": 1.3656914893617023,
"grad_norm": 3.708594799041748,
"learning_rate": 8.699398250399486e-06,
"loss": 0.7632,
"step": 5135
},
{
"epoch": 1.3659574468085105,
"grad_norm": 3.9665486812591553,
"learning_rate": 8.698806515483928e-06,
"loss": 0.8794,
"step": 5136
},
{
"epoch": 1.3662234042553192,
"grad_norm": 4.699567794799805,
"learning_rate": 8.698214666122941e-06,
"loss": 1.0106,
"step": 5137
},
{
"epoch": 1.3664893617021276,
"grad_norm": 3.8563220500946045,
"learning_rate": 8.697622702334839e-06,
"loss": 0.7451,
"step": 5138
},
{
"epoch": 1.366755319148936,
"grad_norm": 4.188748359680176,
"learning_rate": 8.697030624137937e-06,
"loss": 0.7481,
"step": 5139
},
{
"epoch": 1.3670212765957448,
"grad_norm": 3.891820192337036,
"learning_rate": 8.696438431550553e-06,
"loss": 0.8304,
"step": 5140
},
{
"epoch": 1.3672872340425533,
"grad_norm": 4.065185546875,
"learning_rate": 8.695846124591015e-06,
"loss": 0.8912,
"step": 5141
},
{
"epoch": 1.3675531914893617,
"grad_norm": 3.466252326965332,
"learning_rate": 8.695253703277644e-06,
"loss": 0.7941,
"step": 5142
},
{
"epoch": 1.3678191489361702,
"grad_norm": 3.7102415561676025,
"learning_rate": 8.694661167628772e-06,
"loss": 0.6821,
"step": 5143
},
{
"epoch": 1.3680851063829786,
"grad_norm": 4.1319260597229,
"learning_rate": 8.694068517662735e-06,
"loss": 0.9666,
"step": 5144
},
{
"epoch": 1.3683510638297873,
"grad_norm": 3.870607852935791,
"learning_rate": 8.693475753397869e-06,
"loss": 0.8806,
"step": 5145
},
{
"epoch": 1.3686170212765958,
"grad_norm": 3.9953293800354004,
"learning_rate": 8.692882874852515e-06,
"loss": 0.8558,
"step": 5146
},
{
"epoch": 1.3688829787234043,
"grad_norm": 4.429169178009033,
"learning_rate": 8.692289882045015e-06,
"loss": 0.7949,
"step": 5147
},
{
"epoch": 1.3691489361702127,
"grad_norm": 3.895005464553833,
"learning_rate": 8.691696774993721e-06,
"loss": 0.7547,
"step": 5148
},
{
"epoch": 1.3694148936170212,
"grad_norm": 4.446406841278076,
"learning_rate": 8.691103553716981e-06,
"loss": 0.8757,
"step": 5149
},
{
"epoch": 1.3696808510638299,
"grad_norm": 4.012157440185547,
"learning_rate": 8.690510218233153e-06,
"loss": 0.9106,
"step": 5150
},
{
"epoch": 1.3699468085106383,
"grad_norm": 3.966068983078003,
"learning_rate": 8.689916768560593e-06,
"loss": 0.7194,
"step": 5151
},
{
"epoch": 1.3702127659574468,
"grad_norm": 3.9841232299804688,
"learning_rate": 8.689323204717663e-06,
"loss": 0.8174,
"step": 5152
},
{
"epoch": 1.3704787234042553,
"grad_norm": 4.248937129974365,
"learning_rate": 8.688729526722732e-06,
"loss": 0.8107,
"step": 5153
},
{
"epoch": 1.3707446808510637,
"grad_norm": 3.6485583782196045,
"learning_rate": 8.688135734594165e-06,
"loss": 0.8828,
"step": 5154
},
{
"epoch": 1.3710106382978724,
"grad_norm": 4.1670966148376465,
"learning_rate": 8.687541828350334e-06,
"loss": 0.8604,
"step": 5155
},
{
"epoch": 1.3712765957446809,
"grad_norm": 4.121282577514648,
"learning_rate": 8.686947808009621e-06,
"loss": 0.8228,
"step": 5156
},
{
"epoch": 1.3715425531914893,
"grad_norm": 3.781928539276123,
"learning_rate": 8.6863536735904e-06,
"loss": 0.7416,
"step": 5157
},
{
"epoch": 1.371808510638298,
"grad_norm": 3.688425064086914,
"learning_rate": 8.685759425111056e-06,
"loss": 0.7902,
"step": 5158
},
{
"epoch": 1.3720744680851062,
"grad_norm": 3.922410488128662,
"learning_rate": 8.685165062589975e-06,
"loss": 0.8117,
"step": 5159
},
{
"epoch": 1.372340425531915,
"grad_norm": 4.217987060546875,
"learning_rate": 8.68457058604555e-06,
"loss": 0.9173,
"step": 5160
},
{
"epoch": 1.3726063829787234,
"grad_norm": 4.135257244110107,
"learning_rate": 8.683975995496173e-06,
"loss": 0.7474,
"step": 5161
},
{
"epoch": 1.3728723404255319,
"grad_norm": 3.7882463932037354,
"learning_rate": 8.68338129096024e-06,
"loss": 0.8153,
"step": 5162
},
{
"epoch": 1.3731382978723405,
"grad_norm": 3.6793859004974365,
"learning_rate": 8.682786472456155e-06,
"loss": 0.6914,
"step": 5163
},
{
"epoch": 1.373404255319149,
"grad_norm": 4.030581951141357,
"learning_rate": 8.682191540002318e-06,
"loss": 0.778,
"step": 5164
},
{
"epoch": 1.3736702127659575,
"grad_norm": 3.8380470275878906,
"learning_rate": 8.681596493617141e-06,
"loss": 0.7522,
"step": 5165
},
{
"epoch": 1.373936170212766,
"grad_norm": 4.138343334197998,
"learning_rate": 8.681001333319035e-06,
"loss": 0.843,
"step": 5166
},
{
"epoch": 1.3742021276595744,
"grad_norm": 3.723407030105591,
"learning_rate": 8.680406059126412e-06,
"loss": 0.7799,
"step": 5167
},
{
"epoch": 1.374468085106383,
"grad_norm": 3.8985822200775146,
"learning_rate": 8.679810671057695e-06,
"loss": 0.7446,
"step": 5168
},
{
"epoch": 1.3747340425531915,
"grad_norm": 4.534223556518555,
"learning_rate": 8.679215169131301e-06,
"loss": 0.8734,
"step": 5169
},
{
"epoch": 1.375,
"grad_norm": 3.75278639793396,
"learning_rate": 8.67861955336566e-06,
"loss": 0.8435,
"step": 5170
},
{
"epoch": 1.3752659574468085,
"grad_norm": 4.094736099243164,
"learning_rate": 8.678023823779196e-06,
"loss": 0.7671,
"step": 5171
},
{
"epoch": 1.375531914893617,
"grad_norm": 3.920642137527466,
"learning_rate": 8.677427980390348e-06,
"loss": 0.7937,
"step": 5172
},
{
"epoch": 1.3757978723404256,
"grad_norm": 3.5799460411071777,
"learning_rate": 8.676832023217545e-06,
"loss": 0.8206,
"step": 5173
},
{
"epoch": 1.376063829787234,
"grad_norm": 3.8929152488708496,
"learning_rate": 8.676235952279233e-06,
"loss": 0.837,
"step": 5174
},
{
"epoch": 1.3763297872340425,
"grad_norm": 3.7762844562530518,
"learning_rate": 8.675639767593851e-06,
"loss": 0.8191,
"step": 5175
},
{
"epoch": 1.376595744680851,
"grad_norm": 4.34854793548584,
"learning_rate": 8.675043469179849e-06,
"loss": 0.9724,
"step": 5176
},
{
"epoch": 1.3768617021276595,
"grad_norm": 4.143275260925293,
"learning_rate": 8.674447057055673e-06,
"loss": 0.7607,
"step": 5177
},
{
"epoch": 1.3771276595744681,
"grad_norm": 3.8602356910705566,
"learning_rate": 8.673850531239781e-06,
"loss": 0.8241,
"step": 5178
},
{
"epoch": 1.3773936170212766,
"grad_norm": 4.238362789154053,
"learning_rate": 8.673253891750626e-06,
"loss": 0.75,
"step": 5179
},
{
"epoch": 1.377659574468085,
"grad_norm": 4.423724174499512,
"learning_rate": 8.672657138606672e-06,
"loss": 0.8929,
"step": 5180
},
{
"epoch": 1.3779255319148938,
"grad_norm": 3.5237340927124023,
"learning_rate": 8.672060271826381e-06,
"loss": 0.6877,
"step": 5181
},
{
"epoch": 1.378191489361702,
"grad_norm": 3.615936756134033,
"learning_rate": 8.671463291428223e-06,
"loss": 0.7091,
"step": 5182
},
{
"epoch": 1.3784574468085107,
"grad_norm": 3.587336778640747,
"learning_rate": 8.67086619743067e-06,
"loss": 0.8266,
"step": 5183
},
{
"epoch": 1.3787234042553191,
"grad_norm": 4.141132831573486,
"learning_rate": 8.670268989852192e-06,
"loss": 0.7199,
"step": 5184
},
{
"epoch": 1.3789893617021276,
"grad_norm": 4.076261520385742,
"learning_rate": 8.669671668711272e-06,
"loss": 0.7788,
"step": 5185
},
{
"epoch": 1.3792553191489363,
"grad_norm": 4.020741939544678,
"learning_rate": 8.66907423402639e-06,
"loss": 0.8652,
"step": 5186
},
{
"epoch": 1.3795212765957447,
"grad_norm": 3.8059983253479004,
"learning_rate": 8.668476685816029e-06,
"loss": 0.8151,
"step": 5187
},
{
"epoch": 1.3797872340425532,
"grad_norm": 4.055500030517578,
"learning_rate": 8.667879024098682e-06,
"loss": 0.7985,
"step": 5188
},
{
"epoch": 1.3800531914893617,
"grad_norm": 3.8605387210845947,
"learning_rate": 8.66728124889284e-06,
"loss": 0.8602,
"step": 5189
},
{
"epoch": 1.3803191489361701,
"grad_norm": 3.781041383743286,
"learning_rate": 8.666683360216998e-06,
"loss": 0.815,
"step": 5190
},
{
"epoch": 1.3805851063829788,
"grad_norm": 4.160099029541016,
"learning_rate": 8.666085358089655e-06,
"loss": 0.8366,
"step": 5191
},
{
"epoch": 1.3808510638297873,
"grad_norm": 4.079177379608154,
"learning_rate": 8.665487242529316e-06,
"loss": 0.9131,
"step": 5192
},
{
"epoch": 1.3811170212765957,
"grad_norm": 4.033502578735352,
"learning_rate": 8.664889013554484e-06,
"loss": 0.7588,
"step": 5193
},
{
"epoch": 1.3813829787234042,
"grad_norm": 3.969634771347046,
"learning_rate": 8.664290671183675e-06,
"loss": 0.9422,
"step": 5194
},
{
"epoch": 1.3816489361702127,
"grad_norm": 3.9259159564971924,
"learning_rate": 8.663692215435396e-06,
"loss": 0.7046,
"step": 5195
},
{
"epoch": 1.3819148936170214,
"grad_norm": 4.086988925933838,
"learning_rate": 8.663093646328166e-06,
"loss": 0.8629,
"step": 5196
},
{
"epoch": 1.3821808510638298,
"grad_norm": 4.083224773406982,
"learning_rate": 8.662494963880508e-06,
"loss": 0.8992,
"step": 5197
},
{
"epoch": 1.3824468085106383,
"grad_norm": 4.1260881423950195,
"learning_rate": 8.66189616811094e-06,
"loss": 0.8958,
"step": 5198
},
{
"epoch": 1.3827127659574467,
"grad_norm": 3.9255919456481934,
"learning_rate": 8.661297259037998e-06,
"loss": 0.8155,
"step": 5199
},
{
"epoch": 1.3829787234042552,
"grad_norm": 4.030576705932617,
"learning_rate": 8.660698236680205e-06,
"loss": 0.901,
"step": 5200
},
{
"epoch": 1.383244680851064,
"grad_norm": 4.204456329345703,
"learning_rate": 8.660099101056098e-06,
"loss": 0.8021,
"step": 5201
},
{
"epoch": 1.3835106382978724,
"grad_norm": 3.743723154067993,
"learning_rate": 8.659499852184218e-06,
"loss": 0.8411,
"step": 5202
},
{
"epoch": 1.3837765957446808,
"grad_norm": 3.8044793605804443,
"learning_rate": 8.658900490083102e-06,
"loss": 0.6985,
"step": 5203
},
{
"epoch": 1.3840425531914895,
"grad_norm": 3.762624740600586,
"learning_rate": 8.658301014771298e-06,
"loss": 0.7873,
"step": 5204
},
{
"epoch": 1.3843085106382977,
"grad_norm": 3.8245599269866943,
"learning_rate": 8.657701426267355e-06,
"loss": 0.7773,
"step": 5205
},
{
"epoch": 1.3845744680851064,
"grad_norm": 3.875678062438965,
"learning_rate": 8.65710172458982e-06,
"loss": 0.9493,
"step": 5206
},
{
"epoch": 1.3848404255319149,
"grad_norm": 4.034217834472656,
"learning_rate": 8.656501909757255e-06,
"loss": 0.8742,
"step": 5207
},
{
"epoch": 1.3851063829787233,
"grad_norm": 3.7253971099853516,
"learning_rate": 8.655901981788216e-06,
"loss": 0.7408,
"step": 5208
},
{
"epoch": 1.385372340425532,
"grad_norm": 4.211146354675293,
"learning_rate": 8.655301940701262e-06,
"loss": 0.8107,
"step": 5209
},
{
"epoch": 1.3856382978723405,
"grad_norm": 4.0121378898620605,
"learning_rate": 8.654701786514965e-06,
"loss": 0.8808,
"step": 5210
},
{
"epoch": 1.385904255319149,
"grad_norm": 4.111256122589111,
"learning_rate": 8.654101519247892e-06,
"loss": 0.8339,
"step": 5211
},
{
"epoch": 1.3861702127659574,
"grad_norm": 3.683849811553955,
"learning_rate": 8.653501138918615e-06,
"loss": 0.8046,
"step": 5212
},
{
"epoch": 1.3864361702127659,
"grad_norm": 4.3086957931518555,
"learning_rate": 8.652900645545711e-06,
"loss": 0.8217,
"step": 5213
},
{
"epoch": 1.3867021276595746,
"grad_norm": 4.064043998718262,
"learning_rate": 8.65230003914776e-06,
"loss": 0.9811,
"step": 5214
},
{
"epoch": 1.386968085106383,
"grad_norm": 3.8175463676452637,
"learning_rate": 8.651699319743348e-06,
"loss": 0.879,
"step": 5215
},
{
"epoch": 1.3872340425531915,
"grad_norm": 4.500128269195557,
"learning_rate": 8.651098487351057e-06,
"loss": 0.6979,
"step": 5216
},
{
"epoch": 1.3875,
"grad_norm": 4.019436836242676,
"learning_rate": 8.650497541989483e-06,
"loss": 0.8766,
"step": 5217
},
{
"epoch": 1.3877659574468084,
"grad_norm": 3.5277206897735596,
"learning_rate": 8.649896483677213e-06,
"loss": 0.8292,
"step": 5218
},
{
"epoch": 1.388031914893617,
"grad_norm": 3.918307065963745,
"learning_rate": 8.649295312432853e-06,
"loss": 0.7684,
"step": 5219
},
{
"epoch": 1.3882978723404256,
"grad_norm": 3.9739909172058105,
"learning_rate": 8.648694028274998e-06,
"loss": 0.743,
"step": 5220
},
{
"epoch": 1.388563829787234,
"grad_norm": 3.6508398056030273,
"learning_rate": 8.648092631222253e-06,
"loss": 0.7689,
"step": 5221
},
{
"epoch": 1.3888297872340425,
"grad_norm": 3.846869468688965,
"learning_rate": 8.647491121293228e-06,
"loss": 0.741,
"step": 5222
},
{
"epoch": 1.389095744680851,
"grad_norm": 3.8481643199920654,
"learning_rate": 8.646889498506532e-06,
"loss": 0.8665,
"step": 5223
},
{
"epoch": 1.3893617021276596,
"grad_norm": 4.380584239959717,
"learning_rate": 8.646287762880783e-06,
"loss": 0.8029,
"step": 5224
},
{
"epoch": 1.389627659574468,
"grad_norm": 3.8931496143341064,
"learning_rate": 8.645685914434596e-06,
"loss": 0.8964,
"step": 5225
},
{
"epoch": 1.3898936170212766,
"grad_norm": 3.976508378982544,
"learning_rate": 8.645083953186596e-06,
"loss": 0.8707,
"step": 5226
},
{
"epoch": 1.390159574468085,
"grad_norm": 3.606631278991699,
"learning_rate": 8.644481879155406e-06,
"loss": 0.7476,
"step": 5227
},
{
"epoch": 1.3904255319148935,
"grad_norm": 4.043211936950684,
"learning_rate": 8.643879692359655e-06,
"loss": 0.7478,
"step": 5228
},
{
"epoch": 1.3906914893617022,
"grad_norm": 3.9135618209838867,
"learning_rate": 8.643277392817976e-06,
"loss": 0.7469,
"step": 5229
},
{
"epoch": 1.3909574468085106,
"grad_norm": 3.747793674468994,
"learning_rate": 8.642674980549008e-06,
"loss": 0.8092,
"step": 5230
},
{
"epoch": 1.391223404255319,
"grad_norm": 4.33275032043457,
"learning_rate": 8.642072455571383e-06,
"loss": 0.7867,
"step": 5231
},
{
"epoch": 1.3914893617021278,
"grad_norm": 4.364730358123779,
"learning_rate": 8.641469817903752e-06,
"loss": 0.8545,
"step": 5232
},
{
"epoch": 1.3917553191489362,
"grad_norm": 3.848296880722046,
"learning_rate": 8.640867067564757e-06,
"loss": 0.8735,
"step": 5233
},
{
"epoch": 1.3920212765957447,
"grad_norm": 3.8391952514648438,
"learning_rate": 8.640264204573049e-06,
"loss": 0.8439,
"step": 5234
},
{
"epoch": 1.3922872340425532,
"grad_norm": 4.061415672302246,
"learning_rate": 8.639661228947278e-06,
"loss": 0.7702,
"step": 5235
},
{
"epoch": 1.3925531914893616,
"grad_norm": 4.175765037536621,
"learning_rate": 8.639058140706105e-06,
"loss": 0.8053,
"step": 5236
},
{
"epoch": 1.3928191489361703,
"grad_norm": 3.840773105621338,
"learning_rate": 8.638454939868188e-06,
"loss": 0.7192,
"step": 5237
},
{
"epoch": 1.3930851063829788,
"grad_norm": 3.76470947265625,
"learning_rate": 8.637851626452191e-06,
"loss": 0.7634,
"step": 5238
},
{
"epoch": 1.3933510638297872,
"grad_norm": 3.903261184692383,
"learning_rate": 8.637248200476783e-06,
"loss": 0.7672,
"step": 5239
},
{
"epoch": 1.3936170212765957,
"grad_norm": 4.356569290161133,
"learning_rate": 8.636644661960634e-06,
"loss": 0.8834,
"step": 5240
},
{
"epoch": 1.3938829787234042,
"grad_norm": 4.116570949554443,
"learning_rate": 8.636041010922416e-06,
"loss": 0.7715,
"step": 5241
},
{
"epoch": 1.3941489361702128,
"grad_norm": 3.9501302242279053,
"learning_rate": 8.635437247380809e-06,
"loss": 0.7663,
"step": 5242
},
{
"epoch": 1.3944148936170213,
"grad_norm": 4.226482391357422,
"learning_rate": 8.634833371354492e-06,
"loss": 0.8156,
"step": 5243
},
{
"epoch": 1.3946808510638298,
"grad_norm": 4.047403335571289,
"learning_rate": 8.634229382862152e-06,
"loss": 0.8982,
"step": 5244
},
{
"epoch": 1.3949468085106382,
"grad_norm": 4.245815753936768,
"learning_rate": 8.633625281922477e-06,
"loss": 0.8558,
"step": 5245
},
{
"epoch": 1.3952127659574467,
"grad_norm": 3.9995036125183105,
"learning_rate": 8.633021068554155e-06,
"loss": 0.8246,
"step": 5246
},
{
"epoch": 1.3954787234042554,
"grad_norm": 4.213914394378662,
"learning_rate": 8.632416742775886e-06,
"loss": 0.7979,
"step": 5247
},
{
"epoch": 1.3957446808510638,
"grad_norm": 4.043915748596191,
"learning_rate": 8.631812304606367e-06,
"loss": 0.8903,
"step": 5248
},
{
"epoch": 1.3960106382978723,
"grad_norm": 3.995999336242676,
"learning_rate": 8.631207754064299e-06,
"loss": 0.7445,
"step": 5249
},
{
"epoch": 1.3962765957446808,
"grad_norm": 3.6424171924591064,
"learning_rate": 8.630603091168385e-06,
"loss": 0.6922,
"step": 5250
},
{
"epoch": 1.3965425531914892,
"grad_norm": 4.3226118087768555,
"learning_rate": 8.62999831593734e-06,
"loss": 0.8686,
"step": 5251
},
{
"epoch": 1.396808510638298,
"grad_norm": 3.89966082572937,
"learning_rate": 8.629393428389873e-06,
"loss": 0.7592,
"step": 5252
},
{
"epoch": 1.3970744680851064,
"grad_norm": 4.409592151641846,
"learning_rate": 8.628788428544698e-06,
"loss": 0.952,
"step": 5253
},
{
"epoch": 1.3973404255319148,
"grad_norm": 3.884060859680176,
"learning_rate": 8.62818331642054e-06,
"loss": 0.83,
"step": 5254
},
{
"epoch": 1.3976063829787235,
"grad_norm": 3.480745792388916,
"learning_rate": 8.627578092036117e-06,
"loss": 0.7324,
"step": 5255
},
{
"epoch": 1.397872340425532,
"grad_norm": 3.862119436264038,
"learning_rate": 8.626972755410156e-06,
"loss": 0.7555,
"step": 5256
},
{
"epoch": 1.3981382978723405,
"grad_norm": 4.149264335632324,
"learning_rate": 8.626367306561387e-06,
"loss": 0.7649,
"step": 5257
},
{
"epoch": 1.398404255319149,
"grad_norm": 3.6122639179229736,
"learning_rate": 8.625761745508547e-06,
"loss": 0.7959,
"step": 5258
},
{
"epoch": 1.3986702127659574,
"grad_norm": 3.611455202102661,
"learning_rate": 8.625156072270367e-06,
"loss": 0.8546,
"step": 5259
},
{
"epoch": 1.398936170212766,
"grad_norm": 4.0274858474731445,
"learning_rate": 8.624550286865592e-06,
"loss": 0.818,
"step": 5260
},
{
"epoch": 1.3992021276595745,
"grad_norm": 4.080778121948242,
"learning_rate": 8.623944389312962e-06,
"loss": 0.8599,
"step": 5261
},
{
"epoch": 1.399468085106383,
"grad_norm": 4.097471237182617,
"learning_rate": 8.623338379631227e-06,
"loss": 0.8178,
"step": 5262
},
{
"epoch": 1.3997340425531914,
"grad_norm": 3.6200075149536133,
"learning_rate": 8.622732257839137e-06,
"loss": 0.8381,
"step": 5263
},
{
"epoch": 1.4,
"grad_norm": 4.054747581481934,
"learning_rate": 8.622126023955446e-06,
"loss": 0.9865,
"step": 5264
},
{
"epoch": 1.4002659574468086,
"grad_norm": 4.653242111206055,
"learning_rate": 8.62151967799891e-06,
"loss": 0.8813,
"step": 5265
},
{
"epoch": 1.400531914893617,
"grad_norm": 4.182617664337158,
"learning_rate": 8.620913219988291e-06,
"loss": 0.7061,
"step": 5266
},
{
"epoch": 1.4007978723404255,
"grad_norm": 3.594130277633667,
"learning_rate": 8.620306649942356e-06,
"loss": 0.7468,
"step": 5267
},
{
"epoch": 1.401063829787234,
"grad_norm": 4.210184574127197,
"learning_rate": 8.619699967879868e-06,
"loss": 0.9574,
"step": 5268
},
{
"epoch": 1.4013297872340424,
"grad_norm": 4.212064743041992,
"learning_rate": 8.619093173819603e-06,
"loss": 0.8027,
"step": 5269
},
{
"epoch": 1.4015957446808511,
"grad_norm": 4.000636100769043,
"learning_rate": 8.618486267780334e-06,
"loss": 0.8482,
"step": 5270
},
{
"epoch": 1.4018617021276596,
"grad_norm": 4.396604537963867,
"learning_rate": 8.617879249780841e-06,
"loss": 0.8989,
"step": 5271
},
{
"epoch": 1.402127659574468,
"grad_norm": 3.6377105712890625,
"learning_rate": 8.617272119839903e-06,
"loss": 0.7686,
"step": 5272
},
{
"epoch": 1.4023936170212765,
"grad_norm": 3.8942556381225586,
"learning_rate": 8.616664877976308e-06,
"loss": 0.8185,
"step": 5273
},
{
"epoch": 1.402659574468085,
"grad_norm": 3.9607818126678467,
"learning_rate": 8.616057524208843e-06,
"loss": 0.6682,
"step": 5274
},
{
"epoch": 1.4029255319148937,
"grad_norm": 4.523376941680908,
"learning_rate": 8.615450058556301e-06,
"loss": 0.8093,
"step": 5275
},
{
"epoch": 1.4031914893617021,
"grad_norm": 4.111645221710205,
"learning_rate": 8.614842481037476e-06,
"loss": 0.8694,
"step": 5276
},
{
"epoch": 1.4034574468085106,
"grad_norm": 3.7978808879852295,
"learning_rate": 8.61423479167117e-06,
"loss": 0.7477,
"step": 5277
},
{
"epoch": 1.4037234042553193,
"grad_norm": 3.669728994369507,
"learning_rate": 8.613626990476186e-06,
"loss": 0.7951,
"step": 5278
},
{
"epoch": 1.4039893617021277,
"grad_norm": 4.3240251541137695,
"learning_rate": 8.613019077471325e-06,
"loss": 0.8721,
"step": 5279
},
{
"epoch": 1.4042553191489362,
"grad_norm": 3.702890157699585,
"learning_rate": 8.6124110526754e-06,
"loss": 0.6856,
"step": 5280
},
{
"epoch": 1.4045212765957447,
"grad_norm": 4.085876941680908,
"learning_rate": 8.611802916107225e-06,
"loss": 0.7458,
"step": 5281
},
{
"epoch": 1.4047872340425531,
"grad_norm": 4.095217704772949,
"learning_rate": 8.611194667785615e-06,
"loss": 0.821,
"step": 5282
},
{
"epoch": 1.4050531914893618,
"grad_norm": 3.8958888053894043,
"learning_rate": 8.610586307729393e-06,
"loss": 0.7271,
"step": 5283
},
{
"epoch": 1.4053191489361703,
"grad_norm": 3.696851968765259,
"learning_rate": 8.609977835957378e-06,
"loss": 0.7236,
"step": 5284
},
{
"epoch": 1.4055851063829787,
"grad_norm": 4.185340404510498,
"learning_rate": 8.609369252488398e-06,
"loss": 0.9089,
"step": 5285
},
{
"epoch": 1.4058510638297872,
"grad_norm": 4.072790622711182,
"learning_rate": 8.608760557341284e-06,
"loss": 0.761,
"step": 5286
},
{
"epoch": 1.4061170212765957,
"grad_norm": 3.8811473846435547,
"learning_rate": 8.60815175053487e-06,
"loss": 0.8021,
"step": 5287
},
{
"epoch": 1.4063829787234043,
"grad_norm": 4.050495624542236,
"learning_rate": 8.607542832087993e-06,
"loss": 0.7736,
"step": 5288
},
{
"epoch": 1.4066489361702128,
"grad_norm": 3.903702735900879,
"learning_rate": 8.606933802019493e-06,
"loss": 0.8525,
"step": 5289
},
{
"epoch": 1.4069148936170213,
"grad_norm": 3.618151903152466,
"learning_rate": 8.606324660348214e-06,
"loss": 0.7992,
"step": 5290
},
{
"epoch": 1.4071808510638297,
"grad_norm": 3.910585641860962,
"learning_rate": 8.605715407093005e-06,
"loss": 0.8235,
"step": 5291
},
{
"epoch": 1.4074468085106382,
"grad_norm": 4.317497253417969,
"learning_rate": 8.605106042272715e-06,
"loss": 0.8737,
"step": 5292
},
{
"epoch": 1.4077127659574469,
"grad_norm": 4.357272624969482,
"learning_rate": 8.6044965659062e-06,
"loss": 0.787,
"step": 5293
},
{
"epoch": 1.4079787234042553,
"grad_norm": 4.051640033721924,
"learning_rate": 8.603886978012317e-06,
"loss": 0.8513,
"step": 5294
},
{
"epoch": 1.4082446808510638,
"grad_norm": 4.226726055145264,
"learning_rate": 8.60327727860993e-06,
"loss": 0.717,
"step": 5295
},
{
"epoch": 1.4085106382978723,
"grad_norm": 3.7265825271606445,
"learning_rate": 8.6026674677179e-06,
"loss": 0.7177,
"step": 5296
},
{
"epoch": 1.4087765957446807,
"grad_norm": 3.866156816482544,
"learning_rate": 8.602057545355096e-06,
"loss": 0.78,
"step": 5297
},
{
"epoch": 1.4090425531914894,
"grad_norm": 3.843125820159912,
"learning_rate": 8.601447511540392e-06,
"loss": 0.8847,
"step": 5298
},
{
"epoch": 1.4093085106382979,
"grad_norm": 3.813894033432007,
"learning_rate": 8.600837366292663e-06,
"loss": 0.7,
"step": 5299
},
{
"epoch": 1.4095744680851063,
"grad_norm": 4.289909362792969,
"learning_rate": 8.600227109630785e-06,
"loss": 0.7832,
"step": 5300
},
{
"epoch": 1.409840425531915,
"grad_norm": 4.330870151519775,
"learning_rate": 8.599616741573642e-06,
"loss": 0.9482,
"step": 5301
},
{
"epoch": 1.4101063829787235,
"grad_norm": 3.625694990158081,
"learning_rate": 8.599006262140117e-06,
"loss": 0.6515,
"step": 5302
},
{
"epoch": 1.410372340425532,
"grad_norm": 4.081284999847412,
"learning_rate": 8.598395671349104e-06,
"loss": 0.9656,
"step": 5303
},
{
"epoch": 1.4106382978723404,
"grad_norm": 4.240716457366943,
"learning_rate": 8.59778496921949e-06,
"loss": 0.8328,
"step": 5304
},
{
"epoch": 1.4109042553191489,
"grad_norm": 3.9750494956970215,
"learning_rate": 8.597174155770174e-06,
"loss": 0.7686,
"step": 5305
},
{
"epoch": 1.4111702127659576,
"grad_norm": 3.6305007934570312,
"learning_rate": 8.596563231020054e-06,
"loss": 0.7059,
"step": 5306
},
{
"epoch": 1.411436170212766,
"grad_norm": 3.9132840633392334,
"learning_rate": 8.595952194988034e-06,
"loss": 0.8509,
"step": 5307
},
{
"epoch": 1.4117021276595745,
"grad_norm": 4.162221431732178,
"learning_rate": 8.59534104769302e-06,
"loss": 0.82,
"step": 5308
},
{
"epoch": 1.411968085106383,
"grad_norm": 4.090907096862793,
"learning_rate": 8.594729789153919e-06,
"loss": 0.9025,
"step": 5309
},
{
"epoch": 1.4122340425531914,
"grad_norm": 4.178388595581055,
"learning_rate": 8.594118419389648e-06,
"loss": 0.8537,
"step": 5310
},
{
"epoch": 1.4125,
"grad_norm": 3.5532939434051514,
"learning_rate": 8.59350693841912e-06,
"loss": 0.684,
"step": 5311
},
{
"epoch": 1.4127659574468086,
"grad_norm": 3.9625163078308105,
"learning_rate": 8.592895346261258e-06,
"loss": 0.7501,
"step": 5312
},
{
"epoch": 1.413031914893617,
"grad_norm": 3.4592795372009277,
"learning_rate": 8.592283642934983e-06,
"loss": 0.8845,
"step": 5313
},
{
"epoch": 1.4132978723404255,
"grad_norm": 4.265946865081787,
"learning_rate": 8.591671828459222e-06,
"loss": 0.8354,
"step": 5314
},
{
"epoch": 1.413563829787234,
"grad_norm": 4.301452159881592,
"learning_rate": 8.591059902852907e-06,
"loss": 0.9654,
"step": 5315
},
{
"epoch": 1.4138297872340426,
"grad_norm": 3.953643560409546,
"learning_rate": 8.59044786613497e-06,
"loss": 0.8592,
"step": 5316
},
{
"epoch": 1.414095744680851,
"grad_norm": 3.8107998371124268,
"learning_rate": 8.589835718324349e-06,
"loss": 0.7486,
"step": 5317
},
{
"epoch": 1.4143617021276595,
"grad_norm": 4.148920059204102,
"learning_rate": 8.589223459439987e-06,
"loss": 0.8111,
"step": 5318
},
{
"epoch": 1.414627659574468,
"grad_norm": 3.7461628913879395,
"learning_rate": 8.588611089500821e-06,
"loss": 0.7551,
"step": 5319
},
{
"epoch": 1.4148936170212765,
"grad_norm": 4.387768268585205,
"learning_rate": 8.587998608525806e-06,
"loss": 0.933,
"step": 5320
},
{
"epoch": 1.4151595744680852,
"grad_norm": 3.419297933578491,
"learning_rate": 8.587386016533887e-06,
"loss": 0.7643,
"step": 5321
},
{
"epoch": 1.4154255319148936,
"grad_norm": 3.7075390815734863,
"learning_rate": 8.586773313544023e-06,
"loss": 0.7818,
"step": 5322
},
{
"epoch": 1.415691489361702,
"grad_norm": 4.141719341278076,
"learning_rate": 8.586160499575168e-06,
"loss": 0.912,
"step": 5323
},
{
"epoch": 1.4159574468085108,
"grad_norm": 4.2602386474609375,
"learning_rate": 8.585547574646287e-06,
"loss": 0.834,
"step": 5324
},
{
"epoch": 1.4162234042553192,
"grad_norm": 4.043152332305908,
"learning_rate": 8.584934538776342e-06,
"loss": 0.6793,
"step": 5325
},
{
"epoch": 1.4164893617021277,
"grad_norm": 4.062325954437256,
"learning_rate": 8.584321391984301e-06,
"loss": 0.8172,
"step": 5326
},
{
"epoch": 1.4167553191489362,
"grad_norm": 3.731950044631958,
"learning_rate": 8.583708134289138e-06,
"loss": 0.6754,
"step": 5327
},
{
"epoch": 1.4170212765957446,
"grad_norm": 4.3393940925598145,
"learning_rate": 8.583094765709823e-06,
"loss": 0.8304,
"step": 5328
},
{
"epoch": 1.4172872340425533,
"grad_norm": 4.178645610809326,
"learning_rate": 8.582481286265341e-06,
"loss": 0.9168,
"step": 5329
},
{
"epoch": 1.4175531914893618,
"grad_norm": 3.5687899589538574,
"learning_rate": 8.581867695974667e-06,
"loss": 0.6632,
"step": 5330
},
{
"epoch": 1.4178191489361702,
"grad_norm": 3.7236688137054443,
"learning_rate": 8.58125399485679e-06,
"loss": 0.6788,
"step": 5331
},
{
"epoch": 1.4180851063829787,
"grad_norm": 3.8592636585235596,
"learning_rate": 8.5806401829307e-06,
"loss": 0.8632,
"step": 5332
},
{
"epoch": 1.4183510638297872,
"grad_norm": 3.7756807804107666,
"learning_rate": 8.580026260215384e-06,
"loss": 0.6994,
"step": 5333
},
{
"epoch": 1.4186170212765958,
"grad_norm": 3.481576919555664,
"learning_rate": 8.579412226729843e-06,
"loss": 0.8748,
"step": 5334
},
{
"epoch": 1.4188829787234043,
"grad_norm": 3.908369779586792,
"learning_rate": 8.578798082493074e-06,
"loss": 0.7567,
"step": 5335
},
{
"epoch": 1.4191489361702128,
"grad_norm": 4.084057807922363,
"learning_rate": 8.578183827524076e-06,
"loss": 0.9174,
"step": 5336
},
{
"epoch": 1.4194148936170212,
"grad_norm": 4.469969749450684,
"learning_rate": 8.57756946184186e-06,
"loss": 0.9547,
"step": 5337
},
{
"epoch": 1.4196808510638297,
"grad_norm": 3.8578479290008545,
"learning_rate": 8.576954985465431e-06,
"loss": 0.8135,
"step": 5338
},
{
"epoch": 1.4199468085106384,
"grad_norm": 3.7595484256744385,
"learning_rate": 8.576340398413804e-06,
"loss": 0.7724,
"step": 5339
},
{
"epoch": 1.4202127659574468,
"grad_norm": 4.005858898162842,
"learning_rate": 8.575725700705995e-06,
"loss": 0.8386,
"step": 5340
},
{
"epoch": 1.4204787234042553,
"grad_norm": 4.103984355926514,
"learning_rate": 8.575110892361022e-06,
"loss": 0.9413,
"step": 5341
},
{
"epoch": 1.4207446808510638,
"grad_norm": 3.5380845069885254,
"learning_rate": 8.57449597339791e-06,
"loss": 0.8393,
"step": 5342
},
{
"epoch": 1.4210106382978722,
"grad_norm": 3.589729070663452,
"learning_rate": 8.573880943835684e-06,
"loss": 0.7789,
"step": 5343
},
{
"epoch": 1.421276595744681,
"grad_norm": 4.016366004943848,
"learning_rate": 8.573265803693374e-06,
"loss": 0.7377,
"step": 5344
},
{
"epoch": 1.4215425531914894,
"grad_norm": 3.708329439163208,
"learning_rate": 8.572650552990012e-06,
"loss": 0.8608,
"step": 5345
},
{
"epoch": 1.4218085106382978,
"grad_norm": 4.192487716674805,
"learning_rate": 8.572035191744637e-06,
"loss": 0.7963,
"step": 5346
},
{
"epoch": 1.4220744680851065,
"grad_norm": 3.561629056930542,
"learning_rate": 8.571419719976287e-06,
"loss": 0.8004,
"step": 5347
},
{
"epoch": 1.422340425531915,
"grad_norm": 3.7709176540374756,
"learning_rate": 8.570804137704005e-06,
"loss": 0.7012,
"step": 5348
},
{
"epoch": 1.4226063829787234,
"grad_norm": 3.842339515686035,
"learning_rate": 8.57018844494684e-06,
"loss": 0.8063,
"step": 5349
},
{
"epoch": 1.422872340425532,
"grad_norm": 4.014485836029053,
"learning_rate": 8.56957264172384e-06,
"loss": 0.681,
"step": 5350
},
{
"epoch": 1.4231382978723404,
"grad_norm": 3.9877431392669678,
"learning_rate": 8.568956728054061e-06,
"loss": 0.9011,
"step": 5351
},
{
"epoch": 1.423404255319149,
"grad_norm": 3.9741530418395996,
"learning_rate": 8.568340703956558e-06,
"loss": 0.8245,
"step": 5352
},
{
"epoch": 1.4236702127659575,
"grad_norm": 4.008678436279297,
"learning_rate": 8.567724569450393e-06,
"loss": 0.8588,
"step": 5353
},
{
"epoch": 1.423936170212766,
"grad_norm": 4.2688679695129395,
"learning_rate": 8.56710832455463e-06,
"loss": 0.8026,
"step": 5354
},
{
"epoch": 1.4242021276595744,
"grad_norm": 4.144524097442627,
"learning_rate": 8.566491969288333e-06,
"loss": 0.7977,
"step": 5355
},
{
"epoch": 1.424468085106383,
"grad_norm": 4.431448459625244,
"learning_rate": 8.565875503670578e-06,
"loss": 0.9466,
"step": 5356
},
{
"epoch": 1.4247340425531916,
"grad_norm": 3.9344115257263184,
"learning_rate": 8.565258927720436e-06,
"loss": 0.7571,
"step": 5357
},
{
"epoch": 1.425,
"grad_norm": 4.618174076080322,
"learning_rate": 8.564642241456986e-06,
"loss": 0.92,
"step": 5358
},
{
"epoch": 1.4252659574468085,
"grad_norm": 4.515613079071045,
"learning_rate": 8.564025444899308e-06,
"loss": 0.8339,
"step": 5359
},
{
"epoch": 1.425531914893617,
"grad_norm": 3.8892219066619873,
"learning_rate": 8.563408538066486e-06,
"loss": 0.6946,
"step": 5360
},
{
"epoch": 1.4257978723404254,
"grad_norm": 3.8335928916931152,
"learning_rate": 8.562791520977608e-06,
"loss": 0.7894,
"step": 5361
},
{
"epoch": 1.4260638297872341,
"grad_norm": 3.8898446559906006,
"learning_rate": 8.562174393651767e-06,
"loss": 0.6504,
"step": 5362
},
{
"epoch": 1.4263297872340426,
"grad_norm": 3.916454553604126,
"learning_rate": 8.561557156108055e-06,
"loss": 0.8178,
"step": 5363
},
{
"epoch": 1.426595744680851,
"grad_norm": 4.594573020935059,
"learning_rate": 8.560939808365571e-06,
"loss": 0.8554,
"step": 5364
},
{
"epoch": 1.4268617021276595,
"grad_norm": 3.920474052429199,
"learning_rate": 8.56032235044342e-06,
"loss": 0.9173,
"step": 5365
},
{
"epoch": 1.427127659574468,
"grad_norm": 3.8437423706054688,
"learning_rate": 8.5597047823607e-06,
"loss": 0.7551,
"step": 5366
},
{
"epoch": 1.4273936170212767,
"grad_norm": 3.631983518600464,
"learning_rate": 8.559087104136525e-06,
"loss": 0.8889,
"step": 5367
},
{
"epoch": 1.4276595744680851,
"grad_norm": 3.7418458461761475,
"learning_rate": 8.558469315790005e-06,
"loss": 0.7964,
"step": 5368
},
{
"epoch": 1.4279255319148936,
"grad_norm": 4.14785099029541,
"learning_rate": 8.557851417340252e-06,
"loss": 0.8312,
"step": 5369
},
{
"epoch": 1.4281914893617023,
"grad_norm": 4.0224103927612305,
"learning_rate": 8.55723340880639e-06,
"loss": 0.9175,
"step": 5370
},
{
"epoch": 1.4284574468085105,
"grad_norm": 3.899369478225708,
"learning_rate": 8.556615290207538e-06,
"loss": 0.776,
"step": 5371
},
{
"epoch": 1.4287234042553192,
"grad_norm": 3.869248628616333,
"learning_rate": 8.555997061562821e-06,
"loss": 0.7417,
"step": 5372
},
{
"epoch": 1.4289893617021276,
"grad_norm": 3.8381667137145996,
"learning_rate": 8.555378722891367e-06,
"loss": 0.7887,
"step": 5373
},
{
"epoch": 1.429255319148936,
"grad_norm": 4.0374674797058105,
"learning_rate": 8.55476027421231e-06,
"loss": 0.7039,
"step": 5374
},
{
"epoch": 1.4295212765957448,
"grad_norm": 4.473758220672607,
"learning_rate": 8.554141715544788e-06,
"loss": 0.8829,
"step": 5375
},
{
"epoch": 1.4297872340425533,
"grad_norm": 3.995429277420044,
"learning_rate": 8.553523046907934e-06,
"loss": 0.8441,
"step": 5376
},
{
"epoch": 1.4300531914893617,
"grad_norm": 3.942129373550415,
"learning_rate": 8.552904268320895e-06,
"loss": 0.8657,
"step": 5377
},
{
"epoch": 1.4303191489361702,
"grad_norm": 4.163167953491211,
"learning_rate": 8.552285379802811e-06,
"loss": 0.7497,
"step": 5378
},
{
"epoch": 1.4305851063829786,
"grad_norm": 3.926020860671997,
"learning_rate": 8.551666381372839e-06,
"loss": 0.8265,
"step": 5379
},
{
"epoch": 1.4308510638297873,
"grad_norm": 3.686615228652954,
"learning_rate": 8.551047273050126e-06,
"loss": 0.694,
"step": 5380
},
{
"epoch": 1.4311170212765958,
"grad_norm": 4.436965465545654,
"learning_rate": 8.55042805485383e-06,
"loss": 0.929,
"step": 5381
},
{
"epoch": 1.4313829787234043,
"grad_norm": 4.103221416473389,
"learning_rate": 8.549808726803108e-06,
"loss": 0.7724,
"step": 5382
},
{
"epoch": 1.4316489361702127,
"grad_norm": 3.994560718536377,
"learning_rate": 8.549189288917127e-06,
"loss": 0.6845,
"step": 5383
},
{
"epoch": 1.4319148936170212,
"grad_norm": 4.3197712898254395,
"learning_rate": 8.548569741215049e-06,
"loss": 0.8348,
"step": 5384
},
{
"epoch": 1.4321808510638299,
"grad_norm": 4.51045560836792,
"learning_rate": 8.547950083716047e-06,
"loss": 0.8659,
"step": 5385
},
{
"epoch": 1.4324468085106383,
"grad_norm": 4.250168323516846,
"learning_rate": 8.54733031643929e-06,
"loss": 0.9424,
"step": 5386
},
{
"epoch": 1.4327127659574468,
"grad_norm": 3.6297523975372314,
"learning_rate": 8.54671043940396e-06,
"loss": 0.8464,
"step": 5387
},
{
"epoch": 1.4329787234042553,
"grad_norm": 3.914750099182129,
"learning_rate": 8.54609045262923e-06,
"loss": 0.9345,
"step": 5388
},
{
"epoch": 1.4332446808510637,
"grad_norm": 4.086660385131836,
"learning_rate": 8.545470356134289e-06,
"loss": 0.8161,
"step": 5389
},
{
"epoch": 1.4335106382978724,
"grad_norm": 3.657174825668335,
"learning_rate": 8.54485014993832e-06,
"loss": 0.8184,
"step": 5390
},
{
"epoch": 1.4337765957446809,
"grad_norm": 4.197863578796387,
"learning_rate": 8.544229834060512e-06,
"loss": 0.8937,
"step": 5391
},
{
"epoch": 1.4340425531914893,
"grad_norm": 4.215087413787842,
"learning_rate": 8.543609408520062e-06,
"loss": 0.8149,
"step": 5392
},
{
"epoch": 1.434308510638298,
"grad_norm": 4.2908101081848145,
"learning_rate": 8.542988873336164e-06,
"loss": 0.7731,
"step": 5393
},
{
"epoch": 1.4345744680851062,
"grad_norm": 3.921720266342163,
"learning_rate": 8.54236822852802e-06,
"loss": 0.7697,
"step": 5394
},
{
"epoch": 1.434840425531915,
"grad_norm": 4.464201927185059,
"learning_rate": 8.54174747411483e-06,
"loss": 0.8365,
"step": 5395
},
{
"epoch": 1.4351063829787234,
"grad_norm": 3.9795491695404053,
"learning_rate": 8.541126610115806e-06,
"loss": 0.8086,
"step": 5396
},
{
"epoch": 1.4353723404255319,
"grad_norm": 4.0533766746521,
"learning_rate": 8.540505636550153e-06,
"loss": 0.7996,
"step": 5397
},
{
"epoch": 1.4356382978723405,
"grad_norm": 4.261003494262695,
"learning_rate": 8.53988455343709e-06,
"loss": 0.7748,
"step": 5398
},
{
"epoch": 1.435904255319149,
"grad_norm": 4.159748077392578,
"learning_rate": 8.53926336079583e-06,
"loss": 0.8867,
"step": 5399
},
{
"epoch": 1.4361702127659575,
"grad_norm": 3.9314358234405518,
"learning_rate": 8.538642058645595e-06,
"loss": 0.8713,
"step": 5400
},
{
"epoch": 1.436436170212766,
"grad_norm": 3.8043625354766846,
"learning_rate": 8.538020647005607e-06,
"loss": 0.7276,
"step": 5401
},
{
"epoch": 1.4367021276595744,
"grad_norm": 4.576129913330078,
"learning_rate": 8.537399125895096e-06,
"loss": 0.7822,
"step": 5402
},
{
"epoch": 1.436968085106383,
"grad_norm": 3.801168918609619,
"learning_rate": 8.53677749533329e-06,
"loss": 0.8445,
"step": 5403
},
{
"epoch": 1.4372340425531915,
"grad_norm": 3.763317108154297,
"learning_rate": 8.536155755339427e-06,
"loss": 0.7572,
"step": 5404
},
{
"epoch": 1.4375,
"grad_norm": 4.1881256103515625,
"learning_rate": 8.535533905932739e-06,
"loss": 0.8398,
"step": 5405
},
{
"epoch": 1.4377659574468085,
"grad_norm": 3.61997127532959,
"learning_rate": 8.534911947132469e-06,
"loss": 0.674,
"step": 5406
},
{
"epoch": 1.438031914893617,
"grad_norm": 3.6583242416381836,
"learning_rate": 8.534289878957863e-06,
"loss": 0.6655,
"step": 5407
},
{
"epoch": 1.4382978723404256,
"grad_norm": 3.9012091159820557,
"learning_rate": 8.533667701428167e-06,
"loss": 0.6869,
"step": 5408
},
{
"epoch": 1.438563829787234,
"grad_norm": 3.890615463256836,
"learning_rate": 8.53304541456263e-06,
"loss": 0.8431,
"step": 5409
},
{
"epoch": 1.4388297872340425,
"grad_norm": 3.8987715244293213,
"learning_rate": 8.532423018380511e-06,
"loss": 0.8705,
"step": 5410
},
{
"epoch": 1.439095744680851,
"grad_norm": 4.005768775939941,
"learning_rate": 8.531800512901066e-06,
"loss": 0.8555,
"step": 5411
},
{
"epoch": 1.4393617021276595,
"grad_norm": 3.9035804271698,
"learning_rate": 8.531177898143552e-06,
"loss": 0.7811,
"step": 5412
},
{
"epoch": 1.4396276595744681,
"grad_norm": 4.260951995849609,
"learning_rate": 8.530555174127236e-06,
"loss": 0.9168,
"step": 5413
},
{
"epoch": 1.4398936170212766,
"grad_norm": 4.07423210144043,
"learning_rate": 8.529932340871388e-06,
"loss": 0.7437,
"step": 5414
},
{
"epoch": 1.440159574468085,
"grad_norm": 3.9797050952911377,
"learning_rate": 8.529309398395275e-06,
"loss": 0.707,
"step": 5415
},
{
"epoch": 1.4404255319148938,
"grad_norm": 3.7319893836975098,
"learning_rate": 8.528686346718177e-06,
"loss": 0.7089,
"step": 5416
},
{
"epoch": 1.440691489361702,
"grad_norm": 4.224223613739014,
"learning_rate": 8.528063185859367e-06,
"loss": 0.786,
"step": 5417
},
{
"epoch": 1.4409574468085107,
"grad_norm": 4.449718952178955,
"learning_rate": 8.527439915838129e-06,
"loss": 0.8129,
"step": 5418
},
{
"epoch": 1.4412234042553191,
"grad_norm": 3.991421937942505,
"learning_rate": 8.526816536673748e-06,
"loss": 0.9446,
"step": 5419
},
{
"epoch": 1.4414893617021276,
"grad_norm": 3.5149245262145996,
"learning_rate": 8.52619304838551e-06,
"loss": 0.738,
"step": 5420
},
{
"epoch": 1.4417553191489363,
"grad_norm": 4.034007549285889,
"learning_rate": 8.525569450992707e-06,
"loss": 0.8011,
"step": 5421
},
{
"epoch": 1.4420212765957447,
"grad_norm": 4.191031455993652,
"learning_rate": 8.524945744514634e-06,
"loss": 0.9352,
"step": 5422
},
{
"epoch": 1.4422872340425532,
"grad_norm": 3.4210205078125,
"learning_rate": 8.524321928970591e-06,
"loss": 0.7345,
"step": 5423
},
{
"epoch": 1.4425531914893617,
"grad_norm": 3.573930263519287,
"learning_rate": 8.523698004379878e-06,
"loss": 0.6936,
"step": 5424
},
{
"epoch": 1.4428191489361701,
"grad_norm": 3.847769260406494,
"learning_rate": 8.523073970761799e-06,
"loss": 0.7465,
"step": 5425
},
{
"epoch": 1.4430851063829788,
"grad_norm": 3.526007652282715,
"learning_rate": 8.522449828135663e-06,
"loss": 0.8042,
"step": 5426
},
{
"epoch": 1.4433510638297873,
"grad_norm": 3.3529438972473145,
"learning_rate": 8.521825576520784e-06,
"loss": 0.6523,
"step": 5427
},
{
"epoch": 1.4436170212765957,
"grad_norm": 3.608856678009033,
"learning_rate": 8.521201215936474e-06,
"loss": 0.753,
"step": 5428
},
{
"epoch": 1.4438829787234042,
"grad_norm": 3.78037691116333,
"learning_rate": 8.520576746402052e-06,
"loss": 0.9188,
"step": 5429
},
{
"epoch": 1.4441489361702127,
"grad_norm": 3.6370112895965576,
"learning_rate": 8.519952167936842e-06,
"loss": 0.7606,
"step": 5430
},
{
"epoch": 1.4444148936170214,
"grad_norm": 4.091804504394531,
"learning_rate": 8.519327480560169e-06,
"loss": 0.8833,
"step": 5431
},
{
"epoch": 1.4446808510638298,
"grad_norm": 4.076303482055664,
"learning_rate": 8.518702684291358e-06,
"loss": 0.7852,
"step": 5432
},
{
"epoch": 1.4449468085106383,
"grad_norm": 3.845811605453491,
"learning_rate": 8.518077779149744e-06,
"loss": 0.7455,
"step": 5433
},
{
"epoch": 1.4452127659574467,
"grad_norm": 4.302513599395752,
"learning_rate": 8.517452765154661e-06,
"loss": 0.7273,
"step": 5434
},
{
"epoch": 1.4454787234042552,
"grad_norm": 3.78494930267334,
"learning_rate": 8.516827642325447e-06,
"loss": 0.7468,
"step": 5435
},
{
"epoch": 1.445744680851064,
"grad_norm": 3.9590561389923096,
"learning_rate": 8.516202410681446e-06,
"loss": 0.9023,
"step": 5436
},
{
"epoch": 1.4460106382978724,
"grad_norm": 4.2443766593933105,
"learning_rate": 8.515577070242005e-06,
"loss": 0.9363,
"step": 5437
},
{
"epoch": 1.4462765957446808,
"grad_norm": 3.511875867843628,
"learning_rate": 8.514951621026468e-06,
"loss": 0.7257,
"step": 5438
},
{
"epoch": 1.4465425531914895,
"grad_norm": 3.931488513946533,
"learning_rate": 8.51432606305419e-06,
"loss": 0.794,
"step": 5439
},
{
"epoch": 1.4468085106382977,
"grad_norm": 4.520570755004883,
"learning_rate": 8.513700396344527e-06,
"loss": 0.9367,
"step": 5440
},
{
"epoch": 1.4470744680851064,
"grad_norm": 4.023960113525391,
"learning_rate": 8.513074620916835e-06,
"loss": 0.8083,
"step": 5441
},
{
"epoch": 1.4473404255319149,
"grad_norm": 3.8863484859466553,
"learning_rate": 8.512448736790479e-06,
"loss": 0.7789,
"step": 5442
},
{
"epoch": 1.4476063829787233,
"grad_norm": 3.4847662448883057,
"learning_rate": 8.511822743984824e-06,
"loss": 0.6853,
"step": 5443
},
{
"epoch": 1.447872340425532,
"grad_norm": 3.668828010559082,
"learning_rate": 8.511196642519237e-06,
"loss": 0.8037,
"step": 5444
},
{
"epoch": 1.4481382978723405,
"grad_norm": 3.801157236099243,
"learning_rate": 8.510570432413095e-06,
"loss": 0.8393,
"step": 5445
},
{
"epoch": 1.448404255319149,
"grad_norm": 4.479011535644531,
"learning_rate": 8.509944113685769e-06,
"loss": 0.9082,
"step": 5446
},
{
"epoch": 1.4486702127659574,
"grad_norm": 4.385382652282715,
"learning_rate": 8.509317686356638e-06,
"loss": 0.9118,
"step": 5447
},
{
"epoch": 1.4489361702127659,
"grad_norm": 4.001799583435059,
"learning_rate": 8.50869115044509e-06,
"loss": 0.7022,
"step": 5448
},
{
"epoch": 1.4492021276595746,
"grad_norm": 4.2879228591918945,
"learning_rate": 8.508064505970503e-06,
"loss": 0.8253,
"step": 5449
},
{
"epoch": 1.449468085106383,
"grad_norm": 3.933523654937744,
"learning_rate": 8.507437752952271e-06,
"loss": 0.8163,
"step": 5450
},
{
"epoch": 1.4497340425531915,
"grad_norm": 4.011867046356201,
"learning_rate": 8.506810891409786e-06,
"loss": 0.8196,
"step": 5451
},
{
"epoch": 1.45,
"grad_norm": 4.269194602966309,
"learning_rate": 8.506183921362443e-06,
"loss": 0.7912,
"step": 5452
},
{
"epoch": 1.4502659574468084,
"grad_norm": 4.043778896331787,
"learning_rate": 8.505556842829643e-06,
"loss": 0.7842,
"step": 5453
},
{
"epoch": 1.450531914893617,
"grad_norm": 4.532417297363281,
"learning_rate": 8.504929655830785e-06,
"loss": 0.9794,
"step": 5454
},
{
"epoch": 1.4507978723404256,
"grad_norm": 3.571371555328369,
"learning_rate": 8.504302360385276e-06,
"loss": 0.8234,
"step": 5455
},
{
"epoch": 1.451063829787234,
"grad_norm": 3.6812736988067627,
"learning_rate": 8.50367495651253e-06,
"loss": 0.8207,
"step": 5456
},
{
"epoch": 1.4513297872340425,
"grad_norm": 3.88917875289917,
"learning_rate": 8.503047444231954e-06,
"loss": 0.8452,
"step": 5457
},
{
"epoch": 1.451595744680851,
"grad_norm": 3.7152698040008545,
"learning_rate": 8.502419823562964e-06,
"loss": 0.7018,
"step": 5458
},
{
"epoch": 1.4518617021276596,
"grad_norm": 3.9872684478759766,
"learning_rate": 8.501792094524983e-06,
"loss": 0.9355,
"step": 5459
},
{
"epoch": 1.452127659574468,
"grad_norm": 3.8965933322906494,
"learning_rate": 8.501164257137431e-06,
"loss": 0.7547,
"step": 5460
},
{
"epoch": 1.4523936170212766,
"grad_norm": 4.248835563659668,
"learning_rate": 8.500536311419735e-06,
"loss": 0.8456,
"step": 5461
},
{
"epoch": 1.452659574468085,
"grad_norm": 4.09518575668335,
"learning_rate": 8.499908257391324e-06,
"loss": 0.8698,
"step": 5462
},
{
"epoch": 1.4529255319148935,
"grad_norm": 4.262086391448975,
"learning_rate": 8.49928009507163e-06,
"loss": 0.761,
"step": 5463
},
{
"epoch": 1.4531914893617022,
"grad_norm": 3.634997606277466,
"learning_rate": 8.49865182448009e-06,
"loss": 0.7712,
"step": 5464
},
{
"epoch": 1.4534574468085106,
"grad_norm": 4.407344818115234,
"learning_rate": 8.498023445636145e-06,
"loss": 0.8103,
"step": 5465
},
{
"epoch": 1.453723404255319,
"grad_norm": 3.926379680633545,
"learning_rate": 8.497394958559236e-06,
"loss": 0.7233,
"step": 5466
},
{
"epoch": 1.4539893617021278,
"grad_norm": 4.115360736846924,
"learning_rate": 8.496766363268809e-06,
"loss": 0.9513,
"step": 5467
},
{
"epoch": 1.4542553191489362,
"grad_norm": 4.249356269836426,
"learning_rate": 8.496137659784313e-06,
"loss": 0.7799,
"step": 5468
},
{
"epoch": 1.4545212765957447,
"grad_norm": 3.9418179988861084,
"learning_rate": 8.495508848125202e-06,
"loss": 0.7216,
"step": 5469
},
{
"epoch": 1.4547872340425532,
"grad_norm": 4.33933687210083,
"learning_rate": 8.494879928310934e-06,
"loss": 0.8312,
"step": 5470
},
{
"epoch": 1.4550531914893616,
"grad_norm": 4.497339248657227,
"learning_rate": 8.494250900360963e-06,
"loss": 0.6842,
"step": 5471
},
{
"epoch": 1.4553191489361703,
"grad_norm": 4.439492225646973,
"learning_rate": 8.493621764294757e-06,
"loss": 0.8134,
"step": 5472
},
{
"epoch": 1.4555851063829788,
"grad_norm": 4.622555255889893,
"learning_rate": 8.49299252013178e-06,
"loss": 0.878,
"step": 5473
},
{
"epoch": 1.4558510638297872,
"grad_norm": 4.369466781616211,
"learning_rate": 8.492363167891502e-06,
"loss": 0.7228,
"step": 5474
},
{
"epoch": 1.4561170212765957,
"grad_norm": 4.223091125488281,
"learning_rate": 8.491733707593395e-06,
"loss": 0.8303,
"step": 5475
},
{
"epoch": 1.4563829787234042,
"grad_norm": 4.063412189483643,
"learning_rate": 8.491104139256936e-06,
"loss": 0.8504,
"step": 5476
},
{
"epoch": 1.4566489361702128,
"grad_norm": 4.342689514160156,
"learning_rate": 8.490474462901605e-06,
"loss": 0.841,
"step": 5477
},
{
"epoch": 1.4569148936170213,
"grad_norm": 4.090299129486084,
"learning_rate": 8.489844678546886e-06,
"loss": 0.8391,
"step": 5478
},
{
"epoch": 1.4571808510638298,
"grad_norm": 3.786254644393921,
"learning_rate": 8.489214786212263e-06,
"loss": 0.8498,
"step": 5479
},
{
"epoch": 1.4574468085106382,
"grad_norm": 4.191230297088623,
"learning_rate": 8.488584785917226e-06,
"loss": 0.7906,
"step": 5480
},
{
"epoch": 1.4577127659574467,
"grad_norm": 3.928368330001831,
"learning_rate": 8.487954677681269e-06,
"loss": 0.8001,
"step": 5481
},
{
"epoch": 1.4579787234042554,
"grad_norm": 3.579162836074829,
"learning_rate": 8.487324461523887e-06,
"loss": 0.8023,
"step": 5482
},
{
"epoch": 1.4582446808510638,
"grad_norm": 3.6825640201568604,
"learning_rate": 8.486694137464582e-06,
"loss": 0.7853,
"step": 5483
},
{
"epoch": 1.4585106382978723,
"grad_norm": 4.125916004180908,
"learning_rate": 8.486063705522853e-06,
"loss": 0.7216,
"step": 5484
},
{
"epoch": 1.4587765957446808,
"grad_norm": 4.086201190948486,
"learning_rate": 8.48543316571821e-06,
"loss": 0.7723,
"step": 5485
},
{
"epoch": 1.4590425531914892,
"grad_norm": 3.6054461002349854,
"learning_rate": 8.484802518070161e-06,
"loss": 0.7561,
"step": 5486
},
{
"epoch": 1.459308510638298,
"grad_norm": 3.9755938053131104,
"learning_rate": 8.48417176259822e-06,
"loss": 0.7914,
"step": 5487
},
{
"epoch": 1.4595744680851064,
"grad_norm": 3.4087741374969482,
"learning_rate": 8.483540899321901e-06,
"loss": 0.8288,
"step": 5488
},
{
"epoch": 1.4598404255319148,
"grad_norm": 4.220149517059326,
"learning_rate": 8.482909928260726e-06,
"loss": 0.9088,
"step": 5489
},
{
"epoch": 1.4601063829787235,
"grad_norm": 4.157181262969971,
"learning_rate": 8.482278849434218e-06,
"loss": 0.8727,
"step": 5490
},
{
"epoch": 1.460372340425532,
"grad_norm": 4.077250003814697,
"learning_rate": 8.481647662861901e-06,
"loss": 0.7891,
"step": 5491
},
{
"epoch": 1.4606382978723405,
"grad_norm": 3.9751412868499756,
"learning_rate": 8.481016368563308e-06,
"loss": 0.8363,
"step": 5492
},
{
"epoch": 1.460904255319149,
"grad_norm": 4.07692813873291,
"learning_rate": 8.480384966557969e-06,
"loss": 1.0291,
"step": 5493
},
{
"epoch": 1.4611702127659574,
"grad_norm": 3.963118553161621,
"learning_rate": 8.479753456865422e-06,
"loss": 0.778,
"step": 5494
},
{
"epoch": 1.461436170212766,
"grad_norm": 4.359419822692871,
"learning_rate": 8.479121839505205e-06,
"loss": 0.8413,
"step": 5495
},
{
"epoch": 1.4617021276595745,
"grad_norm": 4.071464538574219,
"learning_rate": 8.478490114496862e-06,
"loss": 0.802,
"step": 5496
},
{
"epoch": 1.461968085106383,
"grad_norm": 4.090579509735107,
"learning_rate": 8.477858281859941e-06,
"loss": 0.8182,
"step": 5497
},
{
"epoch": 1.4622340425531914,
"grad_norm": 4.3386006355285645,
"learning_rate": 8.47722634161399e-06,
"loss": 0.7349,
"step": 5498
},
{
"epoch": 1.4625,
"grad_norm": 3.489248275756836,
"learning_rate": 8.476594293778561e-06,
"loss": 0.7918,
"step": 5499
},
{
"epoch": 1.4627659574468086,
"grad_norm": 3.849106788635254,
"learning_rate": 8.475962138373212e-06,
"loss": 0.7986,
"step": 5500
},
{
"epoch": 1.4627659574468086,
"eval_loss": 1.2964370250701904,
"eval_runtime": 13.6602,
"eval_samples_per_second": 29.282,
"eval_steps_per_second": 3.66,
"step": 5500
},
{
"epoch": 1.463031914893617,
"grad_norm": 3.9225049018859863,
"learning_rate": 8.475329875417502e-06,
"loss": 0.7197,
"step": 5501
},
{
"epoch": 1.4632978723404255,
"grad_norm": 3.952686071395874,
"learning_rate": 8.474697504930994e-06,
"loss": 0.8378,
"step": 5502
},
{
"epoch": 1.463563829787234,
"grad_norm": 3.452550172805786,
"learning_rate": 8.474065026933254e-06,
"loss": 0.8279,
"step": 5503
},
{
"epoch": 1.4638297872340424,
"grad_norm": 3.6807174682617188,
"learning_rate": 8.473432441443852e-06,
"loss": 0.8527,
"step": 5504
},
{
"epoch": 1.4640957446808511,
"grad_norm": 3.6200850009918213,
"learning_rate": 8.472799748482361e-06,
"loss": 0.7749,
"step": 5505
},
{
"epoch": 1.4643617021276596,
"grad_norm": 4.591206073760986,
"learning_rate": 8.472166948068357e-06,
"loss": 0.8827,
"step": 5506
},
{
"epoch": 1.464627659574468,
"grad_norm": 3.7772765159606934,
"learning_rate": 8.471534040221419e-06,
"loss": 0.8578,
"step": 5507
},
{
"epoch": 1.4648936170212765,
"grad_norm": 3.75657057762146,
"learning_rate": 8.47090102496113e-06,
"loss": 0.8552,
"step": 5508
},
{
"epoch": 1.465159574468085,
"grad_norm": 3.635420322418213,
"learning_rate": 8.470267902307079e-06,
"loss": 0.7732,
"step": 5509
},
{
"epoch": 1.4654255319148937,
"grad_norm": 4.403695583343506,
"learning_rate": 8.469634672278853e-06,
"loss": 0.9379,
"step": 5510
},
{
"epoch": 1.4656914893617021,
"grad_norm": 3.849709987640381,
"learning_rate": 8.469001334896044e-06,
"loss": 0.7691,
"step": 5511
},
{
"epoch": 1.4659574468085106,
"grad_norm": 3.580702066421509,
"learning_rate": 8.46836789017825e-06,
"loss": 0.7887,
"step": 5512
},
{
"epoch": 1.4662234042553193,
"grad_norm": 4.184311866760254,
"learning_rate": 8.46773433814507e-06,
"loss": 0.9119,
"step": 5513
},
{
"epoch": 1.4664893617021277,
"grad_norm": 4.308862686157227,
"learning_rate": 8.467100678816108e-06,
"loss": 0.8483,
"step": 5514
},
{
"epoch": 1.4667553191489362,
"grad_norm": 3.799316883087158,
"learning_rate": 8.466466912210967e-06,
"loss": 0.8143,
"step": 5515
},
{
"epoch": 1.4670212765957447,
"grad_norm": 3.673563003540039,
"learning_rate": 8.465833038349259e-06,
"loss": 0.7485,
"step": 5516
},
{
"epoch": 1.4672872340425531,
"grad_norm": 4.07314395904541,
"learning_rate": 8.465199057250597e-06,
"loss": 0.8663,
"step": 5517
},
{
"epoch": 1.4675531914893618,
"grad_norm": 3.6095144748687744,
"learning_rate": 8.464564968934595e-06,
"loss": 0.6752,
"step": 5518
},
{
"epoch": 1.4678191489361703,
"grad_norm": 3.661813735961914,
"learning_rate": 8.463930773420874e-06,
"loss": 0.8518,
"step": 5519
},
{
"epoch": 1.4680851063829787,
"grad_norm": 4.36665153503418,
"learning_rate": 8.463296470729058e-06,
"loss": 0.7581,
"step": 5520
},
{
"epoch": 1.4683510638297872,
"grad_norm": 4.145575046539307,
"learning_rate": 8.462662060878772e-06,
"loss": 0.8582,
"step": 5521
},
{
"epoch": 1.4686170212765957,
"grad_norm": 3.805684804916382,
"learning_rate": 8.462027543889644e-06,
"loss": 0.718,
"step": 5522
},
{
"epoch": 1.4688829787234043,
"grad_norm": 3.7820284366607666,
"learning_rate": 8.461392919781309e-06,
"loss": 0.7179,
"step": 5523
},
{
"epoch": 1.4691489361702128,
"grad_norm": 4.097955226898193,
"learning_rate": 8.460758188573399e-06,
"loss": 0.7764,
"step": 5524
},
{
"epoch": 1.4694148936170213,
"grad_norm": 4.177279472351074,
"learning_rate": 8.46012335028556e-06,
"loss": 0.8168,
"step": 5525
},
{
"epoch": 1.4696808510638297,
"grad_norm": 4.4050679206848145,
"learning_rate": 8.459488404937426e-06,
"loss": 0.8876,
"step": 5526
},
{
"epoch": 1.4699468085106382,
"grad_norm": 3.7400434017181396,
"learning_rate": 8.458853352548651e-06,
"loss": 0.8693,
"step": 5527
},
{
"epoch": 1.4702127659574469,
"grad_norm": 3.909196138381958,
"learning_rate": 8.458218193138881e-06,
"loss": 0.8237,
"step": 5528
},
{
"epoch": 1.4704787234042553,
"grad_norm": 3.941265344619751,
"learning_rate": 8.457582926727768e-06,
"loss": 0.9123,
"step": 5529
},
{
"epoch": 1.4707446808510638,
"grad_norm": 3.8149471282958984,
"learning_rate": 8.456947553334966e-06,
"loss": 0.6899,
"step": 5530
},
{
"epoch": 1.4710106382978723,
"grad_norm": 3.6952855587005615,
"learning_rate": 8.45631207298014e-06,
"loss": 0.7128,
"step": 5531
},
{
"epoch": 1.4712765957446807,
"grad_norm": 3.9754221439361572,
"learning_rate": 8.45567648568295e-06,
"loss": 0.9245,
"step": 5532
},
{
"epoch": 1.4715425531914894,
"grad_norm": 4.337751388549805,
"learning_rate": 8.455040791463057e-06,
"loss": 0.8776,
"step": 5533
},
{
"epoch": 1.4718085106382979,
"grad_norm": 3.7709763050079346,
"learning_rate": 8.454404990340137e-06,
"loss": 0.6869,
"step": 5534
},
{
"epoch": 1.4720744680851063,
"grad_norm": 4.196871280670166,
"learning_rate": 8.453769082333858e-06,
"loss": 0.8704,
"step": 5535
},
{
"epoch": 1.472340425531915,
"grad_norm": 3.957577705383301,
"learning_rate": 8.453133067463898e-06,
"loss": 0.7857,
"step": 5536
},
{
"epoch": 1.4726063829787235,
"grad_norm": 3.942445993423462,
"learning_rate": 8.452496945749934e-06,
"loss": 0.875,
"step": 5537
},
{
"epoch": 1.472872340425532,
"grad_norm": 4.122093200683594,
"learning_rate": 8.451860717211653e-06,
"loss": 0.8047,
"step": 5538
},
{
"epoch": 1.4731382978723404,
"grad_norm": 3.8919665813446045,
"learning_rate": 8.451224381868735e-06,
"loss": 0.9631,
"step": 5539
},
{
"epoch": 1.4734042553191489,
"grad_norm": 4.186689376831055,
"learning_rate": 8.45058793974087e-06,
"loss": 0.8028,
"step": 5540
},
{
"epoch": 1.4736702127659576,
"grad_norm": 4.130399703979492,
"learning_rate": 8.449951390847754e-06,
"loss": 0.7659,
"step": 5541
},
{
"epoch": 1.473936170212766,
"grad_norm": 3.8741462230682373,
"learning_rate": 8.44931473520908e-06,
"loss": 0.74,
"step": 5542
},
{
"epoch": 1.4742021276595745,
"grad_norm": 4.210333824157715,
"learning_rate": 8.448677972844546e-06,
"loss": 0.7675,
"step": 5543
},
{
"epoch": 1.474468085106383,
"grad_norm": 3.959024429321289,
"learning_rate": 8.448041103773857e-06,
"loss": 0.8771,
"step": 5544
},
{
"epoch": 1.4747340425531914,
"grad_norm": 3.9098892211914062,
"learning_rate": 8.447404128016715e-06,
"loss": 0.8756,
"step": 5545
},
{
"epoch": 1.475,
"grad_norm": 3.9612808227539062,
"learning_rate": 8.446767045592829e-06,
"loss": 0.7888,
"step": 5546
},
{
"epoch": 1.4752659574468086,
"grad_norm": 3.754507303237915,
"learning_rate": 8.446129856521917e-06,
"loss": 0.8611,
"step": 5547
},
{
"epoch": 1.475531914893617,
"grad_norm": 3.97927188873291,
"learning_rate": 8.445492560823686e-06,
"loss": 0.7937,
"step": 5548
},
{
"epoch": 1.4757978723404255,
"grad_norm": 3.8864712715148926,
"learning_rate": 8.44485515851786e-06,
"loss": 0.7687,
"step": 5549
},
{
"epoch": 1.476063829787234,
"grad_norm": 3.407346487045288,
"learning_rate": 8.44421764962416e-06,
"loss": 0.8368,
"step": 5550
},
{
"epoch": 1.4763297872340426,
"grad_norm": 4.162166118621826,
"learning_rate": 8.44358003416231e-06,
"loss": 0.7305,
"step": 5551
},
{
"epoch": 1.476595744680851,
"grad_norm": 4.198580741882324,
"learning_rate": 8.44294231215204e-06,
"loss": 0.9471,
"step": 5552
},
{
"epoch": 1.4768617021276595,
"grad_norm": 3.6172430515289307,
"learning_rate": 8.44230448361308e-06,
"loss": 0.84,
"step": 5553
},
{
"epoch": 1.477127659574468,
"grad_norm": 3.573073387145996,
"learning_rate": 8.441666548565169e-06,
"loss": 0.8333,
"step": 5554
},
{
"epoch": 1.4773936170212765,
"grad_norm": 3.864596128463745,
"learning_rate": 8.441028507028041e-06,
"loss": 0.7169,
"step": 5555
},
{
"epoch": 1.4776595744680852,
"grad_norm": 3.62256121635437,
"learning_rate": 8.44039035902144e-06,
"loss": 0.8163,
"step": 5556
},
{
"epoch": 1.4779255319148936,
"grad_norm": 3.8395614624023438,
"learning_rate": 8.43975210456511e-06,
"loss": 0.7796,
"step": 5557
},
{
"epoch": 1.478191489361702,
"grad_norm": 3.980595111846924,
"learning_rate": 8.439113743678801e-06,
"loss": 0.9652,
"step": 5558
},
{
"epoch": 1.4784574468085108,
"grad_norm": 3.7857303619384766,
"learning_rate": 8.438475276382264e-06,
"loss": 0.9076,
"step": 5559
},
{
"epoch": 1.4787234042553192,
"grad_norm": 3.4477193355560303,
"learning_rate": 8.437836702695253e-06,
"loss": 0.727,
"step": 5560
},
{
"epoch": 1.4789893617021277,
"grad_norm": 3.9439425468444824,
"learning_rate": 8.437198022637527e-06,
"loss": 0.7404,
"step": 5561
},
{
"epoch": 1.4792553191489362,
"grad_norm": 3.8489301204681396,
"learning_rate": 8.436559236228849e-06,
"loss": 0.7598,
"step": 5562
},
{
"epoch": 1.4795212765957446,
"grad_norm": 3.9537103176116943,
"learning_rate": 8.435920343488978e-06,
"loss": 0.81,
"step": 5563
},
{
"epoch": 1.4797872340425533,
"grad_norm": 4.361562252044678,
"learning_rate": 8.435281344437691e-06,
"loss": 0.9021,
"step": 5564
},
{
"epoch": 1.4800531914893618,
"grad_norm": 4.177056789398193,
"learning_rate": 8.434642239094752e-06,
"loss": 0.7916,
"step": 5565
},
{
"epoch": 1.4803191489361702,
"grad_norm": 4.249316215515137,
"learning_rate": 8.43400302747994e-06,
"loss": 0.8578,
"step": 5566
},
{
"epoch": 1.4805851063829787,
"grad_norm": 4.1586198806762695,
"learning_rate": 8.43336370961303e-06,
"loss": 0.7918,
"step": 5567
},
{
"epoch": 1.4808510638297872,
"grad_norm": 3.8984861373901367,
"learning_rate": 8.432724285513804e-06,
"loss": 0.8302,
"step": 5568
},
{
"epoch": 1.4811170212765958,
"grad_norm": 4.403296947479248,
"learning_rate": 8.43208475520205e-06,
"loss": 0.9246,
"step": 5569
},
{
"epoch": 1.4813829787234043,
"grad_norm": 4.00664758682251,
"learning_rate": 8.43144511869755e-06,
"loss": 0.7915,
"step": 5570
},
{
"epoch": 1.4816489361702128,
"grad_norm": 4.43447732925415,
"learning_rate": 8.4308053760201e-06,
"loss": 0.811,
"step": 5571
},
{
"epoch": 1.4819148936170212,
"grad_norm": 4.107089519500732,
"learning_rate": 8.43016552718949e-06,
"loss": 0.9385,
"step": 5572
},
{
"epoch": 1.4821808510638297,
"grad_norm": 4.0541229248046875,
"learning_rate": 8.429525572225521e-06,
"loss": 0.7683,
"step": 5573
},
{
"epoch": 1.4824468085106384,
"grad_norm": 3.8049004077911377,
"learning_rate": 8.428885511147994e-06,
"loss": 0.8483,
"step": 5574
},
{
"epoch": 1.4827127659574468,
"grad_norm": 4.220947265625,
"learning_rate": 8.42824534397671e-06,
"loss": 0.8209,
"step": 5575
},
{
"epoch": 1.4829787234042553,
"grad_norm": 3.299015998840332,
"learning_rate": 8.427605070731482e-06,
"loss": 0.6946,
"step": 5576
},
{
"epoch": 1.4832446808510638,
"grad_norm": 4.028343677520752,
"learning_rate": 8.426964691432116e-06,
"loss": 0.7912,
"step": 5577
},
{
"epoch": 1.4835106382978722,
"grad_norm": 3.6714823246002197,
"learning_rate": 8.426324206098429e-06,
"loss": 0.7487,
"step": 5578
},
{
"epoch": 1.483776595744681,
"grad_norm": 3.8498239517211914,
"learning_rate": 8.425683614750235e-06,
"loss": 0.7929,
"step": 5579
},
{
"epoch": 1.4840425531914894,
"grad_norm": 3.6556410789489746,
"learning_rate": 8.425042917407358e-06,
"loss": 0.7774,
"step": 5580
},
{
"epoch": 1.4843085106382978,
"grad_norm": 3.908780336380005,
"learning_rate": 8.424402114089618e-06,
"loss": 0.7533,
"step": 5581
},
{
"epoch": 1.4845744680851065,
"grad_norm": 4.054098129272461,
"learning_rate": 8.42376120481685e-06,
"loss": 0.8575,
"step": 5582
},
{
"epoch": 1.484840425531915,
"grad_norm": 4.667778968811035,
"learning_rate": 8.423120189608876e-06,
"loss": 0.8906,
"step": 5583
},
{
"epoch": 1.4851063829787234,
"grad_norm": 3.960300922393799,
"learning_rate": 8.422479068485531e-06,
"loss": 0.7737,
"step": 5584
},
{
"epoch": 1.485372340425532,
"grad_norm": 4.355529308319092,
"learning_rate": 8.421837841466657e-06,
"loss": 0.8904,
"step": 5585
},
{
"epoch": 1.4856382978723404,
"grad_norm": 4.450819969177246,
"learning_rate": 8.42119650857209e-06,
"loss": 0.8558,
"step": 5586
},
{
"epoch": 1.485904255319149,
"grad_norm": 3.8777942657470703,
"learning_rate": 8.420555069821679e-06,
"loss": 0.8021,
"step": 5587
},
{
"epoch": 1.4861702127659575,
"grad_norm": 3.9618871212005615,
"learning_rate": 8.419913525235264e-06,
"loss": 0.8717,
"step": 5588
},
{
"epoch": 1.486436170212766,
"grad_norm": 3.7627811431884766,
"learning_rate": 8.419271874832697e-06,
"loss": 0.7337,
"step": 5589
},
{
"epoch": 1.4867021276595744,
"grad_norm": 3.9509243965148926,
"learning_rate": 8.418630118633835e-06,
"loss": 0.8209,
"step": 5590
},
{
"epoch": 1.486968085106383,
"grad_norm": 3.8642148971557617,
"learning_rate": 8.417988256658532e-06,
"loss": 0.7907,
"step": 5591
},
{
"epoch": 1.4872340425531916,
"grad_norm": 3.917509078979492,
"learning_rate": 8.417346288926646e-06,
"loss": 0.8037,
"step": 5592
},
{
"epoch": 1.4875,
"grad_norm": 3.5143251419067383,
"learning_rate": 8.416704215458042e-06,
"loss": 0.8127,
"step": 5593
},
{
"epoch": 1.4877659574468085,
"grad_norm": 4.229488372802734,
"learning_rate": 8.41606203627259e-06,
"loss": 0.8681,
"step": 5594
},
{
"epoch": 1.488031914893617,
"grad_norm": 3.636591911315918,
"learning_rate": 8.415419751390155e-06,
"loss": 0.8858,
"step": 5595
},
{
"epoch": 1.4882978723404254,
"grad_norm": 3.9129700660705566,
"learning_rate": 8.414777360830611e-06,
"loss": 0.8607,
"step": 5596
},
{
"epoch": 1.4885638297872341,
"grad_norm": 4.00184965133667,
"learning_rate": 8.414134864613837e-06,
"loss": 0.7551,
"step": 5597
},
{
"epoch": 1.4888297872340426,
"grad_norm": 3.9038429260253906,
"learning_rate": 8.413492262759708e-06,
"loss": 0.7195,
"step": 5598
},
{
"epoch": 1.489095744680851,
"grad_norm": 3.802076816558838,
"learning_rate": 8.412849555288111e-06,
"loss": 0.8092,
"step": 5599
},
{
"epoch": 1.4893617021276595,
"grad_norm": 4.020835876464844,
"learning_rate": 8.41220674221893e-06,
"loss": 0.8439,
"step": 5600
},
{
"epoch": 1.489627659574468,
"grad_norm": 4.310454845428467,
"learning_rate": 8.411563823572057e-06,
"loss": 0.959,
"step": 5601
},
{
"epoch": 1.4898936170212767,
"grad_norm": 4.212212085723877,
"learning_rate": 8.410920799367382e-06,
"loss": 0.784,
"step": 5602
},
{
"epoch": 1.4901595744680851,
"grad_norm": 3.9010252952575684,
"learning_rate": 8.4102776696248e-06,
"loss": 0.7156,
"step": 5603
},
{
"epoch": 1.4904255319148936,
"grad_norm": 4.061422348022461,
"learning_rate": 8.409634434364214e-06,
"loss": 0.8524,
"step": 5604
},
{
"epoch": 1.4906914893617023,
"grad_norm": 4.281171798706055,
"learning_rate": 8.408991093605524e-06,
"loss": 0.8344,
"step": 5605
},
{
"epoch": 1.4909574468085105,
"grad_norm": 4.274752616882324,
"learning_rate": 8.408347647368634e-06,
"loss": 0.8106,
"step": 5606
},
{
"epoch": 1.4912234042553192,
"grad_norm": 3.9846606254577637,
"learning_rate": 8.407704095673454e-06,
"loss": 0.7059,
"step": 5607
},
{
"epoch": 1.4914893617021276,
"grad_norm": 4.1280436515808105,
"learning_rate": 8.4070604385399e-06,
"loss": 0.8267,
"step": 5608
},
{
"epoch": 1.491755319148936,
"grad_norm": 3.7875635623931885,
"learning_rate": 8.406416675987884e-06,
"loss": 0.8078,
"step": 5609
},
{
"epoch": 1.4920212765957448,
"grad_norm": 4.4207444190979,
"learning_rate": 8.405772808037326e-06,
"loss": 0.8452,
"step": 5610
},
{
"epoch": 1.4922872340425533,
"grad_norm": 3.9423201084136963,
"learning_rate": 8.405128834708147e-06,
"loss": 0.7491,
"step": 5611
},
{
"epoch": 1.4925531914893617,
"grad_norm": 3.669431686401367,
"learning_rate": 8.404484756020272e-06,
"loss": 0.7232,
"step": 5612
},
{
"epoch": 1.4928191489361702,
"grad_norm": 4.371226787567139,
"learning_rate": 8.403840571993631e-06,
"loss": 0.7899,
"step": 5613
},
{
"epoch": 1.4930851063829786,
"grad_norm": 4.185215950012207,
"learning_rate": 8.403196282648156e-06,
"loss": 0.9727,
"step": 5614
},
{
"epoch": 1.4933510638297873,
"grad_norm": 3.5517239570617676,
"learning_rate": 8.402551888003781e-06,
"loss": 0.805,
"step": 5615
},
{
"epoch": 1.4936170212765958,
"grad_norm": 3.4188995361328125,
"learning_rate": 8.401907388080443e-06,
"loss": 0.7345,
"step": 5616
},
{
"epoch": 1.4938829787234043,
"grad_norm": 3.7187201976776123,
"learning_rate": 8.401262782898087e-06,
"loss": 0.7147,
"step": 5617
},
{
"epoch": 1.4941489361702127,
"grad_norm": 4.5645976066589355,
"learning_rate": 8.400618072476655e-06,
"loss": 0.8707,
"step": 5618
},
{
"epoch": 1.4944148936170212,
"grad_norm": 3.7568912506103516,
"learning_rate": 8.399973256836097e-06,
"loss": 0.8637,
"step": 5619
},
{
"epoch": 1.4946808510638299,
"grad_norm": 4.120610237121582,
"learning_rate": 8.399328335996362e-06,
"loss": 0.8749,
"step": 5620
},
{
"epoch": 1.4949468085106383,
"grad_norm": 3.780111312866211,
"learning_rate": 8.398683309977407e-06,
"loss": 0.739,
"step": 5621
},
{
"epoch": 1.4952127659574468,
"grad_norm": 4.050705909729004,
"learning_rate": 8.39803817879919e-06,
"loss": 0.869,
"step": 5622
},
{
"epoch": 1.4954787234042553,
"grad_norm": 3.941727876663208,
"learning_rate": 8.39739294248167e-06,
"loss": 0.8147,
"step": 5623
},
{
"epoch": 1.4957446808510637,
"grad_norm": 4.117156505584717,
"learning_rate": 8.396747601044812e-06,
"loss": 0.843,
"step": 5624
},
{
"epoch": 1.4960106382978724,
"grad_norm": 3.813788890838623,
"learning_rate": 8.396102154508584e-06,
"loss": 0.7214,
"step": 5625
},
{
"epoch": 1.4962765957446809,
"grad_norm": 4.435267448425293,
"learning_rate": 8.395456602892957e-06,
"loss": 0.9548,
"step": 5626
},
{
"epoch": 1.4965425531914893,
"grad_norm": 4.178934097290039,
"learning_rate": 8.394810946217905e-06,
"loss": 0.797,
"step": 5627
},
{
"epoch": 1.496808510638298,
"grad_norm": 4.201347827911377,
"learning_rate": 8.394165184503406e-06,
"loss": 0.8086,
"step": 5628
},
{
"epoch": 1.4970744680851062,
"grad_norm": 4.090775489807129,
"learning_rate": 8.39351931776944e-06,
"loss": 0.8206,
"step": 5629
},
{
"epoch": 1.497340425531915,
"grad_norm": 3.81706166267395,
"learning_rate": 8.392873346035992e-06,
"loss": 0.7876,
"step": 5630
},
{
"epoch": 1.4976063829787234,
"grad_norm": 4.212119102478027,
"learning_rate": 8.392227269323046e-06,
"loss": 0.8634,
"step": 5631
},
{
"epoch": 1.4978723404255319,
"grad_norm": 4.333573818206787,
"learning_rate": 8.391581087650596e-06,
"loss": 0.8157,
"step": 5632
},
{
"epoch": 1.4981382978723405,
"grad_norm": 4.08198356628418,
"learning_rate": 8.390934801038632e-06,
"loss": 0.8804,
"step": 5633
},
{
"epoch": 1.498404255319149,
"grad_norm": 3.6360666751861572,
"learning_rate": 8.390288409507156e-06,
"loss": 0.6327,
"step": 5634
},
{
"epoch": 1.4986702127659575,
"grad_norm": 4.428205490112305,
"learning_rate": 8.389641913076163e-06,
"loss": 0.8857,
"step": 5635
},
{
"epoch": 1.498936170212766,
"grad_norm": 4.506261825561523,
"learning_rate": 8.388995311765657e-06,
"loss": 0.8376,
"step": 5636
},
{
"epoch": 1.4992021276595744,
"grad_norm": 3.7618744373321533,
"learning_rate": 8.388348605595649e-06,
"loss": 0.8656,
"step": 5637
},
{
"epoch": 1.499468085106383,
"grad_norm": 3.843425750732422,
"learning_rate": 8.387701794586145e-06,
"loss": 0.7474,
"step": 5638
},
{
"epoch": 1.4997340425531915,
"grad_norm": 3.933223009109497,
"learning_rate": 8.387054878757157e-06,
"loss": 0.9316,
"step": 5639
},
{
"epoch": 1.5,
"grad_norm": 3.8141305446624756,
"learning_rate": 8.386407858128707e-06,
"loss": 0.7359,
"step": 5640
},
{
"epoch": 1.5002659574468085,
"grad_norm": 4.184633731842041,
"learning_rate": 8.385760732720809e-06,
"loss": 0.8206,
"step": 5641
},
{
"epoch": 1.500531914893617,
"grad_norm": 3.9276089668273926,
"learning_rate": 8.385113502553487e-06,
"loss": 0.8148,
"step": 5642
},
{
"epoch": 1.5007978723404256,
"grad_norm": 4.084725856781006,
"learning_rate": 8.384466167646768e-06,
"loss": 0.8435,
"step": 5643
},
{
"epoch": 1.501063829787234,
"grad_norm": 4.092894077301025,
"learning_rate": 8.383818728020681e-06,
"loss": 0.7876,
"step": 5644
},
{
"epoch": 1.5013297872340425,
"grad_norm": 3.6473567485809326,
"learning_rate": 8.383171183695258e-06,
"loss": 0.7427,
"step": 5645
},
{
"epoch": 1.5015957446808512,
"grad_norm": 4.224092483520508,
"learning_rate": 8.382523534690537e-06,
"loss": 0.8959,
"step": 5646
},
{
"epoch": 1.5018617021276595,
"grad_norm": 4.414750576019287,
"learning_rate": 8.381875781026553e-06,
"loss": 0.746,
"step": 5647
},
{
"epoch": 1.5021276595744681,
"grad_norm": 4.199521064758301,
"learning_rate": 8.381227922723353e-06,
"loss": 0.8083,
"step": 5648
},
{
"epoch": 1.5023936170212766,
"grad_norm": 3.8716115951538086,
"learning_rate": 8.380579959800981e-06,
"loss": 0.7007,
"step": 5649
},
{
"epoch": 1.502659574468085,
"grad_norm": 4.189701080322266,
"learning_rate": 8.379931892279483e-06,
"loss": 0.7694,
"step": 5650
},
{
"epoch": 1.5029255319148938,
"grad_norm": 3.577147960662842,
"learning_rate": 8.379283720178913e-06,
"loss": 0.7776,
"step": 5651
},
{
"epoch": 1.503191489361702,
"grad_norm": 4.009932994842529,
"learning_rate": 8.378635443519327e-06,
"loss": 0.7633,
"step": 5652
},
{
"epoch": 1.5034574468085107,
"grad_norm": 4.129024505615234,
"learning_rate": 8.377987062320782e-06,
"loss": 0.7067,
"step": 5653
},
{
"epoch": 1.5037234042553191,
"grad_norm": 3.6017751693725586,
"learning_rate": 8.37733857660334e-06,
"loss": 0.7983,
"step": 5654
},
{
"epoch": 1.5039893617021276,
"grad_norm": 3.799006223678589,
"learning_rate": 8.376689986387066e-06,
"loss": 0.8479,
"step": 5655
},
{
"epoch": 1.5042553191489363,
"grad_norm": 4.5062575340271,
"learning_rate": 8.376041291692028e-06,
"loss": 0.8298,
"step": 5656
},
{
"epoch": 1.5045212765957445,
"grad_norm": 3.729353666305542,
"learning_rate": 8.3753924925383e-06,
"loss": 0.7688,
"step": 5657
},
{
"epoch": 1.5047872340425532,
"grad_norm": 4.237773418426514,
"learning_rate": 8.374743588945951e-06,
"loss": 0.9623,
"step": 5658
},
{
"epoch": 1.5050531914893617,
"grad_norm": 3.5734505653381348,
"learning_rate": 8.374094580935064e-06,
"loss": 0.6333,
"step": 5659
},
{
"epoch": 1.5053191489361701,
"grad_norm": 3.711700677871704,
"learning_rate": 8.373445468525719e-06,
"loss": 0.8401,
"step": 5660
},
{
"epoch": 1.5055851063829788,
"grad_norm": 3.8051505088806152,
"learning_rate": 8.372796251737995e-06,
"loss": 0.7845,
"step": 5661
},
{
"epoch": 1.5058510638297873,
"grad_norm": 3.983067750930786,
"learning_rate": 8.372146930591988e-06,
"loss": 0.8886,
"step": 5662
},
{
"epoch": 1.5061170212765957,
"grad_norm": 3.872107744216919,
"learning_rate": 8.371497505107784e-06,
"loss": 0.8892,
"step": 5663
},
{
"epoch": 1.5063829787234042,
"grad_norm": 4.311370849609375,
"learning_rate": 8.370847975305479e-06,
"loss": 0.8369,
"step": 5664
},
{
"epoch": 1.5066489361702127,
"grad_norm": 3.470078706741333,
"learning_rate": 8.370198341205167e-06,
"loss": 0.7035,
"step": 5665
},
{
"epoch": 1.5069148936170214,
"grad_norm": 3.7826905250549316,
"learning_rate": 8.369548602826951e-06,
"loss": 0.8478,
"step": 5666
},
{
"epoch": 1.5071808510638298,
"grad_norm": 4.1136603355407715,
"learning_rate": 8.368898760190933e-06,
"loss": 0.7812,
"step": 5667
},
{
"epoch": 1.5074468085106383,
"grad_norm": 3.856652021408081,
"learning_rate": 8.368248813317221e-06,
"loss": 0.7926,
"step": 5668
},
{
"epoch": 1.507712765957447,
"grad_norm": 4.0616865158081055,
"learning_rate": 8.367598762225929e-06,
"loss": 0.7884,
"step": 5669
},
{
"epoch": 1.5079787234042552,
"grad_norm": 4.08623743057251,
"learning_rate": 8.366948606937161e-06,
"loss": 0.8499,
"step": 5670
},
{
"epoch": 1.508244680851064,
"grad_norm": 4.225100517272949,
"learning_rate": 8.366298347471043e-06,
"loss": 0.8145,
"step": 5671
},
{
"epoch": 1.5085106382978724,
"grad_norm": 4.046361923217773,
"learning_rate": 8.36564798384769e-06,
"loss": 0.6879,
"step": 5672
},
{
"epoch": 1.5087765957446808,
"grad_norm": 4.1829833984375,
"learning_rate": 8.364997516087224e-06,
"loss": 0.7828,
"step": 5673
},
{
"epoch": 1.5090425531914895,
"grad_norm": 3.750427484512329,
"learning_rate": 8.364346944209774e-06,
"loss": 0.7639,
"step": 5674
},
{
"epoch": 1.5093085106382977,
"grad_norm": 4.194416522979736,
"learning_rate": 8.36369626823547e-06,
"loss": 0.8308,
"step": 5675
},
{
"epoch": 1.5095744680851064,
"grad_norm": 4.148036003112793,
"learning_rate": 8.363045488184443e-06,
"loss": 0.7443,
"step": 5676
},
{
"epoch": 1.5098404255319149,
"grad_norm": 3.7398674488067627,
"learning_rate": 8.362394604076827e-06,
"loss": 0.8633,
"step": 5677
},
{
"epoch": 1.5101063829787233,
"grad_norm": 3.8514955043792725,
"learning_rate": 8.361743615932765e-06,
"loss": 0.797,
"step": 5678
},
{
"epoch": 1.510372340425532,
"grad_norm": 4.254388809204102,
"learning_rate": 8.361092523772396e-06,
"loss": 0.8425,
"step": 5679
},
{
"epoch": 1.5106382978723403,
"grad_norm": 4.257145881652832,
"learning_rate": 8.360441327615868e-06,
"loss": 0.7964,
"step": 5680
},
{
"epoch": 1.510904255319149,
"grad_norm": 3.9065487384796143,
"learning_rate": 8.35979002748333e-06,
"loss": 0.837,
"step": 5681
},
{
"epoch": 1.5111702127659574,
"grad_norm": 4.575162410736084,
"learning_rate": 8.359138623394931e-06,
"loss": 0.9485,
"step": 5682
},
{
"epoch": 1.5114361702127659,
"grad_norm": 4.180033206939697,
"learning_rate": 8.35848711537083e-06,
"loss": 0.8287,
"step": 5683
},
{
"epoch": 1.5117021276595746,
"grad_norm": 4.284930229187012,
"learning_rate": 8.357835503431182e-06,
"loss": 0.8548,
"step": 5684
},
{
"epoch": 1.511968085106383,
"grad_norm": 3.8655450344085693,
"learning_rate": 8.357183787596151e-06,
"loss": 0.7792,
"step": 5685
},
{
"epoch": 1.5122340425531915,
"grad_norm": 3.840792655944824,
"learning_rate": 8.356531967885899e-06,
"loss": 0.7953,
"step": 5686
},
{
"epoch": 1.5125,
"grad_norm": 3.675896406173706,
"learning_rate": 8.355880044320599e-06,
"loss": 0.7667,
"step": 5687
},
{
"epoch": 1.5127659574468084,
"grad_norm": 3.6345510482788086,
"learning_rate": 8.355228016920417e-06,
"loss": 0.8588,
"step": 5688
},
{
"epoch": 1.513031914893617,
"grad_norm": 3.8645408153533936,
"learning_rate": 8.354575885705532e-06,
"loss": 0.862,
"step": 5689
},
{
"epoch": 1.5132978723404256,
"grad_norm": 4.727093696594238,
"learning_rate": 8.353923650696119e-06,
"loss": 0.8419,
"step": 5690
},
{
"epoch": 1.513563829787234,
"grad_norm": 4.074021816253662,
"learning_rate": 8.353271311912357e-06,
"loss": 0.7486,
"step": 5691
},
{
"epoch": 1.5138297872340427,
"grad_norm": 3.9446327686309814,
"learning_rate": 8.352618869374435e-06,
"loss": 0.7721,
"step": 5692
},
{
"epoch": 1.514095744680851,
"grad_norm": 3.839276075363159,
"learning_rate": 8.351966323102538e-06,
"loss": 0.7744,
"step": 5693
},
{
"epoch": 1.5143617021276596,
"grad_norm": 4.190333366394043,
"learning_rate": 8.351313673116856e-06,
"loss": 0.8085,
"step": 5694
},
{
"epoch": 1.514627659574468,
"grad_norm": 3.8334741592407227,
"learning_rate": 8.350660919437585e-06,
"loss": 0.933,
"step": 5695
},
{
"epoch": 1.5148936170212766,
"grad_norm": 3.766174793243408,
"learning_rate": 8.350008062084918e-06,
"loss": 0.7537,
"step": 5696
},
{
"epoch": 1.5151595744680852,
"grad_norm": 4.281386852264404,
"learning_rate": 8.349355101079058e-06,
"loss": 0.8714,
"step": 5697
},
{
"epoch": 1.5154255319148935,
"grad_norm": 3.8533146381378174,
"learning_rate": 8.348702036440209e-06,
"loss": 0.8423,
"step": 5698
},
{
"epoch": 1.5156914893617022,
"grad_norm": 4.271562099456787,
"learning_rate": 8.348048868188574e-06,
"loss": 0.9832,
"step": 5699
},
{
"epoch": 1.5159574468085106,
"grad_norm": 4.475942611694336,
"learning_rate": 8.347395596344365e-06,
"loss": 0.9984,
"step": 5700
},
{
"epoch": 1.516223404255319,
"grad_norm": 4.308716773986816,
"learning_rate": 8.346742220927798e-06,
"loss": 0.8947,
"step": 5701
},
{
"epoch": 1.5164893617021278,
"grad_norm": 4.1707587242126465,
"learning_rate": 8.346088741959085e-06,
"loss": 0.9077,
"step": 5702
},
{
"epoch": 1.516755319148936,
"grad_norm": 4.016225337982178,
"learning_rate": 8.345435159458445e-06,
"loss": 0.9186,
"step": 5703
},
{
"epoch": 1.5170212765957447,
"grad_norm": 4.131173133850098,
"learning_rate": 8.344781473446106e-06,
"loss": 0.708,
"step": 5704
},
{
"epoch": 1.5172872340425532,
"grad_norm": 4.118223667144775,
"learning_rate": 8.344127683942289e-06,
"loss": 0.815,
"step": 5705
},
{
"epoch": 1.5175531914893616,
"grad_norm": 4.08048677444458,
"learning_rate": 8.343473790967223e-06,
"loss": 0.7402,
"step": 5706
},
{
"epoch": 1.5178191489361703,
"grad_norm": 4.256683826446533,
"learning_rate": 8.342819794541143e-06,
"loss": 0.9272,
"step": 5707
},
{
"epoch": 1.5180851063829788,
"grad_norm": 3.6859428882598877,
"learning_rate": 8.34216569468428e-06,
"loss": 0.8052,
"step": 5708
},
{
"epoch": 1.5183510638297872,
"grad_norm": 4.601988315582275,
"learning_rate": 8.341511491416877e-06,
"loss": 0.7638,
"step": 5709
},
{
"epoch": 1.5186170212765957,
"grad_norm": 3.8631575107574463,
"learning_rate": 8.340857184759178e-06,
"loss": 0.8282,
"step": 5710
},
{
"epoch": 1.5188829787234042,
"grad_norm": 4.184502124786377,
"learning_rate": 8.34020277473142e-06,
"loss": 0.8513,
"step": 5711
},
{
"epoch": 1.5191489361702128,
"grad_norm": 3.9446780681610107,
"learning_rate": 8.339548261353856e-06,
"loss": 0.6634,
"step": 5712
},
{
"epoch": 1.5194148936170213,
"grad_norm": 3.9360363483428955,
"learning_rate": 8.338893644646739e-06,
"loss": 0.7769,
"step": 5713
},
{
"epoch": 1.5196808510638298,
"grad_norm": 3.235274314880371,
"learning_rate": 8.33823892463032e-06,
"loss": 0.7531,
"step": 5714
},
{
"epoch": 1.5199468085106385,
"grad_norm": 3.941875696182251,
"learning_rate": 8.337584101324859e-06,
"loss": 0.7937,
"step": 5715
},
{
"epoch": 1.5202127659574467,
"grad_norm": 3.7710206508636475,
"learning_rate": 8.336929174750616e-06,
"loss": 0.8403,
"step": 5716
},
{
"epoch": 1.5204787234042554,
"grad_norm": 4.109030246734619,
"learning_rate": 8.336274144927855e-06,
"loss": 0.6704,
"step": 5717
},
{
"epoch": 1.5207446808510638,
"grad_norm": 3.7918636798858643,
"learning_rate": 8.335619011876846e-06,
"loss": 0.7756,
"step": 5718
},
{
"epoch": 1.5210106382978723,
"grad_norm": 3.633254051208496,
"learning_rate": 8.334963775617854e-06,
"loss": 0.7325,
"step": 5719
},
{
"epoch": 1.521276595744681,
"grad_norm": 3.994147539138794,
"learning_rate": 8.334308436171159e-06,
"loss": 0.8936,
"step": 5720
},
{
"epoch": 1.5215425531914892,
"grad_norm": 3.5977087020874023,
"learning_rate": 8.333652993557035e-06,
"loss": 0.8429,
"step": 5721
},
{
"epoch": 1.521808510638298,
"grad_norm": 3.7515316009521484,
"learning_rate": 8.332997447795763e-06,
"loss": 0.8329,
"step": 5722
},
{
"epoch": 1.5220744680851064,
"grad_norm": 3.969116449356079,
"learning_rate": 8.332341798907624e-06,
"loss": 0.804,
"step": 5723
},
{
"epoch": 1.5223404255319148,
"grad_norm": 3.915306329727173,
"learning_rate": 8.331686046912908e-06,
"loss": 0.9369,
"step": 5724
},
{
"epoch": 1.5226063829787235,
"grad_norm": 3.7423787117004395,
"learning_rate": 8.331030191831904e-06,
"loss": 0.8416,
"step": 5725
},
{
"epoch": 1.5228723404255318,
"grad_norm": 3.554068088531494,
"learning_rate": 8.3303742336849e-06,
"loss": 0.7121,
"step": 5726
},
{
"epoch": 1.5231382978723405,
"grad_norm": 4.019564628601074,
"learning_rate": 8.3297181724922e-06,
"loss": 0.7882,
"step": 5727
},
{
"epoch": 1.523404255319149,
"grad_norm": 4.351405143737793,
"learning_rate": 8.3290620082741e-06,
"loss": 0.8769,
"step": 5728
},
{
"epoch": 1.5236702127659574,
"grad_norm": 3.942936658859253,
"learning_rate": 8.328405741050901e-06,
"loss": 0.924,
"step": 5729
},
{
"epoch": 1.523936170212766,
"grad_norm": 4.362167835235596,
"learning_rate": 8.327749370842909e-06,
"loss": 0.8015,
"step": 5730
},
{
"epoch": 1.5242021276595743,
"grad_norm": 3.7932353019714355,
"learning_rate": 8.327092897670432e-06,
"loss": 0.7993,
"step": 5731
},
{
"epoch": 1.524468085106383,
"grad_norm": 3.8214194774627686,
"learning_rate": 8.326436321553785e-06,
"loss": 0.7971,
"step": 5732
},
{
"epoch": 1.5247340425531914,
"grad_norm": 4.244415760040283,
"learning_rate": 8.325779642513283e-06,
"loss": 0.7253,
"step": 5733
},
{
"epoch": 1.525,
"grad_norm": 4.184083938598633,
"learning_rate": 8.325122860569241e-06,
"loss": 0.7849,
"step": 5734
},
{
"epoch": 1.5252659574468086,
"grad_norm": 4.359492301940918,
"learning_rate": 8.324465975741986e-06,
"loss": 0.8228,
"step": 5735
},
{
"epoch": 1.525531914893617,
"grad_norm": 3.8751020431518555,
"learning_rate": 8.323808988051837e-06,
"loss": 0.7288,
"step": 5736
},
{
"epoch": 1.5257978723404255,
"grad_norm": 4.366562843322754,
"learning_rate": 8.323151897519126e-06,
"loss": 0.8452,
"step": 5737
},
{
"epoch": 1.5260638297872342,
"grad_norm": 4.116846561431885,
"learning_rate": 8.322494704164182e-06,
"loss": 0.9376,
"step": 5738
},
{
"epoch": 1.5263297872340424,
"grad_norm": 4.062334060668945,
"learning_rate": 8.321837408007341e-06,
"loss": 0.855,
"step": 5739
},
{
"epoch": 1.5265957446808511,
"grad_norm": 4.4059014320373535,
"learning_rate": 8.321180009068937e-06,
"loss": 0.8832,
"step": 5740
},
{
"epoch": 1.5268617021276596,
"grad_norm": 4.124050140380859,
"learning_rate": 8.320522507369315e-06,
"loss": 0.7446,
"step": 5741
},
{
"epoch": 1.527127659574468,
"grad_norm": 3.721942901611328,
"learning_rate": 8.319864902928819e-06,
"loss": 0.8547,
"step": 5742
},
{
"epoch": 1.5273936170212767,
"grad_norm": 3.816612720489502,
"learning_rate": 8.31920719576779e-06,
"loss": 0.8478,
"step": 5743
},
{
"epoch": 1.527659574468085,
"grad_norm": 4.217785835266113,
"learning_rate": 8.318549385906587e-06,
"loss": 0.8573,
"step": 5744
},
{
"epoch": 1.5279255319148937,
"grad_norm": 4.105627536773682,
"learning_rate": 8.317891473365558e-06,
"loss": 0.8891,
"step": 5745
},
{
"epoch": 1.5281914893617021,
"grad_norm": 4.537158966064453,
"learning_rate": 8.317233458165059e-06,
"loss": 0.9119,
"step": 5746
},
{
"epoch": 1.5284574468085106,
"grad_norm": 4.287096977233887,
"learning_rate": 8.31657534032545e-06,
"loss": 0.8465,
"step": 5747
},
{
"epoch": 1.5287234042553193,
"grad_norm": 4.125601291656494,
"learning_rate": 8.315917119867098e-06,
"loss": 0.7537,
"step": 5748
},
{
"epoch": 1.5289893617021275,
"grad_norm": 4.014163017272949,
"learning_rate": 8.315258796810366e-06,
"loss": 0.7572,
"step": 5749
},
{
"epoch": 1.5292553191489362,
"grad_norm": 3.912703514099121,
"learning_rate": 8.314600371175623e-06,
"loss": 0.7825,
"step": 5750
},
{
"epoch": 1.5295212765957447,
"grad_norm": 3.731410264968872,
"learning_rate": 8.313941842983243e-06,
"loss": 0.9015,
"step": 5751
},
{
"epoch": 1.5297872340425531,
"grad_norm": 4.122485160827637,
"learning_rate": 8.313283212253598e-06,
"loss": 0.8381,
"step": 5752
},
{
"epoch": 1.5300531914893618,
"grad_norm": 4.2268757820129395,
"learning_rate": 8.312624479007072e-06,
"loss": 0.788,
"step": 5753
},
{
"epoch": 1.53031914893617,
"grad_norm": 4.129693508148193,
"learning_rate": 8.311965643264042e-06,
"loss": 0.6951,
"step": 5754
},
{
"epoch": 1.5305851063829787,
"grad_norm": 4.038047790527344,
"learning_rate": 8.311306705044898e-06,
"loss": 0.834,
"step": 5755
},
{
"epoch": 1.5308510638297872,
"grad_norm": 3.85589599609375,
"learning_rate": 8.310647664370026e-06,
"loss": 0.8583,
"step": 5756
},
{
"epoch": 1.5311170212765957,
"grad_norm": 3.889176845550537,
"learning_rate": 8.309988521259816e-06,
"loss": 0.8361,
"step": 5757
},
{
"epoch": 1.5313829787234043,
"grad_norm": 4.0538458824157715,
"learning_rate": 8.309329275734664e-06,
"loss": 0.6951,
"step": 5758
},
{
"epoch": 1.5316489361702128,
"grad_norm": 4.010767936706543,
"learning_rate": 8.30866992781497e-06,
"loss": 0.8313,
"step": 5759
},
{
"epoch": 1.5319148936170213,
"grad_norm": 3.897259473800659,
"learning_rate": 8.30801047752113e-06,
"loss": 0.7736,
"step": 5760
},
{
"epoch": 1.53218085106383,
"grad_norm": 4.07016134262085,
"learning_rate": 8.307350924873553e-06,
"loss": 0.8231,
"step": 5761
},
{
"epoch": 1.5324468085106382,
"grad_norm": 3.886470317840576,
"learning_rate": 8.306691269892646e-06,
"loss": 0.8535,
"step": 5762
},
{
"epoch": 1.5327127659574469,
"grad_norm": 3.458498477935791,
"learning_rate": 8.306031512598815e-06,
"loss": 0.7291,
"step": 5763
},
{
"epoch": 1.5329787234042553,
"grad_norm": 3.6657865047454834,
"learning_rate": 8.305371653012479e-06,
"loss": 0.8239,
"step": 5764
},
{
"epoch": 1.5332446808510638,
"grad_norm": 4.054435729980469,
"learning_rate": 8.304711691154052e-06,
"loss": 0.7947,
"step": 5765
},
{
"epoch": 1.5335106382978725,
"grad_norm": 4.395258903503418,
"learning_rate": 8.304051627043952e-06,
"loss": 0.8615,
"step": 5766
},
{
"epoch": 1.5337765957446807,
"grad_norm": 4.212094306945801,
"learning_rate": 8.303391460702607e-06,
"loss": 0.7645,
"step": 5767
},
{
"epoch": 1.5340425531914894,
"grad_norm": 4.2090044021606445,
"learning_rate": 8.302731192150441e-06,
"loss": 0.8463,
"step": 5768
},
{
"epoch": 1.5343085106382979,
"grad_norm": 3.734283685684204,
"learning_rate": 8.302070821407882e-06,
"loss": 0.7986,
"step": 5769
},
{
"epoch": 1.5345744680851063,
"grad_norm": 4.0931291580200195,
"learning_rate": 8.301410348495366e-06,
"loss": 0.7541,
"step": 5770
},
{
"epoch": 1.534840425531915,
"grad_norm": 3.604841470718384,
"learning_rate": 8.300749773433325e-06,
"loss": 0.8511,
"step": 5771
},
{
"epoch": 1.5351063829787233,
"grad_norm": 3.881558895111084,
"learning_rate": 8.300089096242201e-06,
"loss": 0.7382,
"step": 5772
},
{
"epoch": 1.535372340425532,
"grad_norm": 3.472681760787964,
"learning_rate": 8.299428316942435e-06,
"loss": 0.7106,
"step": 5773
},
{
"epoch": 1.5356382978723404,
"grad_norm": 3.5763661861419678,
"learning_rate": 8.298767435554473e-06,
"loss": 0.6924,
"step": 5774
},
{
"epoch": 1.5359042553191489,
"grad_norm": 3.965982437133789,
"learning_rate": 8.298106452098761e-06,
"loss": 0.8163,
"step": 5775
},
{
"epoch": 1.5361702127659576,
"grad_norm": 3.9243502616882324,
"learning_rate": 8.297445366595754e-06,
"loss": 0.8372,
"step": 5776
},
{
"epoch": 1.5364361702127658,
"grad_norm": 3.8713953495025635,
"learning_rate": 8.296784179065904e-06,
"loss": 0.7919,
"step": 5777
},
{
"epoch": 1.5367021276595745,
"grad_norm": 3.7591898441314697,
"learning_rate": 8.29612288952967e-06,
"loss": 0.8597,
"step": 5778
},
{
"epoch": 1.536968085106383,
"grad_norm": 4.25253438949585,
"learning_rate": 8.295461498007513e-06,
"loss": 1.0482,
"step": 5779
},
{
"epoch": 1.5372340425531914,
"grad_norm": 3.846035957336426,
"learning_rate": 8.294800004519895e-06,
"loss": 0.8348,
"step": 5780
},
{
"epoch": 1.5375,
"grad_norm": 3.652987003326416,
"learning_rate": 8.29413840908729e-06,
"loss": 0.7409,
"step": 5781
},
{
"epoch": 1.5377659574468086,
"grad_norm": 4.131805419921875,
"learning_rate": 8.293476711730163e-06,
"loss": 0.8703,
"step": 5782
},
{
"epoch": 1.538031914893617,
"grad_norm": 4.142578125,
"learning_rate": 8.292814912468988e-06,
"loss": 0.881,
"step": 5783
},
{
"epoch": 1.5382978723404257,
"grad_norm": 3.5386013984680176,
"learning_rate": 8.292153011324242e-06,
"loss": 0.7984,
"step": 5784
},
{
"epoch": 1.538563829787234,
"grad_norm": 4.26931619644165,
"learning_rate": 8.291491008316409e-06,
"loss": 0.8968,
"step": 5785
},
{
"epoch": 1.5388297872340426,
"grad_norm": 4.214763164520264,
"learning_rate": 8.290828903465965e-06,
"loss": 0.7912,
"step": 5786
},
{
"epoch": 1.539095744680851,
"grad_norm": 4.008779525756836,
"learning_rate": 8.290166696793405e-06,
"loss": 0.8708,
"step": 5787
},
{
"epoch": 1.5393617021276595,
"grad_norm": 3.722784996032715,
"learning_rate": 8.28950438831921e-06,
"loss": 0.8047,
"step": 5788
},
{
"epoch": 1.5396276595744682,
"grad_norm": 3.9850144386291504,
"learning_rate": 8.288841978063877e-06,
"loss": 0.8583,
"step": 5789
},
{
"epoch": 1.5398936170212765,
"grad_norm": 3.7640953063964844,
"learning_rate": 8.288179466047903e-06,
"loss": 0.899,
"step": 5790
},
{
"epoch": 1.5401595744680852,
"grad_norm": 3.9535369873046875,
"learning_rate": 8.287516852291784e-06,
"loss": 0.671,
"step": 5791
},
{
"epoch": 1.5404255319148936,
"grad_norm": 3.784611940383911,
"learning_rate": 8.28685413681602e-06,
"loss": 0.955,
"step": 5792
},
{
"epoch": 1.540691489361702,
"grad_norm": 4.205324172973633,
"learning_rate": 8.286191319641123e-06,
"loss": 0.8411,
"step": 5793
},
{
"epoch": 1.5409574468085108,
"grad_norm": 4.253503322601318,
"learning_rate": 8.285528400787597e-06,
"loss": 0.7707,
"step": 5794
},
{
"epoch": 1.541223404255319,
"grad_norm": 3.7679977416992188,
"learning_rate": 8.284865380275953e-06,
"loss": 0.9103,
"step": 5795
},
{
"epoch": 1.5414893617021277,
"grad_norm": 4.094081878662109,
"learning_rate": 8.284202258126706e-06,
"loss": 0.9798,
"step": 5796
},
{
"epoch": 1.5417553191489362,
"grad_norm": 4.189050674438477,
"learning_rate": 8.283539034360376e-06,
"loss": 0.8641,
"step": 5797
},
{
"epoch": 1.5420212765957446,
"grad_norm": 4.017099857330322,
"learning_rate": 8.282875708997482e-06,
"loss": 0.8214,
"step": 5798
},
{
"epoch": 1.5422872340425533,
"grad_norm": 3.6189417839050293,
"learning_rate": 8.282212282058549e-06,
"loss": 0.7486,
"step": 5799
},
{
"epoch": 1.5425531914893615,
"grad_norm": 4.480672359466553,
"learning_rate": 8.281548753564101e-06,
"loss": 0.9041,
"step": 5800
},
{
"epoch": 1.5428191489361702,
"grad_norm": 4.047300338745117,
"learning_rate": 8.280885123534673e-06,
"loss": 0.9519,
"step": 5801
},
{
"epoch": 1.5430851063829787,
"grad_norm": 4.379581928253174,
"learning_rate": 8.280221391990797e-06,
"loss": 0.9203,
"step": 5802
},
{
"epoch": 1.5433510638297872,
"grad_norm": 4.053439140319824,
"learning_rate": 8.279557558953009e-06,
"loss": 0.7759,
"step": 5803
},
{
"epoch": 1.5436170212765958,
"grad_norm": 3.927568197250366,
"learning_rate": 8.278893624441849e-06,
"loss": 0.7132,
"step": 5804
},
{
"epoch": 1.5438829787234043,
"grad_norm": 4.322382926940918,
"learning_rate": 8.278229588477857e-06,
"loss": 0.8272,
"step": 5805
},
{
"epoch": 1.5441489361702128,
"grad_norm": 3.6044352054595947,
"learning_rate": 8.277565451081587e-06,
"loss": 0.7487,
"step": 5806
},
{
"epoch": 1.5444148936170212,
"grad_norm": 3.7423501014709473,
"learning_rate": 8.27690121227358e-06,
"loss": 0.7342,
"step": 5807
},
{
"epoch": 1.5446808510638297,
"grad_norm": 3.7679383754730225,
"learning_rate": 8.27623687207439e-06,
"loss": 0.7897,
"step": 5808
},
{
"epoch": 1.5449468085106384,
"grad_norm": 3.7263903617858887,
"learning_rate": 8.275572430504578e-06,
"loss": 0.8311,
"step": 5809
},
{
"epoch": 1.5452127659574468,
"grad_norm": 3.551025390625,
"learning_rate": 8.274907887584695e-06,
"loss": 0.6916,
"step": 5810
},
{
"epoch": 1.5454787234042553,
"grad_norm": 3.8874595165252686,
"learning_rate": 8.274243243335307e-06,
"loss": 0.8246,
"step": 5811
},
{
"epoch": 1.545744680851064,
"grad_norm": 3.7710976600646973,
"learning_rate": 8.27357849777698e-06,
"loss": 0.8668,
"step": 5812
},
{
"epoch": 1.5460106382978722,
"grad_norm": 4.312849044799805,
"learning_rate": 8.272913650930277e-06,
"loss": 0.9206,
"step": 5813
},
{
"epoch": 1.546276595744681,
"grad_norm": 4.059734344482422,
"learning_rate": 8.272248702815776e-06,
"loss": 0.77,
"step": 5814
},
{
"epoch": 1.5465425531914894,
"grad_norm": 3.781832456588745,
"learning_rate": 8.271583653454046e-06,
"loss": 0.7643,
"step": 5815
},
{
"epoch": 1.5468085106382978,
"grad_norm": 3.607161045074463,
"learning_rate": 8.270918502865663e-06,
"loss": 0.7721,
"step": 5816
},
{
"epoch": 1.5470744680851065,
"grad_norm": 3.986572504043579,
"learning_rate": 8.270253251071214e-06,
"loss": 0.6967,
"step": 5817
},
{
"epoch": 1.5473404255319148,
"grad_norm": 3.9674570560455322,
"learning_rate": 8.269587898091277e-06,
"loss": 0.7986,
"step": 5818
},
{
"epoch": 1.5476063829787234,
"grad_norm": 3.794405698776245,
"learning_rate": 8.268922443946444e-06,
"loss": 0.7897,
"step": 5819
},
{
"epoch": 1.547872340425532,
"grad_norm": 3.5226500034332275,
"learning_rate": 8.2682568886573e-06,
"loss": 0.7474,
"step": 5820
},
{
"epoch": 1.5481382978723404,
"grad_norm": 3.692884922027588,
"learning_rate": 8.267591232244439e-06,
"loss": 0.9286,
"step": 5821
},
{
"epoch": 1.548404255319149,
"grad_norm": 4.193415641784668,
"learning_rate": 8.266925474728459e-06,
"loss": 0.7917,
"step": 5822
},
{
"epoch": 1.5486702127659573,
"grad_norm": 3.877485752105713,
"learning_rate": 8.266259616129959e-06,
"loss": 0.8366,
"step": 5823
},
{
"epoch": 1.548936170212766,
"grad_norm": 3.8126795291900635,
"learning_rate": 8.26559365646954e-06,
"loss": 0.7591,
"step": 5824
},
{
"epoch": 1.5492021276595744,
"grad_norm": 4.233253479003906,
"learning_rate": 8.264927595767808e-06,
"loss": 0.8596,
"step": 5825
},
{
"epoch": 1.549468085106383,
"grad_norm": 4.092543601989746,
"learning_rate": 8.264261434045374e-06,
"loss": 0.7732,
"step": 5826
},
{
"epoch": 1.5497340425531916,
"grad_norm": 4.047788619995117,
"learning_rate": 8.263595171322847e-06,
"loss": 0.8763,
"step": 5827
},
{
"epoch": 1.55,
"grad_norm": 3.990832805633545,
"learning_rate": 8.262928807620843e-06,
"loss": 0.8313,
"step": 5828
},
{
"epoch": 1.5502659574468085,
"grad_norm": 3.948673725128174,
"learning_rate": 8.262262342959981e-06,
"loss": 0.8937,
"step": 5829
},
{
"epoch": 1.550531914893617,
"grad_norm": 4.302928924560547,
"learning_rate": 8.261595777360881e-06,
"loss": 0.7945,
"step": 5830
},
{
"epoch": 1.5507978723404254,
"grad_norm": 3.8130292892456055,
"learning_rate": 8.260929110844166e-06,
"loss": 0.7971,
"step": 5831
},
{
"epoch": 1.5510638297872341,
"grad_norm": 3.7944552898406982,
"learning_rate": 8.260262343430468e-06,
"loss": 0.7268,
"step": 5832
},
{
"epoch": 1.5513297872340426,
"grad_norm": 3.765657424926758,
"learning_rate": 8.259595475140412e-06,
"loss": 0.7289,
"step": 5833
},
{
"epoch": 1.551595744680851,
"grad_norm": 4.215806484222412,
"learning_rate": 8.258928505994635e-06,
"loss": 0.8254,
"step": 5834
},
{
"epoch": 1.5518617021276597,
"grad_norm": 3.7282323837280273,
"learning_rate": 8.258261436013774e-06,
"loss": 0.8426,
"step": 5835
},
{
"epoch": 1.552127659574468,
"grad_norm": 4.05489444732666,
"learning_rate": 8.257594265218468e-06,
"loss": 0.832,
"step": 5836
},
{
"epoch": 1.5523936170212767,
"grad_norm": 4.3416666984558105,
"learning_rate": 8.256926993629358e-06,
"loss": 0.844,
"step": 5837
},
{
"epoch": 1.5526595744680851,
"grad_norm": 4.158813953399658,
"learning_rate": 8.256259621267095e-06,
"loss": 0.7328,
"step": 5838
},
{
"epoch": 1.5529255319148936,
"grad_norm": 4.071340560913086,
"learning_rate": 8.255592148152325e-06,
"loss": 0.7983,
"step": 5839
},
{
"epoch": 1.5531914893617023,
"grad_norm": 3.988938093185425,
"learning_rate": 8.254924574305698e-06,
"loss": 0.7863,
"step": 5840
},
{
"epoch": 1.5534574468085105,
"grad_norm": 3.8350539207458496,
"learning_rate": 8.254256899747876e-06,
"loss": 0.8347,
"step": 5841
},
{
"epoch": 1.5537234042553192,
"grad_norm": 3.7759451866149902,
"learning_rate": 8.253589124499513e-06,
"loss": 0.7486,
"step": 5842
},
{
"epoch": 1.5539893617021276,
"grad_norm": 4.114711284637451,
"learning_rate": 8.252921248581272e-06,
"loss": 0.8939,
"step": 5843
},
{
"epoch": 1.554255319148936,
"grad_norm": 4.071899890899658,
"learning_rate": 8.252253272013816e-06,
"loss": 0.7912,
"step": 5844
},
{
"epoch": 1.5545212765957448,
"grad_norm": 3.5732295513153076,
"learning_rate": 8.251585194817816e-06,
"loss": 0.7897,
"step": 5845
},
{
"epoch": 1.554787234042553,
"grad_norm": 3.884356737136841,
"learning_rate": 8.250917017013943e-06,
"loss": 0.8328,
"step": 5846
},
{
"epoch": 1.5550531914893617,
"grad_norm": 4.147099018096924,
"learning_rate": 8.250248738622868e-06,
"loss": 0.8425,
"step": 5847
},
{
"epoch": 1.5553191489361702,
"grad_norm": 4.285495758056641,
"learning_rate": 8.249580359665272e-06,
"loss": 0.9088,
"step": 5848
},
{
"epoch": 1.5555851063829786,
"grad_norm": 3.903362512588501,
"learning_rate": 8.248911880161832e-06,
"loss": 0.8711,
"step": 5849
},
{
"epoch": 1.5558510638297873,
"grad_norm": 3.910297155380249,
"learning_rate": 8.248243300133236e-06,
"loss": 0.8571,
"step": 5850
},
{
"epoch": 1.5561170212765958,
"grad_norm": 3.7283291816711426,
"learning_rate": 8.247574619600165e-06,
"loss": 0.8114,
"step": 5851
},
{
"epoch": 1.5563829787234043,
"grad_norm": 4.2508864402771,
"learning_rate": 8.246905838583315e-06,
"loss": 0.8498,
"step": 5852
},
{
"epoch": 1.5566489361702127,
"grad_norm": 3.5398671627044678,
"learning_rate": 8.246236957103374e-06,
"loss": 0.7013,
"step": 5853
},
{
"epoch": 1.5569148936170212,
"grad_norm": 3.609945297241211,
"learning_rate": 8.245567975181037e-06,
"loss": 0.7113,
"step": 5854
},
{
"epoch": 1.5571808510638299,
"grad_norm": 3.550767660140991,
"learning_rate": 8.244898892837009e-06,
"loss": 0.753,
"step": 5855
},
{
"epoch": 1.5574468085106383,
"grad_norm": 4.197300434112549,
"learning_rate": 8.244229710091986e-06,
"loss": 0.7006,
"step": 5856
},
{
"epoch": 1.5577127659574468,
"grad_norm": 3.916386842727661,
"learning_rate": 8.243560426966678e-06,
"loss": 0.7071,
"step": 5857
},
{
"epoch": 1.5579787234042555,
"grad_norm": 4.1130218505859375,
"learning_rate": 8.242891043481793e-06,
"loss": 0.8622,
"step": 5858
},
{
"epoch": 1.5582446808510637,
"grad_norm": 3.9336955547332764,
"learning_rate": 8.242221559658039e-06,
"loss": 0.7626,
"step": 5859
},
{
"epoch": 1.5585106382978724,
"grad_norm": 4.237149715423584,
"learning_rate": 8.241551975516133e-06,
"loss": 0.8566,
"step": 5860
},
{
"epoch": 1.5587765957446809,
"grad_norm": 4.12725305557251,
"learning_rate": 8.240882291076794e-06,
"loss": 0.7879,
"step": 5861
},
{
"epoch": 1.5590425531914893,
"grad_norm": 4.043492794036865,
"learning_rate": 8.240212506360738e-06,
"loss": 0.8772,
"step": 5862
},
{
"epoch": 1.559308510638298,
"grad_norm": 3.8735363483428955,
"learning_rate": 8.239542621388696e-06,
"loss": 0.9265,
"step": 5863
},
{
"epoch": 1.5595744680851062,
"grad_norm": 4.195898056030273,
"learning_rate": 8.23887263618139e-06,
"loss": 0.9022,
"step": 5864
},
{
"epoch": 1.559840425531915,
"grad_norm": 3.4813778400421143,
"learning_rate": 8.23820255075955e-06,
"loss": 0.7605,
"step": 5865
},
{
"epoch": 1.5601063829787234,
"grad_norm": 3.5564541816711426,
"learning_rate": 8.237532365143909e-06,
"loss": 0.7148,
"step": 5866
},
{
"epoch": 1.5603723404255319,
"grad_norm": 4.291294097900391,
"learning_rate": 8.236862079355208e-06,
"loss": 1.022,
"step": 5867
},
{
"epoch": 1.5606382978723405,
"grad_norm": 3.761632204055786,
"learning_rate": 8.236191693414184e-06,
"loss": 0.8673,
"step": 5868
},
{
"epoch": 1.5609042553191488,
"grad_norm": 3.8336169719696045,
"learning_rate": 8.235521207341577e-06,
"loss": 0.7979,
"step": 5869
},
{
"epoch": 1.5611702127659575,
"grad_norm": 3.8964157104492188,
"learning_rate": 8.234850621158135e-06,
"loss": 0.7466,
"step": 5870
},
{
"epoch": 1.561436170212766,
"grad_norm": 3.8827109336853027,
"learning_rate": 8.234179934884605e-06,
"loss": 0.953,
"step": 5871
},
{
"epoch": 1.5617021276595744,
"grad_norm": 4.318760395050049,
"learning_rate": 8.23350914854174e-06,
"loss": 0.8975,
"step": 5872
},
{
"epoch": 1.561968085106383,
"grad_norm": 3.927676200866699,
"learning_rate": 8.232838262150298e-06,
"loss": 0.8148,
"step": 5873
},
{
"epoch": 1.5622340425531915,
"grad_norm": 4.160933017730713,
"learning_rate": 8.23216727573103e-06,
"loss": 0.7736,
"step": 5874
},
{
"epoch": 1.5625,
"grad_norm": 4.034573078155518,
"learning_rate": 8.231496189304704e-06,
"loss": 0.7754,
"step": 5875
},
{
"epoch": 1.5627659574468085,
"grad_norm": 4.033196926116943,
"learning_rate": 8.230825002892081e-06,
"loss": 0.8588,
"step": 5876
},
{
"epoch": 1.563031914893617,
"grad_norm": 3.949902057647705,
"learning_rate": 8.23015371651393e-06,
"loss": 0.8279,
"step": 5877
},
{
"epoch": 1.5632978723404256,
"grad_norm": 3.8417794704437256,
"learning_rate": 8.229482330191016e-06,
"loss": 0.7201,
"step": 5878
},
{
"epoch": 1.563563829787234,
"grad_norm": 3.836516857147217,
"learning_rate": 8.22881084394412e-06,
"loss": 0.9244,
"step": 5879
},
{
"epoch": 1.5638297872340425,
"grad_norm": 3.882302761077881,
"learning_rate": 8.228139257794012e-06,
"loss": 0.7944,
"step": 5880
},
{
"epoch": 1.5640957446808512,
"grad_norm": 4.163621425628662,
"learning_rate": 8.227467571761478e-06,
"loss": 0.7916,
"step": 5881
},
{
"epoch": 1.5643617021276595,
"grad_norm": 3.8937926292419434,
"learning_rate": 8.226795785867294e-06,
"loss": 0.7165,
"step": 5882
},
{
"epoch": 1.5646276595744681,
"grad_norm": 4.019950866699219,
"learning_rate": 8.226123900132252e-06,
"loss": 0.8444,
"step": 5883
},
{
"epoch": 1.5648936170212766,
"grad_norm": 3.9146535396575928,
"learning_rate": 8.225451914577137e-06,
"loss": 0.7472,
"step": 5884
},
{
"epoch": 1.565159574468085,
"grad_norm": 4.430140018463135,
"learning_rate": 8.224779829222742e-06,
"loss": 0.8139,
"step": 5885
},
{
"epoch": 1.5654255319148938,
"grad_norm": 3.8101890087127686,
"learning_rate": 8.224107644089863e-06,
"loss": 0.8198,
"step": 5886
},
{
"epoch": 1.565691489361702,
"grad_norm": 3.603240966796875,
"learning_rate": 8.223435359199297e-06,
"loss": 0.7507,
"step": 5887
},
{
"epoch": 1.5659574468085107,
"grad_norm": 3.993999719619751,
"learning_rate": 8.222762974571848e-06,
"loss": 0.6875,
"step": 5888
},
{
"epoch": 1.5662234042553191,
"grad_norm": 4.127441883087158,
"learning_rate": 8.222090490228316e-06,
"loss": 0.7653,
"step": 5889
},
{
"epoch": 1.5664893617021276,
"grad_norm": 4.082408428192139,
"learning_rate": 8.22141790618951e-06,
"loss": 0.8506,
"step": 5890
},
{
"epoch": 1.5667553191489363,
"grad_norm": 4.1307806968688965,
"learning_rate": 8.220745222476243e-06,
"loss": 0.7614,
"step": 5891
},
{
"epoch": 1.5670212765957445,
"grad_norm": 3.9022128582000732,
"learning_rate": 8.220072439109326e-06,
"loss": 0.8563,
"step": 5892
},
{
"epoch": 1.5672872340425532,
"grad_norm": 3.8020009994506836,
"learning_rate": 8.219399556109578e-06,
"loss": 0.8016,
"step": 5893
},
{
"epoch": 1.5675531914893617,
"grad_norm": 4.383156776428223,
"learning_rate": 8.218726573497817e-06,
"loss": 0.7956,
"step": 5894
},
{
"epoch": 1.5678191489361701,
"grad_norm": 4.414666175842285,
"learning_rate": 8.218053491294864e-06,
"loss": 0.8215,
"step": 5895
},
{
"epoch": 1.5680851063829788,
"grad_norm": 4.223287105560303,
"learning_rate": 8.21738030952155e-06,
"loss": 0.8466,
"step": 5896
},
{
"epoch": 1.5683510638297873,
"grad_norm": 4.012655735015869,
"learning_rate": 8.216707028198699e-06,
"loss": 0.7384,
"step": 5897
},
{
"epoch": 1.5686170212765957,
"grad_norm": 4.301409721374512,
"learning_rate": 8.216033647347145e-06,
"loss": 0.7748,
"step": 5898
},
{
"epoch": 1.5688829787234042,
"grad_norm": 4.148224353790283,
"learning_rate": 8.215360166987728e-06,
"loss": 0.8227,
"step": 5899
},
{
"epoch": 1.5691489361702127,
"grad_norm": 4.055191993713379,
"learning_rate": 8.214686587141277e-06,
"loss": 0.7811,
"step": 5900
},
{
"epoch": 1.5694148936170214,
"grad_norm": 3.9274792671203613,
"learning_rate": 8.21401290782864e-06,
"loss": 0.7934,
"step": 5901
},
{
"epoch": 1.5696808510638298,
"grad_norm": 3.762334108352661,
"learning_rate": 8.213339129070658e-06,
"loss": 0.7967,
"step": 5902
},
{
"epoch": 1.5699468085106383,
"grad_norm": 4.094070911407471,
"learning_rate": 8.212665250888184e-06,
"loss": 0.8637,
"step": 5903
},
{
"epoch": 1.570212765957447,
"grad_norm": 3.871859550476074,
"learning_rate": 8.21199127330206e-06,
"loss": 0.7181,
"step": 5904
},
{
"epoch": 1.5704787234042552,
"grad_norm": 4.029532432556152,
"learning_rate": 8.211317196333149e-06,
"loss": 0.756,
"step": 5905
},
{
"epoch": 1.570744680851064,
"grad_norm": 3.982078790664673,
"learning_rate": 8.2106430200023e-06,
"loss": 0.7437,
"step": 5906
},
{
"epoch": 1.5710106382978724,
"grad_norm": 4.319076061248779,
"learning_rate": 8.209968744330375e-06,
"loss": 0.8517,
"step": 5907
},
{
"epoch": 1.5712765957446808,
"grad_norm": 3.5704493522644043,
"learning_rate": 8.20929436933824e-06,
"loss": 0.7369,
"step": 5908
},
{
"epoch": 1.5715425531914895,
"grad_norm": 3.825941562652588,
"learning_rate": 8.208619895046759e-06,
"loss": 0.7644,
"step": 5909
},
{
"epoch": 1.5718085106382977,
"grad_norm": 3.535365581512451,
"learning_rate": 8.2079453214768e-06,
"loss": 0.8191,
"step": 5910
},
{
"epoch": 1.5720744680851064,
"grad_norm": 4.012056827545166,
"learning_rate": 8.207270648649235e-06,
"loss": 0.805,
"step": 5911
},
{
"epoch": 1.5723404255319149,
"grad_norm": 3.670342206954956,
"learning_rate": 8.20659587658494e-06,
"loss": 0.7253,
"step": 5912
},
{
"epoch": 1.5726063829787233,
"grad_norm": 3.5404562950134277,
"learning_rate": 8.205921005304796e-06,
"loss": 0.7078,
"step": 5913
},
{
"epoch": 1.572872340425532,
"grad_norm": 4.304678916931152,
"learning_rate": 8.20524603482968e-06,
"loss": 0.8129,
"step": 5914
},
{
"epoch": 1.5731382978723403,
"grad_norm": 3.6795125007629395,
"learning_rate": 8.204570965180476e-06,
"loss": 0.7669,
"step": 5915
},
{
"epoch": 1.573404255319149,
"grad_norm": 3.8298754692077637,
"learning_rate": 8.203895796378076e-06,
"loss": 0.7803,
"step": 5916
},
{
"epoch": 1.5736702127659574,
"grad_norm": 4.399144649505615,
"learning_rate": 8.203220528443367e-06,
"loss": 0.9503,
"step": 5917
},
{
"epoch": 1.5739361702127659,
"grad_norm": 4.104849815368652,
"learning_rate": 8.202545161397242e-06,
"loss": 0.8586,
"step": 5918
},
{
"epoch": 1.5742021276595746,
"grad_norm": 4.923317909240723,
"learning_rate": 8.201869695260603e-06,
"loss": 0.815,
"step": 5919
},
{
"epoch": 1.574468085106383,
"grad_norm": 3.845151424407959,
"learning_rate": 8.201194130054342e-06,
"loss": 0.8449,
"step": 5920
},
{
"epoch": 1.5747340425531915,
"grad_norm": 4.074094295501709,
"learning_rate": 8.200518465799367e-06,
"loss": 0.7569,
"step": 5921
},
{
"epoch": 1.575,
"grad_norm": 4.062026023864746,
"learning_rate": 8.199842702516584e-06,
"loss": 0.8712,
"step": 5922
},
{
"epoch": 1.5752659574468084,
"grad_norm": 4.046767711639404,
"learning_rate": 8.199166840226898e-06,
"loss": 0.8318,
"step": 5923
},
{
"epoch": 1.575531914893617,
"grad_norm": 3.813408851623535,
"learning_rate": 8.198490878951224e-06,
"loss": 0.7493,
"step": 5924
},
{
"epoch": 1.5757978723404256,
"grad_norm": 4.108468055725098,
"learning_rate": 8.19781481871048e-06,
"loss": 0.7867,
"step": 5925
},
{
"epoch": 1.576063829787234,
"grad_norm": 3.9004015922546387,
"learning_rate": 8.197138659525576e-06,
"loss": 0.7384,
"step": 5926
},
{
"epoch": 1.5763297872340427,
"grad_norm": 4.14080286026001,
"learning_rate": 8.19646240141744e-06,
"loss": 0.7755,
"step": 5927
},
{
"epoch": 1.576595744680851,
"grad_norm": 3.8850128650665283,
"learning_rate": 8.195786044406992e-06,
"loss": 0.7689,
"step": 5928
},
{
"epoch": 1.5768617021276596,
"grad_norm": 3.973543882369995,
"learning_rate": 8.195109588515163e-06,
"loss": 0.7336,
"step": 5929
},
{
"epoch": 1.577127659574468,
"grad_norm": 3.7367260456085205,
"learning_rate": 8.194433033762882e-06,
"loss": 0.8511,
"step": 5930
},
{
"epoch": 1.5773936170212766,
"grad_norm": 3.7051467895507812,
"learning_rate": 8.193756380171081e-06,
"loss": 0.7696,
"step": 5931
},
{
"epoch": 1.5776595744680852,
"grad_norm": 3.612755298614502,
"learning_rate": 8.193079627760697e-06,
"loss": 0.7733,
"step": 5932
},
{
"epoch": 1.5779255319148935,
"grad_norm": 4.524839401245117,
"learning_rate": 8.19240277655267e-06,
"loss": 0.8047,
"step": 5933
},
{
"epoch": 1.5781914893617022,
"grad_norm": 4.2709059715271,
"learning_rate": 8.191725826567943e-06,
"loss": 0.9173,
"step": 5934
},
{
"epoch": 1.5784574468085106,
"grad_norm": 4.062780857086182,
"learning_rate": 8.191048777827462e-06,
"loss": 0.755,
"step": 5935
},
{
"epoch": 1.578723404255319,
"grad_norm": 4.253462314605713,
"learning_rate": 8.190371630352174e-06,
"loss": 0.9102,
"step": 5936
},
{
"epoch": 1.5789893617021278,
"grad_norm": 3.578122854232788,
"learning_rate": 8.189694384163032e-06,
"loss": 0.6755,
"step": 5937
},
{
"epoch": 1.579255319148936,
"grad_norm": 3.9935173988342285,
"learning_rate": 8.189017039280989e-06,
"loss": 0.8196,
"step": 5938
},
{
"epoch": 1.5795212765957447,
"grad_norm": 3.9614062309265137,
"learning_rate": 8.188339595727004e-06,
"loss": 0.7896,
"step": 5939
},
{
"epoch": 1.5797872340425532,
"grad_norm": 3.7698519229888916,
"learning_rate": 8.187662053522039e-06,
"loss": 0.785,
"step": 5940
},
{
"epoch": 1.5800531914893616,
"grad_norm": 4.328986167907715,
"learning_rate": 8.186984412687058e-06,
"loss": 0.87,
"step": 5941
},
{
"epoch": 1.5803191489361703,
"grad_norm": 4.169852256774902,
"learning_rate": 8.186306673243025e-06,
"loss": 0.8594,
"step": 5942
},
{
"epoch": 1.5805851063829788,
"grad_norm": 4.010345458984375,
"learning_rate": 8.185628835210915e-06,
"loss": 0.913,
"step": 5943
},
{
"epoch": 1.5808510638297872,
"grad_norm": 3.9177587032318115,
"learning_rate": 8.184950898611696e-06,
"loss": 0.9157,
"step": 5944
},
{
"epoch": 1.5811170212765957,
"grad_norm": 4.508220672607422,
"learning_rate": 8.184272863466348e-06,
"loss": 0.8951,
"step": 5945
},
{
"epoch": 1.5813829787234042,
"grad_norm": 3.5971477031707764,
"learning_rate": 8.183594729795848e-06,
"loss": 0.7883,
"step": 5946
},
{
"epoch": 1.5816489361702128,
"grad_norm": 4.1539998054504395,
"learning_rate": 8.182916497621177e-06,
"loss": 0.8599,
"step": 5947
},
{
"epoch": 1.5819148936170213,
"grad_norm": 3.9577205181121826,
"learning_rate": 8.182238166963325e-06,
"loss": 0.8107,
"step": 5948
},
{
"epoch": 1.5821808510638298,
"grad_norm": 3.921849250793457,
"learning_rate": 8.181559737843274e-06,
"loss": 0.8452,
"step": 5949
},
{
"epoch": 1.5824468085106385,
"grad_norm": 3.6595895290374756,
"learning_rate": 8.18088121028202e-06,
"loss": 0.8332,
"step": 5950
},
{
"epoch": 1.5827127659574467,
"grad_norm": 4.248002052307129,
"learning_rate": 8.18020258430056e-06,
"loss": 0.928,
"step": 5951
},
{
"epoch": 1.5829787234042554,
"grad_norm": 3.584662437438965,
"learning_rate": 8.179523859919884e-06,
"loss": 0.7684,
"step": 5952
},
{
"epoch": 1.5832446808510638,
"grad_norm": 3.5269956588745117,
"learning_rate": 8.178845037160997e-06,
"loss": 0.7553,
"step": 5953
},
{
"epoch": 1.5835106382978723,
"grad_norm": 4.2691731452941895,
"learning_rate": 8.178166116044904e-06,
"loss": 0.8211,
"step": 5954
},
{
"epoch": 1.583776595744681,
"grad_norm": 4.050920009613037,
"learning_rate": 8.177487096592607e-06,
"loss": 0.9221,
"step": 5955
},
{
"epoch": 1.5840425531914892,
"grad_norm": 4.290426731109619,
"learning_rate": 8.17680797882512e-06,
"loss": 0.7909,
"step": 5956
},
{
"epoch": 1.584308510638298,
"grad_norm": 3.8692431449890137,
"learning_rate": 8.176128762763451e-06,
"loss": 0.7887,
"step": 5957
},
{
"epoch": 1.5845744680851064,
"grad_norm": 4.173573017120361,
"learning_rate": 8.175449448428621e-06,
"loss": 0.7535,
"step": 5958
},
{
"epoch": 1.5848404255319148,
"grad_norm": 4.186033248901367,
"learning_rate": 8.174770035841647e-06,
"loss": 0.8673,
"step": 5959
},
{
"epoch": 1.5851063829787235,
"grad_norm": 4.015555381774902,
"learning_rate": 8.17409052502355e-06,
"loss": 0.8815,
"step": 5960
},
{
"epoch": 1.5853723404255318,
"grad_norm": 3.864473342895508,
"learning_rate": 8.173410915995354e-06,
"loss": 0.8684,
"step": 5961
},
{
"epoch": 1.5856382978723405,
"grad_norm": 3.6198973655700684,
"learning_rate": 8.172731208778089e-06,
"loss": 0.7445,
"step": 5962
},
{
"epoch": 1.585904255319149,
"grad_norm": 3.7900218963623047,
"learning_rate": 8.172051403392784e-06,
"loss": 0.7331,
"step": 5963
},
{
"epoch": 1.5861702127659574,
"grad_norm": 4.163589954376221,
"learning_rate": 8.171371499860475e-06,
"loss": 0.8528,
"step": 5964
},
{
"epoch": 1.586436170212766,
"grad_norm": 4.275415420532227,
"learning_rate": 8.170691498202196e-06,
"loss": 0.8435,
"step": 5965
},
{
"epoch": 1.5867021276595743,
"grad_norm": 3.969174861907959,
"learning_rate": 8.170011398438992e-06,
"loss": 0.8812,
"step": 5966
},
{
"epoch": 1.586968085106383,
"grad_norm": 4.086930751800537,
"learning_rate": 8.169331200591901e-06,
"loss": 0.8988,
"step": 5967
},
{
"epoch": 1.5872340425531914,
"grad_norm": 4.444678783416748,
"learning_rate": 8.168650904681973e-06,
"loss": 0.9295,
"step": 5968
},
{
"epoch": 1.5875,
"grad_norm": 3.7711548805236816,
"learning_rate": 8.167970510730254e-06,
"loss": 0.7715,
"step": 5969
},
{
"epoch": 1.5877659574468086,
"grad_norm": 3.800588369369507,
"learning_rate": 8.167290018757797e-06,
"loss": 0.8273,
"step": 5970
},
{
"epoch": 1.588031914893617,
"grad_norm": 4.506065845489502,
"learning_rate": 8.16660942878566e-06,
"loss": 0.7786,
"step": 5971
},
{
"epoch": 1.5882978723404255,
"grad_norm": 3.8182950019836426,
"learning_rate": 8.165928740834896e-06,
"loss": 0.6682,
"step": 5972
},
{
"epoch": 1.5885638297872342,
"grad_norm": 4.040492534637451,
"learning_rate": 8.165247954926572e-06,
"loss": 0.7333,
"step": 5973
},
{
"epoch": 1.5888297872340424,
"grad_norm": 4.233337879180908,
"learning_rate": 8.164567071081747e-06,
"loss": 0.7931,
"step": 5974
},
{
"epoch": 1.5890957446808511,
"grad_norm": 4.0191969871521,
"learning_rate": 8.163886089321493e-06,
"loss": 0.8279,
"step": 5975
},
{
"epoch": 1.5893617021276596,
"grad_norm": 3.9428741931915283,
"learning_rate": 8.163205009666879e-06,
"loss": 0.7945,
"step": 5976
},
{
"epoch": 1.589627659574468,
"grad_norm": 4.383618354797363,
"learning_rate": 8.162523832138977e-06,
"loss": 0.8961,
"step": 5977
},
{
"epoch": 1.5898936170212767,
"grad_norm": 4.313653945922852,
"learning_rate": 8.161842556758863e-06,
"loss": 0.927,
"step": 5978
},
{
"epoch": 1.590159574468085,
"grad_norm": 4.137526988983154,
"learning_rate": 8.161161183547619e-06,
"loss": 0.833,
"step": 5979
},
{
"epoch": 1.5904255319148937,
"grad_norm": 3.9024994373321533,
"learning_rate": 8.160479712526326e-06,
"loss": 0.8324,
"step": 5980
},
{
"epoch": 1.5906914893617021,
"grad_norm": 3.745685577392578,
"learning_rate": 8.159798143716069e-06,
"loss": 0.7946,
"step": 5981
},
{
"epoch": 1.5909574468085106,
"grad_norm": 4.142686367034912,
"learning_rate": 8.159116477137938e-06,
"loss": 0.8469,
"step": 5982
},
{
"epoch": 1.5912234042553193,
"grad_norm": 4.332526683807373,
"learning_rate": 8.158434712813024e-06,
"loss": 0.8398,
"step": 5983
},
{
"epoch": 1.5914893617021275,
"grad_norm": 4.1822028160095215,
"learning_rate": 8.157752850762422e-06,
"loss": 0.8182,
"step": 5984
},
{
"epoch": 1.5917553191489362,
"grad_norm": 3.797029972076416,
"learning_rate": 8.157070891007227e-06,
"loss": 0.8219,
"step": 5985
},
{
"epoch": 1.5920212765957447,
"grad_norm": 3.6281862258911133,
"learning_rate": 8.156388833568543e-06,
"loss": 0.7788,
"step": 5986
},
{
"epoch": 1.5922872340425531,
"grad_norm": 3.963622570037842,
"learning_rate": 8.155706678467472e-06,
"loss": 0.8121,
"step": 5987
},
{
"epoch": 1.5925531914893618,
"grad_norm": 3.965254068374634,
"learning_rate": 8.15502442572512e-06,
"loss": 0.9758,
"step": 5988
},
{
"epoch": 1.59281914893617,
"grad_norm": 3.7290945053100586,
"learning_rate": 8.1543420753626e-06,
"loss": 0.7913,
"step": 5989
},
{
"epoch": 1.5930851063829787,
"grad_norm": 3.5423686504364014,
"learning_rate": 8.15365962740102e-06,
"loss": 0.6702,
"step": 5990
},
{
"epoch": 1.5933510638297872,
"grad_norm": 4.0960540771484375,
"learning_rate": 8.1529770818615e-06,
"loss": 0.976,
"step": 5991
},
{
"epoch": 1.5936170212765957,
"grad_norm": 3.9374215602874756,
"learning_rate": 8.152294438765157e-06,
"loss": 0.7726,
"step": 5992
},
{
"epoch": 1.5938829787234043,
"grad_norm": 4.123393535614014,
"learning_rate": 8.15161169813311e-06,
"loss": 0.7414,
"step": 5993
},
{
"epoch": 1.5941489361702128,
"grad_norm": 3.7125062942504883,
"learning_rate": 8.150928859986488e-06,
"loss": 0.8094,
"step": 5994
},
{
"epoch": 1.5944148936170213,
"grad_norm": 3.6186742782592773,
"learning_rate": 8.15024592434642e-06,
"loss": 0.8291,
"step": 5995
},
{
"epoch": 1.59468085106383,
"grad_norm": 3.9349913597106934,
"learning_rate": 8.14956289123403e-06,
"loss": 0.8469,
"step": 5996
},
{
"epoch": 1.5949468085106382,
"grad_norm": 4.224155426025391,
"learning_rate": 8.148879760670459e-06,
"loss": 0.8178,
"step": 5997
},
{
"epoch": 1.5952127659574469,
"grad_norm": 4.03489351272583,
"learning_rate": 8.14819653267684e-06,
"loss": 1.0682,
"step": 5998
},
{
"epoch": 1.5954787234042553,
"grad_norm": 3.757615566253662,
"learning_rate": 8.147513207274314e-06,
"loss": 0.9454,
"step": 5999
},
{
"epoch": 1.5957446808510638,
"grad_norm": 3.69804048538208,
"learning_rate": 8.146829784484024e-06,
"loss": 0.6988,
"step": 6000
},
{
"epoch": 1.5957446808510638,
"eval_loss": 1.2842473983764648,
"eval_runtime": 13.4375,
"eval_samples_per_second": 29.767,
"eval_steps_per_second": 3.721,
"step": 6000
},
{
"epoch": 1.5960106382978725,
"grad_norm": 3.8672168254852295,
"learning_rate": 8.146146264327113e-06,
"loss": 0.8893,
"step": 6001
},
{
"epoch": 1.5962765957446807,
"grad_norm": 3.7445380687713623,
"learning_rate": 8.145462646824734e-06,
"loss": 0.8237,
"step": 6002
},
{
"epoch": 1.5965425531914894,
"grad_norm": 3.7135863304138184,
"learning_rate": 8.144778931998038e-06,
"loss": 0.6954,
"step": 6003
},
{
"epoch": 1.5968085106382979,
"grad_norm": 3.946181058883667,
"learning_rate": 8.144095119868178e-06,
"loss": 0.8022,
"step": 6004
},
{
"epoch": 1.5970744680851063,
"grad_norm": 3.866457223892212,
"learning_rate": 8.143411210456314e-06,
"loss": 0.7848,
"step": 6005
},
{
"epoch": 1.597340425531915,
"grad_norm": 3.9514496326446533,
"learning_rate": 8.142727203783608e-06,
"loss": 0.8287,
"step": 6006
},
{
"epoch": 1.5976063829787233,
"grad_norm": 3.780092239379883,
"learning_rate": 8.142043099871219e-06,
"loss": 0.731,
"step": 6007
},
{
"epoch": 1.597872340425532,
"grad_norm": 3.832037925720215,
"learning_rate": 8.141358898740319e-06,
"loss": 0.8207,
"step": 6008
},
{
"epoch": 1.5981382978723404,
"grad_norm": 3.7208633422851562,
"learning_rate": 8.140674600412076e-06,
"loss": 0.7905,
"step": 6009
},
{
"epoch": 1.5984042553191489,
"grad_norm": 3.5873775482177734,
"learning_rate": 8.139990204907662e-06,
"loss": 0.7042,
"step": 6010
},
{
"epoch": 1.5986702127659576,
"grad_norm": 4.138782024383545,
"learning_rate": 8.139305712248256e-06,
"loss": 0.8231,
"step": 6011
},
{
"epoch": 1.5989361702127658,
"grad_norm": 4.014845371246338,
"learning_rate": 8.138621122455034e-06,
"loss": 0.7606,
"step": 6012
},
{
"epoch": 1.5992021276595745,
"grad_norm": 3.997772693634033,
"learning_rate": 8.13793643554918e-06,
"loss": 0.8122,
"step": 6013
},
{
"epoch": 1.599468085106383,
"grad_norm": 3.3885183334350586,
"learning_rate": 8.137251651551878e-06,
"loss": 0.7245,
"step": 6014
},
{
"epoch": 1.5997340425531914,
"grad_norm": 3.9096522331237793,
"learning_rate": 8.136566770484316e-06,
"loss": 0.7919,
"step": 6015
},
{
"epoch": 1.6,
"grad_norm": 4.008962154388428,
"learning_rate": 8.135881792367686e-06,
"loss": 0.8683,
"step": 6016
},
{
"epoch": 1.6002659574468086,
"grad_norm": 3.9772658348083496,
"learning_rate": 8.13519671722318e-06,
"loss": 0.7775,
"step": 6017
},
{
"epoch": 1.600531914893617,
"grad_norm": 4.593280792236328,
"learning_rate": 8.134511545071998e-06,
"loss": 0.8959,
"step": 6018
},
{
"epoch": 1.6007978723404257,
"grad_norm": 3.9730031490325928,
"learning_rate": 8.133826275935337e-06,
"loss": 0.8394,
"step": 6019
},
{
"epoch": 1.601063829787234,
"grad_norm": 4.224338531494141,
"learning_rate": 8.133140909834402e-06,
"loss": 0.7961,
"step": 6020
},
{
"epoch": 1.6013297872340426,
"grad_norm": 3.759888172149658,
"learning_rate": 8.132455446790399e-06,
"loss": 0.8531,
"step": 6021
},
{
"epoch": 1.601595744680851,
"grad_norm": 3.5629312992095947,
"learning_rate": 8.131769886824535e-06,
"loss": 0.8102,
"step": 6022
},
{
"epoch": 1.6018617021276595,
"grad_norm": 3.5515568256378174,
"learning_rate": 8.131084229958024e-06,
"loss": 0.7867,
"step": 6023
},
{
"epoch": 1.6021276595744682,
"grad_norm": 4.148061275482178,
"learning_rate": 8.130398476212081e-06,
"loss": 0.8708,
"step": 6024
},
{
"epoch": 1.6023936170212765,
"grad_norm": 4.018913745880127,
"learning_rate": 8.129712625607924e-06,
"loss": 0.771,
"step": 6025
},
{
"epoch": 1.6026595744680852,
"grad_norm": 4.379147052764893,
"learning_rate": 8.129026678166772e-06,
"loss": 0.8199,
"step": 6026
},
{
"epoch": 1.6029255319148936,
"grad_norm": 3.568890333175659,
"learning_rate": 8.128340633909852e-06,
"loss": 0.705,
"step": 6027
},
{
"epoch": 1.603191489361702,
"grad_norm": 3.6377384662628174,
"learning_rate": 8.127654492858388e-06,
"loss": 0.6958,
"step": 6028
},
{
"epoch": 1.6034574468085108,
"grad_norm": 4.233497142791748,
"learning_rate": 8.126968255033614e-06,
"loss": 0.8446,
"step": 6029
},
{
"epoch": 1.603723404255319,
"grad_norm": 4.239995956420898,
"learning_rate": 8.126281920456758e-06,
"loss": 0.813,
"step": 6030
},
{
"epoch": 1.6039893617021277,
"grad_norm": 3.8521575927734375,
"learning_rate": 8.12559548914906e-06,
"loss": 0.7906,
"step": 6031
},
{
"epoch": 1.6042553191489362,
"grad_norm": 3.567471742630005,
"learning_rate": 8.124908961131759e-06,
"loss": 0.6709,
"step": 6032
},
{
"epoch": 1.6045212765957446,
"grad_norm": 3.527024030685425,
"learning_rate": 8.124222336426094e-06,
"loss": 0.7508,
"step": 6033
},
{
"epoch": 1.6047872340425533,
"grad_norm": 4.134167671203613,
"learning_rate": 8.123535615053312e-06,
"loss": 0.8233,
"step": 6034
},
{
"epoch": 1.6050531914893615,
"grad_norm": 3.62556791305542,
"learning_rate": 8.12284879703466e-06,
"loss": 0.7347,
"step": 6035
},
{
"epoch": 1.6053191489361702,
"grad_norm": 4.534690856933594,
"learning_rate": 8.12216188239139e-06,
"loss": 0.9258,
"step": 6036
},
{
"epoch": 1.6055851063829787,
"grad_norm": 3.8855905532836914,
"learning_rate": 8.121474871144757e-06,
"loss": 0.7215,
"step": 6037
},
{
"epoch": 1.6058510638297872,
"grad_norm": 3.889317274093628,
"learning_rate": 8.120787763316014e-06,
"loss": 0.7557,
"step": 6038
},
{
"epoch": 1.6061170212765958,
"grad_norm": 4.091339588165283,
"learning_rate": 8.120100558926425e-06,
"loss": 0.8053,
"step": 6039
},
{
"epoch": 1.6063829787234043,
"grad_norm": 4.249019622802734,
"learning_rate": 8.11941325799725e-06,
"loss": 0.837,
"step": 6040
},
{
"epoch": 1.6066489361702128,
"grad_norm": 4.165124416351318,
"learning_rate": 8.118725860549756e-06,
"loss": 0.8762,
"step": 6041
},
{
"epoch": 1.6069148936170212,
"grad_norm": 4.028770923614502,
"learning_rate": 8.118038366605212e-06,
"loss": 0.8456,
"step": 6042
},
{
"epoch": 1.6071808510638297,
"grad_norm": 3.60648250579834,
"learning_rate": 8.117350776184892e-06,
"loss": 0.688,
"step": 6043
},
{
"epoch": 1.6074468085106384,
"grad_norm": 3.6444270610809326,
"learning_rate": 8.116663089310067e-06,
"loss": 0.8199,
"step": 6044
},
{
"epoch": 1.6077127659574468,
"grad_norm": 4.073156833648682,
"learning_rate": 8.115975306002018e-06,
"loss": 0.9758,
"step": 6045
},
{
"epoch": 1.6079787234042553,
"grad_norm": 4.100760459899902,
"learning_rate": 8.115287426282022e-06,
"loss": 0.9357,
"step": 6046
},
{
"epoch": 1.608244680851064,
"grad_norm": 4.134888648986816,
"learning_rate": 8.114599450171366e-06,
"loss": 0.7536,
"step": 6047
},
{
"epoch": 1.6085106382978722,
"grad_norm": 3.8742432594299316,
"learning_rate": 8.113911377691338e-06,
"loss": 0.7832,
"step": 6048
},
{
"epoch": 1.608776595744681,
"grad_norm": 4.110736846923828,
"learning_rate": 8.113223208863224e-06,
"loss": 0.7098,
"step": 6049
},
{
"epoch": 1.6090425531914894,
"grad_norm": 3.972907304763794,
"learning_rate": 8.11253494370832e-06,
"loss": 0.8414,
"step": 6050
},
{
"epoch": 1.6093085106382978,
"grad_norm": 3.984872817993164,
"learning_rate": 8.111846582247917e-06,
"loss": 0.9063,
"step": 6051
},
{
"epoch": 1.6095744680851065,
"grad_norm": 4.114076614379883,
"learning_rate": 8.11115812450332e-06,
"loss": 0.8774,
"step": 6052
},
{
"epoch": 1.6098404255319148,
"grad_norm": 3.8898861408233643,
"learning_rate": 8.110469570495828e-06,
"loss": 0.6855,
"step": 6053
},
{
"epoch": 1.6101063829787234,
"grad_norm": 3.620485544204712,
"learning_rate": 8.109780920246743e-06,
"loss": 0.8566,
"step": 6054
},
{
"epoch": 1.610372340425532,
"grad_norm": 4.412075519561768,
"learning_rate": 8.109092173777376e-06,
"loss": 0.8386,
"step": 6055
},
{
"epoch": 1.6106382978723404,
"grad_norm": 4.396791934967041,
"learning_rate": 8.108403331109038e-06,
"loss": 0.7074,
"step": 6056
},
{
"epoch": 1.610904255319149,
"grad_norm": 4.347930431365967,
"learning_rate": 8.10771439226304e-06,
"loss": 0.8188,
"step": 6057
},
{
"epoch": 1.6111702127659573,
"grad_norm": 3.751016855239868,
"learning_rate": 8.1070253572607e-06,
"loss": 0.7469,
"step": 6058
},
{
"epoch": 1.611436170212766,
"grad_norm": 4.112164497375488,
"learning_rate": 8.106336226123339e-06,
"loss": 0.8259,
"step": 6059
},
{
"epoch": 1.6117021276595744,
"grad_norm": 4.112537860870361,
"learning_rate": 8.105646998872275e-06,
"loss": 0.8493,
"step": 6060
},
{
"epoch": 1.611968085106383,
"grad_norm": 4.171288967132568,
"learning_rate": 8.104957675528837e-06,
"loss": 0.9249,
"step": 6061
},
{
"epoch": 1.6122340425531916,
"grad_norm": 4.331489086151123,
"learning_rate": 8.104268256114354e-06,
"loss": 0.9123,
"step": 6062
},
{
"epoch": 1.6125,
"grad_norm": 4.148106575012207,
"learning_rate": 8.103578740650157e-06,
"loss": 0.7654,
"step": 6063
},
{
"epoch": 1.6127659574468085,
"grad_norm": 3.72057843208313,
"learning_rate": 8.102889129157578e-06,
"loss": 0.8049,
"step": 6064
},
{
"epoch": 1.613031914893617,
"grad_norm": 3.9282565116882324,
"learning_rate": 8.102199421657957e-06,
"loss": 0.7639,
"step": 6065
},
{
"epoch": 1.6132978723404254,
"grad_norm": 3.8103582859039307,
"learning_rate": 8.101509618172634e-06,
"loss": 0.8689,
"step": 6066
},
{
"epoch": 1.6135638297872341,
"grad_norm": 4.2297539710998535,
"learning_rate": 8.10081971872295e-06,
"loss": 0.9582,
"step": 6067
},
{
"epoch": 1.6138297872340426,
"grad_norm": 4.653298854827881,
"learning_rate": 8.100129723330255e-06,
"loss": 0.9946,
"step": 6068
},
{
"epoch": 1.614095744680851,
"grad_norm": 3.7969958782196045,
"learning_rate": 8.099439632015896e-06,
"loss": 0.7852,
"step": 6069
},
{
"epoch": 1.6143617021276597,
"grad_norm": 4.072946071624756,
"learning_rate": 8.098749444801226e-06,
"loss": 0.79,
"step": 6070
},
{
"epoch": 1.614627659574468,
"grad_norm": 3.9592959880828857,
"learning_rate": 8.0980591617076e-06,
"loss": 0.7815,
"step": 6071
},
{
"epoch": 1.6148936170212767,
"grad_norm": 4.4633588790893555,
"learning_rate": 8.097368782756374e-06,
"loss": 0.7754,
"step": 6072
},
{
"epoch": 1.6151595744680851,
"grad_norm": 4.381833553314209,
"learning_rate": 8.096678307968913e-06,
"loss": 0.9649,
"step": 6073
},
{
"epoch": 1.6154255319148936,
"grad_norm": 4.433225154876709,
"learning_rate": 8.095987737366578e-06,
"loss": 0.9376,
"step": 6074
},
{
"epoch": 1.6156914893617023,
"grad_norm": 3.7621006965637207,
"learning_rate": 8.095297070970738e-06,
"loss": 0.7577,
"step": 6075
},
{
"epoch": 1.6159574468085105,
"grad_norm": 3.4518826007843018,
"learning_rate": 8.094606308802764e-06,
"loss": 0.816,
"step": 6076
},
{
"epoch": 1.6162234042553192,
"grad_norm": 4.059780120849609,
"learning_rate": 8.093915450884025e-06,
"loss": 0.8319,
"step": 6077
},
{
"epoch": 1.6164893617021276,
"grad_norm": 3.8527324199676514,
"learning_rate": 8.093224497235899e-06,
"loss": 0.8826,
"step": 6078
},
{
"epoch": 1.616755319148936,
"grad_norm": 3.3895418643951416,
"learning_rate": 8.092533447879766e-06,
"loss": 0.73,
"step": 6079
},
{
"epoch": 1.6170212765957448,
"grad_norm": 3.9259166717529297,
"learning_rate": 8.091842302837009e-06,
"loss": 0.8569,
"step": 6080
},
{
"epoch": 1.617287234042553,
"grad_norm": 3.5704541206359863,
"learning_rate": 8.091151062129008e-06,
"loss": 0.8113,
"step": 6081
},
{
"epoch": 1.6175531914893617,
"grad_norm": 3.8313138484954834,
"learning_rate": 8.090459725777156e-06,
"loss": 0.7352,
"step": 6082
},
{
"epoch": 1.6178191489361702,
"grad_norm": 4.403858184814453,
"learning_rate": 8.089768293802842e-06,
"loss": 0.7757,
"step": 6083
},
{
"epoch": 1.6180851063829786,
"grad_norm": 4.078790664672852,
"learning_rate": 8.089076766227457e-06,
"loss": 0.8444,
"step": 6084
},
{
"epoch": 1.6183510638297873,
"grad_norm": 4.103868007659912,
"learning_rate": 8.088385143072402e-06,
"loss": 0.7451,
"step": 6085
},
{
"epoch": 1.6186170212765958,
"grad_norm": 3.906527042388916,
"learning_rate": 8.087693424359073e-06,
"loss": 0.7095,
"step": 6086
},
{
"epoch": 1.6188829787234043,
"grad_norm": 4.909295082092285,
"learning_rate": 8.087001610108874e-06,
"loss": 0.8277,
"step": 6087
},
{
"epoch": 1.6191489361702127,
"grad_norm": 5.194472312927246,
"learning_rate": 8.086309700343211e-06,
"loss": 0.8959,
"step": 6088
},
{
"epoch": 1.6194148936170212,
"grad_norm": 3.6174070835113525,
"learning_rate": 8.085617695083493e-06,
"loss": 0.7838,
"step": 6089
},
{
"epoch": 1.6196808510638299,
"grad_norm": 3.5253570079803467,
"learning_rate": 8.08492559435113e-06,
"loss": 0.7633,
"step": 6090
},
{
"epoch": 1.6199468085106383,
"grad_norm": 4.330216884613037,
"learning_rate": 8.084233398167537e-06,
"loss": 0.8669,
"step": 6091
},
{
"epoch": 1.6202127659574468,
"grad_norm": 3.792811393737793,
"learning_rate": 8.083541106554131e-06,
"loss": 0.8782,
"step": 6092
},
{
"epoch": 1.6204787234042555,
"grad_norm": 3.888946533203125,
"learning_rate": 8.082848719532335e-06,
"loss": 0.8816,
"step": 6093
},
{
"epoch": 1.6207446808510637,
"grad_norm": 3.9346768856048584,
"learning_rate": 8.082156237123567e-06,
"loss": 0.6887,
"step": 6094
},
{
"epoch": 1.6210106382978724,
"grad_norm": 3.7470414638519287,
"learning_rate": 8.081463659349258e-06,
"loss": 0.7622,
"step": 6095
},
{
"epoch": 1.6212765957446809,
"grad_norm": 3.9194772243499756,
"learning_rate": 8.080770986230835e-06,
"loss": 0.768,
"step": 6096
},
{
"epoch": 1.6215425531914893,
"grad_norm": 3.7921671867370605,
"learning_rate": 8.08007821778973e-06,
"loss": 0.8936,
"step": 6097
},
{
"epoch": 1.621808510638298,
"grad_norm": 3.8893918991088867,
"learning_rate": 8.07938535404738e-06,
"loss": 0.835,
"step": 6098
},
{
"epoch": 1.6220744680851062,
"grad_norm": 3.7834744453430176,
"learning_rate": 8.07869239502522e-06,
"loss": 0.7374,
"step": 6099
},
{
"epoch": 1.622340425531915,
"grad_norm": 3.867154598236084,
"learning_rate": 8.077999340744694e-06,
"loss": 0.7935,
"step": 6100
},
{
"epoch": 1.6226063829787234,
"grad_norm": 4.853170394897461,
"learning_rate": 8.077306191227244e-06,
"loss": 0.7786,
"step": 6101
},
{
"epoch": 1.6228723404255319,
"grad_norm": 4.339568614959717,
"learning_rate": 8.076612946494317e-06,
"loss": 0.6722,
"step": 6102
},
{
"epoch": 1.6231382978723405,
"grad_norm": 3.6707983016967773,
"learning_rate": 8.075919606567363e-06,
"loss": 0.8792,
"step": 6103
},
{
"epoch": 1.6234042553191488,
"grad_norm": 3.867652177810669,
"learning_rate": 8.075226171467835e-06,
"loss": 0.7879,
"step": 6104
},
{
"epoch": 1.6236702127659575,
"grad_norm": 3.5733299255371094,
"learning_rate": 8.07453264121719e-06,
"loss": 0.7921,
"step": 6105
},
{
"epoch": 1.623936170212766,
"grad_norm": 3.7665045261383057,
"learning_rate": 8.073839015836884e-06,
"loss": 0.9738,
"step": 6106
},
{
"epoch": 1.6242021276595744,
"grad_norm": 4.237964153289795,
"learning_rate": 8.07314529534838e-06,
"loss": 0.869,
"step": 6107
},
{
"epoch": 1.624468085106383,
"grad_norm": 3.797464370727539,
"learning_rate": 8.072451479773143e-06,
"loss": 0.8445,
"step": 6108
},
{
"epoch": 1.6247340425531915,
"grad_norm": 3.9559130668640137,
"learning_rate": 8.071757569132639e-06,
"loss": 0.848,
"step": 6109
},
{
"epoch": 1.625,
"grad_norm": 3.7033722400665283,
"learning_rate": 8.071063563448341e-06,
"loss": 0.8571,
"step": 6110
},
{
"epoch": 1.6252659574468085,
"grad_norm": 3.696049451828003,
"learning_rate": 8.070369462741719e-06,
"loss": 0.8649,
"step": 6111
},
{
"epoch": 1.625531914893617,
"grad_norm": 3.495377540588379,
"learning_rate": 8.06967526703425e-06,
"loss": 0.7691,
"step": 6112
},
{
"epoch": 1.6257978723404256,
"grad_norm": 3.9298911094665527,
"learning_rate": 8.068980976347416e-06,
"loss": 0.7793,
"step": 6113
},
{
"epoch": 1.626063829787234,
"grad_norm": 3.756425380706787,
"learning_rate": 8.068286590702697e-06,
"loss": 0.8161,
"step": 6114
},
{
"epoch": 1.6263297872340425,
"grad_norm": 4.13591194152832,
"learning_rate": 8.067592110121576e-06,
"loss": 0.8543,
"step": 6115
},
{
"epoch": 1.6265957446808512,
"grad_norm": 4.203410625457764,
"learning_rate": 8.066897534625547e-06,
"loss": 0.7607,
"step": 6116
},
{
"epoch": 1.6268617021276595,
"grad_norm": 4.2013983726501465,
"learning_rate": 8.066202864236096e-06,
"loss": 0.8248,
"step": 6117
},
{
"epoch": 1.6271276595744681,
"grad_norm": 4.034732341766357,
"learning_rate": 8.065508098974719e-06,
"loss": 0.804,
"step": 6118
},
{
"epoch": 1.6273936170212766,
"grad_norm": 4.180783271789551,
"learning_rate": 8.06481323886291e-06,
"loss": 0.8354,
"step": 6119
},
{
"epoch": 1.627659574468085,
"grad_norm": 3.9474117755889893,
"learning_rate": 8.064118283922173e-06,
"loss": 0.8622,
"step": 6120
},
{
"epoch": 1.6279255319148938,
"grad_norm": 3.8866050243377686,
"learning_rate": 8.063423234174008e-06,
"loss": 0.7197,
"step": 6121
},
{
"epoch": 1.628191489361702,
"grad_norm": 4.463206768035889,
"learning_rate": 8.062728089639921e-06,
"loss": 0.9226,
"step": 6122
},
{
"epoch": 1.6284574468085107,
"grad_norm": 3.982656717300415,
"learning_rate": 8.062032850341423e-06,
"loss": 0.7225,
"step": 6123
},
{
"epoch": 1.6287234042553191,
"grad_norm": 3.9853739738464355,
"learning_rate": 8.061337516300024e-06,
"loss": 0.6711,
"step": 6124
},
{
"epoch": 1.6289893617021276,
"grad_norm": 3.823125123977661,
"learning_rate": 8.060642087537233e-06,
"loss": 0.8944,
"step": 6125
},
{
"epoch": 1.6292553191489363,
"grad_norm": 4.082576274871826,
"learning_rate": 8.059946564074577e-06,
"loss": 0.8235,
"step": 6126
},
{
"epoch": 1.6295212765957445,
"grad_norm": 4.3164472579956055,
"learning_rate": 8.05925094593357e-06,
"loss": 0.8086,
"step": 6127
},
{
"epoch": 1.6297872340425532,
"grad_norm": 3.8943753242492676,
"learning_rate": 8.058555233135737e-06,
"loss": 0.7088,
"step": 6128
},
{
"epoch": 1.6300531914893617,
"grad_norm": 4.248415470123291,
"learning_rate": 8.057859425702605e-06,
"loss": 0.8011,
"step": 6129
},
{
"epoch": 1.6303191489361701,
"grad_norm": 3.8152194023132324,
"learning_rate": 8.057163523655702e-06,
"loss": 0.7437,
"step": 6130
},
{
"epoch": 1.6305851063829788,
"grad_norm": 4.243065357208252,
"learning_rate": 8.056467527016559e-06,
"loss": 0.8156,
"step": 6131
},
{
"epoch": 1.6308510638297873,
"grad_norm": 4.148963928222656,
"learning_rate": 8.055771435806714e-06,
"loss": 0.8538,
"step": 6132
},
{
"epoch": 1.6311170212765957,
"grad_norm": 3.848583698272705,
"learning_rate": 8.0550752500477e-06,
"loss": 0.7818,
"step": 6133
},
{
"epoch": 1.6313829787234042,
"grad_norm": 4.185320854187012,
"learning_rate": 8.054378969761062e-06,
"loss": 0.85,
"step": 6134
},
{
"epoch": 1.6316489361702127,
"grad_norm": 4.244765758514404,
"learning_rate": 8.053682594968346e-06,
"loss": 0.8856,
"step": 6135
},
{
"epoch": 1.6319148936170214,
"grad_norm": 3.8420188426971436,
"learning_rate": 8.052986125691091e-06,
"loss": 0.7745,
"step": 6136
},
{
"epoch": 1.6321808510638298,
"grad_norm": 4.029837131500244,
"learning_rate": 8.052289561950852e-06,
"loss": 0.8724,
"step": 6137
},
{
"epoch": 1.6324468085106383,
"grad_norm": 3.9027750492095947,
"learning_rate": 8.051592903769182e-06,
"loss": 0.7405,
"step": 6138
},
{
"epoch": 1.632712765957447,
"grad_norm": 4.00022554397583,
"learning_rate": 8.050896151167632e-06,
"loss": 0.7677,
"step": 6139
},
{
"epoch": 1.6329787234042552,
"grad_norm": 4.150446891784668,
"learning_rate": 8.050199304167766e-06,
"loss": 0.7348,
"step": 6140
},
{
"epoch": 1.633244680851064,
"grad_norm": 4.308548927307129,
"learning_rate": 8.04950236279114e-06,
"loss": 0.8106,
"step": 6141
},
{
"epoch": 1.6335106382978724,
"grad_norm": 3.9967095851898193,
"learning_rate": 8.048805327059321e-06,
"loss": 0.7345,
"step": 6142
},
{
"epoch": 1.6337765957446808,
"grad_norm": 3.783818244934082,
"learning_rate": 8.048108196993879e-06,
"loss": 0.716,
"step": 6143
},
{
"epoch": 1.6340425531914895,
"grad_norm": 3.8823726177215576,
"learning_rate": 8.047410972616376e-06,
"loss": 0.778,
"step": 6144
},
{
"epoch": 1.6343085106382977,
"grad_norm": 4.007701873779297,
"learning_rate": 8.046713653948393e-06,
"loss": 0.9691,
"step": 6145
},
{
"epoch": 1.6345744680851064,
"grad_norm": 4.14747428894043,
"learning_rate": 8.0460162410115e-06,
"loss": 0.8201,
"step": 6146
},
{
"epoch": 1.6348404255319149,
"grad_norm": 4.101099967956543,
"learning_rate": 8.045318733827278e-06,
"loss": 0.8864,
"step": 6147
},
{
"epoch": 1.6351063829787233,
"grad_norm": 3.709555149078369,
"learning_rate": 8.044621132417311e-06,
"loss": 0.7185,
"step": 6148
},
{
"epoch": 1.635372340425532,
"grad_norm": 4.0000481605529785,
"learning_rate": 8.043923436803182e-06,
"loss": 0.8816,
"step": 6149
},
{
"epoch": 1.6356382978723403,
"grad_norm": 4.075678825378418,
"learning_rate": 8.043225647006475e-06,
"loss": 0.8192,
"step": 6150
},
{
"epoch": 1.635904255319149,
"grad_norm": 4.004273891448975,
"learning_rate": 8.042527763048787e-06,
"loss": 0.9374,
"step": 6151
},
{
"epoch": 1.6361702127659574,
"grad_norm": 3.904745101928711,
"learning_rate": 8.041829784951706e-06,
"loss": 0.7701,
"step": 6152
},
{
"epoch": 1.6364361702127659,
"grad_norm": 3.7361650466918945,
"learning_rate": 8.04113171273683e-06,
"loss": 0.6875,
"step": 6153
},
{
"epoch": 1.6367021276595746,
"grad_norm": 3.9355521202087402,
"learning_rate": 8.040433546425759e-06,
"loss": 0.828,
"step": 6154
},
{
"epoch": 1.636968085106383,
"grad_norm": 3.615612745285034,
"learning_rate": 8.039735286040095e-06,
"loss": 0.8136,
"step": 6155
},
{
"epoch": 1.6372340425531915,
"grad_norm": 3.900493621826172,
"learning_rate": 8.03903693160144e-06,
"loss": 0.7782,
"step": 6156
},
{
"epoch": 1.6375,
"grad_norm": 4.175507068634033,
"learning_rate": 8.038338483131408e-06,
"loss": 0.8486,
"step": 6157
},
{
"epoch": 1.6377659574468084,
"grad_norm": 4.02733039855957,
"learning_rate": 8.037639940651603e-06,
"loss": 0.7591,
"step": 6158
},
{
"epoch": 1.638031914893617,
"grad_norm": 4.006030559539795,
"learning_rate": 8.036941304183643e-06,
"loss": 0.8453,
"step": 6159
},
{
"epoch": 1.6382978723404256,
"grad_norm": 3.9777238368988037,
"learning_rate": 8.036242573749142e-06,
"loss": 0.7623,
"step": 6160
},
{
"epoch": 1.638563829787234,
"grad_norm": 3.7054030895233154,
"learning_rate": 8.035543749369724e-06,
"loss": 0.7552,
"step": 6161
},
{
"epoch": 1.6388297872340427,
"grad_norm": 4.149451732635498,
"learning_rate": 8.034844831067006e-06,
"loss": 0.6954,
"step": 6162
},
{
"epoch": 1.639095744680851,
"grad_norm": 4.144680500030518,
"learning_rate": 8.034145818862618e-06,
"loss": 0.8583,
"step": 6163
},
{
"epoch": 1.6393617021276596,
"grad_norm": 3.732167959213257,
"learning_rate": 8.033446712778184e-06,
"loss": 0.7437,
"step": 6164
},
{
"epoch": 1.639627659574468,
"grad_norm": 4.176260471343994,
"learning_rate": 8.032747512835338e-06,
"loss": 0.9089,
"step": 6165
},
{
"epoch": 1.6398936170212766,
"grad_norm": 3.9875879287719727,
"learning_rate": 8.032048219055712e-06,
"loss": 0.7776,
"step": 6166
},
{
"epoch": 1.6401595744680852,
"grad_norm": 3.942016839981079,
"learning_rate": 8.031348831460948e-06,
"loss": 0.752,
"step": 6167
},
{
"epoch": 1.6404255319148935,
"grad_norm": 4.088458061218262,
"learning_rate": 8.030649350072679e-06,
"loss": 0.8339,
"step": 6168
},
{
"epoch": 1.6406914893617022,
"grad_norm": 4.712299346923828,
"learning_rate": 8.029949774912552e-06,
"loss": 0.942,
"step": 6169
},
{
"epoch": 1.6409574468085106,
"grad_norm": 3.5929760932922363,
"learning_rate": 8.029250106002212e-06,
"loss": 0.7309,
"step": 6170
},
{
"epoch": 1.641223404255319,
"grad_norm": 4.059690475463867,
"learning_rate": 8.028550343363306e-06,
"loss": 0.8479,
"step": 6171
},
{
"epoch": 1.6414893617021278,
"grad_norm": 4.054781436920166,
"learning_rate": 8.027850487017488e-06,
"loss": 0.9293,
"step": 6172
},
{
"epoch": 1.641755319148936,
"grad_norm": 3.754241466522217,
"learning_rate": 8.027150536986411e-06,
"loss": 0.7714,
"step": 6173
},
{
"epoch": 1.6420212765957447,
"grad_norm": 3.6258599758148193,
"learning_rate": 8.026450493291731e-06,
"loss": 0.725,
"step": 6174
},
{
"epoch": 1.6422872340425532,
"grad_norm": 4.247791290283203,
"learning_rate": 8.025750355955112e-06,
"loss": 0.7394,
"step": 6175
},
{
"epoch": 1.6425531914893616,
"grad_norm": 3.7767536640167236,
"learning_rate": 8.025050124998213e-06,
"loss": 0.757,
"step": 6176
},
{
"epoch": 1.6428191489361703,
"grad_norm": 3.970726490020752,
"learning_rate": 8.0243498004427e-06,
"loss": 0.7449,
"step": 6177
},
{
"epoch": 1.6430851063829788,
"grad_norm": 4.161791801452637,
"learning_rate": 8.023649382310246e-06,
"loss": 0.8939,
"step": 6178
},
{
"epoch": 1.6433510638297872,
"grad_norm": 3.9791698455810547,
"learning_rate": 8.02294887062252e-06,
"loss": 0.7553,
"step": 6179
},
{
"epoch": 1.6436170212765957,
"grad_norm": 3.881882905960083,
"learning_rate": 8.022248265401196e-06,
"loss": 0.7806,
"step": 6180
},
{
"epoch": 1.6438829787234042,
"grad_norm": 4.165888786315918,
"learning_rate": 8.021547566667952e-06,
"loss": 0.7756,
"step": 6181
},
{
"epoch": 1.6441489361702128,
"grad_norm": 4.053508281707764,
"learning_rate": 8.02084677444447e-06,
"loss": 0.7472,
"step": 6182
},
{
"epoch": 1.6444148936170213,
"grad_norm": 4.370820045471191,
"learning_rate": 8.020145888752431e-06,
"loss": 0.858,
"step": 6183
},
{
"epoch": 1.6446808510638298,
"grad_norm": 4.108578205108643,
"learning_rate": 8.019444909613524e-06,
"loss": 0.8644,
"step": 6184
},
{
"epoch": 1.6449468085106385,
"grad_norm": 3.9922139644622803,
"learning_rate": 8.018743837049433e-06,
"loss": 0.7846,
"step": 6185
},
{
"epoch": 1.6452127659574467,
"grad_norm": 3.711470127105713,
"learning_rate": 8.018042671081858e-06,
"loss": 0.685,
"step": 6186
},
{
"epoch": 1.6454787234042554,
"grad_norm": 3.7997970581054688,
"learning_rate": 8.01734141173249e-06,
"loss": 0.7726,
"step": 6187
},
{
"epoch": 1.6457446808510638,
"grad_norm": 4.349726676940918,
"learning_rate": 8.016640059023023e-06,
"loss": 0.9296,
"step": 6188
},
{
"epoch": 1.6460106382978723,
"grad_norm": 3.8738739490509033,
"learning_rate": 8.01593861297516e-06,
"loss": 0.9472,
"step": 6189
},
{
"epoch": 1.646276595744681,
"grad_norm": 4.002452850341797,
"learning_rate": 8.015237073610607e-06,
"loss": 0.7488,
"step": 6190
},
{
"epoch": 1.6465425531914892,
"grad_norm": 4.017054557800293,
"learning_rate": 8.01453544095107e-06,
"loss": 0.9446,
"step": 6191
},
{
"epoch": 1.646808510638298,
"grad_norm": 4.123724460601807,
"learning_rate": 8.013833715018256e-06,
"loss": 0.9052,
"step": 6192
},
{
"epoch": 1.6470744680851064,
"grad_norm": 3.664494752883911,
"learning_rate": 8.013131895833879e-06,
"loss": 0.7421,
"step": 6193
},
{
"epoch": 1.6473404255319148,
"grad_norm": 3.7503373622894287,
"learning_rate": 8.012429983419654e-06,
"loss": 0.7293,
"step": 6194
},
{
"epoch": 1.6476063829787235,
"grad_norm": 4.248551845550537,
"learning_rate": 8.0117279777973e-06,
"loss": 0.664,
"step": 6195
},
{
"epoch": 1.6478723404255318,
"grad_norm": 4.146711349487305,
"learning_rate": 8.011025878988534e-06,
"loss": 0.8164,
"step": 6196
},
{
"epoch": 1.6481382978723405,
"grad_norm": 3.8372318744659424,
"learning_rate": 8.010323687015083e-06,
"loss": 0.7173,
"step": 6197
},
{
"epoch": 1.648404255319149,
"grad_norm": 4.206233501434326,
"learning_rate": 8.009621401898671e-06,
"loss": 0.8324,
"step": 6198
},
{
"epoch": 1.6486702127659574,
"grad_norm": 3.9302217960357666,
"learning_rate": 8.008919023661033e-06,
"loss": 0.8095,
"step": 6199
},
{
"epoch": 1.648936170212766,
"grad_norm": 3.8333635330200195,
"learning_rate": 8.008216552323896e-06,
"loss": 0.6761,
"step": 6200
},
{
"epoch": 1.6492021276595743,
"grad_norm": 4.308274269104004,
"learning_rate": 8.007513987908997e-06,
"loss": 0.9286,
"step": 6201
},
{
"epoch": 1.649468085106383,
"grad_norm": 3.9875328540802,
"learning_rate": 8.006811330438076e-06,
"loss": 0.8439,
"step": 6202
},
{
"epoch": 1.6497340425531914,
"grad_norm": 3.9723567962646484,
"learning_rate": 8.006108579932869e-06,
"loss": 0.743,
"step": 6203
},
{
"epoch": 1.65,
"grad_norm": 3.6594903469085693,
"learning_rate": 8.005405736415127e-06,
"loss": 0.8403,
"step": 6204
},
{
"epoch": 1.6502659574468086,
"grad_norm": 3.7459709644317627,
"learning_rate": 8.00470279990659e-06,
"loss": 0.7611,
"step": 6205
},
{
"epoch": 1.650531914893617,
"grad_norm": 4.077069282531738,
"learning_rate": 8.003999770429013e-06,
"loss": 0.8415,
"step": 6206
},
{
"epoch": 1.6507978723404255,
"grad_norm": 4.072371482849121,
"learning_rate": 8.003296648004146e-06,
"loss": 0.8709,
"step": 6207
},
{
"epoch": 1.6510638297872342,
"grad_norm": 4.159237861633301,
"learning_rate": 8.002593432653743e-06,
"loss": 0.802,
"step": 6208
},
{
"epoch": 1.6513297872340424,
"grad_norm": 4.047359943389893,
"learning_rate": 8.001890124399565e-06,
"loss": 0.7666,
"step": 6209
},
{
"epoch": 1.6515957446808511,
"grad_norm": 3.548340320587158,
"learning_rate": 8.001186723263374e-06,
"loss": 0.8141,
"step": 6210
},
{
"epoch": 1.6518617021276596,
"grad_norm": 4.3510050773620605,
"learning_rate": 8.00048322926693e-06,
"loss": 0.7908,
"step": 6211
},
{
"epoch": 1.652127659574468,
"grad_norm": 3.642498254776001,
"learning_rate": 7.999779642432003e-06,
"loss": 0.8594,
"step": 6212
},
{
"epoch": 1.6523936170212767,
"grad_norm": 3.804325819015503,
"learning_rate": 7.999075962780363e-06,
"loss": 0.7736,
"step": 6213
},
{
"epoch": 1.652659574468085,
"grad_norm": 4.080993175506592,
"learning_rate": 7.998372190333781e-06,
"loss": 0.8834,
"step": 6214
},
{
"epoch": 1.6529255319148937,
"grad_norm": 4.291904449462891,
"learning_rate": 7.997668325114033e-06,
"loss": 0.8433,
"step": 6215
},
{
"epoch": 1.6531914893617021,
"grad_norm": 3.4936020374298096,
"learning_rate": 7.996964367142899e-06,
"loss": 0.7045,
"step": 6216
},
{
"epoch": 1.6534574468085106,
"grad_norm": 4.251427173614502,
"learning_rate": 7.996260316442157e-06,
"loss": 0.8487,
"step": 6217
},
{
"epoch": 1.6537234042553193,
"grad_norm": 3.810161828994751,
"learning_rate": 7.995556173033594e-06,
"loss": 0.7715,
"step": 6218
},
{
"epoch": 1.6539893617021275,
"grad_norm": 3.8157644271850586,
"learning_rate": 7.994851936938996e-06,
"loss": 0.8408,
"step": 6219
},
{
"epoch": 1.6542553191489362,
"grad_norm": 3.614837646484375,
"learning_rate": 7.994147608180153e-06,
"loss": 0.7829,
"step": 6220
},
{
"epoch": 1.6545212765957447,
"grad_norm": 4.262511253356934,
"learning_rate": 7.99344318677886e-06,
"loss": 0.8728,
"step": 6221
},
{
"epoch": 1.6547872340425531,
"grad_norm": 4.14133358001709,
"learning_rate": 7.992738672756909e-06,
"loss": 0.8611,
"step": 6222
},
{
"epoch": 1.6550531914893618,
"grad_norm": 4.4198737144470215,
"learning_rate": 7.992034066136099e-06,
"loss": 0.8825,
"step": 6223
},
{
"epoch": 1.65531914893617,
"grad_norm": 4.433263778686523,
"learning_rate": 7.991329366938232e-06,
"loss": 0.9547,
"step": 6224
},
{
"epoch": 1.6555851063829787,
"grad_norm": 4.354765892028809,
"learning_rate": 7.990624575185116e-06,
"loss": 0.9415,
"step": 6225
},
{
"epoch": 1.6558510638297872,
"grad_norm": 4.149988174438477,
"learning_rate": 7.98991969089855e-06,
"loss": 0.7804,
"step": 6226
},
{
"epoch": 1.6561170212765957,
"grad_norm": 3.833970546722412,
"learning_rate": 7.98921471410035e-06,
"loss": 0.7944,
"step": 6227
},
{
"epoch": 1.6563829787234043,
"grad_norm": 3.816167116165161,
"learning_rate": 7.98850964481233e-06,
"loss": 0.8054,
"step": 6228
},
{
"epoch": 1.6566489361702128,
"grad_norm": 3.758295774459839,
"learning_rate": 7.987804483056301e-06,
"loss": 0.7724,
"step": 6229
},
{
"epoch": 1.6569148936170213,
"grad_norm": 4.2231669425964355,
"learning_rate": 7.987099228854083e-06,
"loss": 0.8713,
"step": 6230
},
{
"epoch": 1.65718085106383,
"grad_norm": 4.497824192047119,
"learning_rate": 7.9863938822275e-06,
"loss": 0.9629,
"step": 6231
},
{
"epoch": 1.6574468085106382,
"grad_norm": 3.9088895320892334,
"learning_rate": 7.985688443198371e-06,
"loss": 0.7597,
"step": 6232
},
{
"epoch": 1.6577127659574469,
"grad_norm": 3.699256658554077,
"learning_rate": 7.984982911788528e-06,
"loss": 0.8468,
"step": 6233
},
{
"epoch": 1.6579787234042553,
"grad_norm": 3.8971588611602783,
"learning_rate": 7.9842772880198e-06,
"loss": 0.8377,
"step": 6234
},
{
"epoch": 1.6582446808510638,
"grad_norm": 3.8062503337860107,
"learning_rate": 7.98357157191402e-06,
"loss": 0.6739,
"step": 6235
},
{
"epoch": 1.6585106382978725,
"grad_norm": 3.7170534133911133,
"learning_rate": 7.982865763493022e-06,
"loss": 0.7505,
"step": 6236
},
{
"epoch": 1.6587765957446807,
"grad_norm": 3.678074598312378,
"learning_rate": 7.982159862778645e-06,
"loss": 0.7589,
"step": 6237
},
{
"epoch": 1.6590425531914894,
"grad_norm": 3.895219326019287,
"learning_rate": 7.98145386979273e-06,
"loss": 0.6712,
"step": 6238
},
{
"epoch": 1.6593085106382979,
"grad_norm": 4.339925765991211,
"learning_rate": 7.980747784557123e-06,
"loss": 0.9584,
"step": 6239
},
{
"epoch": 1.6595744680851063,
"grad_norm": 3.8446319103240967,
"learning_rate": 7.98004160709367e-06,
"loss": 0.7287,
"step": 6240
},
{
"epoch": 1.659840425531915,
"grad_norm": 3.852252960205078,
"learning_rate": 7.979335337424222e-06,
"loss": 0.9698,
"step": 6241
},
{
"epoch": 1.6601063829787233,
"grad_norm": 3.7780802249908447,
"learning_rate": 7.97862897557063e-06,
"loss": 0.8085,
"step": 6242
},
{
"epoch": 1.660372340425532,
"grad_norm": 3.954035758972168,
"learning_rate": 7.97792252155475e-06,
"loss": 0.8768,
"step": 6243
},
{
"epoch": 1.6606382978723404,
"grad_norm": 3.267712116241455,
"learning_rate": 7.977215975398442e-06,
"loss": 0.6974,
"step": 6244
},
{
"epoch": 1.6609042553191489,
"grad_norm": 3.534168243408203,
"learning_rate": 7.976509337123567e-06,
"loss": 0.8029,
"step": 6245
},
{
"epoch": 1.6611702127659576,
"grad_norm": 3.9597525596618652,
"learning_rate": 7.975802606751989e-06,
"loss": 0.7754,
"step": 6246
},
{
"epoch": 1.6614361702127658,
"grad_norm": 4.123916149139404,
"learning_rate": 7.975095784305572e-06,
"loss": 0.8451,
"step": 6247
},
{
"epoch": 1.6617021276595745,
"grad_norm": 3.989689588546753,
"learning_rate": 7.97438886980619e-06,
"loss": 0.7707,
"step": 6248
},
{
"epoch": 1.661968085106383,
"grad_norm": 4.045599937438965,
"learning_rate": 7.973681863275715e-06,
"loss": 0.7474,
"step": 6249
},
{
"epoch": 1.6622340425531914,
"grad_norm": 4.4239420890808105,
"learning_rate": 7.972974764736023e-06,
"loss": 0.7858,
"step": 6250
},
{
"epoch": 1.6625,
"grad_norm": 3.499119520187378,
"learning_rate": 7.972267574208991e-06,
"loss": 0.7021,
"step": 6251
},
{
"epoch": 1.6627659574468086,
"grad_norm": 4.45729923248291,
"learning_rate": 7.971560291716501e-06,
"loss": 0.9094,
"step": 6252
},
{
"epoch": 1.663031914893617,
"grad_norm": 4.242092609405518,
"learning_rate": 7.970852917280434e-06,
"loss": 0.8807,
"step": 6253
},
{
"epoch": 1.6632978723404257,
"grad_norm": 3.947512149810791,
"learning_rate": 7.970145450922684e-06,
"loss": 0.8778,
"step": 6254
},
{
"epoch": 1.663563829787234,
"grad_norm": 5.4790167808532715,
"learning_rate": 7.969437892665134e-06,
"loss": 0.8196,
"step": 6255
},
{
"epoch": 1.6638297872340426,
"grad_norm": 3.856820583343506,
"learning_rate": 7.968730242529681e-06,
"loss": 0.7653,
"step": 6256
},
{
"epoch": 1.664095744680851,
"grad_norm": 4.446346759796143,
"learning_rate": 7.968022500538219e-06,
"loss": 0.9374,
"step": 6257
},
{
"epoch": 1.6643617021276595,
"grad_norm": 4.079642295837402,
"learning_rate": 7.967314666712647e-06,
"loss": 0.8123,
"step": 6258
},
{
"epoch": 1.6646276595744682,
"grad_norm": 4.338622570037842,
"learning_rate": 7.966606741074864e-06,
"loss": 0.7508,
"step": 6259
},
{
"epoch": 1.6648936170212765,
"grad_norm": 3.974862813949585,
"learning_rate": 7.965898723646777e-06,
"loss": 0.8222,
"step": 6260
},
{
"epoch": 1.6651595744680852,
"grad_norm": 4.263228416442871,
"learning_rate": 7.96519061445029e-06,
"loss": 0.9591,
"step": 6261
},
{
"epoch": 1.6654255319148936,
"grad_norm": 3.6377105712890625,
"learning_rate": 7.964482413507316e-06,
"loss": 0.7791,
"step": 6262
},
{
"epoch": 1.665691489361702,
"grad_norm": 3.3404452800750732,
"learning_rate": 7.963774120839767e-06,
"loss": 0.7668,
"step": 6263
},
{
"epoch": 1.6659574468085108,
"grad_norm": 3.6252615451812744,
"learning_rate": 7.963065736469555e-06,
"loss": 0.7628,
"step": 6264
},
{
"epoch": 1.666223404255319,
"grad_norm": 4.053292751312256,
"learning_rate": 7.9623572604186e-06,
"loss": 0.9255,
"step": 6265
},
{
"epoch": 1.6664893617021277,
"grad_norm": 3.612187385559082,
"learning_rate": 7.961648692708826e-06,
"loss": 0.7864,
"step": 6266
},
{
"epoch": 1.6667553191489362,
"grad_norm": 4.19817590713501,
"learning_rate": 7.960940033362152e-06,
"loss": 0.8414,
"step": 6267
},
{
"epoch": 1.6670212765957446,
"grad_norm": 3.919515371322632,
"learning_rate": 7.960231282400509e-06,
"loss": 0.7358,
"step": 6268
},
{
"epoch": 1.6672872340425533,
"grad_norm": 4.0831732749938965,
"learning_rate": 7.959522439845825e-06,
"loss": 0.7613,
"step": 6269
},
{
"epoch": 1.6675531914893615,
"grad_norm": 4.200259685516357,
"learning_rate": 7.958813505720031e-06,
"loss": 0.9464,
"step": 6270
},
{
"epoch": 1.6678191489361702,
"grad_norm": 4.281257152557373,
"learning_rate": 7.958104480045066e-06,
"loss": 0.8795,
"step": 6271
},
{
"epoch": 1.6680851063829787,
"grad_norm": 3.907784938812256,
"learning_rate": 7.957395362842864e-06,
"loss": 0.6676,
"step": 6272
},
{
"epoch": 1.6683510638297872,
"grad_norm": 4.122792720794678,
"learning_rate": 7.956686154135368e-06,
"loss": 0.7808,
"step": 6273
},
{
"epoch": 1.6686170212765958,
"grad_norm": 4.015087127685547,
"learning_rate": 7.95597685394452e-06,
"loss": 0.8536,
"step": 6274
},
{
"epoch": 1.6688829787234043,
"grad_norm": 3.8058676719665527,
"learning_rate": 7.95526746229227e-06,
"loss": 0.8526,
"step": 6275
},
{
"epoch": 1.6691489361702128,
"grad_norm": 4.022008895874023,
"learning_rate": 7.954557979200562e-06,
"loss": 0.7642,
"step": 6276
},
{
"epoch": 1.6694148936170212,
"grad_norm": 3.820610284805298,
"learning_rate": 7.953848404691354e-06,
"loss": 0.8786,
"step": 6277
},
{
"epoch": 1.6696808510638297,
"grad_norm": 3.6477434635162354,
"learning_rate": 7.9531387387866e-06,
"loss": 0.8277,
"step": 6278
},
{
"epoch": 1.6699468085106384,
"grad_norm": 4.075412273406982,
"learning_rate": 7.952428981508254e-06,
"loss": 0.8095,
"step": 6279
},
{
"epoch": 1.6702127659574468,
"grad_norm": 4.030799388885498,
"learning_rate": 7.951719132878279e-06,
"loss": 0.7007,
"step": 6280
},
{
"epoch": 1.6704787234042553,
"grad_norm": 4.039961338043213,
"learning_rate": 7.95100919291864e-06,
"loss": 0.8829,
"step": 6281
},
{
"epoch": 1.670744680851064,
"grad_norm": 3.8483259677886963,
"learning_rate": 7.950299161651303e-06,
"loss": 0.7494,
"step": 6282
},
{
"epoch": 1.6710106382978722,
"grad_norm": 3.8535609245300293,
"learning_rate": 7.949589039098235e-06,
"loss": 0.7572,
"step": 6283
},
{
"epoch": 1.671276595744681,
"grad_norm": 4.3112311363220215,
"learning_rate": 7.94887882528141e-06,
"loss": 0.9061,
"step": 6284
},
{
"epoch": 1.6715425531914894,
"grad_norm": 3.8851253986358643,
"learning_rate": 7.948168520222802e-06,
"loss": 0.9334,
"step": 6285
},
{
"epoch": 1.6718085106382978,
"grad_norm": 4.051077842712402,
"learning_rate": 7.94745812394439e-06,
"loss": 0.8568,
"step": 6286
},
{
"epoch": 1.6720744680851065,
"grad_norm": 3.8714540004730225,
"learning_rate": 7.946747636468153e-06,
"loss": 0.8496,
"step": 6287
},
{
"epoch": 1.6723404255319148,
"grad_norm": 3.9510905742645264,
"learning_rate": 7.946037057816075e-06,
"loss": 0.8367,
"step": 6288
},
{
"epoch": 1.6726063829787234,
"grad_norm": 4.504206657409668,
"learning_rate": 7.945326388010141e-06,
"loss": 0.8716,
"step": 6289
},
{
"epoch": 1.672872340425532,
"grad_norm": 4.116037845611572,
"learning_rate": 7.944615627072341e-06,
"loss": 0.8481,
"step": 6290
},
{
"epoch": 1.6731382978723404,
"grad_norm": 3.539327383041382,
"learning_rate": 7.943904775024667e-06,
"loss": 0.6687,
"step": 6291
},
{
"epoch": 1.673404255319149,
"grad_norm": 4.1150898933410645,
"learning_rate": 7.943193831889112e-06,
"loss": 0.9299,
"step": 6292
},
{
"epoch": 1.6736702127659573,
"grad_norm": 4.379646301269531,
"learning_rate": 7.942482797687675e-06,
"loss": 0.8867,
"step": 6293
},
{
"epoch": 1.673936170212766,
"grad_norm": 3.6255533695220947,
"learning_rate": 7.941771672442358e-06,
"loss": 0.6831,
"step": 6294
},
{
"epoch": 1.6742021276595744,
"grad_norm": 4.358723163604736,
"learning_rate": 7.94106045617516e-06,
"loss": 0.6923,
"step": 6295
},
{
"epoch": 1.674468085106383,
"grad_norm": 3.967379093170166,
"learning_rate": 7.94034914890809e-06,
"loss": 0.8413,
"step": 6296
},
{
"epoch": 1.6747340425531916,
"grad_norm": 4.233070373535156,
"learning_rate": 7.939637750663153e-06,
"loss": 0.9755,
"step": 6297
},
{
"epoch": 1.675,
"grad_norm": 3.4149739742279053,
"learning_rate": 7.938926261462366e-06,
"loss": 0.6741,
"step": 6298
},
{
"epoch": 1.6752659574468085,
"grad_norm": 4.045546054840088,
"learning_rate": 7.938214681327739e-06,
"loss": 0.8484,
"step": 6299
},
{
"epoch": 1.675531914893617,
"grad_norm": 4.123802185058594,
"learning_rate": 7.93750301028129e-06,
"loss": 0.8398,
"step": 6300
},
{
"epoch": 1.6757978723404254,
"grad_norm": 3.7821900844573975,
"learning_rate": 7.936791248345041e-06,
"loss": 0.7785,
"step": 6301
},
{
"epoch": 1.6760638297872341,
"grad_norm": 3.6713192462921143,
"learning_rate": 7.936079395541013e-06,
"loss": 0.7191,
"step": 6302
},
{
"epoch": 1.6763297872340426,
"grad_norm": 4.085387706756592,
"learning_rate": 7.935367451891232e-06,
"loss": 0.684,
"step": 6303
},
{
"epoch": 1.676595744680851,
"grad_norm": 3.6555123329162598,
"learning_rate": 7.934655417417724e-06,
"loss": 0.7526,
"step": 6304
},
{
"epoch": 1.6768617021276597,
"grad_norm": 3.9464025497436523,
"learning_rate": 7.933943292142524e-06,
"loss": 0.7544,
"step": 6305
},
{
"epoch": 1.677127659574468,
"grad_norm": 3.74369215965271,
"learning_rate": 7.933231076087662e-06,
"loss": 0.7524,
"step": 6306
},
{
"epoch": 1.6773936170212767,
"grad_norm": 4.703025817871094,
"learning_rate": 7.932518769275179e-06,
"loss": 0.8955,
"step": 6307
},
{
"epoch": 1.6776595744680851,
"grad_norm": 4.241019248962402,
"learning_rate": 7.931806371727111e-06,
"loss": 0.7727,
"step": 6308
},
{
"epoch": 1.6779255319148936,
"grad_norm": 4.029513359069824,
"learning_rate": 7.931093883465503e-06,
"loss": 0.7951,
"step": 6309
},
{
"epoch": 1.6781914893617023,
"grad_norm": 3.7332520484924316,
"learning_rate": 7.930381304512401e-06,
"loss": 0.7148,
"step": 6310
},
{
"epoch": 1.6784574468085105,
"grad_norm": 3.734999179840088,
"learning_rate": 7.92966863488985e-06,
"loss": 0.7856,
"step": 6311
},
{
"epoch": 1.6787234042553192,
"grad_norm": 4.164159774780273,
"learning_rate": 7.928955874619902e-06,
"loss": 0.8163,
"step": 6312
},
{
"epoch": 1.6789893617021276,
"grad_norm": 4.043959617614746,
"learning_rate": 7.928243023724611e-06,
"loss": 0.8262,
"step": 6313
},
{
"epoch": 1.679255319148936,
"grad_norm": 3.5217018127441406,
"learning_rate": 7.927530082226034e-06,
"loss": 0.7066,
"step": 6314
},
{
"epoch": 1.6795212765957448,
"grad_norm": 4.035088539123535,
"learning_rate": 7.926817050146227e-06,
"loss": 0.9041,
"step": 6315
},
{
"epoch": 1.679787234042553,
"grad_norm": 3.8981032371520996,
"learning_rate": 7.926103927507257e-06,
"loss": 0.8896,
"step": 6316
},
{
"epoch": 1.6800531914893617,
"grad_norm": 3.613386392593384,
"learning_rate": 7.925390714331185e-06,
"loss": 0.8692,
"step": 6317
},
{
"epoch": 1.6803191489361702,
"grad_norm": 4.042194843292236,
"learning_rate": 7.924677410640081e-06,
"loss": 0.8251,
"step": 6318
},
{
"epoch": 1.6805851063829786,
"grad_norm": 3.749028444290161,
"learning_rate": 7.923964016456014e-06,
"loss": 0.8519,
"step": 6319
},
{
"epoch": 1.6808510638297873,
"grad_norm": 3.482661008834839,
"learning_rate": 7.92325053180106e-06,
"loss": 0.6798,
"step": 6320
},
{
"epoch": 1.6811170212765958,
"grad_norm": 3.876594066619873,
"learning_rate": 7.92253695669729e-06,
"loss": 0.8437,
"step": 6321
},
{
"epoch": 1.6813829787234043,
"grad_norm": 3.941342830657959,
"learning_rate": 7.921823291166785e-06,
"loss": 0.7915,
"step": 6322
},
{
"epoch": 1.6816489361702127,
"grad_norm": 4.015593528747559,
"learning_rate": 7.92110953523163e-06,
"loss": 0.8184,
"step": 6323
},
{
"epoch": 1.6819148936170212,
"grad_norm": 4.370626449584961,
"learning_rate": 7.920395688913906e-06,
"loss": 0.962,
"step": 6324
},
{
"epoch": 1.6821808510638299,
"grad_norm": 3.7897567749023438,
"learning_rate": 7.919681752235701e-06,
"loss": 0.9113,
"step": 6325
},
{
"epoch": 1.6824468085106383,
"grad_norm": 3.8005380630493164,
"learning_rate": 7.918967725219104e-06,
"loss": 0.869,
"step": 6326
},
{
"epoch": 1.6827127659574468,
"grad_norm": 4.056982040405273,
"learning_rate": 7.918253607886212e-06,
"loss": 0.8451,
"step": 6327
},
{
"epoch": 1.6829787234042555,
"grad_norm": 3.5084946155548096,
"learning_rate": 7.917539400259116e-06,
"loss": 0.7714,
"step": 6328
},
{
"epoch": 1.6832446808510637,
"grad_norm": 3.9143457412719727,
"learning_rate": 7.916825102359914e-06,
"loss": 0.8663,
"step": 6329
},
{
"epoch": 1.6835106382978724,
"grad_norm": 3.867074966430664,
"learning_rate": 7.916110714210711e-06,
"loss": 0.8741,
"step": 6330
},
{
"epoch": 1.6837765957446809,
"grad_norm": 3.8426260948181152,
"learning_rate": 7.91539623583361e-06,
"loss": 0.8347,
"step": 6331
},
{
"epoch": 1.6840425531914893,
"grad_norm": 3.8092234134674072,
"learning_rate": 7.914681667250714e-06,
"loss": 0.8565,
"step": 6332
},
{
"epoch": 1.684308510638298,
"grad_norm": 3.754821538925171,
"learning_rate": 7.913967008484138e-06,
"loss": 0.6845,
"step": 6333
},
{
"epoch": 1.6845744680851062,
"grad_norm": 4.067741394042969,
"learning_rate": 7.913252259555992e-06,
"loss": 0.7716,
"step": 6334
},
{
"epoch": 1.684840425531915,
"grad_norm": 4.096173286437988,
"learning_rate": 7.91253742048839e-06,
"loss": 0.8299,
"step": 6335
},
{
"epoch": 1.6851063829787234,
"grad_norm": 4.119457721710205,
"learning_rate": 7.911822491303453e-06,
"loss": 0.8621,
"step": 6336
},
{
"epoch": 1.6853723404255319,
"grad_norm": 4.278772354125977,
"learning_rate": 7.911107472023298e-06,
"loss": 0.8446,
"step": 6337
},
{
"epoch": 1.6856382978723405,
"grad_norm": 3.7795321941375732,
"learning_rate": 7.910392362670051e-06,
"loss": 0.6943,
"step": 6338
},
{
"epoch": 1.6859042553191488,
"grad_norm": 3.9733240604400635,
"learning_rate": 7.909677163265838e-06,
"loss": 0.6562,
"step": 6339
},
{
"epoch": 1.6861702127659575,
"grad_norm": 4.160102844238281,
"learning_rate": 7.908961873832788e-06,
"loss": 0.7915,
"step": 6340
},
{
"epoch": 1.686436170212766,
"grad_norm": 4.3431525230407715,
"learning_rate": 7.908246494393032e-06,
"loss": 0.8474,
"step": 6341
},
{
"epoch": 1.6867021276595744,
"grad_norm": 4.230860233306885,
"learning_rate": 7.907531024968705e-06,
"loss": 0.7098,
"step": 6342
},
{
"epoch": 1.686968085106383,
"grad_norm": 4.223114967346191,
"learning_rate": 7.906815465581945e-06,
"loss": 0.7278,
"step": 6343
},
{
"epoch": 1.6872340425531915,
"grad_norm": 4.246336460113525,
"learning_rate": 7.906099816254895e-06,
"loss": 0.825,
"step": 6344
},
{
"epoch": 1.6875,
"grad_norm": 3.5722670555114746,
"learning_rate": 7.905384077009693e-06,
"loss": 0.8907,
"step": 6345
},
{
"epoch": 1.6877659574468085,
"grad_norm": 4.00727653503418,
"learning_rate": 7.904668247868486e-06,
"loss": 0.7821,
"step": 6346
},
{
"epoch": 1.688031914893617,
"grad_norm": 3.889538049697876,
"learning_rate": 7.903952328853426e-06,
"loss": 0.7967,
"step": 6347
},
{
"epoch": 1.6882978723404256,
"grad_norm": 3.923154830932617,
"learning_rate": 7.90323631998666e-06,
"loss": 0.8152,
"step": 6348
},
{
"epoch": 1.688563829787234,
"grad_norm": 4.059485912322998,
"learning_rate": 7.902520221290345e-06,
"loss": 0.7824,
"step": 6349
},
{
"epoch": 1.6888297872340425,
"grad_norm": 4.1757378578186035,
"learning_rate": 7.901804032786637e-06,
"loss": 0.8839,
"step": 6350
},
{
"epoch": 1.6890957446808512,
"grad_norm": 3.6736671924591064,
"learning_rate": 7.901087754497694e-06,
"loss": 0.684,
"step": 6351
},
{
"epoch": 1.6893617021276595,
"grad_norm": 4.116995811462402,
"learning_rate": 7.900371386445682e-06,
"loss": 0.9625,
"step": 6352
},
{
"epoch": 1.6896276595744681,
"grad_norm": 3.686619758605957,
"learning_rate": 7.899654928652765e-06,
"loss": 0.8667,
"step": 6353
},
{
"epoch": 1.6898936170212766,
"grad_norm": 4.151339054107666,
"learning_rate": 7.89893838114111e-06,
"loss": 0.8102,
"step": 6354
},
{
"epoch": 1.690159574468085,
"grad_norm": 3.7917020320892334,
"learning_rate": 7.898221743932887e-06,
"loss": 0.934,
"step": 6355
},
{
"epoch": 1.6904255319148938,
"grad_norm": 3.5394623279571533,
"learning_rate": 7.897505017050272e-06,
"loss": 0.7577,
"step": 6356
},
{
"epoch": 1.690691489361702,
"grad_norm": 4.058946132659912,
"learning_rate": 7.896788200515442e-06,
"loss": 0.7536,
"step": 6357
},
{
"epoch": 1.6909574468085107,
"grad_norm": 3.8410744667053223,
"learning_rate": 7.896071294350574e-06,
"loss": 0.8212,
"step": 6358
},
{
"epoch": 1.6912234042553191,
"grad_norm": 3.915674924850464,
"learning_rate": 7.89535429857785e-06,
"loss": 0.8288,
"step": 6359
},
{
"epoch": 1.6914893617021276,
"grad_norm": 3.954108715057373,
"learning_rate": 7.894637213219454e-06,
"loss": 0.7738,
"step": 6360
},
{
"epoch": 1.6917553191489363,
"grad_norm": 4.220264434814453,
"learning_rate": 7.893920038297575e-06,
"loss": 0.7686,
"step": 6361
},
{
"epoch": 1.6920212765957445,
"grad_norm": 4.50542688369751,
"learning_rate": 7.893202773834404e-06,
"loss": 0.825,
"step": 6362
},
{
"epoch": 1.6922872340425532,
"grad_norm": 4.274563312530518,
"learning_rate": 7.892485419852131e-06,
"loss": 0.8119,
"step": 6363
},
{
"epoch": 1.6925531914893617,
"grad_norm": 3.8938279151916504,
"learning_rate": 7.891767976372957e-06,
"loss": 0.9073,
"step": 6364
},
{
"epoch": 1.6928191489361701,
"grad_norm": 3.949944257736206,
"learning_rate": 7.891050443419074e-06,
"loss": 0.757,
"step": 6365
},
{
"epoch": 1.6930851063829788,
"grad_norm": 4.313665866851807,
"learning_rate": 7.890332821012687e-06,
"loss": 0.8997,
"step": 6366
},
{
"epoch": 1.6933510638297873,
"grad_norm": 4.165764331817627,
"learning_rate": 7.889615109176e-06,
"loss": 0.8262,
"step": 6367
},
{
"epoch": 1.6936170212765957,
"grad_norm": 3.462186336517334,
"learning_rate": 7.88889730793122e-06,
"loss": 0.6989,
"step": 6368
},
{
"epoch": 1.6938829787234042,
"grad_norm": 4.610195159912109,
"learning_rate": 7.888179417300556e-06,
"loss": 0.924,
"step": 6369
},
{
"epoch": 1.6941489361702127,
"grad_norm": 3.8986306190490723,
"learning_rate": 7.887461437306221e-06,
"loss": 0.8204,
"step": 6370
},
{
"epoch": 1.6944148936170214,
"grad_norm": 3.9623425006866455,
"learning_rate": 7.886743367970428e-06,
"loss": 0.8856,
"step": 6371
},
{
"epoch": 1.6946808510638298,
"grad_norm": 3.7937700748443604,
"learning_rate": 7.886025209315396e-06,
"loss": 0.905,
"step": 6372
},
{
"epoch": 1.6949468085106383,
"grad_norm": 3.6256890296936035,
"learning_rate": 7.885306961363347e-06,
"loss": 0.7097,
"step": 6373
},
{
"epoch": 1.695212765957447,
"grad_norm": 4.079528331756592,
"learning_rate": 7.884588624136505e-06,
"loss": 0.8255,
"step": 6374
},
{
"epoch": 1.6954787234042552,
"grad_norm": 3.7182741165161133,
"learning_rate": 7.883870197657094e-06,
"loss": 0.671,
"step": 6375
},
{
"epoch": 1.695744680851064,
"grad_norm": 3.2320377826690674,
"learning_rate": 7.883151681947343e-06,
"loss": 0.6876,
"step": 6376
},
{
"epoch": 1.6960106382978724,
"grad_norm": 3.610546588897705,
"learning_rate": 7.882433077029484e-06,
"loss": 0.7904,
"step": 6377
},
{
"epoch": 1.6962765957446808,
"grad_norm": 3.8851020336151123,
"learning_rate": 7.881714382925753e-06,
"loss": 0.7701,
"step": 6378
},
{
"epoch": 1.6965425531914895,
"grad_norm": 3.727907657623291,
"learning_rate": 7.880995599658387e-06,
"loss": 0.8374,
"step": 6379
},
{
"epoch": 1.6968085106382977,
"grad_norm": 3.564770221710205,
"learning_rate": 7.880276727249623e-06,
"loss": 0.6483,
"step": 6380
},
{
"epoch": 1.6970744680851064,
"grad_norm": 4.088687419891357,
"learning_rate": 7.879557765721707e-06,
"loss": 0.7902,
"step": 6381
},
{
"epoch": 1.6973404255319149,
"grad_norm": 4.087176322937012,
"learning_rate": 7.878838715096883e-06,
"loss": 0.8723,
"step": 6382
},
{
"epoch": 1.6976063829787233,
"grad_norm": 3.7613840103149414,
"learning_rate": 7.878119575397401e-06,
"loss": 0.7559,
"step": 6383
},
{
"epoch": 1.697872340425532,
"grad_norm": 4.426526069641113,
"learning_rate": 7.87740034664551e-06,
"loss": 1.1472,
"step": 6384
},
{
"epoch": 1.6981382978723403,
"grad_norm": 3.5922887325286865,
"learning_rate": 7.876681028863464e-06,
"loss": 0.8193,
"step": 6385
},
{
"epoch": 1.698404255319149,
"grad_norm": 4.141395092010498,
"learning_rate": 7.875961622073523e-06,
"loss": 0.8629,
"step": 6386
},
{
"epoch": 1.6986702127659574,
"grad_norm": 3.894594669342041,
"learning_rate": 7.875242126297939e-06,
"loss": 0.8301,
"step": 6387
},
{
"epoch": 1.6989361702127659,
"grad_norm": 3.929243564605713,
"learning_rate": 7.87452254155898e-06,
"loss": 0.8301,
"step": 6388
},
{
"epoch": 1.6992021276595746,
"grad_norm": 3.575058698654175,
"learning_rate": 7.87380286787891e-06,
"loss": 0.7595,
"step": 6389
},
{
"epoch": 1.699468085106383,
"grad_norm": 3.9643123149871826,
"learning_rate": 7.873083105279996e-06,
"loss": 0.8527,
"step": 6390
},
{
"epoch": 1.6997340425531915,
"grad_norm": 3.8817079067230225,
"learning_rate": 7.872363253784508e-06,
"loss": 0.6764,
"step": 6391
},
{
"epoch": 1.7,
"grad_norm": 4.209853649139404,
"learning_rate": 7.871643313414718e-06,
"loss": 0.8082,
"step": 6392
},
{
"epoch": 1.7002659574468084,
"grad_norm": 3.9260003566741943,
"learning_rate": 7.870923284192904e-06,
"loss": 0.7839,
"step": 6393
},
{
"epoch": 1.700531914893617,
"grad_norm": 3.726177453994751,
"learning_rate": 7.870203166141343e-06,
"loss": 0.721,
"step": 6394
},
{
"epoch": 1.7007978723404256,
"grad_norm": 4.2059326171875,
"learning_rate": 7.869482959282318e-06,
"loss": 0.7346,
"step": 6395
},
{
"epoch": 1.701063829787234,
"grad_norm": 4.017068862915039,
"learning_rate": 7.868762663638111e-06,
"loss": 0.6286,
"step": 6396
},
{
"epoch": 1.7013297872340427,
"grad_norm": 3.6799540519714355,
"learning_rate": 7.86804227923101e-06,
"loss": 0.7389,
"step": 6397
},
{
"epoch": 1.701595744680851,
"grad_norm": 3.797459602355957,
"learning_rate": 7.867321806083303e-06,
"loss": 0.7271,
"step": 6398
},
{
"epoch": 1.7018617021276596,
"grad_norm": 3.9897758960723877,
"learning_rate": 7.866601244217284e-06,
"loss": 0.8449,
"step": 6399
},
{
"epoch": 1.702127659574468,
"grad_norm": 4.305942058563232,
"learning_rate": 7.86588059365525e-06,
"loss": 0.8108,
"step": 6400
},
{
"epoch": 1.7023936170212766,
"grad_norm": 3.727057456970215,
"learning_rate": 7.865159854419493e-06,
"loss": 0.801,
"step": 6401
},
{
"epoch": 1.7026595744680852,
"grad_norm": 3.9825263023376465,
"learning_rate": 7.864439026532318e-06,
"loss": 0.8026,
"step": 6402
},
{
"epoch": 1.7029255319148935,
"grad_norm": 3.602372884750366,
"learning_rate": 7.863718110016025e-06,
"loss": 0.6829,
"step": 6403
},
{
"epoch": 1.7031914893617022,
"grad_norm": 4.175540447235107,
"learning_rate": 7.862997104892924e-06,
"loss": 0.7491,
"step": 6404
},
{
"epoch": 1.7034574468085106,
"grad_norm": 3.7469863891601562,
"learning_rate": 7.862276011185323e-06,
"loss": 0.6495,
"step": 6405
},
{
"epoch": 1.703723404255319,
"grad_norm": 3.860929012298584,
"learning_rate": 7.861554828915531e-06,
"loss": 0.8538,
"step": 6406
},
{
"epoch": 1.7039893617021278,
"grad_norm": 3.6298773288726807,
"learning_rate": 7.860833558105863e-06,
"loss": 0.7653,
"step": 6407
},
{
"epoch": 1.704255319148936,
"grad_norm": 3.6208910942077637,
"learning_rate": 7.860112198778638e-06,
"loss": 0.8272,
"step": 6408
},
{
"epoch": 1.7045212765957447,
"grad_norm": 3.9331130981445312,
"learning_rate": 7.859390750956172e-06,
"loss": 0.802,
"step": 6409
},
{
"epoch": 1.7047872340425532,
"grad_norm": 3.843306303024292,
"learning_rate": 7.858669214660792e-06,
"loss": 0.8426,
"step": 6410
},
{
"epoch": 1.7050531914893616,
"grad_norm": 3.844093084335327,
"learning_rate": 7.857947589914819e-06,
"loss": 0.7836,
"step": 6411
},
{
"epoch": 1.7053191489361703,
"grad_norm": 3.7956225872039795,
"learning_rate": 7.857225876740585e-06,
"loss": 0.7151,
"step": 6412
},
{
"epoch": 1.7055851063829788,
"grad_norm": 3.568847417831421,
"learning_rate": 7.856504075160416e-06,
"loss": 0.8406,
"step": 6413
},
{
"epoch": 1.7058510638297872,
"grad_norm": 5.6517462730407715,
"learning_rate": 7.855782185196648e-06,
"loss": 0.8804,
"step": 6414
},
{
"epoch": 1.7061170212765957,
"grad_norm": 3.6728999614715576,
"learning_rate": 7.855060206871618e-06,
"loss": 0.7445,
"step": 6415
},
{
"epoch": 1.7063829787234042,
"grad_norm": 4.358402729034424,
"learning_rate": 7.854338140207662e-06,
"loss": 0.7949,
"step": 6416
},
{
"epoch": 1.7066489361702128,
"grad_norm": 4.032132625579834,
"learning_rate": 7.853615985227126e-06,
"loss": 0.8492,
"step": 6417
},
{
"epoch": 1.7069148936170213,
"grad_norm": 4.185794353485107,
"learning_rate": 7.85289374195235e-06,
"loss": 0.9054,
"step": 6418
},
{
"epoch": 1.7071808510638298,
"grad_norm": 4.639225006103516,
"learning_rate": 7.852171410405684e-06,
"loss": 0.9118,
"step": 6419
},
{
"epoch": 1.7074468085106385,
"grad_norm": 3.67490816116333,
"learning_rate": 7.851448990609476e-06,
"loss": 0.8046,
"step": 6420
},
{
"epoch": 1.7077127659574467,
"grad_norm": 3.879056692123413,
"learning_rate": 7.850726482586078e-06,
"loss": 0.6831,
"step": 6421
},
{
"epoch": 1.7079787234042554,
"grad_norm": 3.963789463043213,
"learning_rate": 7.850003886357847e-06,
"loss": 0.7881,
"step": 6422
},
{
"epoch": 1.7082446808510638,
"grad_norm": 4.229506015777588,
"learning_rate": 7.849281201947142e-06,
"loss": 0.8157,
"step": 6423
},
{
"epoch": 1.7085106382978723,
"grad_norm": 4.29874849319458,
"learning_rate": 7.84855842937632e-06,
"loss": 0.9049,
"step": 6424
},
{
"epoch": 1.708776595744681,
"grad_norm": 3.8917417526245117,
"learning_rate": 7.847835568667746e-06,
"loss": 0.7922,
"step": 6425
},
{
"epoch": 1.7090425531914892,
"grad_norm": 3.8562116622924805,
"learning_rate": 7.847112619843789e-06,
"loss": 0.7363,
"step": 6426
},
{
"epoch": 1.709308510638298,
"grad_norm": 4.495066165924072,
"learning_rate": 7.846389582926814e-06,
"loss": 0.977,
"step": 6427
},
{
"epoch": 1.7095744680851064,
"grad_norm": 3.899489164352417,
"learning_rate": 7.845666457939193e-06,
"loss": 0.7289,
"step": 6428
},
{
"epoch": 1.7098404255319148,
"grad_norm": 3.9472427368164062,
"learning_rate": 7.844943244903303e-06,
"loss": 0.8273,
"step": 6429
},
{
"epoch": 1.7101063829787235,
"grad_norm": 4.187959671020508,
"learning_rate": 7.84421994384152e-06,
"loss": 0.8658,
"step": 6430
},
{
"epoch": 1.7103723404255318,
"grad_norm": 4.103062152862549,
"learning_rate": 7.843496554776222e-06,
"loss": 0.8097,
"step": 6431
},
{
"epoch": 1.7106382978723405,
"grad_norm": 3.977741241455078,
"learning_rate": 7.842773077729793e-06,
"loss": 0.799,
"step": 6432
},
{
"epoch": 1.710904255319149,
"grad_norm": 3.8812167644500732,
"learning_rate": 7.842049512724618e-06,
"loss": 0.6743,
"step": 6433
},
{
"epoch": 1.7111702127659574,
"grad_norm": 4.060866832733154,
"learning_rate": 7.841325859783086e-06,
"loss": 0.7479,
"step": 6434
},
{
"epoch": 1.711436170212766,
"grad_norm": 4.428943634033203,
"learning_rate": 7.840602118927584e-06,
"loss": 0.9101,
"step": 6435
},
{
"epoch": 1.7117021276595743,
"grad_norm": 3.989323139190674,
"learning_rate": 7.83987829018051e-06,
"loss": 0.8308,
"step": 6436
},
{
"epoch": 1.711968085106383,
"grad_norm": 4.173738479614258,
"learning_rate": 7.83915437356426e-06,
"loss": 0.8025,
"step": 6437
},
{
"epoch": 1.7122340425531914,
"grad_norm": 3.7683372497558594,
"learning_rate": 7.838430369101227e-06,
"loss": 0.8168,
"step": 6438
},
{
"epoch": 1.7125,
"grad_norm": 3.9382693767547607,
"learning_rate": 7.837706276813819e-06,
"loss": 0.8469,
"step": 6439
},
{
"epoch": 1.7127659574468086,
"grad_norm": 4.1283278465271,
"learning_rate": 7.836982096724438e-06,
"loss": 0.7938,
"step": 6440
},
{
"epoch": 1.713031914893617,
"grad_norm": 4.033618927001953,
"learning_rate": 7.836257828855489e-06,
"loss": 0.8479,
"step": 6441
},
{
"epoch": 1.7132978723404255,
"grad_norm": 4.25187349319458,
"learning_rate": 7.835533473229385e-06,
"loss": 0.8507,
"step": 6442
},
{
"epoch": 1.7135638297872342,
"grad_norm": 4.031279563903809,
"learning_rate": 7.834809029868538e-06,
"loss": 0.8444,
"step": 6443
},
{
"epoch": 1.7138297872340424,
"grad_norm": 3.5434410572052,
"learning_rate": 7.834084498795361e-06,
"loss": 0.6862,
"step": 6444
},
{
"epoch": 1.7140957446808511,
"grad_norm": 4.158623218536377,
"learning_rate": 7.833359880032272e-06,
"loss": 0.8362,
"step": 6445
},
{
"epoch": 1.7143617021276596,
"grad_norm": 4.039031982421875,
"learning_rate": 7.832635173601692e-06,
"loss": 0.8806,
"step": 6446
},
{
"epoch": 1.714627659574468,
"grad_norm": 4.09163236618042,
"learning_rate": 7.831910379526047e-06,
"loss": 0.9957,
"step": 6447
},
{
"epoch": 1.7148936170212767,
"grad_norm": 3.4675064086914062,
"learning_rate": 7.831185497827758e-06,
"loss": 0.7451,
"step": 6448
},
{
"epoch": 1.715159574468085,
"grad_norm": 3.6473426818847656,
"learning_rate": 7.830460528529258e-06,
"loss": 0.7436,
"step": 6449
},
{
"epoch": 1.7154255319148937,
"grad_norm": 3.779623508453369,
"learning_rate": 7.829735471652978e-06,
"loss": 0.7522,
"step": 6450
},
{
"epoch": 1.7156914893617021,
"grad_norm": 3.759127616882324,
"learning_rate": 7.829010327221348e-06,
"loss": 0.8186,
"step": 6451
},
{
"epoch": 1.7159574468085106,
"grad_norm": 3.606985330581665,
"learning_rate": 7.828285095256808e-06,
"loss": 0.8916,
"step": 6452
},
{
"epoch": 1.7162234042553193,
"grad_norm": 3.6981024742126465,
"learning_rate": 7.8275597757818e-06,
"loss": 0.7967,
"step": 6453
},
{
"epoch": 1.7164893617021275,
"grad_norm": 3.8665547370910645,
"learning_rate": 7.826834368818761e-06,
"loss": 0.731,
"step": 6454
},
{
"epoch": 1.7167553191489362,
"grad_norm": 3.547314167022705,
"learning_rate": 7.826108874390141e-06,
"loss": 0.7793,
"step": 6455
},
{
"epoch": 1.7170212765957447,
"grad_norm": 3.823787212371826,
"learning_rate": 7.825383292518383e-06,
"loss": 0.7854,
"step": 6456
},
{
"epoch": 1.7172872340425531,
"grad_norm": 4.252329349517822,
"learning_rate": 7.82465762322594e-06,
"loss": 0.9033,
"step": 6457
},
{
"epoch": 1.7175531914893618,
"grad_norm": 3.9819960594177246,
"learning_rate": 7.823931866535264e-06,
"loss": 0.9616,
"step": 6458
},
{
"epoch": 1.71781914893617,
"grad_norm": 4.099963665008545,
"learning_rate": 7.823206022468812e-06,
"loss": 0.8145,
"step": 6459
},
{
"epoch": 1.7180851063829787,
"grad_norm": 4.146093368530273,
"learning_rate": 7.82248009104904e-06,
"loss": 0.7693,
"step": 6460
},
{
"epoch": 1.7183510638297872,
"grad_norm": 3.9053497314453125,
"learning_rate": 7.821754072298414e-06,
"loss": 0.8287,
"step": 6461
},
{
"epoch": 1.7186170212765957,
"grad_norm": 4.186066150665283,
"learning_rate": 7.821027966239393e-06,
"loss": 0.7655,
"step": 6462
},
{
"epoch": 1.7188829787234043,
"grad_norm": 4.364232540130615,
"learning_rate": 7.820301772894445e-06,
"loss": 0.7746,
"step": 6463
},
{
"epoch": 1.7191489361702128,
"grad_norm": 3.838639736175537,
"learning_rate": 7.81957549228604e-06,
"loss": 0.8342,
"step": 6464
},
{
"epoch": 1.7194148936170213,
"grad_norm": 4.181699752807617,
"learning_rate": 7.818849124436651e-06,
"loss": 0.8181,
"step": 6465
},
{
"epoch": 1.71968085106383,
"grad_norm": 4.069806098937988,
"learning_rate": 7.818122669368751e-06,
"loss": 0.7486,
"step": 6466
},
{
"epoch": 1.7199468085106382,
"grad_norm": 3.9210989475250244,
"learning_rate": 7.817396127104815e-06,
"loss": 0.8064,
"step": 6467
},
{
"epoch": 1.7202127659574469,
"grad_norm": 3.3825418949127197,
"learning_rate": 7.816669497667328e-06,
"loss": 0.7276,
"step": 6468
},
{
"epoch": 1.7204787234042553,
"grad_norm": 4.07489013671875,
"learning_rate": 7.815942781078772e-06,
"loss": 0.7628,
"step": 6469
},
{
"epoch": 1.7207446808510638,
"grad_norm": 4.20849084854126,
"learning_rate": 7.815215977361628e-06,
"loss": 0.822,
"step": 6470
},
{
"epoch": 1.7210106382978725,
"grad_norm": 4.13023567199707,
"learning_rate": 7.814489086538388e-06,
"loss": 0.8117,
"step": 6471
},
{
"epoch": 1.7212765957446807,
"grad_norm": 4.143436431884766,
"learning_rate": 7.813762108631544e-06,
"loss": 0.8769,
"step": 6472
},
{
"epoch": 1.7215425531914894,
"grad_norm": 3.954219102859497,
"learning_rate": 7.813035043663585e-06,
"loss": 0.7836,
"step": 6473
},
{
"epoch": 1.7218085106382979,
"grad_norm": 3.688133478164673,
"learning_rate": 7.81230789165701e-06,
"loss": 0.8905,
"step": 6474
},
{
"epoch": 1.7220744680851063,
"grad_norm": 4.443986892700195,
"learning_rate": 7.811580652634319e-06,
"loss": 0.8933,
"step": 6475
},
{
"epoch": 1.722340425531915,
"grad_norm": 3.791365146636963,
"learning_rate": 7.810853326618012e-06,
"loss": 0.8278,
"step": 6476
},
{
"epoch": 1.7226063829787233,
"grad_norm": 4.167088031768799,
"learning_rate": 7.810125913630593e-06,
"loss": 0.7669,
"step": 6477
},
{
"epoch": 1.722872340425532,
"grad_norm": 3.4958133697509766,
"learning_rate": 7.80939841369457e-06,
"loss": 0.7095,
"step": 6478
},
{
"epoch": 1.7231382978723404,
"grad_norm": 4.2002339363098145,
"learning_rate": 7.808670826832455e-06,
"loss": 0.7463,
"step": 6479
},
{
"epoch": 1.7234042553191489,
"grad_norm": 3.795557737350464,
"learning_rate": 7.807943153066754e-06,
"loss": 0.6731,
"step": 6480
},
{
"epoch": 1.7236702127659576,
"grad_norm": 3.272183895111084,
"learning_rate": 7.807215392419988e-06,
"loss": 0.6116,
"step": 6481
},
{
"epoch": 1.7239361702127658,
"grad_norm": 4.027061462402344,
"learning_rate": 7.806487544914672e-06,
"loss": 0.8122,
"step": 6482
},
{
"epoch": 1.7242021276595745,
"grad_norm": 3.5909063816070557,
"learning_rate": 7.805759610573327e-06,
"loss": 0.7915,
"step": 6483
},
{
"epoch": 1.724468085106383,
"grad_norm": 4.0041961669921875,
"learning_rate": 7.805031589418477e-06,
"loss": 0.6859,
"step": 6484
},
{
"epoch": 1.7247340425531914,
"grad_norm": 3.9270341396331787,
"learning_rate": 7.804303481472645e-06,
"loss": 0.7585,
"step": 6485
},
{
"epoch": 1.725,
"grad_norm": 4.444969654083252,
"learning_rate": 7.803575286758365e-06,
"loss": 0.8409,
"step": 6486
},
{
"epoch": 1.7252659574468086,
"grad_norm": 4.4063262939453125,
"learning_rate": 7.802847005298162e-06,
"loss": 1.0173,
"step": 6487
},
{
"epoch": 1.725531914893617,
"grad_norm": 4.078791618347168,
"learning_rate": 7.802118637114575e-06,
"loss": 0.8106,
"step": 6488
},
{
"epoch": 1.7257978723404257,
"grad_norm": 3.8760604858398438,
"learning_rate": 7.801390182230137e-06,
"loss": 0.7751,
"step": 6489
},
{
"epoch": 1.726063829787234,
"grad_norm": 4.180771350860596,
"learning_rate": 7.800661640667388e-06,
"loss": 0.8671,
"step": 6490
},
{
"epoch": 1.7263297872340426,
"grad_norm": 3.921558380126953,
"learning_rate": 7.799933012448872e-06,
"loss": 0.8414,
"step": 6491
},
{
"epoch": 1.726595744680851,
"grad_norm": 3.8960835933685303,
"learning_rate": 7.799204297597129e-06,
"loss": 0.7135,
"step": 6492
},
{
"epoch": 1.7268617021276595,
"grad_norm": 3.834841251373291,
"learning_rate": 7.798475496134714e-06,
"loss": 0.7374,
"step": 6493
},
{
"epoch": 1.7271276595744682,
"grad_norm": 3.5948872566223145,
"learning_rate": 7.79774660808417e-06,
"loss": 0.7354,
"step": 6494
},
{
"epoch": 1.7273936170212765,
"grad_norm": 3.763976573944092,
"learning_rate": 7.797017633468052e-06,
"loss": 0.9162,
"step": 6495
},
{
"epoch": 1.7276595744680852,
"grad_norm": 3.8534562587738037,
"learning_rate": 7.796288572308914e-06,
"loss": 0.8713,
"step": 6496
},
{
"epoch": 1.7279255319148936,
"grad_norm": 4.049807071685791,
"learning_rate": 7.795559424629317e-06,
"loss": 0.8404,
"step": 6497
},
{
"epoch": 1.728191489361702,
"grad_norm": 3.8596930503845215,
"learning_rate": 7.79483019045182e-06,
"loss": 0.7868,
"step": 6498
},
{
"epoch": 1.7284574468085108,
"grad_norm": 4.452897071838379,
"learning_rate": 7.794100869798986e-06,
"loss": 0.9168,
"step": 6499
},
{
"epoch": 1.728723404255319,
"grad_norm": 3.7102370262145996,
"learning_rate": 7.79337146269338e-06,
"loss": 0.9201,
"step": 6500
},
{
"epoch": 1.728723404255319,
"eval_loss": 1.2800854444503784,
"eval_runtime": 13.8491,
"eval_samples_per_second": 28.883,
"eval_steps_per_second": 3.61,
"step": 6500
},
{
"epoch": 1.7289893617021277,
"grad_norm": 4.088536262512207,
"learning_rate": 7.792641969157574e-06,
"loss": 0.8304,
"step": 6501
},
{
"epoch": 1.7292553191489362,
"grad_norm": 3.8640379905700684,
"learning_rate": 7.791912389214138e-06,
"loss": 0.77,
"step": 6502
},
{
"epoch": 1.7295212765957446,
"grad_norm": 3.927625894546509,
"learning_rate": 7.791182722885644e-06,
"loss": 0.7303,
"step": 6503
},
{
"epoch": 1.7297872340425533,
"grad_norm": 3.960904598236084,
"learning_rate": 7.790452970194673e-06,
"loss": 0.8346,
"step": 6504
},
{
"epoch": 1.7300531914893615,
"grad_norm": 3.953512191772461,
"learning_rate": 7.7897231311638e-06,
"loss": 0.6958,
"step": 6505
},
{
"epoch": 1.7303191489361702,
"grad_norm": 3.7672922611236572,
"learning_rate": 7.788993205815606e-06,
"loss": 0.7887,
"step": 6506
},
{
"epoch": 1.7305851063829787,
"grad_norm": 4.269046783447266,
"learning_rate": 7.788263194172684e-06,
"loss": 0.9836,
"step": 6507
},
{
"epoch": 1.7308510638297872,
"grad_norm": 3.96058988571167,
"learning_rate": 7.787533096257613e-06,
"loss": 0.9103,
"step": 6508
},
{
"epoch": 1.7311170212765958,
"grad_norm": 3.9208950996398926,
"learning_rate": 7.786802912092986e-06,
"loss": 0.819,
"step": 6509
},
{
"epoch": 1.7313829787234043,
"grad_norm": 3.600135326385498,
"learning_rate": 7.786072641701397e-06,
"loss": 0.8122,
"step": 6510
},
{
"epoch": 1.7316489361702128,
"grad_norm": 3.9716193675994873,
"learning_rate": 7.78534228510544e-06,
"loss": 0.7281,
"step": 6511
},
{
"epoch": 1.7319148936170212,
"grad_norm": 4.222037315368652,
"learning_rate": 7.784611842327711e-06,
"loss": 0.8926,
"step": 6512
},
{
"epoch": 1.7321808510638297,
"grad_norm": 3.3642852306365967,
"learning_rate": 7.783881313390816e-06,
"loss": 0.7014,
"step": 6513
},
{
"epoch": 1.7324468085106384,
"grad_norm": 4.051825046539307,
"learning_rate": 7.783150698317354e-06,
"loss": 0.7602,
"step": 6514
},
{
"epoch": 1.7327127659574468,
"grad_norm": 4.036343574523926,
"learning_rate": 7.782419997129934e-06,
"loss": 0.8381,
"step": 6515
},
{
"epoch": 1.7329787234042553,
"grad_norm": 3.722576856613159,
"learning_rate": 7.781689209851163e-06,
"loss": 0.8737,
"step": 6516
},
{
"epoch": 1.733244680851064,
"grad_norm": 4.037721157073975,
"learning_rate": 7.780958336503653e-06,
"loss": 0.8382,
"step": 6517
},
{
"epoch": 1.7335106382978722,
"grad_norm": 4.075493812561035,
"learning_rate": 7.780227377110016e-06,
"loss": 0.8215,
"step": 6518
},
{
"epoch": 1.733776595744681,
"grad_norm": 3.9683899879455566,
"learning_rate": 7.779496331692872e-06,
"loss": 0.8797,
"step": 6519
},
{
"epoch": 1.7340425531914894,
"grad_norm": 3.871469259262085,
"learning_rate": 7.77876520027484e-06,
"loss": 0.7388,
"step": 6520
},
{
"epoch": 1.7343085106382978,
"grad_norm": 3.950624465942383,
"learning_rate": 7.778033982878539e-06,
"loss": 0.7502,
"step": 6521
},
{
"epoch": 1.7345744680851065,
"grad_norm": 4.015387058258057,
"learning_rate": 7.777302679526596e-06,
"loss": 0.9874,
"step": 6522
},
{
"epoch": 1.7348404255319148,
"grad_norm": 4.03596830368042,
"learning_rate": 7.776571290241642e-06,
"loss": 0.7633,
"step": 6523
},
{
"epoch": 1.7351063829787234,
"grad_norm": 4.029125213623047,
"learning_rate": 7.775839815046299e-06,
"loss": 0.7994,
"step": 6524
},
{
"epoch": 1.735372340425532,
"grad_norm": 4.058604717254639,
"learning_rate": 7.775108253963207e-06,
"loss": 0.7391,
"step": 6525
},
{
"epoch": 1.7356382978723404,
"grad_norm": 3.862391948699951,
"learning_rate": 7.774376607014995e-06,
"loss": 0.9032,
"step": 6526
},
{
"epoch": 1.735904255319149,
"grad_norm": 3.903395414352417,
"learning_rate": 7.773644874224306e-06,
"loss": 0.8429,
"step": 6527
},
{
"epoch": 1.7361702127659573,
"grad_norm": 3.8711469173431396,
"learning_rate": 7.77291305561378e-06,
"loss": 0.807,
"step": 6528
},
{
"epoch": 1.736436170212766,
"grad_norm": 3.977463483810425,
"learning_rate": 7.77218115120606e-06,
"loss": 0.7929,
"step": 6529
},
{
"epoch": 1.7367021276595744,
"grad_norm": 3.7397544384002686,
"learning_rate": 7.77144916102379e-06,
"loss": 0.8478,
"step": 6530
},
{
"epoch": 1.736968085106383,
"grad_norm": 3.6703922748565674,
"learning_rate": 7.770717085089618e-06,
"loss": 0.6432,
"step": 6531
},
{
"epoch": 1.7372340425531916,
"grad_norm": 4.170365333557129,
"learning_rate": 7.7699849234262e-06,
"loss": 0.7565,
"step": 6532
},
{
"epoch": 1.7375,
"grad_norm": 3.6264007091522217,
"learning_rate": 7.769252676056186e-06,
"loss": 0.7635,
"step": 6533
},
{
"epoch": 1.7377659574468085,
"grad_norm": 3.9042675495147705,
"learning_rate": 7.768520343002235e-06,
"loss": 0.9037,
"step": 6534
},
{
"epoch": 1.738031914893617,
"grad_norm": 4.19412899017334,
"learning_rate": 7.767787924287005e-06,
"loss": 0.8516,
"step": 6535
},
{
"epoch": 1.7382978723404254,
"grad_norm": 3.869814157485962,
"learning_rate": 7.767055419933157e-06,
"loss": 0.7815,
"step": 6536
},
{
"epoch": 1.7385638297872341,
"grad_norm": 3.712411642074585,
"learning_rate": 7.766322829963357e-06,
"loss": 0.6676,
"step": 6537
},
{
"epoch": 1.7388297872340426,
"grad_norm": 4.046865463256836,
"learning_rate": 7.76559015440027e-06,
"loss": 0.8799,
"step": 6538
},
{
"epoch": 1.739095744680851,
"grad_norm": 3.908235549926758,
"learning_rate": 7.76485739326657e-06,
"loss": 0.7999,
"step": 6539
},
{
"epoch": 1.7393617021276597,
"grad_norm": 4.396571159362793,
"learning_rate": 7.764124546584926e-06,
"loss": 0.8813,
"step": 6540
},
{
"epoch": 1.739627659574468,
"grad_norm": 3.7259883880615234,
"learning_rate": 7.763391614378014e-06,
"loss": 0.8519,
"step": 6541
},
{
"epoch": 1.7398936170212767,
"grad_norm": 3.7457261085510254,
"learning_rate": 7.762658596668514e-06,
"loss": 0.7913,
"step": 6542
},
{
"epoch": 1.7401595744680851,
"grad_norm": 3.66605544090271,
"learning_rate": 7.7619254934791e-06,
"loss": 0.8122,
"step": 6543
},
{
"epoch": 1.7404255319148936,
"grad_norm": 3.8894519805908203,
"learning_rate": 7.761192304832463e-06,
"loss": 0.6829,
"step": 6544
},
{
"epoch": 1.7406914893617023,
"grad_norm": 3.4376041889190674,
"learning_rate": 7.760459030751285e-06,
"loss": 0.6903,
"step": 6545
},
{
"epoch": 1.7409574468085105,
"grad_norm": 4.00453519821167,
"learning_rate": 7.759725671258254e-06,
"loss": 0.8714,
"step": 6546
},
{
"epoch": 1.7412234042553192,
"grad_norm": 3.9484405517578125,
"learning_rate": 7.758992226376062e-06,
"loss": 0.9567,
"step": 6547
},
{
"epoch": 1.7414893617021276,
"grad_norm": 3.885755777359009,
"learning_rate": 7.7582586961274e-06,
"loss": 0.7928,
"step": 6548
},
{
"epoch": 1.741755319148936,
"grad_norm": 3.8768088817596436,
"learning_rate": 7.757525080534968e-06,
"loss": 0.7554,
"step": 6549
},
{
"epoch": 1.7420212765957448,
"grad_norm": 3.7053639888763428,
"learning_rate": 7.756791379621461e-06,
"loss": 0.8122,
"step": 6550
},
{
"epoch": 1.742287234042553,
"grad_norm": 3.9800238609313965,
"learning_rate": 7.756057593409588e-06,
"loss": 0.8505,
"step": 6551
},
{
"epoch": 1.7425531914893617,
"grad_norm": 3.586451768875122,
"learning_rate": 7.755323721922045e-06,
"loss": 0.7435,
"step": 6552
},
{
"epoch": 1.7428191489361702,
"grad_norm": 4.315957069396973,
"learning_rate": 7.754589765181543e-06,
"loss": 0.8308,
"step": 6553
},
{
"epoch": 1.7430851063829786,
"grad_norm": 3.764915704727173,
"learning_rate": 7.75385572321079e-06,
"loss": 0.7939,
"step": 6554
},
{
"epoch": 1.7433510638297873,
"grad_norm": 3.9177279472351074,
"learning_rate": 7.7531215960325e-06,
"loss": 0.8557,
"step": 6555
},
{
"epoch": 1.7436170212765958,
"grad_norm": 3.802114248275757,
"learning_rate": 7.752387383669384e-06,
"loss": 0.7933,
"step": 6556
},
{
"epoch": 1.7438829787234043,
"grad_norm": 4.129657745361328,
"learning_rate": 7.751653086144164e-06,
"loss": 0.8744,
"step": 6557
},
{
"epoch": 1.7441489361702127,
"grad_norm": 4.201019763946533,
"learning_rate": 7.750918703479558e-06,
"loss": 0.7875,
"step": 6558
},
{
"epoch": 1.7444148936170212,
"grad_norm": 4.305670261383057,
"learning_rate": 7.750184235698285e-06,
"loss": 0.8137,
"step": 6559
},
{
"epoch": 1.7446808510638299,
"grad_norm": 3.571631908416748,
"learning_rate": 7.749449682823077e-06,
"loss": 0.7308,
"step": 6560
},
{
"epoch": 1.7449468085106383,
"grad_norm": 4.124020576477051,
"learning_rate": 7.74871504487666e-06,
"loss": 0.9546,
"step": 6561
},
{
"epoch": 1.7452127659574468,
"grad_norm": 4.1722588539123535,
"learning_rate": 7.74798032188176e-06,
"loss": 0.787,
"step": 6562
},
{
"epoch": 1.7454787234042555,
"grad_norm": 4.017617225646973,
"learning_rate": 7.747245513861115e-06,
"loss": 0.8655,
"step": 6563
},
{
"epoch": 1.7457446808510637,
"grad_norm": 4.122082233428955,
"learning_rate": 7.74651062083746e-06,
"loss": 0.9471,
"step": 6564
},
{
"epoch": 1.7460106382978724,
"grad_norm": 4.254493713378906,
"learning_rate": 7.745775642833532e-06,
"loss": 0.8313,
"step": 6565
},
{
"epoch": 1.7462765957446809,
"grad_norm": 3.856379985809326,
"learning_rate": 7.745040579872073e-06,
"loss": 0.9207,
"step": 6566
},
{
"epoch": 1.7465425531914893,
"grad_norm": 4.020528316497803,
"learning_rate": 7.744305431975827e-06,
"loss": 0.7029,
"step": 6567
},
{
"epoch": 1.746808510638298,
"grad_norm": 4.091069221496582,
"learning_rate": 7.743570199167539e-06,
"loss": 0.8682,
"step": 6568
},
{
"epoch": 1.7470744680851062,
"grad_norm": 3.8805131912231445,
"learning_rate": 7.742834881469959e-06,
"loss": 0.8366,
"step": 6569
},
{
"epoch": 1.747340425531915,
"grad_norm": 3.5972797870635986,
"learning_rate": 7.742099478905837e-06,
"loss": 0.784,
"step": 6570
},
{
"epoch": 1.7476063829787234,
"grad_norm": 3.655684232711792,
"learning_rate": 7.741363991497932e-06,
"loss": 0.7849,
"step": 6571
},
{
"epoch": 1.7478723404255319,
"grad_norm": 3.854562520980835,
"learning_rate": 7.740628419268996e-06,
"loss": 0.7961,
"step": 6572
},
{
"epoch": 1.7481382978723405,
"grad_norm": 3.5972256660461426,
"learning_rate": 7.73989276224179e-06,
"loss": 0.8045,
"step": 6573
},
{
"epoch": 1.7484042553191488,
"grad_norm": 4.087411880493164,
"learning_rate": 7.739157020439077e-06,
"loss": 0.8889,
"step": 6574
},
{
"epoch": 1.7486702127659575,
"grad_norm": 4.145167350769043,
"learning_rate": 7.738421193883618e-06,
"loss": 0.8542,
"step": 6575
},
{
"epoch": 1.748936170212766,
"grad_norm": 4.064332008361816,
"learning_rate": 7.737685282598187e-06,
"loss": 0.8523,
"step": 6576
},
{
"epoch": 1.7492021276595744,
"grad_norm": 4.075108051300049,
"learning_rate": 7.736949286605549e-06,
"loss": 0.8839,
"step": 6577
},
{
"epoch": 1.749468085106383,
"grad_norm": 4.157843112945557,
"learning_rate": 7.736213205928476e-06,
"loss": 0.9253,
"step": 6578
},
{
"epoch": 1.7497340425531915,
"grad_norm": 3.978928327560425,
"learning_rate": 7.735477040589745e-06,
"loss": 0.8454,
"step": 6579
},
{
"epoch": 1.75,
"grad_norm": 3.7294394969940186,
"learning_rate": 7.734740790612137e-06,
"loss": 0.7877,
"step": 6580
},
{
"epoch": 1.7502659574468085,
"grad_norm": 4.367574214935303,
"learning_rate": 7.734004456018424e-06,
"loss": 0.7477,
"step": 6581
},
{
"epoch": 1.750531914893617,
"grad_norm": 3.952146291732788,
"learning_rate": 7.733268036831398e-06,
"loss": 0.7725,
"step": 6582
},
{
"epoch": 1.7507978723404256,
"grad_norm": 4.400146961212158,
"learning_rate": 7.73253153307384e-06,
"loss": 0.8059,
"step": 6583
},
{
"epoch": 1.751063829787234,
"grad_norm": 4.003587245941162,
"learning_rate": 7.73179494476854e-06,
"loss": 0.8549,
"step": 6584
},
{
"epoch": 1.7513297872340425,
"grad_norm": 3.898470640182495,
"learning_rate": 7.731058271938286e-06,
"loss": 0.7925,
"step": 6585
},
{
"epoch": 1.7515957446808512,
"grad_norm": 3.6899170875549316,
"learning_rate": 7.730321514605877e-06,
"loss": 0.7535,
"step": 6586
},
{
"epoch": 1.7518617021276595,
"grad_norm": 3.996615171432495,
"learning_rate": 7.729584672794102e-06,
"loss": 0.8278,
"step": 6587
},
{
"epoch": 1.7521276595744681,
"grad_norm": 4.020608901977539,
"learning_rate": 7.728847746525764e-06,
"loss": 0.7233,
"step": 6588
},
{
"epoch": 1.7523936170212766,
"grad_norm": 4.504430294036865,
"learning_rate": 7.728110735823666e-06,
"loss": 0.8254,
"step": 6589
},
{
"epoch": 1.752659574468085,
"grad_norm": 3.7418766021728516,
"learning_rate": 7.72737364071061e-06,
"loss": 0.8151,
"step": 6590
},
{
"epoch": 1.7529255319148938,
"grad_norm": 4.577789783477783,
"learning_rate": 7.7266364612094e-06,
"loss": 0.9276,
"step": 6591
},
{
"epoch": 1.753191489361702,
"grad_norm": 4.067131042480469,
"learning_rate": 7.72589919734285e-06,
"loss": 0.8282,
"step": 6592
},
{
"epoch": 1.7534574468085107,
"grad_norm": 4.11132287979126,
"learning_rate": 7.725161849133769e-06,
"loss": 0.8663,
"step": 6593
},
{
"epoch": 1.7537234042553191,
"grad_norm": 3.8996002674102783,
"learning_rate": 7.724424416604972e-06,
"loss": 0.9631,
"step": 6594
},
{
"epoch": 1.7539893617021276,
"grad_norm": 3.911623954772949,
"learning_rate": 7.723686899779277e-06,
"loss": 0.8082,
"step": 6595
},
{
"epoch": 1.7542553191489363,
"grad_norm": 4.957215785980225,
"learning_rate": 7.7229492986795e-06,
"loss": 0.8758,
"step": 6596
},
{
"epoch": 1.7545212765957445,
"grad_norm": 4.114643573760986,
"learning_rate": 7.722211613328467e-06,
"loss": 0.7665,
"step": 6597
},
{
"epoch": 1.7547872340425532,
"grad_norm": 3.4866108894348145,
"learning_rate": 7.721473843749e-06,
"loss": 0.7636,
"step": 6598
},
{
"epoch": 1.7550531914893617,
"grad_norm": 3.798917055130005,
"learning_rate": 7.72073598996393e-06,
"loss": 0.7645,
"step": 6599
},
{
"epoch": 1.7553191489361701,
"grad_norm": 4.327617168426514,
"learning_rate": 7.719998051996087e-06,
"loss": 0.8174,
"step": 6600
},
{
"epoch": 1.7555851063829788,
"grad_norm": 3.7455971240997314,
"learning_rate": 7.719260029868299e-06,
"loss": 0.7484,
"step": 6601
},
{
"epoch": 1.7558510638297873,
"grad_norm": 3.4463014602661133,
"learning_rate": 7.718521923603404e-06,
"loss": 0.692,
"step": 6602
},
{
"epoch": 1.7561170212765957,
"grad_norm": 3.920140027999878,
"learning_rate": 7.717783733224243e-06,
"loss": 0.9122,
"step": 6603
},
{
"epoch": 1.7563829787234042,
"grad_norm": 4.227574825286865,
"learning_rate": 7.717045458753651e-06,
"loss": 0.7812,
"step": 6604
},
{
"epoch": 1.7566489361702127,
"grad_norm": 4.23086404800415,
"learning_rate": 7.716307100214472e-06,
"loss": 0.829,
"step": 6605
},
{
"epoch": 1.7569148936170214,
"grad_norm": 3.5714340209960938,
"learning_rate": 7.715568657629557e-06,
"loss": 0.8676,
"step": 6606
},
{
"epoch": 1.7571808510638298,
"grad_norm": 4.220118045806885,
"learning_rate": 7.71483013102175e-06,
"loss": 0.7351,
"step": 6607
},
{
"epoch": 1.7574468085106383,
"grad_norm": 3.8862133026123047,
"learning_rate": 7.7140915204139e-06,
"loss": 0.7836,
"step": 6608
},
{
"epoch": 1.757712765957447,
"grad_norm": 3.9056966304779053,
"learning_rate": 7.713352825828865e-06,
"loss": 0.7439,
"step": 6609
},
{
"epoch": 1.7579787234042552,
"grad_norm": 4.519630432128906,
"learning_rate": 7.712614047289498e-06,
"loss": 0.9618,
"step": 6610
},
{
"epoch": 1.758244680851064,
"grad_norm": 3.756225109100342,
"learning_rate": 7.711875184818659e-06,
"loss": 0.7612,
"step": 6611
},
{
"epoch": 1.7585106382978724,
"grad_norm": 4.109426498413086,
"learning_rate": 7.71113623843921e-06,
"loss": 0.8828,
"step": 6612
},
{
"epoch": 1.7587765957446808,
"grad_norm": 4.274012565612793,
"learning_rate": 7.710397208174012e-06,
"loss": 0.8212,
"step": 6613
},
{
"epoch": 1.7590425531914895,
"grad_norm": 4.489198207855225,
"learning_rate": 7.709658094045933e-06,
"loss": 0.9358,
"step": 6614
},
{
"epoch": 1.7593085106382977,
"grad_norm": 3.796844005584717,
"learning_rate": 7.708918896077843e-06,
"loss": 0.8092,
"step": 6615
},
{
"epoch": 1.7595744680851064,
"grad_norm": 4.139426231384277,
"learning_rate": 7.708179614292614e-06,
"loss": 0.7859,
"step": 6616
},
{
"epoch": 1.7598404255319149,
"grad_norm": 4.109641075134277,
"learning_rate": 7.707440248713118e-06,
"loss": 0.7763,
"step": 6617
},
{
"epoch": 1.7601063829787233,
"grad_norm": 4.1055521965026855,
"learning_rate": 7.706700799362235e-06,
"loss": 0.7225,
"step": 6618
},
{
"epoch": 1.760372340425532,
"grad_norm": 4.071004390716553,
"learning_rate": 7.70596126626284e-06,
"loss": 0.7714,
"step": 6619
},
{
"epoch": 1.7606382978723403,
"grad_norm": 4.117389678955078,
"learning_rate": 7.705221649437819e-06,
"loss": 0.8,
"step": 6620
},
{
"epoch": 1.760904255319149,
"grad_norm": 3.617248058319092,
"learning_rate": 7.704481948910057e-06,
"loss": 0.8286,
"step": 6621
},
{
"epoch": 1.7611702127659574,
"grad_norm": 3.6249337196350098,
"learning_rate": 7.703742164702436e-06,
"loss": 0.732,
"step": 6622
},
{
"epoch": 1.7614361702127659,
"grad_norm": 3.584951400756836,
"learning_rate": 7.703002296837849e-06,
"loss": 0.859,
"step": 6623
},
{
"epoch": 1.7617021276595746,
"grad_norm": 3.908857822418213,
"learning_rate": 7.70226234533919e-06,
"loss": 0.8112,
"step": 6624
},
{
"epoch": 1.761968085106383,
"grad_norm": 4.350627422332764,
"learning_rate": 7.701522310229353e-06,
"loss": 0.9676,
"step": 6625
},
{
"epoch": 1.7622340425531915,
"grad_norm": 3.7733817100524902,
"learning_rate": 7.700782191531236e-06,
"loss": 0.7312,
"step": 6626
},
{
"epoch": 1.7625,
"grad_norm": 3.822552442550659,
"learning_rate": 7.700041989267738e-06,
"loss": 0.6901,
"step": 6627
},
{
"epoch": 1.7627659574468084,
"grad_norm": 3.9083547592163086,
"learning_rate": 7.69930170346176e-06,
"loss": 0.7498,
"step": 6628
},
{
"epoch": 1.763031914893617,
"grad_norm": 4.126950263977051,
"learning_rate": 7.69856133413621e-06,
"loss": 0.7975,
"step": 6629
},
{
"epoch": 1.7632978723404256,
"grad_norm": 4.27503776550293,
"learning_rate": 7.697820881313994e-06,
"loss": 0.7927,
"step": 6630
},
{
"epoch": 1.763563829787234,
"grad_norm": 4.2161407470703125,
"learning_rate": 7.697080345018024e-06,
"loss": 0.8779,
"step": 6631
},
{
"epoch": 1.7638297872340427,
"grad_norm": 4.142273426055908,
"learning_rate": 7.696339725271215e-06,
"loss": 0.8069,
"step": 6632
},
{
"epoch": 1.764095744680851,
"grad_norm": 4.17659330368042,
"learning_rate": 7.695599022096478e-06,
"loss": 0.7439,
"step": 6633
},
{
"epoch": 1.7643617021276596,
"grad_norm": 4.072018623352051,
"learning_rate": 7.694858235516735e-06,
"loss": 0.8364,
"step": 6634
},
{
"epoch": 1.764627659574468,
"grad_norm": 3.6811084747314453,
"learning_rate": 7.694117365554905e-06,
"loss": 0.8986,
"step": 6635
},
{
"epoch": 1.7648936170212766,
"grad_norm": 3.924104928970337,
"learning_rate": 7.693376412233913e-06,
"loss": 0.7906,
"step": 6636
},
{
"epoch": 1.7651595744680852,
"grad_norm": 4.180627822875977,
"learning_rate": 7.69263537557668e-06,
"loss": 0.814,
"step": 6637
},
{
"epoch": 1.7654255319148935,
"grad_norm": 3.74808931350708,
"learning_rate": 7.691894255606143e-06,
"loss": 0.8623,
"step": 6638
},
{
"epoch": 1.7656914893617022,
"grad_norm": 3.8845086097717285,
"learning_rate": 7.691153052345227e-06,
"loss": 0.8279,
"step": 6639
},
{
"epoch": 1.7659574468085106,
"grad_norm": 3.6786465644836426,
"learning_rate": 7.690411765816864e-06,
"loss": 0.8579,
"step": 6640
},
{
"epoch": 1.766223404255319,
"grad_norm": 4.260414123535156,
"learning_rate": 7.689670396043997e-06,
"loss": 0.8473,
"step": 6641
},
{
"epoch": 1.7664893617021278,
"grad_norm": 3.757199287414551,
"learning_rate": 7.688928943049558e-06,
"loss": 0.8065,
"step": 6642
},
{
"epoch": 1.766755319148936,
"grad_norm": 4.010439872741699,
"learning_rate": 7.688187406856494e-06,
"loss": 0.8412,
"step": 6643
},
{
"epoch": 1.7670212765957447,
"grad_norm": 4.193131923675537,
"learning_rate": 7.687445787487746e-06,
"loss": 0.7638,
"step": 6644
},
{
"epoch": 1.7672872340425532,
"grad_norm": 3.7920022010803223,
"learning_rate": 7.686704084966263e-06,
"loss": 0.7628,
"step": 6645
},
{
"epoch": 1.7675531914893616,
"grad_norm": 3.6464099884033203,
"learning_rate": 7.68596229931499e-06,
"loss": 0.7547,
"step": 6646
},
{
"epoch": 1.7678191489361703,
"grad_norm": 3.7222912311553955,
"learning_rate": 7.685220430556883e-06,
"loss": 0.6741,
"step": 6647
},
{
"epoch": 1.7680851063829788,
"grad_norm": 3.48502254486084,
"learning_rate": 7.684478478714892e-06,
"loss": 0.6893,
"step": 6648
},
{
"epoch": 1.7683510638297872,
"grad_norm": 4.072755813598633,
"learning_rate": 7.683736443811978e-06,
"loss": 0.8487,
"step": 6649
},
{
"epoch": 1.7686170212765957,
"grad_norm": 3.5753612518310547,
"learning_rate": 7.682994325871098e-06,
"loss": 0.8314,
"step": 6650
},
{
"epoch": 1.7688829787234042,
"grad_norm": 4.951267242431641,
"learning_rate": 7.682252124915216e-06,
"loss": 0.9956,
"step": 6651
},
{
"epoch": 1.7691489361702128,
"grad_norm": 4.200650691986084,
"learning_rate": 7.681509840967294e-06,
"loss": 0.7119,
"step": 6652
},
{
"epoch": 1.7694148936170213,
"grad_norm": 3.4650633335113525,
"learning_rate": 7.6807674740503e-06,
"loss": 0.843,
"step": 6653
},
{
"epoch": 1.7696808510638298,
"grad_norm": 4.049907207489014,
"learning_rate": 7.680025024187206e-06,
"loss": 0.7776,
"step": 6654
},
{
"epoch": 1.7699468085106385,
"grad_norm": 3.934799909591675,
"learning_rate": 7.67928249140098e-06,
"loss": 0.7957,
"step": 6655
},
{
"epoch": 1.7702127659574467,
"grad_norm": 4.14153528213501,
"learning_rate": 7.678539875714604e-06,
"loss": 0.7445,
"step": 6656
},
{
"epoch": 1.7704787234042554,
"grad_norm": 3.816898822784424,
"learning_rate": 7.677797177151047e-06,
"loss": 0.8869,
"step": 6657
},
{
"epoch": 1.7707446808510638,
"grad_norm": 4.405877113342285,
"learning_rate": 7.677054395733292e-06,
"loss": 0.9004,
"step": 6658
},
{
"epoch": 1.7710106382978723,
"grad_norm": 4.069585800170898,
"learning_rate": 7.676311531484324e-06,
"loss": 0.7907,
"step": 6659
},
{
"epoch": 1.771276595744681,
"grad_norm": 3.9655072689056396,
"learning_rate": 7.675568584427125e-06,
"loss": 0.8069,
"step": 6660
},
{
"epoch": 1.7715425531914892,
"grad_norm": 3.8515357971191406,
"learning_rate": 7.674825554584686e-06,
"loss": 0.8013,
"step": 6661
},
{
"epoch": 1.771808510638298,
"grad_norm": 4.2742438316345215,
"learning_rate": 7.674082441979993e-06,
"loss": 0.9655,
"step": 6662
},
{
"epoch": 1.7720744680851064,
"grad_norm": 4.425269603729248,
"learning_rate": 7.67333924663604e-06,
"loss": 0.872,
"step": 6663
},
{
"epoch": 1.7723404255319148,
"grad_norm": 4.043865203857422,
"learning_rate": 7.672595968575827e-06,
"loss": 0.8425,
"step": 6664
},
{
"epoch": 1.7726063829787235,
"grad_norm": 3.77255916595459,
"learning_rate": 7.671852607822346e-06,
"loss": 0.6711,
"step": 6665
},
{
"epoch": 1.7728723404255318,
"grad_norm": 3.8917951583862305,
"learning_rate": 7.671109164398598e-06,
"loss": 0.7429,
"step": 6666
},
{
"epoch": 1.7731382978723405,
"grad_norm": 4.034469127655029,
"learning_rate": 7.67036563832759e-06,
"loss": 0.884,
"step": 6667
},
{
"epoch": 1.773404255319149,
"grad_norm": 4.177572727203369,
"learning_rate": 7.669622029632323e-06,
"loss": 0.7823,
"step": 6668
},
{
"epoch": 1.7736702127659574,
"grad_norm": 3.816012382507324,
"learning_rate": 7.668878338335808e-06,
"loss": 0.8012,
"step": 6669
},
{
"epoch": 1.773936170212766,
"grad_norm": 3.6478235721588135,
"learning_rate": 7.668134564461057e-06,
"loss": 0.8071,
"step": 6670
},
{
"epoch": 1.7742021276595743,
"grad_norm": 4.1651177406311035,
"learning_rate": 7.66739070803108e-06,
"loss": 0.882,
"step": 6671
},
{
"epoch": 1.774468085106383,
"grad_norm": 4.032572269439697,
"learning_rate": 7.666646769068894e-06,
"loss": 0.7804,
"step": 6672
},
{
"epoch": 1.7747340425531914,
"grad_norm": 4.481500148773193,
"learning_rate": 7.665902747597516e-06,
"loss": 0.8824,
"step": 6673
},
{
"epoch": 1.775,
"grad_norm": 3.6887848377227783,
"learning_rate": 7.66515864363997e-06,
"loss": 0.8179,
"step": 6674
},
{
"epoch": 1.7752659574468086,
"grad_norm": 3.5154476165771484,
"learning_rate": 7.664414457219277e-06,
"loss": 0.8015,
"step": 6675
},
{
"epoch": 1.775531914893617,
"grad_norm": 3.9713804721832275,
"learning_rate": 7.663670188358464e-06,
"loss": 0.8426,
"step": 6676
},
{
"epoch": 1.7757978723404255,
"grad_norm": 4.082159996032715,
"learning_rate": 7.66292583708056e-06,
"loss": 0.81,
"step": 6677
},
{
"epoch": 1.7760638297872342,
"grad_norm": 3.8582613468170166,
"learning_rate": 7.662181403408593e-06,
"loss": 0.7965,
"step": 6678
},
{
"epoch": 1.7763297872340424,
"grad_norm": 4.068000793457031,
"learning_rate": 7.661436887365603e-06,
"loss": 0.8332,
"step": 6679
},
{
"epoch": 1.7765957446808511,
"grad_norm": 4.067226409912109,
"learning_rate": 7.660692288974618e-06,
"loss": 0.8399,
"step": 6680
},
{
"epoch": 1.7768617021276596,
"grad_norm": 3.885331392288208,
"learning_rate": 7.659947608258684e-06,
"loss": 0.8701,
"step": 6681
},
{
"epoch": 1.777127659574468,
"grad_norm": 3.792872905731201,
"learning_rate": 7.659202845240839e-06,
"loss": 0.8379,
"step": 6682
},
{
"epoch": 1.7773936170212767,
"grad_norm": 3.553959369659424,
"learning_rate": 7.658457999944124e-06,
"loss": 0.6874,
"step": 6683
},
{
"epoch": 1.777659574468085,
"grad_norm": 4.169983386993408,
"learning_rate": 7.657713072391591e-06,
"loss": 0.7569,
"step": 6684
},
{
"epoch": 1.7779255319148937,
"grad_norm": 4.05847692489624,
"learning_rate": 7.656968062606288e-06,
"loss": 0.8497,
"step": 6685
},
{
"epoch": 1.7781914893617021,
"grad_norm": 4.117887496948242,
"learning_rate": 7.656222970611263e-06,
"loss": 0.708,
"step": 6686
},
{
"epoch": 1.7784574468085106,
"grad_norm": 3.683126211166382,
"learning_rate": 7.655477796429571e-06,
"loss": 0.7568,
"step": 6687
},
{
"epoch": 1.7787234042553193,
"grad_norm": 3.6990060806274414,
"learning_rate": 7.654732540084273e-06,
"loss": 0.7721,
"step": 6688
},
{
"epoch": 1.7789893617021275,
"grad_norm": 3.917276620864868,
"learning_rate": 7.653987201598422e-06,
"loss": 0.8214,
"step": 6689
},
{
"epoch": 1.7792553191489362,
"grad_norm": 4.091401100158691,
"learning_rate": 7.653241780995083e-06,
"loss": 0.7312,
"step": 6690
},
{
"epoch": 1.7795212765957447,
"grad_norm": 4.167940139770508,
"learning_rate": 7.652496278297319e-06,
"loss": 0.9115,
"step": 6691
},
{
"epoch": 1.7797872340425531,
"grad_norm": 3.9726510047912598,
"learning_rate": 7.651750693528197e-06,
"loss": 0.7857,
"step": 6692
},
{
"epoch": 1.7800531914893618,
"grad_norm": 3.7973427772521973,
"learning_rate": 7.651005026710786e-06,
"loss": 0.8594,
"step": 6693
},
{
"epoch": 1.78031914893617,
"grad_norm": 3.932386875152588,
"learning_rate": 7.65025927786816e-06,
"loss": 0.7873,
"step": 6694
},
{
"epoch": 1.7805851063829787,
"grad_norm": 3.6921486854553223,
"learning_rate": 7.64951344702339e-06,
"loss": 0.7569,
"step": 6695
},
{
"epoch": 1.7808510638297872,
"grad_norm": 4.060511589050293,
"learning_rate": 7.648767534199556e-06,
"loss": 0.7533,
"step": 6696
},
{
"epoch": 1.7811170212765957,
"grad_norm": 4.142321586608887,
"learning_rate": 7.648021539419737e-06,
"loss": 0.7836,
"step": 6697
},
{
"epoch": 1.7813829787234043,
"grad_norm": 4.071194648742676,
"learning_rate": 7.647275462707011e-06,
"loss": 0.7489,
"step": 6698
},
{
"epoch": 1.7816489361702128,
"grad_norm": 4.006459712982178,
"learning_rate": 7.646529304084469e-06,
"loss": 0.812,
"step": 6699
},
{
"epoch": 1.7819148936170213,
"grad_norm": 3.6437671184539795,
"learning_rate": 7.64578306357519e-06,
"loss": 0.7105,
"step": 6700
},
{
"epoch": 1.78218085106383,
"grad_norm": 4.094074249267578,
"learning_rate": 7.645036741202271e-06,
"loss": 0.9633,
"step": 6701
},
{
"epoch": 1.7824468085106382,
"grad_norm": 4.029351711273193,
"learning_rate": 7.6442903369888e-06,
"loss": 0.8999,
"step": 6702
},
{
"epoch": 1.7827127659574469,
"grad_norm": 3.8068792819976807,
"learning_rate": 7.643543850957872e-06,
"loss": 0.7305,
"step": 6703
},
{
"epoch": 1.7829787234042553,
"grad_norm": 4.074723243713379,
"learning_rate": 7.642797283132586e-06,
"loss": 0.8502,
"step": 6704
},
{
"epoch": 1.7832446808510638,
"grad_norm": 3.3582799434661865,
"learning_rate": 7.642050633536042e-06,
"loss": 0.7219,
"step": 6705
},
{
"epoch": 1.7835106382978725,
"grad_norm": 3.6337673664093018,
"learning_rate": 7.641303902191339e-06,
"loss": 0.7843,
"step": 6706
},
{
"epoch": 1.7837765957446807,
"grad_norm": 4.376511573791504,
"learning_rate": 7.640557089121583e-06,
"loss": 0.9737,
"step": 6707
},
{
"epoch": 1.7840425531914894,
"grad_norm": 3.6106109619140625,
"learning_rate": 7.639810194349884e-06,
"loss": 0.7549,
"step": 6708
},
{
"epoch": 1.7843085106382979,
"grad_norm": 3.9676499366760254,
"learning_rate": 7.639063217899348e-06,
"loss": 0.8951,
"step": 6709
},
{
"epoch": 1.7845744680851063,
"grad_norm": 3.7763378620147705,
"learning_rate": 7.638316159793089e-06,
"loss": 0.8431,
"step": 6710
},
{
"epoch": 1.784840425531915,
"grad_norm": 3.744365930557251,
"learning_rate": 7.637569020054221e-06,
"loss": 0.8697,
"step": 6711
},
{
"epoch": 1.7851063829787233,
"grad_norm": 3.4194390773773193,
"learning_rate": 7.636821798705864e-06,
"loss": 0.8979,
"step": 6712
},
{
"epoch": 1.785372340425532,
"grad_norm": 3.804483413696289,
"learning_rate": 7.636074495771134e-06,
"loss": 0.8484,
"step": 6713
},
{
"epoch": 1.7856382978723404,
"grad_norm": 4.089145660400391,
"learning_rate": 7.635327111273158e-06,
"loss": 0.892,
"step": 6714
},
{
"epoch": 1.7859042553191489,
"grad_norm": 4.051761150360107,
"learning_rate": 7.634579645235056e-06,
"loss": 0.8972,
"step": 6715
},
{
"epoch": 1.7861702127659576,
"grad_norm": 4.0280961990356445,
"learning_rate": 7.633832097679959e-06,
"loss": 0.8125,
"step": 6716
},
{
"epoch": 1.7864361702127658,
"grad_norm": 4.206244468688965,
"learning_rate": 7.633084468630996e-06,
"loss": 0.7675,
"step": 6717
},
{
"epoch": 1.7867021276595745,
"grad_norm": 3.4746177196502686,
"learning_rate": 7.6323367581113e-06,
"loss": 0.7079,
"step": 6718
},
{
"epoch": 1.786968085106383,
"grad_norm": 3.8518667221069336,
"learning_rate": 7.631588966144003e-06,
"loss": 0.965,
"step": 6719
},
{
"epoch": 1.7872340425531914,
"grad_norm": 3.605275869369507,
"learning_rate": 7.630841092752248e-06,
"loss": 0.7733,
"step": 6720
},
{
"epoch": 1.7875,
"grad_norm": 4.255527019500732,
"learning_rate": 7.63009313795917e-06,
"loss": 0.8645,
"step": 6721
},
{
"epoch": 1.7877659574468086,
"grad_norm": 3.93906307220459,
"learning_rate": 7.629345101787917e-06,
"loss": 0.8449,
"step": 6722
},
{
"epoch": 1.788031914893617,
"grad_norm": 4.351909160614014,
"learning_rate": 7.628596984261629e-06,
"loss": 0.8644,
"step": 6723
},
{
"epoch": 1.7882978723404257,
"grad_norm": 3.7165818214416504,
"learning_rate": 7.627848785403456e-06,
"loss": 0.7284,
"step": 6724
},
{
"epoch": 1.788563829787234,
"grad_norm": 3.9665300846099854,
"learning_rate": 7.6271005052365465e-06,
"loss": 0.8396,
"step": 6725
},
{
"epoch": 1.7888297872340426,
"grad_norm": 3.951260566711426,
"learning_rate": 7.6263521437840544e-06,
"loss": 0.9464,
"step": 6726
},
{
"epoch": 1.789095744680851,
"grad_norm": 4.499269008636475,
"learning_rate": 7.625603701069135e-06,
"loss": 0.9031,
"step": 6727
},
{
"epoch": 1.7893617021276595,
"grad_norm": 3.931673526763916,
"learning_rate": 7.6248551771149474e-06,
"loss": 0.823,
"step": 6728
},
{
"epoch": 1.7896276595744682,
"grad_norm": 4.128811836242676,
"learning_rate": 7.624106571944648e-06,
"loss": 0.7497,
"step": 6729
},
{
"epoch": 1.7898936170212765,
"grad_norm": 3.873683452606201,
"learning_rate": 7.623357885581403e-06,
"loss": 0.8247,
"step": 6730
},
{
"epoch": 1.7901595744680852,
"grad_norm": 3.7852728366851807,
"learning_rate": 7.6226091180483765e-06,
"loss": 0.8774,
"step": 6731
},
{
"epoch": 1.7904255319148936,
"grad_norm": 3.885965585708618,
"learning_rate": 7.621860269368735e-06,
"loss": 0.7561,
"step": 6732
},
{
"epoch": 1.790691489361702,
"grad_norm": 4.435214519500732,
"learning_rate": 7.6211113395656515e-06,
"loss": 0.9338,
"step": 6733
},
{
"epoch": 1.7909574468085108,
"grad_norm": 4.548224449157715,
"learning_rate": 7.6203623286622955e-06,
"loss": 0.8323,
"step": 6734
},
{
"epoch": 1.791223404255319,
"grad_norm": 3.8655712604522705,
"learning_rate": 7.619613236681845e-06,
"loss": 0.8654,
"step": 6735
},
{
"epoch": 1.7914893617021277,
"grad_norm": 3.7102363109588623,
"learning_rate": 7.618864063647477e-06,
"loss": 0.8015,
"step": 6736
},
{
"epoch": 1.7917553191489362,
"grad_norm": 4.260025978088379,
"learning_rate": 7.6181148095823705e-06,
"loss": 0.7977,
"step": 6737
},
{
"epoch": 1.7920212765957446,
"grad_norm": 4.112497806549072,
"learning_rate": 7.6173654745097106e-06,
"loss": 0.7763,
"step": 6738
},
{
"epoch": 1.7922872340425533,
"grad_norm": 3.998528003692627,
"learning_rate": 7.6166160584526795e-06,
"loss": 0.8215,
"step": 6739
},
{
"epoch": 1.7925531914893615,
"grad_norm": 3.6492180824279785,
"learning_rate": 7.615866561434468e-06,
"loss": 0.7239,
"step": 6740
},
{
"epoch": 1.7928191489361702,
"grad_norm": 3.8486714363098145,
"learning_rate": 7.615116983478266e-06,
"loss": 0.8435,
"step": 6741
},
{
"epoch": 1.7930851063829787,
"grad_norm": 3.863814353942871,
"learning_rate": 7.614367324607263e-06,
"loss": 0.8033,
"step": 6742
},
{
"epoch": 1.7933510638297872,
"grad_norm": 3.88749098777771,
"learning_rate": 7.613617584844662e-06,
"loss": 0.8072,
"step": 6743
},
{
"epoch": 1.7936170212765958,
"grad_norm": 3.9917871952056885,
"learning_rate": 7.612867764213651e-06,
"loss": 0.8138,
"step": 6744
},
{
"epoch": 1.7938829787234043,
"grad_norm": 4.009222507476807,
"learning_rate": 7.612117862737437e-06,
"loss": 0.7131,
"step": 6745
},
{
"epoch": 1.7941489361702128,
"grad_norm": 4.001763343811035,
"learning_rate": 7.611367880439221e-06,
"loss": 0.9487,
"step": 6746
},
{
"epoch": 1.7944148936170212,
"grad_norm": 4.2233805656433105,
"learning_rate": 7.610617817342207e-06,
"loss": 0.7244,
"step": 6747
},
{
"epoch": 1.7946808510638297,
"grad_norm": 3.7131550312042236,
"learning_rate": 7.609867673469607e-06,
"loss": 0.8303,
"step": 6748
},
{
"epoch": 1.7949468085106384,
"grad_norm": 4.046380519866943,
"learning_rate": 7.609117448844626e-06,
"loss": 0.8372,
"step": 6749
},
{
"epoch": 1.7952127659574468,
"grad_norm": 4.070696830749512,
"learning_rate": 7.60836714349048e-06,
"loss": 0.8259,
"step": 6750
},
{
"epoch": 1.7954787234042553,
"grad_norm": 3.893247604370117,
"learning_rate": 7.607616757430383e-06,
"loss": 0.8598,
"step": 6751
},
{
"epoch": 1.795744680851064,
"grad_norm": 3.7077648639678955,
"learning_rate": 7.606866290687555e-06,
"loss": 0.8036,
"step": 6752
},
{
"epoch": 1.7960106382978722,
"grad_norm": 4.3204450607299805,
"learning_rate": 7.606115743285213e-06,
"loss": 0.8424,
"step": 6753
},
{
"epoch": 1.796276595744681,
"grad_norm": 3.3555731773376465,
"learning_rate": 7.605365115246581e-06,
"loss": 0.8369,
"step": 6754
},
{
"epoch": 1.7965425531914894,
"grad_norm": 3.561962842941284,
"learning_rate": 7.604614406594888e-06,
"loss": 0.7841,
"step": 6755
},
{
"epoch": 1.7968085106382978,
"grad_norm": 4.0263166427612305,
"learning_rate": 7.6038636173533565e-06,
"loss": 0.7135,
"step": 6756
},
{
"epoch": 1.7970744680851065,
"grad_norm": 3.8524928092956543,
"learning_rate": 7.603112747545218e-06,
"loss": 0.8327,
"step": 6757
},
{
"epoch": 1.7973404255319148,
"grad_norm": 3.5046606063842773,
"learning_rate": 7.602361797193709e-06,
"loss": 0.8162,
"step": 6758
},
{
"epoch": 1.7976063829787234,
"grad_norm": 4.547070503234863,
"learning_rate": 7.60161076632206e-06,
"loss": 0.8014,
"step": 6759
},
{
"epoch": 1.797872340425532,
"grad_norm": 4.453802585601807,
"learning_rate": 7.600859654953513e-06,
"loss": 0.9287,
"step": 6760
},
{
"epoch": 1.7981382978723404,
"grad_norm": 4.324093818664551,
"learning_rate": 7.6001084631113046e-06,
"loss": 0.848,
"step": 6761
},
{
"epoch": 1.798404255319149,
"grad_norm": 4.146725177764893,
"learning_rate": 7.599357190818679e-06,
"loss": 0.8875,
"step": 6762
},
{
"epoch": 1.7986702127659573,
"grad_norm": 4.132041931152344,
"learning_rate": 7.598605838098882e-06,
"loss": 0.8413,
"step": 6763
},
{
"epoch": 1.798936170212766,
"grad_norm": 3.829908847808838,
"learning_rate": 7.59785440497516e-06,
"loss": 0.843,
"step": 6764
},
{
"epoch": 1.7992021276595744,
"grad_norm": 4.308759689331055,
"learning_rate": 7.597102891470766e-06,
"loss": 0.7839,
"step": 6765
},
{
"epoch": 1.799468085106383,
"grad_norm": 3.6383216381073,
"learning_rate": 7.59635129760895e-06,
"loss": 0.608,
"step": 6766
},
{
"epoch": 1.7997340425531916,
"grad_norm": 3.6101510524749756,
"learning_rate": 7.595599623412968e-06,
"loss": 0.7246,
"step": 6767
},
{
"epoch": 1.8,
"grad_norm": 3.51635479927063,
"learning_rate": 7.594847868906076e-06,
"loss": 0.798,
"step": 6768
},
{
"epoch": 1.8002659574468085,
"grad_norm": 3.927917718887329,
"learning_rate": 7.594096034111538e-06,
"loss": 0.8229,
"step": 6769
},
{
"epoch": 1.800531914893617,
"grad_norm": 4.29150390625,
"learning_rate": 7.5933441190526146e-06,
"loss": 0.922,
"step": 6770
},
{
"epoch": 1.8007978723404254,
"grad_norm": 3.8685336112976074,
"learning_rate": 7.592592123752569e-06,
"loss": 0.7242,
"step": 6771
},
{
"epoch": 1.8010638297872341,
"grad_norm": 3.9335358142852783,
"learning_rate": 7.591840048234673e-06,
"loss": 0.8717,
"step": 6772
},
{
"epoch": 1.8013297872340426,
"grad_norm": 4.033020496368408,
"learning_rate": 7.591087892522193e-06,
"loss": 0.8129,
"step": 6773
},
{
"epoch": 1.801595744680851,
"grad_norm": 4.348812580108643,
"learning_rate": 7.590335656638403e-06,
"loss": 0.8352,
"step": 6774
},
{
"epoch": 1.8018617021276597,
"grad_norm": 3.683743476867676,
"learning_rate": 7.589583340606579e-06,
"loss": 0.8427,
"step": 6775
},
{
"epoch": 1.802127659574468,
"grad_norm": 3.782118797302246,
"learning_rate": 7.588830944449996e-06,
"loss": 0.8659,
"step": 6776
},
{
"epoch": 1.8023936170212767,
"grad_norm": 4.097870826721191,
"learning_rate": 7.5880784681919365e-06,
"loss": 0.7472,
"step": 6777
},
{
"epoch": 1.8026595744680851,
"grad_norm": 3.921733856201172,
"learning_rate": 7.587325911855681e-06,
"loss": 0.8388,
"step": 6778
},
{
"epoch": 1.8029255319148936,
"grad_norm": 4.305613994598389,
"learning_rate": 7.586573275464517e-06,
"loss": 1.0133,
"step": 6779
},
{
"epoch": 1.8031914893617023,
"grad_norm": 4.13943338394165,
"learning_rate": 7.58582055904173e-06,
"loss": 0.7861,
"step": 6780
},
{
"epoch": 1.8034574468085105,
"grad_norm": 4.047939777374268,
"learning_rate": 7.585067762610612e-06,
"loss": 0.8422,
"step": 6781
},
{
"epoch": 1.8037234042553192,
"grad_norm": 3.8695991039276123,
"learning_rate": 7.584314886194451e-06,
"loss": 0.8365,
"step": 6782
},
{
"epoch": 1.8039893617021276,
"grad_norm": 3.7691190242767334,
"learning_rate": 7.583561929816547e-06,
"loss": 0.8293,
"step": 6783
},
{
"epoch": 1.804255319148936,
"grad_norm": 4.062473773956299,
"learning_rate": 7.5828088935001954e-06,
"loss": 0.8118,
"step": 6784
},
{
"epoch": 1.8045212765957448,
"grad_norm": 4.588931560516357,
"learning_rate": 7.582055777268693e-06,
"loss": 0.8835,
"step": 6785
},
{
"epoch": 1.804787234042553,
"grad_norm": 3.1973307132720947,
"learning_rate": 7.581302581145346e-06,
"loss": 0.6728,
"step": 6786
},
{
"epoch": 1.8050531914893617,
"grad_norm": 4.123830318450928,
"learning_rate": 7.5805493051534605e-06,
"loss": 0.9315,
"step": 6787
},
{
"epoch": 1.8053191489361702,
"grad_norm": 3.992337942123413,
"learning_rate": 7.57979594931634e-06,
"loss": 0.7951,
"step": 6788
},
{
"epoch": 1.8055851063829786,
"grad_norm": 3.456594467163086,
"learning_rate": 7.579042513657294e-06,
"loss": 0.8114,
"step": 6789
},
{
"epoch": 1.8058510638297873,
"grad_norm": 4.029353618621826,
"learning_rate": 7.578288998199638e-06,
"loss": 0.895,
"step": 6790
},
{
"epoch": 1.8061170212765958,
"grad_norm": 4.027595520019531,
"learning_rate": 7.577535402966683e-06,
"loss": 0.8416,
"step": 6791
},
{
"epoch": 1.8063829787234043,
"grad_norm": 3.8989861011505127,
"learning_rate": 7.5767817279817505e-06,
"loss": 0.8275,
"step": 6792
},
{
"epoch": 1.8066489361702127,
"grad_norm": 4.1814961433410645,
"learning_rate": 7.576027973268155e-06,
"loss": 0.7388,
"step": 6793
},
{
"epoch": 1.8069148936170212,
"grad_norm": 3.8830153942108154,
"learning_rate": 7.575274138849223e-06,
"loss": 0.7622,
"step": 6794
},
{
"epoch": 1.8071808510638299,
"grad_norm": 3.6945488452911377,
"learning_rate": 7.574520224748276e-06,
"loss": 0.6767,
"step": 6795
},
{
"epoch": 1.8074468085106383,
"grad_norm": 3.8499093055725098,
"learning_rate": 7.5737662309886415e-06,
"loss": 0.8128,
"step": 6796
},
{
"epoch": 1.8077127659574468,
"grad_norm": 4.120965480804443,
"learning_rate": 7.573012157593651e-06,
"loss": 0.8356,
"step": 6797
},
{
"epoch": 1.8079787234042555,
"grad_norm": 3.9702072143554688,
"learning_rate": 7.572258004586635e-06,
"loss": 0.773,
"step": 6798
},
{
"epoch": 1.8082446808510637,
"grad_norm": 3.910039186477661,
"learning_rate": 7.5715037719909266e-06,
"loss": 0.7577,
"step": 6799
},
{
"epoch": 1.8085106382978724,
"grad_norm": 3.9392266273498535,
"learning_rate": 7.570749459829865e-06,
"loss": 0.9043,
"step": 6800
},
{
"epoch": 1.8087765957446809,
"grad_norm": 3.9405999183654785,
"learning_rate": 7.56999506812679e-06,
"loss": 0.8526,
"step": 6801
},
{
"epoch": 1.8090425531914893,
"grad_norm": 3.701950788497925,
"learning_rate": 7.569240596905038e-06,
"loss": 0.7136,
"step": 6802
},
{
"epoch": 1.809308510638298,
"grad_norm": 3.7333173751831055,
"learning_rate": 7.568486046187959e-06,
"loss": 0.8191,
"step": 6803
},
{
"epoch": 1.8095744680851062,
"grad_norm": 3.9274251461029053,
"learning_rate": 7.567731415998898e-06,
"loss": 0.8371,
"step": 6804
},
{
"epoch": 1.809840425531915,
"grad_norm": 4.320472240447998,
"learning_rate": 7.566976706361204e-06,
"loss": 0.8743,
"step": 6805
},
{
"epoch": 1.8101063829787234,
"grad_norm": 4.124827861785889,
"learning_rate": 7.566221917298228e-06,
"loss": 0.8599,
"step": 6806
},
{
"epoch": 1.8103723404255319,
"grad_norm": 4.09792947769165,
"learning_rate": 7.565467048833325e-06,
"loss": 0.782,
"step": 6807
},
{
"epoch": 1.8106382978723405,
"grad_norm": 4.003774166107178,
"learning_rate": 7.56471210098985e-06,
"loss": 0.7946,
"step": 6808
},
{
"epoch": 1.8109042553191488,
"grad_norm": 4.259424686431885,
"learning_rate": 7.563957073791164e-06,
"loss": 0.8328,
"step": 6809
},
{
"epoch": 1.8111702127659575,
"grad_norm": 3.9565248489379883,
"learning_rate": 7.563201967260627e-06,
"loss": 0.8544,
"step": 6810
},
{
"epoch": 1.811436170212766,
"grad_norm": 3.88087797164917,
"learning_rate": 7.562446781421604e-06,
"loss": 0.7987,
"step": 6811
},
{
"epoch": 1.8117021276595744,
"grad_norm": 3.9190945625305176,
"learning_rate": 7.5616915162974594e-06,
"loss": 0.8162,
"step": 6812
},
{
"epoch": 1.811968085106383,
"grad_norm": 3.700688600540161,
"learning_rate": 7.560936171911564e-06,
"loss": 0.7738,
"step": 6813
},
{
"epoch": 1.8122340425531915,
"grad_norm": 4.023971080780029,
"learning_rate": 7.560180748287289e-06,
"loss": 0.8266,
"step": 6814
},
{
"epoch": 1.8125,
"grad_norm": 4.754519462585449,
"learning_rate": 7.559425245448006e-06,
"loss": 1.0779,
"step": 6815
},
{
"epoch": 1.8127659574468085,
"grad_norm": 4.043941497802734,
"learning_rate": 7.558669663417093e-06,
"loss": 0.7789,
"step": 6816
},
{
"epoch": 1.813031914893617,
"grad_norm": 4.064941883087158,
"learning_rate": 7.557914002217929e-06,
"loss": 0.8235,
"step": 6817
},
{
"epoch": 1.8132978723404256,
"grad_norm": 4.2770562171936035,
"learning_rate": 7.5571582618738936e-06,
"loss": 0.8647,
"step": 6818
},
{
"epoch": 1.813563829787234,
"grad_norm": 3.758079767227173,
"learning_rate": 7.55640244240837e-06,
"loss": 0.765,
"step": 6819
},
{
"epoch": 1.8138297872340425,
"grad_norm": 4.024742603302002,
"learning_rate": 7.555646543844747e-06,
"loss": 0.9143,
"step": 6820
},
{
"epoch": 1.8140957446808512,
"grad_norm": 4.142058372497559,
"learning_rate": 7.55489056620641e-06,
"loss": 0.8872,
"step": 6821
},
{
"epoch": 1.8143617021276595,
"grad_norm": 4.0311455726623535,
"learning_rate": 7.554134509516751e-06,
"loss": 0.7628,
"step": 6822
},
{
"epoch": 1.8146276595744681,
"grad_norm": 3.73848032951355,
"learning_rate": 7.553378373799163e-06,
"loss": 0.807,
"step": 6823
},
{
"epoch": 1.8148936170212766,
"grad_norm": 3.553116798400879,
"learning_rate": 7.552622159077041e-06,
"loss": 0.8166,
"step": 6824
},
{
"epoch": 1.815159574468085,
"grad_norm": 3.678316116333008,
"learning_rate": 7.5518658653737844e-06,
"loss": 0.8462,
"step": 6825
},
{
"epoch": 1.8154255319148938,
"grad_norm": 4.440575122833252,
"learning_rate": 7.551109492712795e-06,
"loss": 0.8861,
"step": 6826
},
{
"epoch": 1.815691489361702,
"grad_norm": 4.359316825866699,
"learning_rate": 7.550353041117473e-06,
"loss": 0.8025,
"step": 6827
},
{
"epoch": 1.8159574468085107,
"grad_norm": 3.976832389831543,
"learning_rate": 7.549596510611226e-06,
"loss": 0.8486,
"step": 6828
},
{
"epoch": 1.8162234042553191,
"grad_norm": 3.64974308013916,
"learning_rate": 7.54883990121746e-06,
"loss": 0.6982,
"step": 6829
},
{
"epoch": 1.8164893617021276,
"grad_norm": 4.051089286804199,
"learning_rate": 7.548083212959588e-06,
"loss": 0.8417,
"step": 6830
},
{
"epoch": 1.8167553191489363,
"grad_norm": 3.949113130569458,
"learning_rate": 7.547326445861021e-06,
"loss": 0.7382,
"step": 6831
},
{
"epoch": 1.8170212765957445,
"grad_norm": 3.896155834197998,
"learning_rate": 7.546569599945174e-06,
"loss": 0.9312,
"step": 6832
},
{
"epoch": 1.8172872340425532,
"grad_norm": 4.127990245819092,
"learning_rate": 7.545812675235467e-06,
"loss": 0.9422,
"step": 6833
},
{
"epoch": 1.8175531914893617,
"grad_norm": 3.8345584869384766,
"learning_rate": 7.545055671755316e-06,
"loss": 0.8672,
"step": 6834
},
{
"epoch": 1.8178191489361701,
"grad_norm": 3.544022560119629,
"learning_rate": 7.544298589528148e-06,
"loss": 0.8378,
"step": 6835
},
{
"epoch": 1.8180851063829788,
"grad_norm": 3.773446798324585,
"learning_rate": 7.543541428577386e-06,
"loss": 0.7617,
"step": 6836
},
{
"epoch": 1.8183510638297873,
"grad_norm": 4.245392322540283,
"learning_rate": 7.542784188926456e-06,
"loss": 0.7689,
"step": 6837
},
{
"epoch": 1.8186170212765957,
"grad_norm": 4.0154924392700195,
"learning_rate": 7.542026870598791e-06,
"loss": 0.7467,
"step": 6838
},
{
"epoch": 1.8188829787234042,
"grad_norm": 4.492767810821533,
"learning_rate": 7.5412694736178206e-06,
"loss": 0.9573,
"step": 6839
},
{
"epoch": 1.8191489361702127,
"grad_norm": 3.7740705013275146,
"learning_rate": 7.540511998006982e-06,
"loss": 0.6853,
"step": 6840
},
{
"epoch": 1.8194148936170214,
"grad_norm": 4.6515655517578125,
"learning_rate": 7.539754443789709e-06,
"loss": 0.9875,
"step": 6841
},
{
"epoch": 1.8196808510638298,
"grad_norm": 4.019815921783447,
"learning_rate": 7.5389968109894465e-06,
"loss": 0.7956,
"step": 6842
},
{
"epoch": 1.8199468085106383,
"grad_norm": 3.8876473903656006,
"learning_rate": 7.5382390996296315e-06,
"loss": 0.8368,
"step": 6843
},
{
"epoch": 1.820212765957447,
"grad_norm": 4.036003112792969,
"learning_rate": 7.537481309733709e-06,
"loss": 0.7615,
"step": 6844
},
{
"epoch": 1.8204787234042552,
"grad_norm": 3.9731733798980713,
"learning_rate": 7.53672344132513e-06,
"loss": 0.8408,
"step": 6845
},
{
"epoch": 1.820744680851064,
"grad_norm": 4.149892807006836,
"learning_rate": 7.53596549442734e-06,
"loss": 0.7553,
"step": 6846
},
{
"epoch": 1.8210106382978724,
"grad_norm": 3.9756197929382324,
"learning_rate": 7.535207469063791e-06,
"loss": 0.8429,
"step": 6847
},
{
"epoch": 1.8212765957446808,
"grad_norm": 4.044477939605713,
"learning_rate": 7.53444936525794e-06,
"loss": 0.7761,
"step": 6848
},
{
"epoch": 1.8215425531914895,
"grad_norm": 3.613596200942993,
"learning_rate": 7.53369118303324e-06,
"loss": 0.808,
"step": 6849
},
{
"epoch": 1.8218085106382977,
"grad_norm": 4.789092540740967,
"learning_rate": 7.532932922413152e-06,
"loss": 0.8992,
"step": 6850
},
{
"epoch": 1.8220744680851064,
"grad_norm": 3.8128976821899414,
"learning_rate": 7.532174583421138e-06,
"loss": 0.7259,
"step": 6851
},
{
"epoch": 1.8223404255319149,
"grad_norm": 3.685126781463623,
"learning_rate": 7.53141616608066e-06,
"loss": 0.7971,
"step": 6852
},
{
"epoch": 1.8226063829787233,
"grad_norm": 3.8787617683410645,
"learning_rate": 7.5306576704151865e-06,
"loss": 0.7447,
"step": 6853
},
{
"epoch": 1.822872340425532,
"grad_norm": 4.506245136260986,
"learning_rate": 7.529899096448185e-06,
"loss": 0.8898,
"step": 6854
},
{
"epoch": 1.8231382978723403,
"grad_norm": 4.238636016845703,
"learning_rate": 7.529140444203127e-06,
"loss": 0.8057,
"step": 6855
},
{
"epoch": 1.823404255319149,
"grad_norm": 4.039521217346191,
"learning_rate": 7.528381713703485e-06,
"loss": 0.772,
"step": 6856
},
{
"epoch": 1.8236702127659574,
"grad_norm": 3.6089868545532227,
"learning_rate": 7.5276229049727375e-06,
"loss": 0.8194,
"step": 6857
},
{
"epoch": 1.8239361702127659,
"grad_norm": 3.4110054969787598,
"learning_rate": 7.52686401803436e-06,
"loss": 0.6902,
"step": 6858
},
{
"epoch": 1.8242021276595746,
"grad_norm": 3.6139302253723145,
"learning_rate": 7.526105052911836e-06,
"loss": 0.8318,
"step": 6859
},
{
"epoch": 1.824468085106383,
"grad_norm": 4.215152740478516,
"learning_rate": 7.525346009628647e-06,
"loss": 0.8303,
"step": 6860
},
{
"epoch": 1.8247340425531915,
"grad_norm": 3.8578953742980957,
"learning_rate": 7.524586888208278e-06,
"loss": 0.8625,
"step": 6861
},
{
"epoch": 1.825,
"grad_norm": 3.8874824047088623,
"learning_rate": 7.52382768867422e-06,
"loss": 0.7106,
"step": 6862
},
{
"epoch": 1.8252659574468084,
"grad_norm": 3.746168851852417,
"learning_rate": 7.5230684110499604e-06,
"loss": 0.8753,
"step": 6863
},
{
"epoch": 1.825531914893617,
"grad_norm": 3.70993971824646,
"learning_rate": 7.522309055358995e-06,
"loss": 0.7393,
"step": 6864
},
{
"epoch": 1.8257978723404256,
"grad_norm": 3.599679470062256,
"learning_rate": 7.5215496216248175e-06,
"loss": 0.893,
"step": 6865
},
{
"epoch": 1.826063829787234,
"grad_norm": 3.7604589462280273,
"learning_rate": 7.520790109870926e-06,
"loss": 0.7966,
"step": 6866
},
{
"epoch": 1.8263297872340427,
"grad_norm": 3.9113166332244873,
"learning_rate": 7.5200305201208205e-06,
"loss": 0.8071,
"step": 6867
},
{
"epoch": 1.826595744680851,
"grad_norm": 4.262864112854004,
"learning_rate": 7.519270852398002e-06,
"loss": 0.7942,
"step": 6868
},
{
"epoch": 1.8268617021276596,
"grad_norm": 4.096951007843018,
"learning_rate": 7.5185111067259804e-06,
"loss": 0.717,
"step": 6869
},
{
"epoch": 1.827127659574468,
"grad_norm": 4.112506866455078,
"learning_rate": 7.517751283128258e-06,
"loss": 0.8871,
"step": 6870
},
{
"epoch": 1.8273936170212766,
"grad_norm": 3.5203890800476074,
"learning_rate": 7.516991381628347e-06,
"loss": 0.796,
"step": 6871
},
{
"epoch": 1.8276595744680852,
"grad_norm": 3.556929588317871,
"learning_rate": 7.516231402249758e-06,
"loss": 0.8346,
"step": 6872
},
{
"epoch": 1.8279255319148935,
"grad_norm": 3.3509085178375244,
"learning_rate": 7.51547134501601e-06,
"loss": 0.7763,
"step": 6873
},
{
"epoch": 1.8281914893617022,
"grad_norm": 4.3177103996276855,
"learning_rate": 7.514711209950615e-06,
"loss": 0.7943,
"step": 6874
},
{
"epoch": 1.8284574468085106,
"grad_norm": 3.8919661045074463,
"learning_rate": 7.513950997077094e-06,
"loss": 0.7541,
"step": 6875
},
{
"epoch": 1.828723404255319,
"grad_norm": 3.506849765777588,
"learning_rate": 7.513190706418969e-06,
"loss": 0.8451,
"step": 6876
},
{
"epoch": 1.8289893617021278,
"grad_norm": 4.711544513702393,
"learning_rate": 7.512430337999768e-06,
"loss": 0.9569,
"step": 6877
},
{
"epoch": 1.829255319148936,
"grad_norm": 4.111194610595703,
"learning_rate": 7.511669891843011e-06,
"loss": 0.9289,
"step": 6878
},
{
"epoch": 1.8295212765957447,
"grad_norm": 3.4928982257843018,
"learning_rate": 7.510909367972231e-06,
"loss": 0.7627,
"step": 6879
},
{
"epoch": 1.8297872340425532,
"grad_norm": 3.737337827682495,
"learning_rate": 7.5101487664109605e-06,
"loss": 0.7463,
"step": 6880
},
{
"epoch": 1.8300531914893616,
"grad_norm": 3.4611358642578125,
"learning_rate": 7.50938808718273e-06,
"loss": 0.7764,
"step": 6881
},
{
"epoch": 1.8303191489361703,
"grad_norm": 3.901796817779541,
"learning_rate": 7.508627330311078e-06,
"loss": 0.9079,
"step": 6882
},
{
"epoch": 1.8305851063829788,
"grad_norm": 3.8375611305236816,
"learning_rate": 7.507866495819543e-06,
"loss": 0.7861,
"step": 6883
},
{
"epoch": 1.8308510638297872,
"grad_norm": 3.7982888221740723,
"learning_rate": 7.507105583731666e-06,
"loss": 0.8905,
"step": 6884
},
{
"epoch": 1.8311170212765957,
"grad_norm": 3.70542573928833,
"learning_rate": 7.506344594070991e-06,
"loss": 0.7173,
"step": 6885
},
{
"epoch": 1.8313829787234042,
"grad_norm": 3.7828474044799805,
"learning_rate": 7.505583526861064e-06,
"loss": 0.8687,
"step": 6886
},
{
"epoch": 1.8316489361702128,
"grad_norm": 4.376963138580322,
"learning_rate": 7.504822382125432e-06,
"loss": 0.982,
"step": 6887
},
{
"epoch": 1.8319148936170213,
"grad_norm": 3.9631431102752686,
"learning_rate": 7.504061159887646e-06,
"loss": 0.8186,
"step": 6888
},
{
"epoch": 1.8321808510638298,
"grad_norm": 4.296795845031738,
"learning_rate": 7.5032998601712605e-06,
"loss": 0.8346,
"step": 6889
},
{
"epoch": 1.8324468085106385,
"grad_norm": 3.889289617538452,
"learning_rate": 7.502538482999829e-06,
"loss": 0.8344,
"step": 6890
},
{
"epoch": 1.8327127659574467,
"grad_norm": 4.060772895812988,
"learning_rate": 7.50177702839691e-06,
"loss": 0.7625,
"step": 6891
},
{
"epoch": 1.8329787234042554,
"grad_norm": 3.6209208965301514,
"learning_rate": 7.501015496386066e-06,
"loss": 0.779,
"step": 6892
},
{
"epoch": 1.8332446808510638,
"grad_norm": 3.7519564628601074,
"learning_rate": 7.5002538869908556e-06,
"loss": 0.7245,
"step": 6893
},
{
"epoch": 1.8335106382978723,
"grad_norm": 3.842135190963745,
"learning_rate": 7.499492200234849e-06,
"loss": 0.7977,
"step": 6894
},
{
"epoch": 1.833776595744681,
"grad_norm": 4.067161560058594,
"learning_rate": 7.498730436141609e-06,
"loss": 0.8287,
"step": 6895
},
{
"epoch": 1.8340425531914892,
"grad_norm": 3.8573522567749023,
"learning_rate": 7.497968594734708e-06,
"loss": 0.7012,
"step": 6896
},
{
"epoch": 1.834308510638298,
"grad_norm": 3.792734146118164,
"learning_rate": 7.4972066760377184e-06,
"loss": 0.7986,
"step": 6897
},
{
"epoch": 1.8345744680851064,
"grad_norm": 4.287036418914795,
"learning_rate": 7.496444680074213e-06,
"loss": 0.8091,
"step": 6898
},
{
"epoch": 1.8348404255319148,
"grad_norm": 3.9161949157714844,
"learning_rate": 7.49568260686777e-06,
"loss": 0.8796,
"step": 6899
},
{
"epoch": 1.8351063829787235,
"grad_norm": 3.8841638565063477,
"learning_rate": 7.49492045644197e-06,
"loss": 0.8827,
"step": 6900
},
{
"epoch": 1.8353723404255318,
"grad_norm": 3.770533323287964,
"learning_rate": 7.494158228820393e-06,
"loss": 0.7671,
"step": 6901
},
{
"epoch": 1.8356382978723405,
"grad_norm": 4.155034065246582,
"learning_rate": 7.493395924026623e-06,
"loss": 0.8533,
"step": 6902
},
{
"epoch": 1.835904255319149,
"grad_norm": 3.911745071411133,
"learning_rate": 7.492633542084249e-06,
"loss": 0.82,
"step": 6903
},
{
"epoch": 1.8361702127659574,
"grad_norm": 3.444728136062622,
"learning_rate": 7.491871083016858e-06,
"loss": 0.7717,
"step": 6904
},
{
"epoch": 1.836436170212766,
"grad_norm": 4.003023147583008,
"learning_rate": 7.491108546848041e-06,
"loss": 0.7351,
"step": 6905
},
{
"epoch": 1.8367021276595743,
"grad_norm": 3.9087607860565186,
"learning_rate": 7.490345933601395e-06,
"loss": 0.8509,
"step": 6906
},
{
"epoch": 1.836968085106383,
"grad_norm": 4.098905086517334,
"learning_rate": 7.489583243300511e-06,
"loss": 0.9289,
"step": 6907
},
{
"epoch": 1.8372340425531914,
"grad_norm": 4.120253562927246,
"learning_rate": 7.488820475968992e-06,
"loss": 0.8707,
"step": 6908
},
{
"epoch": 1.8375,
"grad_norm": 4.324950218200684,
"learning_rate": 7.488057631630438e-06,
"loss": 0.7811,
"step": 6909
},
{
"epoch": 1.8377659574468086,
"grad_norm": 4.5706634521484375,
"learning_rate": 7.4872947103084495e-06,
"loss": 0.8641,
"step": 6910
},
{
"epoch": 1.838031914893617,
"grad_norm": 4.22561502456665,
"learning_rate": 7.486531712026634e-06,
"loss": 0.794,
"step": 6911
},
{
"epoch": 1.8382978723404255,
"grad_norm": 4.015974521636963,
"learning_rate": 7.485768636808603e-06,
"loss": 0.8757,
"step": 6912
},
{
"epoch": 1.8385638297872342,
"grad_norm": 3.7457127571105957,
"learning_rate": 7.48500548467796e-06,
"loss": 0.8682,
"step": 6913
},
{
"epoch": 1.8388297872340424,
"grad_norm": 3.964571714401245,
"learning_rate": 7.484242255658322e-06,
"loss": 0.7431,
"step": 6914
},
{
"epoch": 1.8390957446808511,
"grad_norm": 3.838426351547241,
"learning_rate": 7.4834789497733065e-06,
"loss": 0.7413,
"step": 6915
},
{
"epoch": 1.8393617021276596,
"grad_norm": 3.7367520332336426,
"learning_rate": 7.4827155670465264e-06,
"loss": 0.8366,
"step": 6916
},
{
"epoch": 1.839627659574468,
"grad_norm": 3.9056553840637207,
"learning_rate": 7.481952107501604e-06,
"loss": 0.7134,
"step": 6917
},
{
"epoch": 1.8398936170212767,
"grad_norm": 4.098144054412842,
"learning_rate": 7.481188571162161e-06,
"loss": 0.7744,
"step": 6918
},
{
"epoch": 1.840159574468085,
"grad_norm": 4.067973613739014,
"learning_rate": 7.480424958051823e-06,
"loss": 0.8143,
"step": 6919
},
{
"epoch": 1.8404255319148937,
"grad_norm": 3.9194462299346924,
"learning_rate": 7.479661268194217e-06,
"loss": 0.8335,
"step": 6920
},
{
"epoch": 1.8406914893617021,
"grad_norm": 4.130805492401123,
"learning_rate": 7.4788975016129704e-06,
"loss": 0.769,
"step": 6921
},
{
"epoch": 1.8409574468085106,
"grad_norm": 3.580792188644409,
"learning_rate": 7.478133658331716e-06,
"loss": 0.7743,
"step": 6922
},
{
"epoch": 1.8412234042553193,
"grad_norm": 3.78035569190979,
"learning_rate": 7.477369738374092e-06,
"loss": 0.8619,
"step": 6923
},
{
"epoch": 1.8414893617021275,
"grad_norm": 3.8400089740753174,
"learning_rate": 7.476605741763729e-06,
"loss": 0.8161,
"step": 6924
},
{
"epoch": 1.8417553191489362,
"grad_norm": 3.7448103427886963,
"learning_rate": 7.475841668524268e-06,
"loss": 0.8305,
"step": 6925
},
{
"epoch": 1.8420212765957447,
"grad_norm": 3.828014850616455,
"learning_rate": 7.475077518679352e-06,
"loss": 0.8424,
"step": 6926
},
{
"epoch": 1.8422872340425531,
"grad_norm": 3.776527166366577,
"learning_rate": 7.474313292252624e-06,
"loss": 0.9811,
"step": 6927
},
{
"epoch": 1.8425531914893618,
"grad_norm": 4.294341564178467,
"learning_rate": 7.473548989267728e-06,
"loss": 0.8375,
"step": 6928
},
{
"epoch": 1.84281914893617,
"grad_norm": 4.230419158935547,
"learning_rate": 7.472784609748316e-06,
"loss": 0.7886,
"step": 6929
},
{
"epoch": 1.8430851063829787,
"grad_norm": 4.243613243103027,
"learning_rate": 7.472020153718036e-06,
"loss": 0.8787,
"step": 6930
},
{
"epoch": 1.8433510638297872,
"grad_norm": 4.046195983886719,
"learning_rate": 7.471255621200541e-06,
"loss": 0.7344,
"step": 6931
},
{
"epoch": 1.8436170212765957,
"grad_norm": 3.4666972160339355,
"learning_rate": 7.470491012219488e-06,
"loss": 0.8123,
"step": 6932
},
{
"epoch": 1.8438829787234043,
"grad_norm": 4.226772785186768,
"learning_rate": 7.469726326798535e-06,
"loss": 0.7765,
"step": 6933
},
{
"epoch": 1.8441489361702128,
"grad_norm": 4.348804950714111,
"learning_rate": 7.468961564961341e-06,
"loss": 0.8481,
"step": 6934
},
{
"epoch": 1.8444148936170213,
"grad_norm": 3.7085683345794678,
"learning_rate": 7.4681967267315715e-06,
"loss": 0.7717,
"step": 6935
},
{
"epoch": 1.84468085106383,
"grad_norm": 3.670295238494873,
"learning_rate": 7.4674318121328856e-06,
"loss": 0.7074,
"step": 6936
},
{
"epoch": 1.8449468085106382,
"grad_norm": 4.235050678253174,
"learning_rate": 7.466666821188957e-06,
"loss": 0.9085,
"step": 6937
},
{
"epoch": 1.8452127659574469,
"grad_norm": 4.282822132110596,
"learning_rate": 7.465901753923452e-06,
"loss": 0.8641,
"step": 6938
},
{
"epoch": 1.8454787234042553,
"grad_norm": 3.9703402519226074,
"learning_rate": 7.465136610360044e-06,
"loss": 0.7331,
"step": 6939
},
{
"epoch": 1.8457446808510638,
"grad_norm": 3.793503522872925,
"learning_rate": 7.4643713905224065e-06,
"loss": 0.8122,
"step": 6940
},
{
"epoch": 1.8460106382978725,
"grad_norm": 4.120753288269043,
"learning_rate": 7.463606094434218e-06,
"loss": 0.8822,
"step": 6941
},
{
"epoch": 1.8462765957446807,
"grad_norm": 4.266670227050781,
"learning_rate": 7.462840722119155e-06,
"loss": 0.8363,
"step": 6942
},
{
"epoch": 1.8465425531914894,
"grad_norm": 3.998488664627075,
"learning_rate": 7.462075273600901e-06,
"loss": 0.895,
"step": 6943
},
{
"epoch": 1.8468085106382979,
"grad_norm": 3.923610210418701,
"learning_rate": 7.461309748903138e-06,
"loss": 0.8406,
"step": 6944
},
{
"epoch": 1.8470744680851063,
"grad_norm": 4.076598644256592,
"learning_rate": 7.460544148049555e-06,
"loss": 0.7919,
"step": 6945
},
{
"epoch": 1.847340425531915,
"grad_norm": 4.171792507171631,
"learning_rate": 7.459778471063839e-06,
"loss": 0.9616,
"step": 6946
},
{
"epoch": 1.8476063829787233,
"grad_norm": 4.327701091766357,
"learning_rate": 7.45901271796968e-06,
"loss": 0.8918,
"step": 6947
},
{
"epoch": 1.847872340425532,
"grad_norm": 4.035894393920898,
"learning_rate": 7.4582468887907746e-06,
"loss": 0.7007,
"step": 6948
},
{
"epoch": 1.8481382978723404,
"grad_norm": 3.9794068336486816,
"learning_rate": 7.457480983550813e-06,
"loss": 0.8622,
"step": 6949
},
{
"epoch": 1.8484042553191489,
"grad_norm": 3.988560914993286,
"learning_rate": 7.4567150022735e-06,
"loss": 0.7892,
"step": 6950
},
{
"epoch": 1.8486702127659576,
"grad_norm": 3.761817216873169,
"learning_rate": 7.455948944982529e-06,
"loss": 0.7549,
"step": 6951
},
{
"epoch": 1.8489361702127658,
"grad_norm": 3.962528944015503,
"learning_rate": 7.455182811701609e-06,
"loss": 0.7874,
"step": 6952
},
{
"epoch": 1.8492021276595745,
"grad_norm": 4.180268287658691,
"learning_rate": 7.454416602454441e-06,
"loss": 0.8401,
"step": 6953
},
{
"epoch": 1.849468085106383,
"grad_norm": 3.7611262798309326,
"learning_rate": 7.453650317264734e-06,
"loss": 0.8463,
"step": 6954
},
{
"epoch": 1.8497340425531914,
"grad_norm": 3.7269387245178223,
"learning_rate": 7.452883956156197e-06,
"loss": 0.7884,
"step": 6955
},
{
"epoch": 1.85,
"grad_norm": 4.998419284820557,
"learning_rate": 7.452117519152542e-06,
"loss": 0.861,
"step": 6956
},
{
"epoch": 1.8502659574468086,
"grad_norm": 4.210315704345703,
"learning_rate": 7.4513510062774845e-06,
"loss": 0.8083,
"step": 6957
},
{
"epoch": 1.850531914893617,
"grad_norm": 4.184957027435303,
"learning_rate": 7.4505844175547405e-06,
"loss": 0.7648,
"step": 6958
},
{
"epoch": 1.8507978723404257,
"grad_norm": 3.883157730102539,
"learning_rate": 7.44981775300803e-06,
"loss": 0.789,
"step": 6959
},
{
"epoch": 1.851063829787234,
"grad_norm": 3.930384397506714,
"learning_rate": 7.449051012661073e-06,
"loss": 0.7467,
"step": 6960
},
{
"epoch": 1.8513297872340426,
"grad_norm": 4.148220062255859,
"learning_rate": 7.448284196537594e-06,
"loss": 0.8692,
"step": 6961
},
{
"epoch": 1.851595744680851,
"grad_norm": 4.141353607177734,
"learning_rate": 7.4475173046613205e-06,
"loss": 0.8553,
"step": 6962
},
{
"epoch": 1.8518617021276595,
"grad_norm": 3.8646962642669678,
"learning_rate": 7.4467503370559806e-06,
"loss": 0.7953,
"step": 6963
},
{
"epoch": 1.8521276595744682,
"grad_norm": 3.765763759613037,
"learning_rate": 7.445983293745302e-06,
"loss": 0.7173,
"step": 6964
},
{
"epoch": 1.8523936170212765,
"grad_norm": 3.5731546878814697,
"learning_rate": 7.445216174753022e-06,
"loss": 0.7643,
"step": 6965
},
{
"epoch": 1.8526595744680852,
"grad_norm": 3.3962113857269287,
"learning_rate": 7.444448980102875e-06,
"loss": 0.7694,
"step": 6966
},
{
"epoch": 1.8529255319148936,
"grad_norm": 4.201429843902588,
"learning_rate": 7.4436817098186e-06,
"loss": 0.9388,
"step": 6967
},
{
"epoch": 1.853191489361702,
"grad_norm": 4.063852787017822,
"learning_rate": 7.442914363923933e-06,
"loss": 0.8472,
"step": 6968
},
{
"epoch": 1.8534574468085108,
"grad_norm": 4.6696696281433105,
"learning_rate": 7.442146942442621e-06,
"loss": 0.8739,
"step": 6969
},
{
"epoch": 1.853723404255319,
"grad_norm": 3.5337836742401123,
"learning_rate": 7.4413794453984065e-06,
"loss": 0.7506,
"step": 6970
},
{
"epoch": 1.8539893617021277,
"grad_norm": 4.372726917266846,
"learning_rate": 7.440611872815038e-06,
"loss": 0.824,
"step": 6971
},
{
"epoch": 1.8542553191489362,
"grad_norm": 4.04209566116333,
"learning_rate": 7.439844224716265e-06,
"loss": 0.8098,
"step": 6972
},
{
"epoch": 1.8545212765957446,
"grad_norm": 3.8578147888183594,
"learning_rate": 7.439076501125839e-06,
"loss": 0.7585,
"step": 6973
},
{
"epoch": 1.8547872340425533,
"grad_norm": 4.210418701171875,
"learning_rate": 7.4383087020675145e-06,
"loss": 0.7915,
"step": 6974
},
{
"epoch": 1.8550531914893615,
"grad_norm": 3.4614603519439697,
"learning_rate": 7.4375408275650475e-06,
"loss": 0.7506,
"step": 6975
},
{
"epoch": 1.8553191489361702,
"grad_norm": 4.220035076141357,
"learning_rate": 7.436772877642199e-06,
"loss": 0.8875,
"step": 6976
},
{
"epoch": 1.8555851063829787,
"grad_norm": 4.095662593841553,
"learning_rate": 7.436004852322727e-06,
"loss": 0.8973,
"step": 6977
},
{
"epoch": 1.8558510638297872,
"grad_norm": 4.23422908782959,
"learning_rate": 7.435236751630397e-06,
"loss": 0.699,
"step": 6978
},
{
"epoch": 1.8561170212765958,
"grad_norm": 3.976768970489502,
"learning_rate": 7.434468575588976e-06,
"loss": 0.781,
"step": 6979
},
{
"epoch": 1.8563829787234043,
"grad_norm": 4.405401229858398,
"learning_rate": 7.43370032422223e-06,
"loss": 0.7388,
"step": 6980
},
{
"epoch": 1.8566489361702128,
"grad_norm": 4.096654891967773,
"learning_rate": 7.432931997553929e-06,
"loss": 0.8305,
"step": 6981
},
{
"epoch": 1.8569148936170212,
"grad_norm": 3.9386327266693115,
"learning_rate": 7.432163595607851e-06,
"loss": 0.775,
"step": 6982
},
{
"epoch": 1.8571808510638297,
"grad_norm": 4.111544609069824,
"learning_rate": 7.431395118407766e-06,
"loss": 0.9179,
"step": 6983
},
{
"epoch": 1.8574468085106384,
"grad_norm": 3.3650224208831787,
"learning_rate": 7.4306265659774525e-06,
"loss": 0.8286,
"step": 6984
},
{
"epoch": 1.8577127659574468,
"grad_norm": 4.099471569061279,
"learning_rate": 7.429857938340693e-06,
"loss": 0.8789,
"step": 6985
},
{
"epoch": 1.8579787234042553,
"grad_norm": 4.082056999206543,
"learning_rate": 7.429089235521267e-06,
"loss": 0.8938,
"step": 6986
},
{
"epoch": 1.858244680851064,
"grad_norm": 4.1304545402526855,
"learning_rate": 7.428320457542962e-06,
"loss": 0.8639,
"step": 6987
},
{
"epoch": 1.8585106382978722,
"grad_norm": 3.941922426223755,
"learning_rate": 7.427551604429562e-06,
"loss": 0.7966,
"step": 6988
},
{
"epoch": 1.858776595744681,
"grad_norm": 3.8861730098724365,
"learning_rate": 7.426782676204857e-06,
"loss": 0.8282,
"step": 6989
},
{
"epoch": 1.8590425531914894,
"grad_norm": 3.8917558193206787,
"learning_rate": 7.426013672892639e-06,
"loss": 0.7213,
"step": 6990
},
{
"epoch": 1.8593085106382978,
"grad_norm": 4.324743747711182,
"learning_rate": 7.4252445945167005e-06,
"loss": 0.9627,
"step": 6991
},
{
"epoch": 1.8595744680851065,
"grad_norm": 3.6545021533966064,
"learning_rate": 7.42447544110084e-06,
"loss": 0.742,
"step": 6992
},
{
"epoch": 1.8598404255319148,
"grad_norm": 4.201162338256836,
"learning_rate": 7.423706212668855e-06,
"loss": 0.8343,
"step": 6993
},
{
"epoch": 1.8601063829787234,
"grad_norm": 3.67588472366333,
"learning_rate": 7.4229369092445465e-06,
"loss": 0.7863,
"step": 6994
},
{
"epoch": 1.860372340425532,
"grad_norm": 3.3527588844299316,
"learning_rate": 7.422167530851716e-06,
"loss": 0.7513,
"step": 6995
},
{
"epoch": 1.8606382978723404,
"grad_norm": 3.977691888809204,
"learning_rate": 7.421398077514172e-06,
"loss": 0.7507,
"step": 6996
},
{
"epoch": 1.860904255319149,
"grad_norm": 4.172175407409668,
"learning_rate": 7.420628549255719e-06,
"loss": 0.8395,
"step": 6997
},
{
"epoch": 1.8611702127659573,
"grad_norm": 3.738621473312378,
"learning_rate": 7.41985894610017e-06,
"loss": 0.8366,
"step": 6998
},
{
"epoch": 1.861436170212766,
"grad_norm": 4.003189563751221,
"learning_rate": 7.4190892680713366e-06,
"loss": 0.9032,
"step": 6999
},
{
"epoch": 1.8617021276595744,
"grad_norm": 3.872437000274658,
"learning_rate": 7.418319515193032e-06,
"loss": 0.8052,
"step": 7000
},
{
"epoch": 1.8617021276595744,
"eval_loss": 1.269985556602478,
"eval_runtime": 14.1914,
"eval_samples_per_second": 28.186,
"eval_steps_per_second": 3.523,
"step": 7000
},
{
"epoch": 1.861968085106383,
"grad_norm": 4.005687713623047,
"learning_rate": 7.417549687489074e-06,
"loss": 0.7515,
"step": 7001
},
{
"epoch": 1.8622340425531916,
"grad_norm": 3.833047866821289,
"learning_rate": 7.416779784983284e-06,
"loss": 0.8487,
"step": 7002
},
{
"epoch": 1.8625,
"grad_norm": 3.902536392211914,
"learning_rate": 7.416009807699481e-06,
"loss": 0.7448,
"step": 7003
},
{
"epoch": 1.8627659574468085,
"grad_norm": 4.018909931182861,
"learning_rate": 7.41523975566149e-06,
"loss": 0.8619,
"step": 7004
},
{
"epoch": 1.863031914893617,
"grad_norm": 3.7916078567504883,
"learning_rate": 7.414469628893137e-06,
"loss": 0.7254,
"step": 7005
},
{
"epoch": 1.8632978723404254,
"grad_norm": 3.662709951400757,
"learning_rate": 7.413699427418253e-06,
"loss": 0.8801,
"step": 7006
},
{
"epoch": 1.8635638297872341,
"grad_norm": 3.8417561054229736,
"learning_rate": 7.412929151260665e-06,
"loss": 0.9611,
"step": 7007
},
{
"epoch": 1.8638297872340426,
"grad_norm": 3.8474161624908447,
"learning_rate": 7.412158800444208e-06,
"loss": 0.7215,
"step": 7008
},
{
"epoch": 1.864095744680851,
"grad_norm": 3.4360055923461914,
"learning_rate": 7.411388374992719e-06,
"loss": 0.7885,
"step": 7009
},
{
"epoch": 1.8643617021276597,
"grad_norm": 3.902475357055664,
"learning_rate": 7.410617874930034e-06,
"loss": 0.8199,
"step": 7010
},
{
"epoch": 1.864627659574468,
"grad_norm": 4.08276891708374,
"learning_rate": 7.409847300279993e-06,
"loss": 0.793,
"step": 7011
},
{
"epoch": 1.8648936170212767,
"grad_norm": 4.242387294769287,
"learning_rate": 7.4090766510664405e-06,
"loss": 0.9345,
"step": 7012
},
{
"epoch": 1.8651595744680851,
"grad_norm": 3.8312370777130127,
"learning_rate": 7.40830592731322e-06,
"loss": 0.8151,
"step": 7013
},
{
"epoch": 1.8654255319148936,
"grad_norm": 4.087930679321289,
"learning_rate": 7.407535129044179e-06,
"loss": 0.936,
"step": 7014
},
{
"epoch": 1.8656914893617023,
"grad_norm": 4.200309753417969,
"learning_rate": 7.4067642562831656e-06,
"loss": 0.8345,
"step": 7015
},
{
"epoch": 1.8659574468085105,
"grad_norm": 3.7283883094787598,
"learning_rate": 7.4059933090540315e-06,
"loss": 0.7398,
"step": 7016
},
{
"epoch": 1.8662234042553192,
"grad_norm": 4.288913249969482,
"learning_rate": 7.4052222873806345e-06,
"loss": 0.9314,
"step": 7017
},
{
"epoch": 1.8664893617021276,
"grad_norm": 4.077908515930176,
"learning_rate": 7.404451191286825e-06,
"loss": 0.8331,
"step": 7018
},
{
"epoch": 1.866755319148936,
"grad_norm": 4.040445804595947,
"learning_rate": 7.403680020796468e-06,
"loss": 0.8054,
"step": 7019
},
{
"epoch": 1.8670212765957448,
"grad_norm": 4.416097164154053,
"learning_rate": 7.402908775933419e-06,
"loss": 0.7164,
"step": 7020
},
{
"epoch": 1.867287234042553,
"grad_norm": 3.8552403450012207,
"learning_rate": 7.402137456721544e-06,
"loss": 0.8274,
"step": 7021
},
{
"epoch": 1.8675531914893617,
"grad_norm": 4.477870941162109,
"learning_rate": 7.401366063184709e-06,
"loss": 0.9087,
"step": 7022
},
{
"epoch": 1.8678191489361702,
"grad_norm": 4.315149784088135,
"learning_rate": 7.4005945953467794e-06,
"loss": 0.8275,
"step": 7023
},
{
"epoch": 1.8680851063829786,
"grad_norm": 4.013988971710205,
"learning_rate": 7.3998230532316275e-06,
"loss": 0.7935,
"step": 7024
},
{
"epoch": 1.8683510638297873,
"grad_norm": 4.538480281829834,
"learning_rate": 7.399051436863125e-06,
"loss": 0.7913,
"step": 7025
},
{
"epoch": 1.8686170212765958,
"grad_norm": 3.814431667327881,
"learning_rate": 7.398279746265144e-06,
"loss": 0.8819,
"step": 7026
},
{
"epoch": 1.8688829787234043,
"grad_norm": 4.128929615020752,
"learning_rate": 7.397507981461567e-06,
"loss": 0.7733,
"step": 7027
},
{
"epoch": 1.8691489361702127,
"grad_norm": 4.266568660736084,
"learning_rate": 7.3967361424762696e-06,
"loss": 0.8756,
"step": 7028
},
{
"epoch": 1.8694148936170212,
"grad_norm": 3.817857265472412,
"learning_rate": 7.3959642293331336e-06,
"loss": 0.8247,
"step": 7029
},
{
"epoch": 1.8696808510638299,
"grad_norm": 4.07396125793457,
"learning_rate": 7.395192242056044e-06,
"loss": 0.7925,
"step": 7030
},
{
"epoch": 1.8699468085106383,
"grad_norm": 3.3347582817077637,
"learning_rate": 7.3944201806688865e-06,
"loss": 0.647,
"step": 7031
},
{
"epoch": 1.8702127659574468,
"grad_norm": 3.7496252059936523,
"learning_rate": 7.393648045195548e-06,
"loss": 0.884,
"step": 7032
},
{
"epoch": 1.8704787234042555,
"grad_norm": 3.871969223022461,
"learning_rate": 7.392875835659923e-06,
"loss": 0.7962,
"step": 7033
},
{
"epoch": 1.8707446808510637,
"grad_norm": 4.357855796813965,
"learning_rate": 7.392103552085901e-06,
"loss": 0.8063,
"step": 7034
},
{
"epoch": 1.8710106382978724,
"grad_norm": 3.7552926540374756,
"learning_rate": 7.391331194497379e-06,
"loss": 0.7611,
"step": 7035
},
{
"epoch": 1.8712765957446809,
"grad_norm": 4.20325231552124,
"learning_rate": 7.390558762918254e-06,
"loss": 0.8825,
"step": 7036
},
{
"epoch": 1.8715425531914893,
"grad_norm": 3.433969020843506,
"learning_rate": 7.389786257372428e-06,
"loss": 0.6822,
"step": 7037
},
{
"epoch": 1.871808510638298,
"grad_norm": 3.9316911697387695,
"learning_rate": 7.3890136778837995e-06,
"loss": 0.8302,
"step": 7038
},
{
"epoch": 1.8720744680851062,
"grad_norm": 3.7068655490875244,
"learning_rate": 7.388241024476276e-06,
"loss": 0.8207,
"step": 7039
},
{
"epoch": 1.872340425531915,
"grad_norm": 3.7558844089508057,
"learning_rate": 7.387468297173764e-06,
"loss": 0.8916,
"step": 7040
},
{
"epoch": 1.8726063829787234,
"grad_norm": 3.663325786590576,
"learning_rate": 7.386695496000172e-06,
"loss": 0.8461,
"step": 7041
},
{
"epoch": 1.8728723404255319,
"grad_norm": 3.7792584896087646,
"learning_rate": 7.38592262097941e-06,
"loss": 0.775,
"step": 7042
},
{
"epoch": 1.8731382978723405,
"grad_norm": 3.6168766021728516,
"learning_rate": 7.385149672135394e-06,
"loss": 0.7552,
"step": 7043
},
{
"epoch": 1.8734042553191488,
"grad_norm": 3.5428271293640137,
"learning_rate": 7.384376649492039e-06,
"loss": 0.8633,
"step": 7044
},
{
"epoch": 1.8736702127659575,
"grad_norm": 4.00286340713501,
"learning_rate": 7.383603553073262e-06,
"loss": 0.7895,
"step": 7045
},
{
"epoch": 1.873936170212766,
"grad_norm": 4.0529890060424805,
"learning_rate": 7.382830382902986e-06,
"loss": 0.7161,
"step": 7046
},
{
"epoch": 1.8742021276595744,
"grad_norm": 4.5928425788879395,
"learning_rate": 7.382057139005132e-06,
"loss": 0.8454,
"step": 7047
},
{
"epoch": 1.874468085106383,
"grad_norm": 3.7979865074157715,
"learning_rate": 7.381283821403626e-06,
"loss": 0.8475,
"step": 7048
},
{
"epoch": 1.8747340425531915,
"grad_norm": 3.9232993125915527,
"learning_rate": 7.380510430122396e-06,
"loss": 0.8079,
"step": 7049
},
{
"epoch": 1.875,
"grad_norm": 4.084567546844482,
"learning_rate": 7.379736965185369e-06,
"loss": 0.8926,
"step": 7050
},
{
"epoch": 1.8752659574468085,
"grad_norm": 3.967013359069824,
"learning_rate": 7.378963426616479e-06,
"loss": 0.8136,
"step": 7051
},
{
"epoch": 1.875531914893617,
"grad_norm": 4.18993616104126,
"learning_rate": 7.378189814439659e-06,
"loss": 0.663,
"step": 7052
},
{
"epoch": 1.8757978723404256,
"grad_norm": 3.4214327335357666,
"learning_rate": 7.377416128678847e-06,
"loss": 0.7142,
"step": 7053
},
{
"epoch": 1.876063829787234,
"grad_norm": 4.111138343811035,
"learning_rate": 7.37664236935798e-06,
"loss": 0.8517,
"step": 7054
},
{
"epoch": 1.8763297872340425,
"grad_norm": 4.020641326904297,
"learning_rate": 7.375868536501001e-06,
"loss": 0.7649,
"step": 7055
},
{
"epoch": 1.8765957446808512,
"grad_norm": 3.6159451007843018,
"learning_rate": 7.375094630131852e-06,
"loss": 0.7219,
"step": 7056
},
{
"epoch": 1.8768617021276595,
"grad_norm": 4.138524532318115,
"learning_rate": 7.374320650274479e-06,
"loss": 0.7374,
"step": 7057
},
{
"epoch": 1.8771276595744681,
"grad_norm": 4.114788055419922,
"learning_rate": 7.373546596952829e-06,
"loss": 0.9118,
"step": 7058
},
{
"epoch": 1.8773936170212766,
"grad_norm": 3.8229057788848877,
"learning_rate": 7.372772470190852e-06,
"loss": 0.7109,
"step": 7059
},
{
"epoch": 1.877659574468085,
"grad_norm": 3.9543075561523438,
"learning_rate": 7.371998270012504e-06,
"loss": 0.7616,
"step": 7060
},
{
"epoch": 1.8779255319148938,
"grad_norm": 3.862529754638672,
"learning_rate": 7.3712239964417345e-06,
"loss": 0.8719,
"step": 7061
},
{
"epoch": 1.878191489361702,
"grad_norm": 3.855138063430786,
"learning_rate": 7.370449649502504e-06,
"loss": 0.7093,
"step": 7062
},
{
"epoch": 1.8784574468085107,
"grad_norm": 4.169119358062744,
"learning_rate": 7.36967522921877e-06,
"loss": 0.8817,
"step": 7063
},
{
"epoch": 1.8787234042553191,
"grad_norm": 3.8987720012664795,
"learning_rate": 7.368900735614494e-06,
"loss": 0.7522,
"step": 7064
},
{
"epoch": 1.8789893617021276,
"grad_norm": 3.938058853149414,
"learning_rate": 7.36812616871364e-06,
"loss": 0.7694,
"step": 7065
},
{
"epoch": 1.8792553191489363,
"grad_norm": 3.7450876235961914,
"learning_rate": 7.367351528540176e-06,
"loss": 0.7283,
"step": 7066
},
{
"epoch": 1.8795212765957445,
"grad_norm": 3.9045193195343018,
"learning_rate": 7.366576815118067e-06,
"loss": 0.735,
"step": 7067
},
{
"epoch": 1.8797872340425532,
"grad_norm": 3.4928138256073,
"learning_rate": 7.365802028471285e-06,
"loss": 0.7537,
"step": 7068
},
{
"epoch": 1.8800531914893617,
"grad_norm": 3.8254666328430176,
"learning_rate": 7.365027168623804e-06,
"loss": 0.8252,
"step": 7069
},
{
"epoch": 1.8803191489361701,
"grad_norm": 4.039599418640137,
"learning_rate": 7.364252235599596e-06,
"loss": 0.78,
"step": 7070
},
{
"epoch": 1.8805851063829788,
"grad_norm": 4.29962158203125,
"learning_rate": 7.363477229422642e-06,
"loss": 0.8651,
"step": 7071
},
{
"epoch": 1.8808510638297873,
"grad_norm": 3.891298294067383,
"learning_rate": 7.3627021501169196e-06,
"loss": 0.7887,
"step": 7072
},
{
"epoch": 1.8811170212765957,
"grad_norm": 3.8227875232696533,
"learning_rate": 7.36192699770641e-06,
"loss": 0.8563,
"step": 7073
},
{
"epoch": 1.8813829787234042,
"grad_norm": 3.881826639175415,
"learning_rate": 7.3611517722151e-06,
"loss": 0.7518,
"step": 7074
},
{
"epoch": 1.8816489361702127,
"grad_norm": 3.529783248901367,
"learning_rate": 7.360376473666973e-06,
"loss": 0.7086,
"step": 7075
},
{
"epoch": 1.8819148936170214,
"grad_norm": 3.710423231124878,
"learning_rate": 7.359601102086018e-06,
"loss": 0.8141,
"step": 7076
},
{
"epoch": 1.8821808510638298,
"grad_norm": 4.26459264755249,
"learning_rate": 7.358825657496228e-06,
"loss": 0.8523,
"step": 7077
},
{
"epoch": 1.8824468085106383,
"grad_norm": 3.9186158180236816,
"learning_rate": 7.358050139921595e-06,
"loss": 0.806,
"step": 7078
},
{
"epoch": 1.882712765957447,
"grad_norm": 3.5147833824157715,
"learning_rate": 7.3572745493861155e-06,
"loss": 0.742,
"step": 7079
},
{
"epoch": 1.8829787234042552,
"grad_norm": 3.834606885910034,
"learning_rate": 7.356498885913784e-06,
"loss": 0.9308,
"step": 7080
},
{
"epoch": 1.883244680851064,
"grad_norm": 3.989713191986084,
"learning_rate": 7.355723149528604e-06,
"loss": 0.8085,
"step": 7081
},
{
"epoch": 1.8835106382978724,
"grad_norm": 4.148540019989014,
"learning_rate": 7.354947340254576e-06,
"loss": 0.7697,
"step": 7082
},
{
"epoch": 1.8837765957446808,
"grad_norm": 3.6128063201904297,
"learning_rate": 7.354171458115704e-06,
"loss": 0.7755,
"step": 7083
},
{
"epoch": 1.8840425531914895,
"grad_norm": 4.31196928024292,
"learning_rate": 7.353395503135996e-06,
"loss": 0.7754,
"step": 7084
},
{
"epoch": 1.8843085106382977,
"grad_norm": 3.750534772872925,
"learning_rate": 7.35261947533946e-06,
"loss": 0.8237,
"step": 7085
},
{
"epoch": 1.8845744680851064,
"grad_norm": 3.8344967365264893,
"learning_rate": 7.351843374750108e-06,
"loss": 0.832,
"step": 7086
},
{
"epoch": 1.8848404255319149,
"grad_norm": 3.5898144245147705,
"learning_rate": 7.351067201391952e-06,
"loss": 0.737,
"step": 7087
},
{
"epoch": 1.8851063829787233,
"grad_norm": 3.8664729595184326,
"learning_rate": 7.35029095528901e-06,
"loss": 0.8636,
"step": 7088
},
{
"epoch": 1.885372340425532,
"grad_norm": 4.382975101470947,
"learning_rate": 7.349514636465298e-06,
"loss": 0.8923,
"step": 7089
},
{
"epoch": 1.8856382978723403,
"grad_norm": 4.070766448974609,
"learning_rate": 7.348738244944837e-06,
"loss": 0.8651,
"step": 7090
},
{
"epoch": 1.885904255319149,
"grad_norm": 4.187519073486328,
"learning_rate": 7.347961780751649e-06,
"loss": 0.8492,
"step": 7091
},
{
"epoch": 1.8861702127659574,
"grad_norm": 3.7398457527160645,
"learning_rate": 7.347185243909761e-06,
"loss": 0.7936,
"step": 7092
},
{
"epoch": 1.8864361702127659,
"grad_norm": 3.758314609527588,
"learning_rate": 7.346408634443196e-06,
"loss": 0.9086,
"step": 7093
},
{
"epoch": 1.8867021276595746,
"grad_norm": 3.800701856613159,
"learning_rate": 7.345631952375986e-06,
"loss": 0.8418,
"step": 7094
},
{
"epoch": 1.886968085106383,
"grad_norm": 4.155978202819824,
"learning_rate": 7.3448551977321615e-06,
"loss": 0.9388,
"step": 7095
},
{
"epoch": 1.8872340425531915,
"grad_norm": 3.9163780212402344,
"learning_rate": 7.344078370535757e-06,
"loss": 0.7108,
"step": 7096
},
{
"epoch": 1.8875,
"grad_norm": 3.312629222869873,
"learning_rate": 7.343301470810809e-06,
"loss": 0.6591,
"step": 7097
},
{
"epoch": 1.8877659574468084,
"grad_norm": 4.259210586547852,
"learning_rate": 7.342524498581352e-06,
"loss": 0.9209,
"step": 7098
},
{
"epoch": 1.888031914893617,
"grad_norm": 4.158624649047852,
"learning_rate": 7.34174745387143e-06,
"loss": 0.8084,
"step": 7099
},
{
"epoch": 1.8882978723404256,
"grad_norm": 4.25371789932251,
"learning_rate": 7.340970336705084e-06,
"loss": 0.8624,
"step": 7100
},
{
"epoch": 1.888563829787234,
"grad_norm": 3.780513286590576,
"learning_rate": 7.340193147106362e-06,
"loss": 0.7879,
"step": 7101
},
{
"epoch": 1.8888297872340427,
"grad_norm": 4.191688537597656,
"learning_rate": 7.339415885099307e-06,
"loss": 0.7785,
"step": 7102
},
{
"epoch": 1.889095744680851,
"grad_norm": 4.398171901702881,
"learning_rate": 7.33863855070797e-06,
"loss": 0.8883,
"step": 7103
},
{
"epoch": 1.8893617021276596,
"grad_norm": 3.6488990783691406,
"learning_rate": 7.337861143956404e-06,
"loss": 0.8097,
"step": 7104
},
{
"epoch": 1.889627659574468,
"grad_norm": 4.0780487060546875,
"learning_rate": 7.3370836648686616e-06,
"loss": 0.7897,
"step": 7105
},
{
"epoch": 1.8898936170212766,
"grad_norm": 4.089003562927246,
"learning_rate": 7.336306113468799e-06,
"loss": 0.9653,
"step": 7106
},
{
"epoch": 1.8901595744680852,
"grad_norm": 4.446435928344727,
"learning_rate": 7.335528489780874e-06,
"loss": 0.8947,
"step": 7107
},
{
"epoch": 1.8904255319148935,
"grad_norm": 3.880557060241699,
"learning_rate": 7.334750793828947e-06,
"loss": 0.9184,
"step": 7108
},
{
"epoch": 1.8906914893617022,
"grad_norm": 4.0276899337768555,
"learning_rate": 7.3339730256370834e-06,
"loss": 0.7444,
"step": 7109
},
{
"epoch": 1.8909574468085106,
"grad_norm": 4.381673336029053,
"learning_rate": 7.333195185229346e-06,
"loss": 0.7789,
"step": 7110
},
{
"epoch": 1.891223404255319,
"grad_norm": 4.908472537994385,
"learning_rate": 7.3324172726298015e-06,
"loss": 0.8258,
"step": 7111
},
{
"epoch": 1.8914893617021278,
"grad_norm": 4.257655143737793,
"learning_rate": 7.331639287862522e-06,
"loss": 0.8343,
"step": 7112
},
{
"epoch": 1.891755319148936,
"grad_norm": 3.902233600616455,
"learning_rate": 7.330861230951577e-06,
"loss": 0.7672,
"step": 7113
},
{
"epoch": 1.8920212765957447,
"grad_norm": 4.111093044281006,
"learning_rate": 7.3300831019210415e-06,
"loss": 0.9128,
"step": 7114
},
{
"epoch": 1.8922872340425532,
"grad_norm": 4.477164268493652,
"learning_rate": 7.329304900794991e-06,
"loss": 0.9389,
"step": 7115
},
{
"epoch": 1.8925531914893616,
"grad_norm": 4.585188388824463,
"learning_rate": 7.328526627597505e-06,
"loss": 0.8127,
"step": 7116
},
{
"epoch": 1.8928191489361703,
"grad_norm": 3.906665086746216,
"learning_rate": 7.327748282352664e-06,
"loss": 0.7996,
"step": 7117
},
{
"epoch": 1.8930851063829788,
"grad_norm": 4.213885307312012,
"learning_rate": 7.32696986508455e-06,
"loss": 0.8334,
"step": 7118
},
{
"epoch": 1.8933510638297872,
"grad_norm": 4.066798686981201,
"learning_rate": 7.326191375817249e-06,
"loss": 0.8217,
"step": 7119
},
{
"epoch": 1.8936170212765957,
"grad_norm": 3.510889768600464,
"learning_rate": 7.325412814574847e-06,
"loss": 0.7864,
"step": 7120
},
{
"epoch": 1.8938829787234042,
"grad_norm": 3.888808250427246,
"learning_rate": 7.324634181381436e-06,
"loss": 0.7519,
"step": 7121
},
{
"epoch": 1.8941489361702128,
"grad_norm": 3.9174201488494873,
"learning_rate": 7.323855476261106e-06,
"loss": 0.6913,
"step": 7122
},
{
"epoch": 1.8944148936170213,
"grad_norm": 4.041181564331055,
"learning_rate": 7.323076699237951e-06,
"loss": 0.6076,
"step": 7123
},
{
"epoch": 1.8946808510638298,
"grad_norm": 3.841498851776123,
"learning_rate": 7.322297850336069e-06,
"loss": 0.8645,
"step": 7124
},
{
"epoch": 1.8949468085106385,
"grad_norm": 3.5201406478881836,
"learning_rate": 7.3215189295795565e-06,
"loss": 0.7253,
"step": 7125
},
{
"epoch": 1.8952127659574467,
"grad_norm": 3.9525210857391357,
"learning_rate": 7.320739936992514e-06,
"loss": 0.8073,
"step": 7126
},
{
"epoch": 1.8954787234042554,
"grad_norm": 3.8624043464660645,
"learning_rate": 7.319960872599048e-06,
"loss": 0.8157,
"step": 7127
},
{
"epoch": 1.8957446808510638,
"grad_norm": 4.123876571655273,
"learning_rate": 7.31918173642326e-06,
"loss": 0.8038,
"step": 7128
},
{
"epoch": 1.8960106382978723,
"grad_norm": 3.812316417694092,
"learning_rate": 7.318402528489258e-06,
"loss": 0.7421,
"step": 7129
},
{
"epoch": 1.896276595744681,
"grad_norm": 4.009311199188232,
"learning_rate": 7.317623248821153e-06,
"loss": 0.835,
"step": 7130
},
{
"epoch": 1.8965425531914892,
"grad_norm": 4.297110557556152,
"learning_rate": 7.316843897443055e-06,
"loss": 0.7093,
"step": 7131
},
{
"epoch": 1.896808510638298,
"grad_norm": 4.034492015838623,
"learning_rate": 7.316064474379081e-06,
"loss": 0.7682,
"step": 7132
},
{
"epoch": 1.8970744680851064,
"grad_norm": 4.544641494750977,
"learning_rate": 7.315284979653344e-06,
"loss": 0.8832,
"step": 7133
},
{
"epoch": 1.8973404255319148,
"grad_norm": 4.383004188537598,
"learning_rate": 7.314505413289964e-06,
"loss": 0.892,
"step": 7134
},
{
"epoch": 1.8976063829787235,
"grad_norm": 3.52055025100708,
"learning_rate": 7.313725775313061e-06,
"loss": 0.7965,
"step": 7135
},
{
"epoch": 1.8978723404255318,
"grad_norm": 3.933687925338745,
"learning_rate": 7.31294606574676e-06,
"loss": 0.7829,
"step": 7136
},
{
"epoch": 1.8981382978723405,
"grad_norm": 4.500588417053223,
"learning_rate": 7.312166284615183e-06,
"loss": 0.8802,
"step": 7137
},
{
"epoch": 1.898404255319149,
"grad_norm": 3.9210360050201416,
"learning_rate": 7.31138643194246e-06,
"loss": 0.7418,
"step": 7138
},
{
"epoch": 1.8986702127659574,
"grad_norm": 4.024209022521973,
"learning_rate": 7.3106065077527175e-06,
"loss": 0.8769,
"step": 7139
},
{
"epoch": 1.898936170212766,
"grad_norm": 4.242138862609863,
"learning_rate": 7.3098265120700915e-06,
"loss": 0.8789,
"step": 7140
},
{
"epoch": 1.8992021276595743,
"grad_norm": 3.6798341274261475,
"learning_rate": 7.309046444918712e-06,
"loss": 0.7971,
"step": 7141
},
{
"epoch": 1.899468085106383,
"grad_norm": 4.092346668243408,
"learning_rate": 7.308266306322719e-06,
"loss": 0.7864,
"step": 7142
},
{
"epoch": 1.8997340425531914,
"grad_norm": 4.132681846618652,
"learning_rate": 7.307486096306247e-06,
"loss": 0.8868,
"step": 7143
},
{
"epoch": 1.9,
"grad_norm": 3.893075942993164,
"learning_rate": 7.30670581489344e-06,
"loss": 0.9096,
"step": 7144
},
{
"epoch": 1.9002659574468086,
"grad_norm": 3.807593822479248,
"learning_rate": 7.305925462108439e-06,
"loss": 0.7444,
"step": 7145
},
{
"epoch": 1.900531914893617,
"grad_norm": 3.6460392475128174,
"learning_rate": 7.305145037975388e-06,
"loss": 0.74,
"step": 7146
},
{
"epoch": 1.9007978723404255,
"grad_norm": 3.5041310787200928,
"learning_rate": 7.304364542518435e-06,
"loss": 0.8561,
"step": 7147
},
{
"epoch": 1.9010638297872342,
"grad_norm": 4.359119892120361,
"learning_rate": 7.303583975761732e-06,
"loss": 0.735,
"step": 7148
},
{
"epoch": 1.9013297872340424,
"grad_norm": 4.176085948944092,
"learning_rate": 7.302803337729429e-06,
"loss": 0.8723,
"step": 7149
},
{
"epoch": 1.9015957446808511,
"grad_norm": 3.764272689819336,
"learning_rate": 7.302022628445678e-06,
"loss": 0.8359,
"step": 7150
},
{
"epoch": 1.9018617021276596,
"grad_norm": 3.8661603927612305,
"learning_rate": 7.301241847934637e-06,
"loss": 0.9286,
"step": 7151
},
{
"epoch": 1.902127659574468,
"grad_norm": 3.493070363998413,
"learning_rate": 7.300460996220464e-06,
"loss": 0.7439,
"step": 7152
},
{
"epoch": 1.9023936170212767,
"grad_norm": 3.425701379776001,
"learning_rate": 7.2996800733273196e-06,
"loss": 0.8468,
"step": 7153
},
{
"epoch": 1.902659574468085,
"grad_norm": 3.9553513526916504,
"learning_rate": 7.298899079279365e-06,
"loss": 0.8075,
"step": 7154
},
{
"epoch": 1.9029255319148937,
"grad_norm": 3.900907516479492,
"learning_rate": 7.298118014100766e-06,
"loss": 0.8969,
"step": 7155
},
{
"epoch": 1.9031914893617021,
"grad_norm": 3.8822121620178223,
"learning_rate": 7.297336877815693e-06,
"loss": 0.8685,
"step": 7156
},
{
"epoch": 1.9034574468085106,
"grad_norm": 3.847317695617676,
"learning_rate": 7.29655567044831e-06,
"loss": 0.7251,
"step": 7157
},
{
"epoch": 1.9037234042553193,
"grad_norm": 3.5498738288879395,
"learning_rate": 7.295774392022791e-06,
"loss": 0.7035,
"step": 7158
},
{
"epoch": 1.9039893617021275,
"grad_norm": 3.658343553543091,
"learning_rate": 7.2949930425633095e-06,
"loss": 0.7414,
"step": 7159
},
{
"epoch": 1.9042553191489362,
"grad_norm": 3.804388999938965,
"learning_rate": 7.2942116220940406e-06,
"loss": 0.8057,
"step": 7160
},
{
"epoch": 1.9045212765957447,
"grad_norm": 3.876521348953247,
"learning_rate": 7.293430130639163e-06,
"loss": 0.886,
"step": 7161
},
{
"epoch": 1.9047872340425531,
"grad_norm": 3.969161033630371,
"learning_rate": 7.292648568222859e-06,
"loss": 0.9049,
"step": 7162
},
{
"epoch": 1.9050531914893618,
"grad_norm": 4.049928188323975,
"learning_rate": 7.2918669348693075e-06,
"loss": 0.8954,
"step": 7163
},
{
"epoch": 1.90531914893617,
"grad_norm": 3.997854232788086,
"learning_rate": 7.291085230602694e-06,
"loss": 0.9063,
"step": 7164
},
{
"epoch": 1.9055851063829787,
"grad_norm": 4.090554237365723,
"learning_rate": 7.290303455447208e-06,
"loss": 0.8132,
"step": 7165
},
{
"epoch": 1.9058510638297872,
"grad_norm": 3.8804330825805664,
"learning_rate": 7.289521609427035e-06,
"loss": 0.8245,
"step": 7166
},
{
"epoch": 1.9061170212765957,
"grad_norm": 3.7036948204040527,
"learning_rate": 7.288739692566367e-06,
"loss": 0.891,
"step": 7167
},
{
"epoch": 1.9063829787234043,
"grad_norm": 3.8350512981414795,
"learning_rate": 7.2879577048894e-06,
"loss": 0.7912,
"step": 7168
},
{
"epoch": 1.9066489361702128,
"grad_norm": 3.3897817134857178,
"learning_rate": 7.287175646420327e-06,
"loss": 0.8327,
"step": 7169
},
{
"epoch": 1.9069148936170213,
"grad_norm": 4.037939548492432,
"learning_rate": 7.2863935171833465e-06,
"loss": 0.8793,
"step": 7170
},
{
"epoch": 1.90718085106383,
"grad_norm": 3.7813265323638916,
"learning_rate": 7.285611317202661e-06,
"loss": 0.8551,
"step": 7171
},
{
"epoch": 1.9074468085106382,
"grad_norm": 3.916761636734009,
"learning_rate": 7.284829046502467e-06,
"loss": 0.7564,
"step": 7172
},
{
"epoch": 1.9077127659574469,
"grad_norm": 3.843834400177002,
"learning_rate": 7.284046705106974e-06,
"loss": 0.8456,
"step": 7173
},
{
"epoch": 1.9079787234042553,
"grad_norm": 3.752497434616089,
"learning_rate": 7.2832642930403876e-06,
"loss": 0.8221,
"step": 7174
},
{
"epoch": 1.9082446808510638,
"grad_norm": 4.00820779800415,
"learning_rate": 7.282481810326915e-06,
"loss": 0.9672,
"step": 7175
},
{
"epoch": 1.9085106382978725,
"grad_norm": 4.226334571838379,
"learning_rate": 7.281699256990766e-06,
"loss": 0.8973,
"step": 7176
},
{
"epoch": 1.9087765957446807,
"grad_norm": 3.871880531311035,
"learning_rate": 7.280916633056159e-06,
"loss": 0.8204,
"step": 7177
},
{
"epoch": 1.9090425531914894,
"grad_norm": 4.339875221252441,
"learning_rate": 7.280133938547304e-06,
"loss": 0.8958,
"step": 7178
},
{
"epoch": 1.9093085106382979,
"grad_norm": 3.7419753074645996,
"learning_rate": 7.27935117348842e-06,
"loss": 0.789,
"step": 7179
},
{
"epoch": 1.9095744680851063,
"grad_norm": 4.0317888259887695,
"learning_rate": 7.278568337903729e-06,
"loss": 0.7995,
"step": 7180
},
{
"epoch": 1.909840425531915,
"grad_norm": 3.9452288150787354,
"learning_rate": 7.277785431817449e-06,
"loss": 0.8576,
"step": 7181
},
{
"epoch": 1.9101063829787233,
"grad_norm": 3.957437753677368,
"learning_rate": 7.277002455253807e-06,
"loss": 0.8532,
"step": 7182
},
{
"epoch": 1.910372340425532,
"grad_norm": 3.9327943325042725,
"learning_rate": 7.276219408237029e-06,
"loss": 0.8155,
"step": 7183
},
{
"epoch": 1.9106382978723404,
"grad_norm": 4.20408296585083,
"learning_rate": 7.27543629079134e-06,
"loss": 0.8285,
"step": 7184
},
{
"epoch": 1.9109042553191489,
"grad_norm": 4.2042341232299805,
"learning_rate": 7.274653102940974e-06,
"loss": 0.8624,
"step": 7185
},
{
"epoch": 1.9111702127659576,
"grad_norm": 4.000115871429443,
"learning_rate": 7.2738698447101645e-06,
"loss": 0.8343,
"step": 7186
},
{
"epoch": 1.9114361702127658,
"grad_norm": 4.323785305023193,
"learning_rate": 7.273086516123145e-06,
"loss": 0.7525,
"step": 7187
},
{
"epoch": 1.9117021276595745,
"grad_norm": 3.9202396869659424,
"learning_rate": 7.27230311720415e-06,
"loss": 0.9014,
"step": 7188
},
{
"epoch": 1.911968085106383,
"grad_norm": 3.924821615219116,
"learning_rate": 7.271519647977422e-06,
"loss": 0.8206,
"step": 7189
},
{
"epoch": 1.9122340425531914,
"grad_norm": 3.9752979278564453,
"learning_rate": 7.270736108467202e-06,
"loss": 0.9627,
"step": 7190
},
{
"epoch": 1.9125,
"grad_norm": 3.7932825088500977,
"learning_rate": 7.269952498697734e-06,
"loss": 0.8227,
"step": 7191
},
{
"epoch": 1.9127659574468086,
"grad_norm": 4.589715480804443,
"learning_rate": 7.2691688186932626e-06,
"loss": 0.9176,
"step": 7192
},
{
"epoch": 1.913031914893617,
"grad_norm": 4.00385856628418,
"learning_rate": 7.268385068478037e-06,
"loss": 0.7602,
"step": 7193
},
{
"epoch": 1.9132978723404257,
"grad_norm": 4.291144847869873,
"learning_rate": 7.267601248076307e-06,
"loss": 1.0254,
"step": 7194
},
{
"epoch": 1.913563829787234,
"grad_norm": 3.699037790298462,
"learning_rate": 7.2668173575123234e-06,
"loss": 0.8528,
"step": 7195
},
{
"epoch": 1.9138297872340426,
"grad_norm": 3.936768054962158,
"learning_rate": 7.266033396810343e-06,
"loss": 0.7172,
"step": 7196
},
{
"epoch": 1.914095744680851,
"grad_norm": 3.23809814453125,
"learning_rate": 7.265249365994621e-06,
"loss": 0.6519,
"step": 7197
},
{
"epoch": 1.9143617021276595,
"grad_norm": 4.3691020011901855,
"learning_rate": 7.2644652650894155e-06,
"loss": 0.8097,
"step": 7198
},
{
"epoch": 1.9146276595744682,
"grad_norm": 4.070173263549805,
"learning_rate": 7.263681094118989e-06,
"loss": 1.0137,
"step": 7199
},
{
"epoch": 1.9148936170212765,
"grad_norm": 3.9889721870422363,
"learning_rate": 7.262896853107606e-06,
"loss": 0.8935,
"step": 7200
},
{
"epoch": 1.9151595744680852,
"grad_norm": 3.6993491649627686,
"learning_rate": 7.262112542079529e-06,
"loss": 0.7445,
"step": 7201
},
{
"epoch": 1.9154255319148936,
"grad_norm": 4.081962585449219,
"learning_rate": 7.261328161059026e-06,
"loss": 1.0239,
"step": 7202
},
{
"epoch": 1.915691489361702,
"grad_norm": 4.065913677215576,
"learning_rate": 7.260543710070369e-06,
"loss": 0.9063,
"step": 7203
},
{
"epoch": 1.9159574468085108,
"grad_norm": 3.7012364864349365,
"learning_rate": 7.259759189137827e-06,
"loss": 0.9102,
"step": 7204
},
{
"epoch": 1.916223404255319,
"grad_norm": 4.341013431549072,
"learning_rate": 7.258974598285674e-06,
"loss": 0.8309,
"step": 7205
},
{
"epoch": 1.9164893617021277,
"grad_norm": 3.8948628902435303,
"learning_rate": 7.258189937538189e-06,
"loss": 0.786,
"step": 7206
},
{
"epoch": 1.9167553191489362,
"grad_norm": 4.040065288543701,
"learning_rate": 7.257405206919649e-06,
"loss": 0.7283,
"step": 7207
},
{
"epoch": 1.9170212765957446,
"grad_norm": 3.775395631790161,
"learning_rate": 7.256620406454333e-06,
"loss": 0.7441,
"step": 7208
},
{
"epoch": 1.9172872340425533,
"grad_norm": 4.277199745178223,
"learning_rate": 7.255835536166525e-06,
"loss": 0.8784,
"step": 7209
},
{
"epoch": 1.9175531914893615,
"grad_norm": 4.311332702636719,
"learning_rate": 7.25505059608051e-06,
"loss": 0.911,
"step": 7210
},
{
"epoch": 1.9178191489361702,
"grad_norm": 3.843778371810913,
"learning_rate": 7.254265586220574e-06,
"loss": 0.7906,
"step": 7211
},
{
"epoch": 1.9180851063829787,
"grad_norm": 4.064030647277832,
"learning_rate": 7.253480506611008e-06,
"loss": 0.8904,
"step": 7212
},
{
"epoch": 1.9183510638297872,
"grad_norm": 3.85115385055542,
"learning_rate": 7.252695357276101e-06,
"loss": 0.7148,
"step": 7213
},
{
"epoch": 1.9186170212765958,
"grad_norm": 3.716801643371582,
"learning_rate": 7.251910138240147e-06,
"loss": 0.7956,
"step": 7214
},
{
"epoch": 1.9188829787234043,
"grad_norm": 3.7296745777130127,
"learning_rate": 7.251124849527442e-06,
"loss": 0.8143,
"step": 7215
},
{
"epoch": 1.9191489361702128,
"grad_norm": 3.9987385272979736,
"learning_rate": 7.250339491162284e-06,
"loss": 0.8333,
"step": 7216
},
{
"epoch": 1.9194148936170212,
"grad_norm": 3.8190033435821533,
"learning_rate": 7.2495540631689745e-06,
"loss": 0.8476,
"step": 7217
},
{
"epoch": 1.9196808510638297,
"grad_norm": 4.055121898651123,
"learning_rate": 7.248768565571811e-06,
"loss": 0.8605,
"step": 7218
},
{
"epoch": 1.9199468085106384,
"grad_norm": 4.3670525550842285,
"learning_rate": 7.247982998395102e-06,
"loss": 0.8381,
"step": 7219
},
{
"epoch": 1.9202127659574468,
"grad_norm": 4.680405139923096,
"learning_rate": 7.247197361663152e-06,
"loss": 0.9635,
"step": 7220
},
{
"epoch": 1.9204787234042553,
"grad_norm": 4.1340460777282715,
"learning_rate": 7.24641165540027e-06,
"loss": 0.8125,
"step": 7221
},
{
"epoch": 1.920744680851064,
"grad_norm": 4.003271102905273,
"learning_rate": 7.245625879630767e-06,
"loss": 0.8934,
"step": 7222
},
{
"epoch": 1.9210106382978722,
"grad_norm": 4.222568035125732,
"learning_rate": 7.244840034378955e-06,
"loss": 1.0299,
"step": 7223
},
{
"epoch": 1.921276595744681,
"grad_norm": 3.762643337249756,
"learning_rate": 7.244054119669148e-06,
"loss": 0.6798,
"step": 7224
},
{
"epoch": 1.9215425531914894,
"grad_norm": 4.137721538543701,
"learning_rate": 7.243268135525666e-06,
"loss": 0.8147,
"step": 7225
},
{
"epoch": 1.9218085106382978,
"grad_norm": 4.0250139236450195,
"learning_rate": 7.242482081972827e-06,
"loss": 0.8394,
"step": 7226
},
{
"epoch": 1.9220744680851065,
"grad_norm": 3.7539706230163574,
"learning_rate": 7.241695959034951e-06,
"loss": 0.8293,
"step": 7227
},
{
"epoch": 1.9223404255319148,
"grad_norm": 4.054415225982666,
"learning_rate": 7.2409097667363635e-06,
"loss": 0.9107,
"step": 7228
},
{
"epoch": 1.9226063829787234,
"grad_norm": 4.380495548248291,
"learning_rate": 7.2401235051013885e-06,
"loss": 0.8641,
"step": 7229
},
{
"epoch": 1.922872340425532,
"grad_norm": 4.061448097229004,
"learning_rate": 7.239337174154357e-06,
"loss": 0.8332,
"step": 7230
},
{
"epoch": 1.9231382978723404,
"grad_norm": 4.095539093017578,
"learning_rate": 7.2385507739195945e-06,
"loss": 0.828,
"step": 7231
},
{
"epoch": 1.923404255319149,
"grad_norm": 4.271059513092041,
"learning_rate": 7.2377643044214375e-06,
"loss": 0.8365,
"step": 7232
},
{
"epoch": 1.9236702127659573,
"grad_norm": 3.9962894916534424,
"learning_rate": 7.236977765684216e-06,
"loss": 0.6932,
"step": 7233
},
{
"epoch": 1.923936170212766,
"grad_norm": 4.267841339111328,
"learning_rate": 7.236191157732272e-06,
"loss": 0.8561,
"step": 7234
},
{
"epoch": 1.9242021276595744,
"grad_norm": 4.299777030944824,
"learning_rate": 7.2354044805899385e-06,
"loss": 0.864,
"step": 7235
},
{
"epoch": 1.924468085106383,
"grad_norm": 4.053724765777588,
"learning_rate": 7.234617734281558e-06,
"loss": 0.8643,
"step": 7236
},
{
"epoch": 1.9247340425531916,
"grad_norm": 4.541396141052246,
"learning_rate": 7.2338309188314745e-06,
"loss": 0.793,
"step": 7237
},
{
"epoch": 1.925,
"grad_norm": 4.2436676025390625,
"learning_rate": 7.233044034264034e-06,
"loss": 0.7894,
"step": 7238
},
{
"epoch": 1.9252659574468085,
"grad_norm": 4.764181613922119,
"learning_rate": 7.23225708060358e-06,
"loss": 0.7979,
"step": 7239
},
{
"epoch": 1.925531914893617,
"grad_norm": 4.301015377044678,
"learning_rate": 7.2314700578744635e-06,
"loss": 0.8022,
"step": 7240
},
{
"epoch": 1.9257978723404254,
"grad_norm": 3.9735851287841797,
"learning_rate": 7.230682966101038e-06,
"loss": 0.7377,
"step": 7241
},
{
"epoch": 1.9260638297872341,
"grad_norm": 4.120856285095215,
"learning_rate": 7.229895805307654e-06,
"loss": 0.7386,
"step": 7242
},
{
"epoch": 1.9263297872340426,
"grad_norm": 4.618571758270264,
"learning_rate": 7.229108575518668e-06,
"loss": 0.8771,
"step": 7243
},
{
"epoch": 1.926595744680851,
"grad_norm": 3.679917573928833,
"learning_rate": 7.22832127675844e-06,
"loss": 0.8137,
"step": 7244
},
{
"epoch": 1.9268617021276597,
"grad_norm": 4.480624198913574,
"learning_rate": 7.227533909051327e-06,
"loss": 0.8955,
"step": 7245
},
{
"epoch": 1.927127659574468,
"grad_norm": 3.715806722640991,
"learning_rate": 7.226746472421692e-06,
"loss": 0.8023,
"step": 7246
},
{
"epoch": 1.9273936170212767,
"grad_norm": 4.008445739746094,
"learning_rate": 7.2259589668939005e-06,
"loss": 0.8584,
"step": 7247
},
{
"epoch": 1.9276595744680851,
"grad_norm": 4.211793899536133,
"learning_rate": 7.225171392492316e-06,
"loss": 0.8412,
"step": 7248
},
{
"epoch": 1.9279255319148936,
"grad_norm": 4.422094821929932,
"learning_rate": 7.224383749241311e-06,
"loss": 0.811,
"step": 7249
},
{
"epoch": 1.9281914893617023,
"grad_norm": 3.894848108291626,
"learning_rate": 7.223596037165252e-06,
"loss": 0.9126,
"step": 7250
},
{
"epoch": 1.9284574468085105,
"grad_norm": 3.9139139652252197,
"learning_rate": 7.222808256288515e-06,
"loss": 0.7837,
"step": 7251
},
{
"epoch": 1.9287234042553192,
"grad_norm": 4.1469197273254395,
"learning_rate": 7.222020406635474e-06,
"loss": 0.7134,
"step": 7252
},
{
"epoch": 1.9289893617021276,
"grad_norm": 3.5331952571868896,
"learning_rate": 7.2212324882305045e-06,
"loss": 0.7372,
"step": 7253
},
{
"epoch": 1.929255319148936,
"grad_norm": 3.312333822250366,
"learning_rate": 7.220444501097986e-06,
"loss": 0.7583,
"step": 7254
},
{
"epoch": 1.9295212765957448,
"grad_norm": 4.264598846435547,
"learning_rate": 7.2196564452623015e-06,
"loss": 0.8354,
"step": 7255
},
{
"epoch": 1.929787234042553,
"grad_norm": 4.467483997344971,
"learning_rate": 7.2188683207478326e-06,
"loss": 0.8728,
"step": 7256
},
{
"epoch": 1.9300531914893617,
"grad_norm": 3.850327730178833,
"learning_rate": 7.218080127578966e-06,
"loss": 0.8222,
"step": 7257
},
{
"epoch": 1.9303191489361702,
"grad_norm": 3.970350980758667,
"learning_rate": 7.217291865780089e-06,
"loss": 0.8979,
"step": 7258
},
{
"epoch": 1.9305851063829786,
"grad_norm": 3.9415476322174072,
"learning_rate": 7.21650353537559e-06,
"loss": 0.7552,
"step": 7259
},
{
"epoch": 1.9308510638297873,
"grad_norm": 3.566114664077759,
"learning_rate": 7.215715136389862e-06,
"loss": 0.8683,
"step": 7260
},
{
"epoch": 1.9311170212765958,
"grad_norm": 3.991467237472534,
"learning_rate": 7.2149266688473005e-06,
"loss": 0.7815,
"step": 7261
},
{
"epoch": 1.9313829787234043,
"grad_norm": 4.0647406578063965,
"learning_rate": 7.214138132772299e-06,
"loss": 0.7483,
"step": 7262
},
{
"epoch": 1.9316489361702127,
"grad_norm": 4.495807647705078,
"learning_rate": 7.213349528189258e-06,
"loss": 0.9067,
"step": 7263
},
{
"epoch": 1.9319148936170212,
"grad_norm": 4.034248352050781,
"learning_rate": 7.212560855122576e-06,
"loss": 0.7541,
"step": 7264
},
{
"epoch": 1.9321808510638299,
"grad_norm": 3.8755152225494385,
"learning_rate": 7.211772113596656e-06,
"loss": 0.8805,
"step": 7265
},
{
"epoch": 1.9324468085106383,
"grad_norm": 3.655921220779419,
"learning_rate": 7.210983303635901e-06,
"loss": 0.7864,
"step": 7266
},
{
"epoch": 1.9327127659574468,
"grad_norm": 4.281502723693848,
"learning_rate": 7.210194425264723e-06,
"loss": 0.9595,
"step": 7267
},
{
"epoch": 1.9329787234042555,
"grad_norm": 3.8239359855651855,
"learning_rate": 7.209405478507525e-06,
"loss": 0.7896,
"step": 7268
},
{
"epoch": 1.9332446808510637,
"grad_norm": 3.9340760707855225,
"learning_rate": 7.20861646338872e-06,
"loss": 0.855,
"step": 7269
},
{
"epoch": 1.9335106382978724,
"grad_norm": 3.6993649005889893,
"learning_rate": 7.207827379932724e-06,
"loss": 0.774,
"step": 7270
},
{
"epoch": 1.9337765957446809,
"grad_norm": 4.12832498550415,
"learning_rate": 7.2070382281639466e-06,
"loss": 0.8031,
"step": 7271
},
{
"epoch": 1.9340425531914893,
"grad_norm": 3.675234079360962,
"learning_rate": 7.206249008106808e-06,
"loss": 0.7203,
"step": 7272
},
{
"epoch": 1.934308510638298,
"grad_norm": 4.341015338897705,
"learning_rate": 7.20545971978573e-06,
"loss": 0.7099,
"step": 7273
},
{
"epoch": 1.9345744680851062,
"grad_norm": 4.289004802703857,
"learning_rate": 7.2046703632251295e-06,
"loss": 0.8558,
"step": 7274
},
{
"epoch": 1.934840425531915,
"grad_norm": 3.8868236541748047,
"learning_rate": 7.203880938449432e-06,
"loss": 0.8851,
"step": 7275
},
{
"epoch": 1.9351063829787234,
"grad_norm": 4.085642337799072,
"learning_rate": 7.2030914454830645e-06,
"loss": 0.7872,
"step": 7276
},
{
"epoch": 1.9353723404255319,
"grad_norm": 3.6767923831939697,
"learning_rate": 7.202301884350454e-06,
"loss": 0.712,
"step": 7277
},
{
"epoch": 1.9356382978723405,
"grad_norm": 4.32539176940918,
"learning_rate": 7.201512255076031e-06,
"loss": 0.9707,
"step": 7278
},
{
"epoch": 1.9359042553191488,
"grad_norm": 3.729510545730591,
"learning_rate": 7.2007225576842255e-06,
"loss": 0.8447,
"step": 7279
},
{
"epoch": 1.9361702127659575,
"grad_norm": 4.127895832061768,
"learning_rate": 7.1999327921994735e-06,
"loss": 0.8129,
"step": 7280
},
{
"epoch": 1.936436170212766,
"grad_norm": 3.7349631786346436,
"learning_rate": 7.199142958646211e-06,
"loss": 0.6886,
"step": 7281
},
{
"epoch": 1.9367021276595744,
"grad_norm": 3.900869369506836,
"learning_rate": 7.198353057048876e-06,
"loss": 0.7183,
"step": 7282
},
{
"epoch": 1.936968085106383,
"grad_norm": 4.21663761138916,
"learning_rate": 7.197563087431909e-06,
"loss": 0.9005,
"step": 7283
},
{
"epoch": 1.9372340425531915,
"grad_norm": 3.992421865463257,
"learning_rate": 7.196773049819753e-06,
"loss": 0.8604,
"step": 7284
},
{
"epoch": 1.9375,
"grad_norm": 4.140373229980469,
"learning_rate": 7.195982944236853e-06,
"loss": 0.9231,
"step": 7285
},
{
"epoch": 1.9377659574468085,
"grad_norm": 3.9591143131256104,
"learning_rate": 7.1951927707076545e-06,
"loss": 0.9934,
"step": 7286
},
{
"epoch": 1.938031914893617,
"grad_norm": 4.134740352630615,
"learning_rate": 7.194402529256608e-06,
"loss": 0.8869,
"step": 7287
},
{
"epoch": 1.9382978723404256,
"grad_norm": 3.9935176372528076,
"learning_rate": 7.193612219908161e-06,
"loss": 0.7377,
"step": 7288
},
{
"epoch": 1.938563829787234,
"grad_norm": 4.432157039642334,
"learning_rate": 7.192821842686772e-06,
"loss": 0.864,
"step": 7289
},
{
"epoch": 1.9388297872340425,
"grad_norm": 4.096209526062012,
"learning_rate": 7.1920313976168935e-06,
"loss": 0.8539,
"step": 7290
},
{
"epoch": 1.9390957446808512,
"grad_norm": 3.792664051055908,
"learning_rate": 7.191240884722982e-06,
"loss": 0.8195,
"step": 7291
},
{
"epoch": 1.9393617021276595,
"grad_norm": 3.759690046310425,
"learning_rate": 7.190450304029497e-06,
"loss": 0.7395,
"step": 7292
},
{
"epoch": 1.9396276595744681,
"grad_norm": 3.7826247215270996,
"learning_rate": 7.1896596555609025e-06,
"loss": 0.7206,
"step": 7293
},
{
"epoch": 1.9398936170212766,
"grad_norm": 3.8327670097351074,
"learning_rate": 7.1888689393416575e-06,
"loss": 0.9116,
"step": 7294
},
{
"epoch": 1.940159574468085,
"grad_norm": 3.965418815612793,
"learning_rate": 7.188078155396232e-06,
"loss": 0.8134,
"step": 7295
},
{
"epoch": 1.9404255319148938,
"grad_norm": 3.9271137714385986,
"learning_rate": 7.187287303749093e-06,
"loss": 0.705,
"step": 7296
},
{
"epoch": 1.940691489361702,
"grad_norm": 4.100310325622559,
"learning_rate": 7.186496384424708e-06,
"loss": 0.8471,
"step": 7297
},
{
"epoch": 1.9409574468085107,
"grad_norm": 3.9107069969177246,
"learning_rate": 7.185705397447552e-06,
"loss": 0.8495,
"step": 7298
},
{
"epoch": 1.9412234042553191,
"grad_norm": 4.238333225250244,
"learning_rate": 7.1849143428420975e-06,
"loss": 0.7926,
"step": 7299
},
{
"epoch": 1.9414893617021276,
"grad_norm": 4.412265777587891,
"learning_rate": 7.18412322063282e-06,
"loss": 0.947,
"step": 7300
},
{
"epoch": 1.9417553191489363,
"grad_norm": 3.686246156692505,
"learning_rate": 7.183332030844199e-06,
"loss": 0.7733,
"step": 7301
},
{
"epoch": 1.9420212765957445,
"grad_norm": 3.924842596054077,
"learning_rate": 7.182540773500715e-06,
"loss": 0.9132,
"step": 7302
},
{
"epoch": 1.9422872340425532,
"grad_norm": 3.5468335151672363,
"learning_rate": 7.181749448626849e-06,
"loss": 0.8032,
"step": 7303
},
{
"epoch": 1.9425531914893617,
"grad_norm": 3.618908166885376,
"learning_rate": 7.180958056247087e-06,
"loss": 0.8473,
"step": 7304
},
{
"epoch": 1.9428191489361701,
"grad_norm": 3.575326919555664,
"learning_rate": 7.180166596385915e-06,
"loss": 0.7703,
"step": 7305
},
{
"epoch": 1.9430851063829788,
"grad_norm": 4.315759658813477,
"learning_rate": 7.179375069067821e-06,
"loss": 0.823,
"step": 7306
},
{
"epoch": 1.9433510638297873,
"grad_norm": 3.9836225509643555,
"learning_rate": 7.178583474317295e-06,
"loss": 0.6672,
"step": 7307
},
{
"epoch": 1.9436170212765957,
"grad_norm": 4.030239105224609,
"learning_rate": 7.177791812158835e-06,
"loss": 0.806,
"step": 7308
},
{
"epoch": 1.9438829787234042,
"grad_norm": 3.8376708030700684,
"learning_rate": 7.17700008261693e-06,
"loss": 0.7224,
"step": 7309
},
{
"epoch": 1.9441489361702127,
"grad_norm": 4.117557048797607,
"learning_rate": 7.176208285716079e-06,
"loss": 0.8359,
"step": 7310
},
{
"epoch": 1.9444148936170214,
"grad_norm": 4.3215012550354,
"learning_rate": 7.175416421480783e-06,
"loss": 0.7143,
"step": 7311
},
{
"epoch": 1.9446808510638298,
"grad_norm": 3.8996849060058594,
"learning_rate": 7.174624489935541e-06,
"loss": 0.806,
"step": 7312
},
{
"epoch": 1.9449468085106383,
"grad_norm": 3.478804588317871,
"learning_rate": 7.173832491104858e-06,
"loss": 0.7916,
"step": 7313
},
{
"epoch": 1.945212765957447,
"grad_norm": 3.8935012817382812,
"learning_rate": 7.173040425013236e-06,
"loss": 0.719,
"step": 7314
},
{
"epoch": 1.9454787234042552,
"grad_norm": 3.9126412868499756,
"learning_rate": 7.172248291685187e-06,
"loss": 0.6975,
"step": 7315
},
{
"epoch": 1.945744680851064,
"grad_norm": 3.790658712387085,
"learning_rate": 7.171456091145217e-06,
"loss": 0.8119,
"step": 7316
},
{
"epoch": 1.9460106382978724,
"grad_norm": 4.477363109588623,
"learning_rate": 7.170663823417839e-06,
"loss": 0.8697,
"step": 7317
},
{
"epoch": 1.9462765957446808,
"grad_norm": 4.502041816711426,
"learning_rate": 7.1698714885275665e-06,
"loss": 0.9479,
"step": 7318
},
{
"epoch": 1.9465425531914895,
"grad_norm": 3.928950071334839,
"learning_rate": 7.169079086498915e-06,
"loss": 0.7123,
"step": 7319
},
{
"epoch": 1.9468085106382977,
"grad_norm": 3.781550168991089,
"learning_rate": 7.168286617356406e-06,
"loss": 0.7275,
"step": 7320
},
{
"epoch": 1.9470744680851064,
"grad_norm": 4.246979236602783,
"learning_rate": 7.167494081124553e-06,
"loss": 0.885,
"step": 7321
},
{
"epoch": 1.9473404255319149,
"grad_norm": 4.124865531921387,
"learning_rate": 7.166701477827882e-06,
"loss": 0.8088,
"step": 7322
},
{
"epoch": 1.9476063829787233,
"grad_norm": 4.21986198425293,
"learning_rate": 7.165908807490916e-06,
"loss": 0.9175,
"step": 7323
},
{
"epoch": 1.947872340425532,
"grad_norm": 4.153756618499756,
"learning_rate": 7.165116070138183e-06,
"loss": 0.8633,
"step": 7324
},
{
"epoch": 1.9481382978723403,
"grad_norm": 3.5365302562713623,
"learning_rate": 7.164323265794209e-06,
"loss": 0.8274,
"step": 7325
},
{
"epoch": 1.948404255319149,
"grad_norm": 4.312306880950928,
"learning_rate": 7.1635303944835246e-06,
"loss": 0.847,
"step": 7326
},
{
"epoch": 1.9486702127659574,
"grad_norm": 4.010374069213867,
"learning_rate": 7.162737456230662e-06,
"loss": 0.82,
"step": 7327
},
{
"epoch": 1.9489361702127659,
"grad_norm": 5.155407905578613,
"learning_rate": 7.161944451060157e-06,
"loss": 0.9241,
"step": 7328
},
{
"epoch": 1.9492021276595746,
"grad_norm": 3.665374279022217,
"learning_rate": 7.161151378996545e-06,
"loss": 0.8255,
"step": 7329
},
{
"epoch": 1.949468085106383,
"grad_norm": 3.6932079792022705,
"learning_rate": 7.1603582400643646e-06,
"loss": 0.8187,
"step": 7330
},
{
"epoch": 1.9497340425531915,
"grad_norm": 3.555961847305298,
"learning_rate": 7.159565034288157e-06,
"loss": 0.7523,
"step": 7331
},
{
"epoch": 1.95,
"grad_norm": 4.505660533905029,
"learning_rate": 7.158771761692464e-06,
"loss": 0.7903,
"step": 7332
},
{
"epoch": 1.9502659574468084,
"grad_norm": 3.616476058959961,
"learning_rate": 7.157978422301832e-06,
"loss": 0.8853,
"step": 7333
},
{
"epoch": 1.950531914893617,
"grad_norm": 4.25620698928833,
"learning_rate": 7.157185016140809e-06,
"loss": 0.8566,
"step": 7334
},
{
"epoch": 1.9507978723404256,
"grad_norm": 3.9593820571899414,
"learning_rate": 7.156391543233938e-06,
"loss": 0.7797,
"step": 7335
},
{
"epoch": 1.951063829787234,
"grad_norm": 4.379816055297852,
"learning_rate": 7.155598003605776e-06,
"loss": 0.9148,
"step": 7336
},
{
"epoch": 1.9513297872340427,
"grad_norm": 3.731823205947876,
"learning_rate": 7.154804397280873e-06,
"loss": 0.7223,
"step": 7337
},
{
"epoch": 1.951595744680851,
"grad_norm": 3.8849217891693115,
"learning_rate": 7.154010724283786e-06,
"loss": 0.8446,
"step": 7338
},
{
"epoch": 1.9518617021276596,
"grad_norm": 3.7477874755859375,
"learning_rate": 7.15321698463907e-06,
"loss": 0.6922,
"step": 7339
},
{
"epoch": 1.952127659574468,
"grad_norm": 4.323108673095703,
"learning_rate": 7.152423178371286e-06,
"loss": 0.8153,
"step": 7340
},
{
"epoch": 1.9523936170212766,
"grad_norm": 4.16124153137207,
"learning_rate": 7.1516293055049944e-06,
"loss": 0.8003,
"step": 7341
},
{
"epoch": 1.9526595744680852,
"grad_norm": 4.236426830291748,
"learning_rate": 7.150835366064759e-06,
"loss": 0.7843,
"step": 7342
},
{
"epoch": 1.9529255319148935,
"grad_norm": 3.637660026550293,
"learning_rate": 7.1500413600751465e-06,
"loss": 0.7665,
"step": 7343
},
{
"epoch": 1.9531914893617022,
"grad_norm": 3.838202476501465,
"learning_rate": 7.14924728756072e-06,
"loss": 0.7723,
"step": 7344
},
{
"epoch": 1.9534574468085106,
"grad_norm": 4.209107875823975,
"learning_rate": 7.148453148546055e-06,
"loss": 0.8646,
"step": 7345
},
{
"epoch": 1.953723404255319,
"grad_norm": 3.9335439205169678,
"learning_rate": 7.147658943055718e-06,
"loss": 0.6881,
"step": 7346
},
{
"epoch": 1.9539893617021278,
"grad_norm": 3.6025755405426025,
"learning_rate": 7.1468646711142855e-06,
"loss": 0.6567,
"step": 7347
},
{
"epoch": 1.954255319148936,
"grad_norm": 3.8079092502593994,
"learning_rate": 7.146070332746332e-06,
"loss": 0.7122,
"step": 7348
},
{
"epoch": 1.9545212765957447,
"grad_norm": 4.033806800842285,
"learning_rate": 7.145275927976436e-06,
"loss": 0.7522,
"step": 7349
},
{
"epoch": 1.9547872340425532,
"grad_norm": 4.1563310623168945,
"learning_rate": 7.144481456829178e-06,
"loss": 0.7998,
"step": 7350
},
{
"epoch": 1.9550531914893616,
"grad_norm": 4.061034202575684,
"learning_rate": 7.143686919329138e-06,
"loss": 0.9232,
"step": 7351
},
{
"epoch": 1.9553191489361703,
"grad_norm": 4.174419403076172,
"learning_rate": 7.1428923155009e-06,
"loss": 0.6807,
"step": 7352
},
{
"epoch": 1.9555851063829788,
"grad_norm": 3.6197104454040527,
"learning_rate": 7.142097645369052e-06,
"loss": 0.8129,
"step": 7353
},
{
"epoch": 1.9558510638297872,
"grad_norm": 4.288638591766357,
"learning_rate": 7.141302908958181e-06,
"loss": 0.9342,
"step": 7354
},
{
"epoch": 1.9561170212765957,
"grad_norm": 3.9184861183166504,
"learning_rate": 7.140508106292876e-06,
"loss": 0.7052,
"step": 7355
},
{
"epoch": 1.9563829787234042,
"grad_norm": 4.214428901672363,
"learning_rate": 7.1397132373977295e-06,
"loss": 0.8679,
"step": 7356
},
{
"epoch": 1.9566489361702128,
"grad_norm": 4.283886909484863,
"learning_rate": 7.138918302297338e-06,
"loss": 0.8816,
"step": 7357
},
{
"epoch": 1.9569148936170213,
"grad_norm": 3.77843976020813,
"learning_rate": 7.138123301016295e-06,
"loss": 0.7901,
"step": 7358
},
{
"epoch": 1.9571808510638298,
"grad_norm": 3.9347009658813477,
"learning_rate": 7.137328233579201e-06,
"loss": 0.7385,
"step": 7359
},
{
"epoch": 1.9574468085106385,
"grad_norm": 3.9841034412384033,
"learning_rate": 7.136533100010654e-06,
"loss": 0.7738,
"step": 7360
},
{
"epoch": 1.9577127659574467,
"grad_norm": 3.536179780960083,
"learning_rate": 7.1357379003352565e-06,
"loss": 0.8311,
"step": 7361
},
{
"epoch": 1.9579787234042554,
"grad_norm": 4.386892318725586,
"learning_rate": 7.134942634577615e-06,
"loss": 0.9451,
"step": 7362
},
{
"epoch": 1.9582446808510638,
"grad_norm": 3.738041877746582,
"learning_rate": 7.1341473027623355e-06,
"loss": 0.6454,
"step": 7363
},
{
"epoch": 1.9585106382978723,
"grad_norm": 3.718473434448242,
"learning_rate": 7.133351904914024e-06,
"loss": 0.8613,
"step": 7364
},
{
"epoch": 1.958776595744681,
"grad_norm": 4.3047661781311035,
"learning_rate": 7.132556441057294e-06,
"loss": 0.7499,
"step": 7365
},
{
"epoch": 1.9590425531914892,
"grad_norm": 3.821338415145874,
"learning_rate": 7.131760911216756e-06,
"loss": 0.737,
"step": 7366
},
{
"epoch": 1.959308510638298,
"grad_norm": 3.7964980602264404,
"learning_rate": 7.130965315417027e-06,
"loss": 0.8637,
"step": 7367
},
{
"epoch": 1.9595744680851064,
"grad_norm": 3.9412569999694824,
"learning_rate": 7.130169653682721e-06,
"loss": 0.6788,
"step": 7368
},
{
"epoch": 1.9598404255319148,
"grad_norm": 4.125255584716797,
"learning_rate": 7.129373926038459e-06,
"loss": 0.86,
"step": 7369
},
{
"epoch": 1.9601063829787235,
"grad_norm": 3.7982115745544434,
"learning_rate": 7.128578132508859e-06,
"loss": 0.9386,
"step": 7370
},
{
"epoch": 1.9603723404255318,
"grad_norm": 3.9143412113189697,
"learning_rate": 7.1277822731185475e-06,
"loss": 0.911,
"step": 7371
},
{
"epoch": 1.9606382978723405,
"grad_norm": 4.226142883300781,
"learning_rate": 7.126986347892146e-06,
"loss": 0.7375,
"step": 7372
},
{
"epoch": 1.960904255319149,
"grad_norm": 3.8393430709838867,
"learning_rate": 7.126190356854283e-06,
"loss": 0.8341,
"step": 7373
},
{
"epoch": 1.9611702127659574,
"grad_norm": 4.1616926193237305,
"learning_rate": 7.1253943000295865e-06,
"loss": 0.8532,
"step": 7374
},
{
"epoch": 1.961436170212766,
"grad_norm": 3.9134316444396973,
"learning_rate": 7.12459817744269e-06,
"loss": 0.7566,
"step": 7375
},
{
"epoch": 1.9617021276595743,
"grad_norm": 3.930948495864868,
"learning_rate": 7.123801989118223e-06,
"loss": 0.7781,
"step": 7376
},
{
"epoch": 1.961968085106383,
"grad_norm": 3.913886785507202,
"learning_rate": 7.1230057350808234e-06,
"loss": 0.8081,
"step": 7377
},
{
"epoch": 1.9622340425531914,
"grad_norm": 4.381828308105469,
"learning_rate": 7.122209415355125e-06,
"loss": 0.9048,
"step": 7378
},
{
"epoch": 1.9625,
"grad_norm": 3.839282512664795,
"learning_rate": 7.121413029965769e-06,
"loss": 0.7002,
"step": 7379
},
{
"epoch": 1.9627659574468086,
"grad_norm": 4.018161773681641,
"learning_rate": 7.120616578937397e-06,
"loss": 0.803,
"step": 7380
},
{
"epoch": 1.963031914893617,
"grad_norm": 4.220311164855957,
"learning_rate": 7.1198200622946516e-06,
"loss": 0.8337,
"step": 7381
},
{
"epoch": 1.9632978723404255,
"grad_norm": 3.790156841278076,
"learning_rate": 7.119023480062176e-06,
"loss": 0.7224,
"step": 7382
},
{
"epoch": 1.9635638297872342,
"grad_norm": 4.560417652130127,
"learning_rate": 7.1182268322646205e-06,
"loss": 0.8584,
"step": 7383
},
{
"epoch": 1.9638297872340424,
"grad_norm": 4.3043999671936035,
"learning_rate": 7.117430118926633e-06,
"loss": 0.8294,
"step": 7384
},
{
"epoch": 1.9640957446808511,
"grad_norm": 3.781405210494995,
"learning_rate": 7.116633340072863e-06,
"loss": 0.7876,
"step": 7385
},
{
"epoch": 1.9643617021276596,
"grad_norm": 3.986027956008911,
"learning_rate": 7.115836495727968e-06,
"loss": 0.7581,
"step": 7386
},
{
"epoch": 1.964627659574468,
"grad_norm": 3.9813320636749268,
"learning_rate": 7.1150395859165985e-06,
"loss": 0.9021,
"step": 7387
},
{
"epoch": 1.9648936170212767,
"grad_norm": 4.043676376342773,
"learning_rate": 7.114242610663415e-06,
"loss": 0.791,
"step": 7388
},
{
"epoch": 1.965159574468085,
"grad_norm": 4.014968395233154,
"learning_rate": 7.113445569993076e-06,
"loss": 0.7437,
"step": 7389
},
{
"epoch": 1.9654255319148937,
"grad_norm": 3.8244807720184326,
"learning_rate": 7.1126484639302425e-06,
"loss": 0.7376,
"step": 7390
},
{
"epoch": 1.9656914893617021,
"grad_norm": 3.804473400115967,
"learning_rate": 7.111851292499579e-06,
"loss": 0.8358,
"step": 7391
},
{
"epoch": 1.9659574468085106,
"grad_norm": 3.598792552947998,
"learning_rate": 7.111054055725749e-06,
"loss": 0.7728,
"step": 7392
},
{
"epoch": 1.9662234042553193,
"grad_norm": 4.2588677406311035,
"learning_rate": 7.110256753633421e-06,
"loss": 0.884,
"step": 7393
},
{
"epoch": 1.9664893617021275,
"grad_norm": 3.7859714031219482,
"learning_rate": 7.109459386247265e-06,
"loss": 0.6813,
"step": 7394
},
{
"epoch": 1.9667553191489362,
"grad_norm": 4.303823471069336,
"learning_rate": 7.108661953591953e-06,
"loss": 0.9044,
"step": 7395
},
{
"epoch": 1.9670212765957447,
"grad_norm": 3.953003406524658,
"learning_rate": 7.107864455692156e-06,
"loss": 0.7632,
"step": 7396
},
{
"epoch": 1.9672872340425531,
"grad_norm": 4.125672817230225,
"learning_rate": 7.107066892572552e-06,
"loss": 0.7153,
"step": 7397
},
{
"epoch": 1.9675531914893618,
"grad_norm": 4.01138973236084,
"learning_rate": 7.106269264257817e-06,
"loss": 0.8052,
"step": 7398
},
{
"epoch": 1.96781914893617,
"grad_norm": 3.7055439949035645,
"learning_rate": 7.10547157077263e-06,
"loss": 0.7684,
"step": 7399
},
{
"epoch": 1.9680851063829787,
"grad_norm": 4.636490821838379,
"learning_rate": 7.104673812141676e-06,
"loss": 0.7504,
"step": 7400
},
{
"epoch": 1.9683510638297872,
"grad_norm": 3.961894989013672,
"learning_rate": 7.103875988389636e-06,
"loss": 0.9316,
"step": 7401
},
{
"epoch": 1.9686170212765957,
"grad_norm": 3.978306770324707,
"learning_rate": 7.103078099541194e-06,
"loss": 0.8276,
"step": 7402
},
{
"epoch": 1.9688829787234043,
"grad_norm": 3.9166336059570312,
"learning_rate": 7.102280145621041e-06,
"loss": 0.7308,
"step": 7403
},
{
"epoch": 1.9691489361702128,
"grad_norm": 3.680129289627075,
"learning_rate": 7.101482126653865e-06,
"loss": 0.8355,
"step": 7404
},
{
"epoch": 1.9694148936170213,
"grad_norm": 4.1183857917785645,
"learning_rate": 7.1006840426643576e-06,
"loss": 0.7782,
"step": 7405
},
{
"epoch": 1.96968085106383,
"grad_norm": 4.286891460418701,
"learning_rate": 7.099885893677213e-06,
"loss": 0.8094,
"step": 7406
},
{
"epoch": 1.9699468085106382,
"grad_norm": 4.037398338317871,
"learning_rate": 7.099087679717127e-06,
"loss": 0.8141,
"step": 7407
},
{
"epoch": 1.9702127659574469,
"grad_norm": 3.8752505779266357,
"learning_rate": 7.098289400808795e-06,
"loss": 0.7824,
"step": 7408
},
{
"epoch": 1.9704787234042553,
"grad_norm": 3.7574338912963867,
"learning_rate": 7.0974910569769195e-06,
"loss": 0.6398,
"step": 7409
},
{
"epoch": 1.9707446808510638,
"grad_norm": 3.918271064758301,
"learning_rate": 7.096692648246203e-06,
"loss": 0.7949,
"step": 7410
},
{
"epoch": 1.9710106382978725,
"grad_norm": 4.124891757965088,
"learning_rate": 7.095894174641345e-06,
"loss": 0.9578,
"step": 7411
},
{
"epoch": 1.9712765957446807,
"grad_norm": 3.764817953109741,
"learning_rate": 7.0950956361870536e-06,
"loss": 0.8013,
"step": 7412
},
{
"epoch": 1.9715425531914894,
"grad_norm": 4.22829008102417,
"learning_rate": 7.094297032908037e-06,
"loss": 0.7897,
"step": 7413
},
{
"epoch": 1.9718085106382979,
"grad_norm": 4.174428462982178,
"learning_rate": 7.093498364829006e-06,
"loss": 0.8182,
"step": 7414
},
{
"epoch": 1.9720744680851063,
"grad_norm": 4.265493392944336,
"learning_rate": 7.09269963197467e-06,
"loss": 0.7067,
"step": 7415
},
{
"epoch": 1.972340425531915,
"grad_norm": 3.417632579803467,
"learning_rate": 7.091900834369743e-06,
"loss": 0.6767,
"step": 7416
},
{
"epoch": 1.9726063829787233,
"grad_norm": 3.931145429611206,
"learning_rate": 7.09110197203894e-06,
"loss": 0.7581,
"step": 7417
},
{
"epoch": 1.972872340425532,
"grad_norm": 3.808061361312866,
"learning_rate": 7.090303045006983e-06,
"loss": 0.88,
"step": 7418
},
{
"epoch": 1.9731382978723404,
"grad_norm": 4.074621677398682,
"learning_rate": 7.089504053298587e-06,
"loss": 0.8391,
"step": 7419
},
{
"epoch": 1.9734042553191489,
"grad_norm": 3.7446646690368652,
"learning_rate": 7.0887049969384756e-06,
"loss": 0.778,
"step": 7420
},
{
"epoch": 1.9736702127659576,
"grad_norm": 4.311694622039795,
"learning_rate": 7.087905875951373e-06,
"loss": 0.6362,
"step": 7421
},
{
"epoch": 1.9739361702127658,
"grad_norm": 3.7492148876190186,
"learning_rate": 7.087106690362003e-06,
"loss": 0.85,
"step": 7422
},
{
"epoch": 1.9742021276595745,
"grad_norm": 3.8154044151306152,
"learning_rate": 7.086307440195096e-06,
"loss": 0.8229,
"step": 7423
},
{
"epoch": 1.974468085106383,
"grad_norm": 3.8786826133728027,
"learning_rate": 7.085508125475381e-06,
"loss": 0.8001,
"step": 7424
},
{
"epoch": 1.9747340425531914,
"grad_norm": 3.972696304321289,
"learning_rate": 7.084708746227589e-06,
"loss": 0.9101,
"step": 7425
},
{
"epoch": 1.975,
"grad_norm": 4.224587440490723,
"learning_rate": 7.083909302476453e-06,
"loss": 0.7869,
"step": 7426
},
{
"epoch": 1.9752659574468086,
"grad_norm": 3.700507164001465,
"learning_rate": 7.08310979424671e-06,
"loss": 0.7123,
"step": 7427
},
{
"epoch": 1.975531914893617,
"grad_norm": 3.8128812313079834,
"learning_rate": 7.082310221563098e-06,
"loss": 0.7205,
"step": 7428
},
{
"epoch": 1.9757978723404257,
"grad_norm": 4.028718948364258,
"learning_rate": 7.081510584450355e-06,
"loss": 0.9249,
"step": 7429
},
{
"epoch": 1.976063829787234,
"grad_norm": 3.798619270324707,
"learning_rate": 7.080710882933225e-06,
"loss": 0.7412,
"step": 7430
},
{
"epoch": 1.9763297872340426,
"grad_norm": 4.599943161010742,
"learning_rate": 7.07991111703645e-06,
"loss": 0.8713,
"step": 7431
},
{
"epoch": 1.976595744680851,
"grad_norm": 4.6581854820251465,
"learning_rate": 7.079111286784775e-06,
"loss": 0.8165,
"step": 7432
},
{
"epoch": 1.9768617021276595,
"grad_norm": 3.9097495079040527,
"learning_rate": 7.078311392202951e-06,
"loss": 0.7803,
"step": 7433
},
{
"epoch": 1.9771276595744682,
"grad_norm": 4.4464802742004395,
"learning_rate": 7.077511433315725e-06,
"loss": 0.9244,
"step": 7434
},
{
"epoch": 1.9773936170212765,
"grad_norm": 4.222725868225098,
"learning_rate": 7.076711410147849e-06,
"loss": 0.9159,
"step": 7435
},
{
"epoch": 1.9776595744680852,
"grad_norm": 3.8437206745147705,
"learning_rate": 7.075911322724077e-06,
"loss": 0.7657,
"step": 7436
},
{
"epoch": 1.9779255319148936,
"grad_norm": 3.891757011413574,
"learning_rate": 7.075111171069165e-06,
"loss": 0.574,
"step": 7437
},
{
"epoch": 1.978191489361702,
"grad_norm": 3.8077917098999023,
"learning_rate": 7.074310955207869e-06,
"loss": 0.713,
"step": 7438
},
{
"epoch": 1.9784574468085108,
"grad_norm": 3.8292224407196045,
"learning_rate": 7.073510675164952e-06,
"loss": 0.8645,
"step": 7439
},
{
"epoch": 1.978723404255319,
"grad_norm": 3.931783437728882,
"learning_rate": 7.072710330965171e-06,
"loss": 0.7588,
"step": 7440
},
{
"epoch": 1.9789893617021277,
"grad_norm": 3.6988885402679443,
"learning_rate": 7.071909922633293e-06,
"loss": 0.8146,
"step": 7441
},
{
"epoch": 1.9792553191489362,
"grad_norm": 3.7726998329162598,
"learning_rate": 7.071109450194085e-06,
"loss": 0.8082,
"step": 7442
},
{
"epoch": 1.9795212765957446,
"grad_norm": 4.304258346557617,
"learning_rate": 7.070308913672309e-06,
"loss": 0.8142,
"step": 7443
},
{
"epoch": 1.9797872340425533,
"grad_norm": 3.6615335941314697,
"learning_rate": 7.069508313092739e-06,
"loss": 0.7409,
"step": 7444
},
{
"epoch": 1.9800531914893615,
"grad_norm": 4.02711296081543,
"learning_rate": 7.068707648480145e-06,
"loss": 0.8662,
"step": 7445
},
{
"epoch": 1.9803191489361702,
"grad_norm": 3.48976993560791,
"learning_rate": 7.067906919859301e-06,
"loss": 0.7655,
"step": 7446
},
{
"epoch": 1.9805851063829787,
"grad_norm": 4.168039321899414,
"learning_rate": 7.067106127254983e-06,
"loss": 0.8516,
"step": 7447
},
{
"epoch": 1.9808510638297872,
"grad_norm": 3.757882833480835,
"learning_rate": 7.066305270691965e-06,
"loss": 0.7557,
"step": 7448
},
{
"epoch": 1.9811170212765958,
"grad_norm": 4.09896183013916,
"learning_rate": 7.065504350195031e-06,
"loss": 0.7227,
"step": 7449
},
{
"epoch": 1.9813829787234043,
"grad_norm": 3.6728386878967285,
"learning_rate": 7.064703365788961e-06,
"loss": 0.8711,
"step": 7450
},
{
"epoch": 1.9816489361702128,
"grad_norm": 4.336848735809326,
"learning_rate": 7.063902317498537e-06,
"loss": 0.8427,
"step": 7451
},
{
"epoch": 1.9819148936170212,
"grad_norm": 3.715324640274048,
"learning_rate": 7.063101205348546e-06,
"loss": 0.8392,
"step": 7452
},
{
"epoch": 1.9821808510638297,
"grad_norm": 3.8472211360931396,
"learning_rate": 7.062300029363775e-06,
"loss": 0.8386,
"step": 7453
},
{
"epoch": 1.9824468085106384,
"grad_norm": 4.4139533042907715,
"learning_rate": 7.061498789569012e-06,
"loss": 0.7736,
"step": 7454
},
{
"epoch": 1.9827127659574468,
"grad_norm": 4.422085285186768,
"learning_rate": 7.06069748598905e-06,
"loss": 0.8175,
"step": 7455
},
{
"epoch": 1.9829787234042553,
"grad_norm": 4.3708696365356445,
"learning_rate": 7.059896118648681e-06,
"loss": 0.8802,
"step": 7456
},
{
"epoch": 1.983244680851064,
"grad_norm": 3.6612091064453125,
"learning_rate": 7.059094687572701e-06,
"loss": 0.73,
"step": 7457
},
{
"epoch": 1.9835106382978722,
"grad_norm": 4.2330780029296875,
"learning_rate": 7.058293192785907e-06,
"loss": 0.7638,
"step": 7458
},
{
"epoch": 1.983776595744681,
"grad_norm": 4.289926528930664,
"learning_rate": 7.0574916343130995e-06,
"loss": 0.7821,
"step": 7459
},
{
"epoch": 1.9840425531914894,
"grad_norm": 4.122095108032227,
"learning_rate": 7.0566900121790775e-06,
"loss": 0.9189,
"step": 7460
},
{
"epoch": 1.9843085106382978,
"grad_norm": 3.974686861038208,
"learning_rate": 7.055888326408645e-06,
"loss": 0.7231,
"step": 7461
},
{
"epoch": 1.9845744680851065,
"grad_norm": 3.515641450881958,
"learning_rate": 7.055086577026608e-06,
"loss": 0.8235,
"step": 7462
},
{
"epoch": 1.9848404255319148,
"grad_norm": 4.1052565574646,
"learning_rate": 7.0542847640577725e-06,
"loss": 0.7862,
"step": 7463
},
{
"epoch": 1.9851063829787234,
"grad_norm": 3.889636516571045,
"learning_rate": 7.0534828875269466e-06,
"loss": 0.7854,
"step": 7464
},
{
"epoch": 1.985372340425532,
"grad_norm": 4.208193778991699,
"learning_rate": 7.052680947458944e-06,
"loss": 0.7854,
"step": 7465
},
{
"epoch": 1.9856382978723404,
"grad_norm": 4.233124732971191,
"learning_rate": 7.051878943878575e-06,
"loss": 0.7895,
"step": 7466
},
{
"epoch": 1.985904255319149,
"grad_norm": 4.030735969543457,
"learning_rate": 7.051076876810656e-06,
"loss": 0.8551,
"step": 7467
},
{
"epoch": 1.9861702127659573,
"grad_norm": 3.666236639022827,
"learning_rate": 7.050274746280005e-06,
"loss": 0.7758,
"step": 7468
},
{
"epoch": 1.986436170212766,
"grad_norm": 3.7510082721710205,
"learning_rate": 7.0494725523114375e-06,
"loss": 0.9323,
"step": 7469
},
{
"epoch": 1.9867021276595744,
"grad_norm": 3.9435558319091797,
"learning_rate": 7.048670294929777e-06,
"loss": 0.9059,
"step": 7470
},
{
"epoch": 1.986968085106383,
"grad_norm": 3.691020965576172,
"learning_rate": 7.047867974159845e-06,
"loss": 0.7602,
"step": 7471
},
{
"epoch": 1.9872340425531916,
"grad_norm": 3.697643518447876,
"learning_rate": 7.047065590026467e-06,
"loss": 0.7624,
"step": 7472
},
{
"epoch": 1.9875,
"grad_norm": 3.759286880493164,
"learning_rate": 7.04626314255447e-06,
"loss": 0.8639,
"step": 7473
},
{
"epoch": 1.9877659574468085,
"grad_norm": 4.054465293884277,
"learning_rate": 7.045460631768684e-06,
"loss": 0.7268,
"step": 7474
},
{
"epoch": 1.988031914893617,
"grad_norm": 4.61219596862793,
"learning_rate": 7.0446580576939346e-06,
"loss": 0.9591,
"step": 7475
},
{
"epoch": 1.9882978723404254,
"grad_norm": 4.135398864746094,
"learning_rate": 7.04385542035506e-06,
"loss": 0.9273,
"step": 7476
},
{
"epoch": 1.9885638297872341,
"grad_norm": 3.8725779056549072,
"learning_rate": 7.043052719776891e-06,
"loss": 0.803,
"step": 7477
},
{
"epoch": 1.9888297872340426,
"grad_norm": 3.9959404468536377,
"learning_rate": 7.042249955984265e-06,
"loss": 0.8572,
"step": 7478
},
{
"epoch": 1.989095744680851,
"grad_norm": 3.542355537414551,
"learning_rate": 7.041447129002023e-06,
"loss": 0.8041,
"step": 7479
},
{
"epoch": 1.9893617021276597,
"grad_norm": 4.780427932739258,
"learning_rate": 7.0406442388550016e-06,
"loss": 0.88,
"step": 7480
},
{
"epoch": 1.989627659574468,
"grad_norm": 3.5344386100769043,
"learning_rate": 7.039841285568045e-06,
"loss": 0.7503,
"step": 7481
},
{
"epoch": 1.9898936170212767,
"grad_norm": 3.8678970336914062,
"learning_rate": 7.039038269165999e-06,
"loss": 0.74,
"step": 7482
},
{
"epoch": 1.9901595744680851,
"grad_norm": 3.366485834121704,
"learning_rate": 7.038235189673706e-06,
"loss": 0.7804,
"step": 7483
},
{
"epoch": 1.9904255319148936,
"grad_norm": 3.5538713932037354,
"learning_rate": 7.037432047116018e-06,
"loss": 0.7362,
"step": 7484
},
{
"epoch": 1.9906914893617023,
"grad_norm": 4.539484977722168,
"learning_rate": 7.036628841517783e-06,
"loss": 0.8812,
"step": 7485
},
{
"epoch": 1.9909574468085105,
"grad_norm": 3.830280065536499,
"learning_rate": 7.035825572903854e-06,
"loss": 0.809,
"step": 7486
},
{
"epoch": 1.9912234042553192,
"grad_norm": 4.038280963897705,
"learning_rate": 7.035022241299083e-06,
"loss": 0.7987,
"step": 7487
},
{
"epoch": 1.9914893617021276,
"grad_norm": 4.29449462890625,
"learning_rate": 7.034218846728331e-06,
"loss": 0.8703,
"step": 7488
},
{
"epoch": 1.991755319148936,
"grad_norm": 4.56672477722168,
"learning_rate": 7.033415389216452e-06,
"loss": 0.9195,
"step": 7489
},
{
"epoch": 1.9920212765957448,
"grad_norm": 4.10626745223999,
"learning_rate": 7.032611868788306e-06,
"loss": 0.7476,
"step": 7490
},
{
"epoch": 1.992287234042553,
"grad_norm": 3.6163523197174072,
"learning_rate": 7.031808285468756e-06,
"loss": 0.8082,
"step": 7491
},
{
"epoch": 1.9925531914893617,
"grad_norm": 4.114681243896484,
"learning_rate": 7.031004639282666e-06,
"loss": 0.9355,
"step": 7492
},
{
"epoch": 1.9928191489361702,
"grad_norm": 3.9397499561309814,
"learning_rate": 7.0302009302549e-06,
"loss": 0.7364,
"step": 7493
},
{
"epoch": 1.9930851063829786,
"grad_norm": 3.4797003269195557,
"learning_rate": 7.029397158410329e-06,
"loss": 0.8413,
"step": 7494
},
{
"epoch": 1.9933510638297873,
"grad_norm": 4.215932369232178,
"learning_rate": 7.028593323773819e-06,
"loss": 0.8095,
"step": 7495
},
{
"epoch": 1.9936170212765958,
"grad_norm": 3.694060802459717,
"learning_rate": 7.027789426370244e-06,
"loss": 0.8051,
"step": 7496
},
{
"epoch": 1.9938829787234043,
"grad_norm": 4.0490875244140625,
"learning_rate": 7.026985466224477e-06,
"loss": 0.874,
"step": 7497
},
{
"epoch": 1.9941489361702127,
"grad_norm": 4.0154194831848145,
"learning_rate": 7.026181443361392e-06,
"loss": 0.807,
"step": 7498
},
{
"epoch": 1.9944148936170212,
"grad_norm": 3.8070061206817627,
"learning_rate": 7.025377357805867e-06,
"loss": 0.8078,
"step": 7499
},
{
"epoch": 1.9946808510638299,
"grad_norm": 4.185990810394287,
"learning_rate": 7.024573209582783e-06,
"loss": 0.7529,
"step": 7500
},
{
"epoch": 1.9946808510638299,
"eval_loss": 1.260877251625061,
"eval_runtime": 13.905,
"eval_samples_per_second": 28.767,
"eval_steps_per_second": 3.596,
"step": 7500
},
{
"epoch": 1.9949468085106383,
"grad_norm": 3.18033504486084,
"learning_rate": 7.023768998717022e-06,
"loss": 0.7159,
"step": 7501
},
{
"epoch": 1.9952127659574468,
"grad_norm": 3.839970111846924,
"learning_rate": 7.022964725233463e-06,
"loss": 0.7902,
"step": 7502
},
{
"epoch": 1.9954787234042555,
"grad_norm": 4.011384963989258,
"learning_rate": 7.022160389156995e-06,
"loss": 0.7596,
"step": 7503
},
{
"epoch": 1.9957446808510637,
"grad_norm": 3.67543888092041,
"learning_rate": 7.0213559905125016e-06,
"loss": 0.7987,
"step": 7504
},
{
"epoch": 1.9960106382978724,
"grad_norm": 4.240528583526611,
"learning_rate": 7.020551529324877e-06,
"loss": 0.8651,
"step": 7505
},
{
"epoch": 1.9962765957446809,
"grad_norm": 3.9020180702209473,
"learning_rate": 7.0197470056190075e-06,
"loss": 0.9205,
"step": 7506
},
{
"epoch": 1.9965425531914893,
"grad_norm": 4.0633368492126465,
"learning_rate": 7.0189424194197875e-06,
"loss": 0.8294,
"step": 7507
},
{
"epoch": 1.996808510638298,
"grad_norm": 3.88988995552063,
"learning_rate": 7.018137770752114e-06,
"loss": 0.861,
"step": 7508
},
{
"epoch": 1.9970744680851062,
"grad_norm": 3.5177197456359863,
"learning_rate": 7.01733305964088e-06,
"loss": 0.772,
"step": 7509
},
{
"epoch": 1.997340425531915,
"grad_norm": 3.661116123199463,
"learning_rate": 7.016528286110986e-06,
"loss": 0.7985,
"step": 7510
},
{
"epoch": 1.9976063829787234,
"grad_norm": 4.28385591506958,
"learning_rate": 7.015723450187334e-06,
"loss": 0.9045,
"step": 7511
},
{
"epoch": 1.9978723404255319,
"grad_norm": 3.899296522140503,
"learning_rate": 7.014918551894824e-06,
"loss": 0.7558,
"step": 7512
},
{
"epoch": 1.9981382978723405,
"grad_norm": 3.9070241451263428,
"learning_rate": 7.014113591258361e-06,
"loss": 0.8287,
"step": 7513
},
{
"epoch": 1.9984042553191488,
"grad_norm": 3.7345831394195557,
"learning_rate": 7.013308568302855e-06,
"loss": 0.781,
"step": 7514
},
{
"epoch": 1.9986702127659575,
"grad_norm": 3.6665847301483154,
"learning_rate": 7.012503483053209e-06,
"loss": 0.9715,
"step": 7515
},
{
"epoch": 1.998936170212766,
"grad_norm": 3.48984956741333,
"learning_rate": 7.011698335534336e-06,
"loss": 0.6823,
"step": 7516
},
{
"epoch": 1.9992021276595744,
"grad_norm": 3.7711336612701416,
"learning_rate": 7.01089312577115e-06,
"loss": 0.8192,
"step": 7517
},
{
"epoch": 1.999468085106383,
"grad_norm": 4.02569580078125,
"learning_rate": 7.0100878537885605e-06,
"loss": 0.856,
"step": 7518
},
{
"epoch": 1.9997340425531915,
"grad_norm": 4.044494152069092,
"learning_rate": 7.009282519611488e-06,
"loss": 0.8349,
"step": 7519
},
{
"epoch": 2.0,
"grad_norm": 3.897979259490967,
"learning_rate": 7.008477123264849e-06,
"loss": 0.6436,
"step": 7520
}
],
"logging_steps": 1.0,
"max_steps": 18800,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500.0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.4371418007171236e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}