7513 lines
184 KiB
JSON
7513 lines
184 KiB
JSON
|
|
{
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 1.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 5334,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.0009373828271466067,
|
||
|
|
"grad_norm": 55.87532592115914,
|
||
|
|
"learning_rate": 2.8089887640449436e-06,
|
||
|
|
"loss": 3.8993,
|
||
|
|
"step": 5
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0018747656542932134,
|
||
|
|
"grad_norm": 39.52831505761594,
|
||
|
|
"learning_rate": 5.617977528089887e-06,
|
||
|
|
"loss": 3.4743,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00281214848143982,
|
||
|
|
"grad_norm": 22.428432087019985,
|
||
|
|
"learning_rate": 8.42696629213483e-06,
|
||
|
|
"loss": 2.8849,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0037495313085864268,
|
||
|
|
"grad_norm": 9.054209275536381,
|
||
|
|
"learning_rate": 1.1235955056179774e-05,
|
||
|
|
"loss": 2.1428,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004686914135733034,
|
||
|
|
"grad_norm": 3.948592126973317,
|
||
|
|
"learning_rate": 1.404494382022472e-05,
|
||
|
|
"loss": 1.7056,
|
||
|
|
"step": 25
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00562429696287964,
|
||
|
|
"grad_norm": 2.299992616041375,
|
||
|
|
"learning_rate": 1.685393258426966e-05,
|
||
|
|
"loss": 1.4186,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006561679790026247,
|
||
|
|
"grad_norm": 0.9839714616377845,
|
||
|
|
"learning_rate": 1.9662921348314603e-05,
|
||
|
|
"loss": 1.2306,
|
||
|
|
"step": 35
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0074990626171728535,
|
||
|
|
"grad_norm": 0.6008365887782849,
|
||
|
|
"learning_rate": 2.247191011235955e-05,
|
||
|
|
"loss": 1.1511,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00843644544431946,
|
||
|
|
"grad_norm": 0.4330325556834079,
|
||
|
|
"learning_rate": 2.528089887640449e-05,
|
||
|
|
"loss": 1.0624,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.009373828271466067,
|
||
|
|
"grad_norm": 0.35553504278533465,
|
||
|
|
"learning_rate": 2.808988764044944e-05,
|
||
|
|
"loss": 1.0431,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.010311211098612674,
|
||
|
|
"grad_norm": 0.25388532511026035,
|
||
|
|
"learning_rate": 3.089887640449438e-05,
|
||
|
|
"loss": 1.005,
|
||
|
|
"step": 55
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.01124859392575928,
|
||
|
|
"grad_norm": 0.2576041354688323,
|
||
|
|
"learning_rate": 3.370786516853932e-05,
|
||
|
|
"loss": 0.9996,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.012185976752905886,
|
||
|
|
"grad_norm": 0.21819394992900415,
|
||
|
|
"learning_rate": 3.6516853932584265e-05,
|
||
|
|
"loss": 0.9775,
|
||
|
|
"step": 65
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.013123359580052493,
|
||
|
|
"grad_norm": 0.21574042351319167,
|
||
|
|
"learning_rate": 3.932584269662921e-05,
|
||
|
|
"loss": 0.9914,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0140607424071991,
|
||
|
|
"grad_norm": 0.18802524325518408,
|
||
|
|
"learning_rate": 4.2134831460674156e-05,
|
||
|
|
"loss": 0.9853,
|
||
|
|
"step": 75
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.014998125234345707,
|
||
|
|
"grad_norm": 0.18620252651860728,
|
||
|
|
"learning_rate": 4.49438202247191e-05,
|
||
|
|
"loss": 1.0079,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.015935508061492312,
|
||
|
|
"grad_norm": 0.18097949689788895,
|
||
|
|
"learning_rate": 4.775280898876404e-05,
|
||
|
|
"loss": 0.9329,
|
||
|
|
"step": 85
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.01687289088863892,
|
||
|
|
"grad_norm": 0.15787887483249505,
|
||
|
|
"learning_rate": 5.056179775280898e-05,
|
||
|
|
"loss": 0.9277,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.017810273715785526,
|
||
|
|
"grad_norm": 0.15373249141451148,
|
||
|
|
"learning_rate": 5.337078651685392e-05,
|
||
|
|
"loss": 0.9232,
|
||
|
|
"step": 95
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.018747656542932135,
|
||
|
|
"grad_norm": 0.13868806686970064,
|
||
|
|
"learning_rate": 5.617977528089888e-05,
|
||
|
|
"loss": 0.9002,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.01968503937007874,
|
||
|
|
"grad_norm": 0.14099964891127995,
|
||
|
|
"learning_rate": 5.898876404494382e-05,
|
||
|
|
"loss": 0.916,
|
||
|
|
"step": 105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.02062242219722535,
|
||
|
|
"grad_norm": 0.12190687602764205,
|
||
|
|
"learning_rate": 6.179775280898876e-05,
|
||
|
|
"loss": 0.926,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.021559805024371954,
|
||
|
|
"grad_norm": 0.1146212972821343,
|
||
|
|
"learning_rate": 6.46067415730337e-05,
|
||
|
|
"loss": 0.9661,
|
||
|
|
"step": 115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.02249718785151856,
|
||
|
|
"grad_norm": 0.1305834260724098,
|
||
|
|
"learning_rate": 6.741573033707865e-05,
|
||
|
|
"loss": 0.8979,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.023434570678665168,
|
||
|
|
"grad_norm": 0.09286668479471845,
|
||
|
|
"learning_rate": 7.022471910112359e-05,
|
||
|
|
"loss": 0.895,
|
||
|
|
"step": 125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.024371953505811773,
|
||
|
|
"grad_norm": 0.09893175557780264,
|
||
|
|
"learning_rate": 7.303370786516853e-05,
|
||
|
|
"loss": 0.8941,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.02530933633295838,
|
||
|
|
"grad_norm": 0.08722137311816515,
|
||
|
|
"learning_rate": 7.584269662921347e-05,
|
||
|
|
"loss": 0.887,
|
||
|
|
"step": 135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.026246719160104987,
|
||
|
|
"grad_norm": 0.08022348162278936,
|
||
|
|
"learning_rate": 7.865168539325841e-05,
|
||
|
|
"loss": 0.8804,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.027184101987251595,
|
||
|
|
"grad_norm": 0.08856735925510693,
|
||
|
|
"learning_rate": 8.146067415730337e-05,
|
||
|
|
"loss": 0.855,
|
||
|
|
"step": 145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0281214848143982,
|
||
|
|
"grad_norm": 0.08311974627408422,
|
||
|
|
"learning_rate": 8.426966292134831e-05,
|
||
|
|
"loss": 0.8837,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.029058867641544806,
|
||
|
|
"grad_norm": 0.07297089629042264,
|
||
|
|
"learning_rate": 8.707865168539325e-05,
|
||
|
|
"loss": 0.8748,
|
||
|
|
"step": 155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.029996250468691414,
|
||
|
|
"grad_norm": 0.0720196207299981,
|
||
|
|
"learning_rate": 8.98876404494382e-05,
|
||
|
|
"loss": 0.8997,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03093363329583802,
|
||
|
|
"grad_norm": 0.07463772342009761,
|
||
|
|
"learning_rate": 9.269662921348314e-05,
|
||
|
|
"loss": 0.9228,
|
||
|
|
"step": 165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.031871016122984624,
|
||
|
|
"grad_norm": 0.08058875574417819,
|
||
|
|
"learning_rate": 9.550561797752808e-05,
|
||
|
|
"loss": 0.8742,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03280839895013123,
|
||
|
|
"grad_norm": 0.07434935030584572,
|
||
|
|
"learning_rate": 9.831460674157303e-05,
|
||
|
|
"loss": 0.9169,
|
||
|
|
"step": 175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03374578177727784,
|
||
|
|
"grad_norm": 0.06516079400878816,
|
||
|
|
"learning_rate": 0.00010112359550561796,
|
||
|
|
"loss": 0.8842,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.034683164604424443,
|
||
|
|
"grad_norm": 0.068946140527396,
|
||
|
|
"learning_rate": 0.00010393258426966292,
|
||
|
|
"loss": 0.8673,
|
||
|
|
"step": 185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03562054743157105,
|
||
|
|
"grad_norm": 0.0742056092302864,
|
||
|
|
"learning_rate": 0.00010674157303370785,
|
||
|
|
"loss": 0.8704,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03655793025871766,
|
||
|
|
"grad_norm": 0.09321869505325037,
|
||
|
|
"learning_rate": 0.0001095505617977528,
|
||
|
|
"loss": 0.8774,
|
||
|
|
"step": 195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03749531308586427,
|
||
|
|
"grad_norm": 0.07237285280180873,
|
||
|
|
"learning_rate": 0.00011235955056179776,
|
||
|
|
"loss": 0.8645,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03843269591301087,
|
||
|
|
"grad_norm": 0.0725790446638447,
|
||
|
|
"learning_rate": 0.00011516853932584269,
|
||
|
|
"loss": 0.8853,
|
||
|
|
"step": 205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03937007874015748,
|
||
|
|
"grad_norm": 0.06714726508233679,
|
||
|
|
"learning_rate": 0.00011797752808988764,
|
||
|
|
"loss": 0.8722,
|
||
|
|
"step": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04030746156730409,
|
||
|
|
"grad_norm": 0.06816724261165027,
|
||
|
|
"learning_rate": 0.00012078651685393257,
|
||
|
|
"loss": 0.857,
|
||
|
|
"step": 215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0412448443944507,
|
||
|
|
"grad_norm": 0.0630221201023211,
|
||
|
|
"learning_rate": 0.00012359550561797752,
|
||
|
|
"loss": 0.8844,
|
||
|
|
"step": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0421822272215973,
|
||
|
|
"grad_norm": 0.06920243449410893,
|
||
|
|
"learning_rate": 0.00012640449438202245,
|
||
|
|
"loss": 0.8786,
|
||
|
|
"step": 225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04311961004874391,
|
||
|
|
"grad_norm": 0.07088619559489383,
|
||
|
|
"learning_rate": 0.0001292134831460674,
|
||
|
|
"loss": 0.8797,
|
||
|
|
"step": 230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.044056992875890516,
|
||
|
|
"grad_norm": 0.061740726041492694,
|
||
|
|
"learning_rate": 0.00013202247191011236,
|
||
|
|
"loss": 0.8698,
|
||
|
|
"step": 235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04499437570303712,
|
||
|
|
"grad_norm": 0.07106582585336212,
|
||
|
|
"learning_rate": 0.0001348314606741573,
|
||
|
|
"loss": 0.869,
|
||
|
|
"step": 240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.045931758530183726,
|
||
|
|
"grad_norm": 0.061678059675451424,
|
||
|
|
"learning_rate": 0.00013764044943820225,
|
||
|
|
"loss": 0.8643,
|
||
|
|
"step": 245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.046869141357330335,
|
||
|
|
"grad_norm": 0.06524467963947332,
|
||
|
|
"learning_rate": 0.00014044943820224718,
|
||
|
|
"loss": 0.8927,
|
||
|
|
"step": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.047806524184476944,
|
||
|
|
"grad_norm": 0.0650786155112897,
|
||
|
|
"learning_rate": 0.00014325842696629213,
|
||
|
|
"loss": 0.8384,
|
||
|
|
"step": 255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.048743907011623545,
|
||
|
|
"grad_norm": 0.07154242679452663,
|
||
|
|
"learning_rate": 0.00014606741573033706,
|
||
|
|
"loss": 0.8809,
|
||
|
|
"step": 260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.049681289838770154,
|
||
|
|
"grad_norm": 0.07795179863304261,
|
||
|
|
"learning_rate": 0.00014887640449438202,
|
||
|
|
"loss": 0.8394,
|
||
|
|
"step": 265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05061867266591676,
|
||
|
|
"grad_norm": 0.06374006181294534,
|
||
|
|
"learning_rate": 0.00015168539325842694,
|
||
|
|
"loss": 0.8961,
|
||
|
|
"step": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.051556055493063364,
|
||
|
|
"grad_norm": 0.07787607064114185,
|
||
|
|
"learning_rate": 0.0001544943820224719,
|
||
|
|
"loss": 0.8632,
|
||
|
|
"step": 275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05249343832020997,
|
||
|
|
"grad_norm": 0.06289687876280393,
|
||
|
|
"learning_rate": 0.00015730337078651683,
|
||
|
|
"loss": 0.88,
|
||
|
|
"step": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05343082114735658,
|
||
|
|
"grad_norm": 0.06491539830485436,
|
||
|
|
"learning_rate": 0.00016011235955056178,
|
||
|
|
"loss": 0.8737,
|
||
|
|
"step": 285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05436820397450319,
|
||
|
|
"grad_norm": 0.070328841548907,
|
||
|
|
"learning_rate": 0.00016292134831460674,
|
||
|
|
"loss": 0.8617,
|
||
|
|
"step": 290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05530558680164979,
|
||
|
|
"grad_norm": 0.0638014579038207,
|
||
|
|
"learning_rate": 0.00016573033707865167,
|
||
|
|
"loss": 0.8506,
|
||
|
|
"step": 295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0562429696287964,
|
||
|
|
"grad_norm": 0.06422804662460813,
|
||
|
|
"learning_rate": 0.00016853932584269662,
|
||
|
|
"loss": 0.8805,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05718035245594301,
|
||
|
|
"grad_norm": 0.06361653985204194,
|
||
|
|
"learning_rate": 0.00017134831460674155,
|
||
|
|
"loss": 0.8918,
|
||
|
|
"step": 305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05811773528308961,
|
||
|
|
"grad_norm": 0.06932335644552794,
|
||
|
|
"learning_rate": 0.0001741573033707865,
|
||
|
|
"loss": 0.8595,
|
||
|
|
"step": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05905511811023622,
|
||
|
|
"grad_norm": 0.07957054733316223,
|
||
|
|
"learning_rate": 0.00017696629213483143,
|
||
|
|
"loss": 0.8663,
|
||
|
|
"step": 315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05999250093738283,
|
||
|
|
"grad_norm": 0.06998445535720693,
|
||
|
|
"learning_rate": 0.0001797752808988764,
|
||
|
|
"loss": 0.8676,
|
||
|
|
"step": 320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06092988376452944,
|
||
|
|
"grad_norm": 0.06702795111487583,
|
||
|
|
"learning_rate": 0.00018258426966292135,
|
||
|
|
"loss": 0.8802,
|
||
|
|
"step": 325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06186726659167604,
|
||
|
|
"grad_norm": 0.057562966446872095,
|
||
|
|
"learning_rate": 0.00018539325842696627,
|
||
|
|
"loss": 0.8671,
|
||
|
|
"step": 330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06280464941882265,
|
||
|
|
"grad_norm": 0.07214305689508435,
|
||
|
|
"learning_rate": 0.0001882022471910112,
|
||
|
|
"loss": 0.8992,
|
||
|
|
"step": 335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06374203224596925,
|
||
|
|
"grad_norm": 0.06640078935309972,
|
||
|
|
"learning_rate": 0.00019101123595505616,
|
||
|
|
"loss": 0.8589,
|
||
|
|
"step": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06467941507311586,
|
||
|
|
"grad_norm": 0.06432108168682822,
|
||
|
|
"learning_rate": 0.0001938202247191011,
|
||
|
|
"loss": 0.8792,
|
||
|
|
"step": 345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06561679790026247,
|
||
|
|
"grad_norm": 0.06995296734682956,
|
||
|
|
"learning_rate": 0.00019662921348314607,
|
||
|
|
"loss": 0.865,
|
||
|
|
"step": 350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06655418072740907,
|
||
|
|
"grad_norm": 0.0781580610323253,
|
||
|
|
"learning_rate": 0.00019943820224719097,
|
||
|
|
"loss": 0.8815,
|
||
|
|
"step": 355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06749156355455568,
|
||
|
|
"grad_norm": 0.07071637338397053,
|
||
|
|
"learning_rate": 0.00020224719101123593,
|
||
|
|
"loss": 0.8666,
|
||
|
|
"step": 360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06842894638170229,
|
||
|
|
"grad_norm": 0.0682378939648076,
|
||
|
|
"learning_rate": 0.00020505617977528088,
|
||
|
|
"loss": 0.8862,
|
||
|
|
"step": 365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06936632920884889,
|
||
|
|
"grad_norm": 0.06900860789126333,
|
||
|
|
"learning_rate": 0.00020786516853932584,
|
||
|
|
"loss": 0.8638,
|
||
|
|
"step": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0703037120359955,
|
||
|
|
"grad_norm": 0.06343642719314092,
|
||
|
|
"learning_rate": 0.0002106741573033708,
|
||
|
|
"loss": 0.8692,
|
||
|
|
"step": 375
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0712410948631421,
|
||
|
|
"grad_norm": 0.058805566981791894,
|
||
|
|
"learning_rate": 0.0002134831460674157,
|
||
|
|
"loss": 0.8699,
|
||
|
|
"step": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07217847769028872,
|
||
|
|
"grad_norm": 0.06501680635838757,
|
||
|
|
"learning_rate": 0.00021629213483146065,
|
||
|
|
"loss": 0.8794,
|
||
|
|
"step": 385
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07311586051743532,
|
||
|
|
"grad_norm": 0.06790932619576265,
|
||
|
|
"learning_rate": 0.0002191011235955056,
|
||
|
|
"loss": 0.8609,
|
||
|
|
"step": 390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07405324334458192,
|
||
|
|
"grad_norm": 0.07253373879567401,
|
||
|
|
"learning_rate": 0.00022191011235955056,
|
||
|
|
"loss": 0.8586,
|
||
|
|
"step": 395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07499062617172854,
|
||
|
|
"grad_norm": 0.05597241270795834,
|
||
|
|
"learning_rate": 0.00022471910112359551,
|
||
|
|
"loss": 0.8606,
|
||
|
|
"step": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07592800899887514,
|
||
|
|
"grad_norm": 0.07414431562106621,
|
||
|
|
"learning_rate": 0.00022752808988764042,
|
||
|
|
"loss": 0.859,
|
||
|
|
"step": 405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07686539182602174,
|
||
|
|
"grad_norm": 0.06416271464203936,
|
||
|
|
"learning_rate": 0.00023033707865168537,
|
||
|
|
"loss": 0.8462,
|
||
|
|
"step": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07780277465316836,
|
||
|
|
"grad_norm": 0.06411386307806086,
|
||
|
|
"learning_rate": 0.00023314606741573033,
|
||
|
|
"loss": 0.8859,
|
||
|
|
"step": 415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07874015748031496,
|
||
|
|
"grad_norm": 0.08671615531146802,
|
||
|
|
"learning_rate": 0.00023595505617977528,
|
||
|
|
"loss": 0.858,
|
||
|
|
"step": 420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07967754030746156,
|
||
|
|
"grad_norm": 0.06950084739281429,
|
||
|
|
"learning_rate": 0.00023876404494382018,
|
||
|
|
"loss": 0.8779,
|
||
|
|
"step": 425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08061492313460818,
|
||
|
|
"grad_norm": 0.06732606238114026,
|
||
|
|
"learning_rate": 0.00024157303370786514,
|
||
|
|
"loss": 0.8585,
|
||
|
|
"step": 430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08155230596175478,
|
||
|
|
"grad_norm": 0.06753409562352092,
|
||
|
|
"learning_rate": 0.0002443820224719101,
|
||
|
|
"loss": 0.8648,
|
||
|
|
"step": 435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0824896887889014,
|
||
|
|
"grad_norm": 0.06868527875344918,
|
||
|
|
"learning_rate": 0.00024719101123595505,
|
||
|
|
"loss": 0.8923,
|
||
|
|
"step": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.083427071616048,
|
||
|
|
"grad_norm": 0.060659330441044046,
|
||
|
|
"learning_rate": 0.00025,
|
||
|
|
"loss": 0.8441,
|
||
|
|
"step": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0843644544431946,
|
||
|
|
"grad_norm": 0.056891953999125895,
|
||
|
|
"learning_rate": 0.0002528089887640449,
|
||
|
|
"loss": 0.8711,
|
||
|
|
"step": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08530183727034121,
|
||
|
|
"grad_norm": 0.06315987668061082,
|
||
|
|
"learning_rate": 0.00025561797752808986,
|
||
|
|
"loss": 0.8709,
|
||
|
|
"step": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08623922009748781,
|
||
|
|
"grad_norm": 0.06974874046223557,
|
||
|
|
"learning_rate": 0.0002584269662921348,
|
||
|
|
"loss": 0.8881,
|
||
|
|
"step": 460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08717660292463442,
|
||
|
|
"grad_norm": 0.07106045458363727,
|
||
|
|
"learning_rate": 0.00026123595505617977,
|
||
|
|
"loss": 0.8455,
|
||
|
|
"step": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08811398575178103,
|
||
|
|
"grad_norm": 0.06530109700061691,
|
||
|
|
"learning_rate": 0.00026404494382022473,
|
||
|
|
"loss": 0.8701,
|
||
|
|
"step": 470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08905136857892763,
|
||
|
|
"grad_norm": 0.06284436680171902,
|
||
|
|
"learning_rate": 0.00026685393258426963,
|
||
|
|
"loss": 0.8639,
|
||
|
|
"step": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08998875140607424,
|
||
|
|
"grad_norm": 0.06695190689656623,
|
||
|
|
"learning_rate": 0.0002696629213483146,
|
||
|
|
"loss": 0.8596,
|
||
|
|
"step": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09092613423322085,
|
||
|
|
"grad_norm": 0.06134041035709648,
|
||
|
|
"learning_rate": 0.00027247191011235954,
|
||
|
|
"loss": 0.8846,
|
||
|
|
"step": 485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09186351706036745,
|
||
|
|
"grad_norm": 0.05390358172595912,
|
||
|
|
"learning_rate": 0.0002752808988764045,
|
||
|
|
"loss": 0.8608,
|
||
|
|
"step": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09280089988751405,
|
||
|
|
"grad_norm": 0.06061308522705228,
|
||
|
|
"learning_rate": 0.00027808988764044945,
|
||
|
|
"loss": 0.8688,
|
||
|
|
"step": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09373828271466067,
|
||
|
|
"grad_norm": 0.05665308816810634,
|
||
|
|
"learning_rate": 0.00028089887640449435,
|
||
|
|
"loss": 0.8431,
|
||
|
|
"step": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09467566554180727,
|
||
|
|
"grad_norm": 0.06298526377992197,
|
||
|
|
"learning_rate": 0.0002837078651685393,
|
||
|
|
"loss": 0.8534,
|
||
|
|
"step": 505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09561304836895389,
|
||
|
|
"grad_norm": 0.06828852662205184,
|
||
|
|
"learning_rate": 0.00028651685393258426,
|
||
|
|
"loss": 0.855,
|
||
|
|
"step": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09655043119610049,
|
||
|
|
"grad_norm": 0.059581044094172864,
|
||
|
|
"learning_rate": 0.0002893258426966292,
|
||
|
|
"loss": 0.892,
|
||
|
|
"step": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09748781402324709,
|
||
|
|
"grad_norm": 0.05834286349398695,
|
||
|
|
"learning_rate": 0.0002921348314606741,
|
||
|
|
"loss": 0.8791,
|
||
|
|
"step": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0984251968503937,
|
||
|
|
"grad_norm": 0.05628861715462521,
|
||
|
|
"learning_rate": 0.0002949438202247191,
|
||
|
|
"loss": 0.8572,
|
||
|
|
"step": 525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09936257967754031,
|
||
|
|
"grad_norm": 0.0752400574763716,
|
||
|
|
"learning_rate": 0.00029775280898876403,
|
||
|
|
"loss": 0.9176,
|
||
|
|
"step": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10029996250468691,
|
||
|
|
"grad_norm": 0.0589889669786586,
|
||
|
|
"learning_rate": 0.0002999999678723826,
|
||
|
|
"loss": 0.879,
|
||
|
|
"step": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10123734533183353,
|
||
|
|
"grad_norm": 0.06984453009723764,
|
||
|
|
"learning_rate": 0.0002999988434072206,
|
||
|
|
"loss": 0.8791,
|
||
|
|
"step": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10217472815898013,
|
||
|
|
"grad_norm": 0.05131555199439601,
|
||
|
|
"learning_rate": 0.0002999961125749536,
|
||
|
|
"loss": 0.8639,
|
||
|
|
"step": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10311211098612673,
|
||
|
|
"grad_norm": 0.06129502320830477,
|
||
|
|
"learning_rate": 0.0002999917754048268,
|
||
|
|
"loss": 0.8626,
|
||
|
|
"step": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10404949381327334,
|
||
|
|
"grad_norm": 0.052116063904268276,
|
||
|
|
"learning_rate": 0.00029998583194328776,
|
||
|
|
"loss": 0.8405,
|
||
|
|
"step": 555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10498687664041995,
|
||
|
|
"grad_norm": 0.05805086354219647,
|
||
|
|
"learning_rate": 0.0002999782822539861,
|
||
|
|
"loss": 0.8521,
|
||
|
|
"step": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10592425946756655,
|
||
|
|
"grad_norm": 0.058854180699594805,
|
||
|
|
"learning_rate": 0.000299969126417773,
|
||
|
|
"loss": 0.8612,
|
||
|
|
"step": 565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10686164229471316,
|
||
|
|
"grad_norm": 0.05170888163012354,
|
||
|
|
"learning_rate": 0.00029995836453270005,
|
||
|
|
"loss": 0.852,
|
||
|
|
"step": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10779902512185977,
|
||
|
|
"grad_norm": 0.053497899422118654,
|
||
|
|
"learning_rate": 0.0002999459967140185,
|
||
|
|
"loss": 0.8425,
|
||
|
|
"step": 575
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10873640794900638,
|
||
|
|
"grad_norm": 0.0517344152203251,
|
||
|
|
"learning_rate": 0.00029993202309417765,
|
||
|
|
"loss": 0.8666,
|
||
|
|
"step": 580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10967379077615298,
|
||
|
|
"grad_norm": 0.06883148118348337,
|
||
|
|
"learning_rate": 0.00029991644382282377,
|
||
|
|
"loss": 0.8699,
|
||
|
|
"step": 585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11061117360329958,
|
||
|
|
"grad_norm": 0.06701524872749254,
|
||
|
|
"learning_rate": 0.0002998992590667984,
|
||
|
|
"loss": 0.8612,
|
||
|
|
"step": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1115485564304462,
|
||
|
|
"grad_norm": 0.0633741891240403,
|
||
|
|
"learning_rate": 0.00029988046901013643,
|
||
|
|
"loss": 0.8861,
|
||
|
|
"step": 595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1124859392575928,
|
||
|
|
"grad_norm": 0.059595235950932066,
|
||
|
|
"learning_rate": 0.00029986007385406424,
|
||
|
|
"loss": 0.8761,
|
||
|
|
"step": 600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1134233220847394,
|
||
|
|
"grad_norm": 0.05592116325579977,
|
||
|
|
"learning_rate": 0.00029983807381699757,
|
||
|
|
"loss": 0.8756,
|
||
|
|
"step": 605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11436070491188602,
|
||
|
|
"grad_norm": 0.057345108961587225,
|
||
|
|
"learning_rate": 0.0002998144691345392,
|
||
|
|
"loss": 0.8574,
|
||
|
|
"step": 610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11529808773903262,
|
||
|
|
"grad_norm": 0.05734473521251835,
|
||
|
|
"learning_rate": 0.0002997892600594762,
|
||
|
|
"loss": 0.8544,
|
||
|
|
"step": 615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11623547056617922,
|
||
|
|
"grad_norm": 0.05186000961639975,
|
||
|
|
"learning_rate": 0.00029976244686177764,
|
||
|
|
"loss": 0.8808,
|
||
|
|
"step": 620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11717285339332584,
|
||
|
|
"grad_norm": 0.05414634714867266,
|
||
|
|
"learning_rate": 0.00029973402982859127,
|
||
|
|
"loss": 0.8331,
|
||
|
|
"step": 625
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11811023622047244,
|
||
|
|
"grad_norm": 0.04968211925070222,
|
||
|
|
"learning_rate": 0.0002997040092642407,
|
||
|
|
"loss": 0.8592,
|
||
|
|
"step": 630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11904761904761904,
|
||
|
|
"grad_norm": 0.06248537463553037,
|
||
|
|
"learning_rate": 0.00029967238549022206,
|
||
|
|
"loss": 0.8819,
|
||
|
|
"step": 635
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11998500187476566,
|
||
|
|
"grad_norm": 0.055299575032660746,
|
||
|
|
"learning_rate": 0.00029963915884520054,
|
||
|
|
"loss": 0.8636,
|
||
|
|
"step": 640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12092238470191226,
|
||
|
|
"grad_norm": 0.04854206172628792,
|
||
|
|
"learning_rate": 0.00029960432968500675,
|
||
|
|
"loss": 0.8628,
|
||
|
|
"step": 645
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12185976752905887,
|
||
|
|
"grad_norm": 0.058596631103748714,
|
||
|
|
"learning_rate": 0.00029956789838263314,
|
||
|
|
"loss": 0.8489,
|
||
|
|
"step": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12279715035620548,
|
||
|
|
"grad_norm": 0.06652964862071063,
|
||
|
|
"learning_rate": 0.0002995298653282297,
|
||
|
|
"loss": 0.876,
|
||
|
|
"step": 655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12373453318335208,
|
||
|
|
"grad_norm": 0.05617071779521591,
|
||
|
|
"learning_rate": 0.00029949023092909976,
|
||
|
|
"loss": 0.8582,
|
||
|
|
"step": 660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12467191601049869,
|
||
|
|
"grad_norm": 0.06128826182928016,
|
||
|
|
"learning_rate": 0.00029944899560969593,
|
||
|
|
"loss": 0.8556,
|
||
|
|
"step": 665
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1256092988376453,
|
||
|
|
"grad_norm": 0.06596974095737278,
|
||
|
|
"learning_rate": 0.00029940615981161544,
|
||
|
|
"loss": 0.8484,
|
||
|
|
"step": 670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1265466816647919,
|
||
|
|
"grad_norm": 0.05241653325144218,
|
||
|
|
"learning_rate": 0.00029936172399359516,
|
||
|
|
"loss": 0.8681,
|
||
|
|
"step": 675
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1274840644919385,
|
||
|
|
"grad_norm": 0.0656117272709645,
|
||
|
|
"learning_rate": 0.00029931568863150705,
|
||
|
|
"loss": 0.8611,
|
||
|
|
"step": 680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1284214473190851,
|
||
|
|
"grad_norm": 0.05416806152576802,
|
||
|
|
"learning_rate": 0.0002992680542183529,
|
||
|
|
"loss": 0.8543,
|
||
|
|
"step": 685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12935883014623173,
|
||
|
|
"grad_norm": 0.05949667729922976,
|
||
|
|
"learning_rate": 0.00029921882126425893,
|
||
|
|
"loss": 0.8476,
|
||
|
|
"step": 690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13029621297337832,
|
||
|
|
"grad_norm": 0.05476709738462092,
|
||
|
|
"learning_rate": 0.0002991679902964706,
|
||
|
|
"loss": 0.8329,
|
||
|
|
"step": 695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13123359580052493,
|
||
|
|
"grad_norm": 0.05583654578373451,
|
||
|
|
"learning_rate": 0.00029911556185934667,
|
||
|
|
"loss": 0.8546,
|
||
|
|
"step": 700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13217097862767155,
|
||
|
|
"grad_norm": 0.052842518474849304,
|
||
|
|
"learning_rate": 0.0002990615365143536,
|
||
|
|
"loss": 0.8576,
|
||
|
|
"step": 705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13310836145481814,
|
||
|
|
"grad_norm": 0.05325159315281095,
|
||
|
|
"learning_rate": 0.0002990059148400594,
|
||
|
|
"loss": 0.8475,
|
||
|
|
"step": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13404574428196475,
|
||
|
|
"grad_norm": 0.0477613812018967,
|
||
|
|
"learning_rate": 0.00029894869743212767,
|
||
|
|
"loss": 0.8505,
|
||
|
|
"step": 715
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13498312710911137,
|
||
|
|
"grad_norm": 0.04650972000773531,
|
||
|
|
"learning_rate": 0.00029888988490331067,
|
||
|
|
"loss": 0.8406,
|
||
|
|
"step": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13592050993625795,
|
||
|
|
"grad_norm": 0.05548534041152614,
|
||
|
|
"learning_rate": 0.00029882947788344345,
|
||
|
|
"loss": 0.8731,
|
||
|
|
"step": 725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13685789276340457,
|
||
|
|
"grad_norm": 0.047893477001569186,
|
||
|
|
"learning_rate": 0.00029876747701943667,
|
||
|
|
"loss": 0.8666,
|
||
|
|
"step": 730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1377952755905512,
|
||
|
|
"grad_norm": 0.05399917193542452,
|
||
|
|
"learning_rate": 0.00029870388297526966,
|
||
|
|
"loss": 0.8476,
|
||
|
|
"step": 735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13873265841769777,
|
||
|
|
"grad_norm": 0.04738143330368603,
|
||
|
|
"learning_rate": 0.0002986386964319837,
|
||
|
|
"loss": 0.8423,
|
||
|
|
"step": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1396700412448444,
|
||
|
|
"grad_norm": 0.05491107440574904,
|
||
|
|
"learning_rate": 0.0002985719180876742,
|
||
|
|
"loss": 0.8451,
|
||
|
|
"step": 745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.140607424071991,
|
||
|
|
"grad_norm": 0.060398470624460965,
|
||
|
|
"learning_rate": 0.0002985035486574836,
|
||
|
|
"loss": 0.8801,
|
||
|
|
"step": 750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14154480689913762,
|
||
|
|
"grad_norm": 0.05350788151410978,
|
||
|
|
"learning_rate": 0.00029843358887359357,
|
||
|
|
"loss": 0.8516,
|
||
|
|
"step": 755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1424821897262842,
|
||
|
|
"grad_norm": 0.05517308695615106,
|
||
|
|
"learning_rate": 0.0002983620394852172,
|
||
|
|
"loss": 0.8703,
|
||
|
|
"step": 760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14341957255343082,
|
||
|
|
"grad_norm": 0.054231118168541766,
|
||
|
|
"learning_rate": 0.000298288901258591,
|
||
|
|
"loss": 0.8693,
|
||
|
|
"step": 765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14435695538057744,
|
||
|
|
"grad_norm": 0.049622640241897456,
|
||
|
|
"learning_rate": 0.0002982141749769665,
|
||
|
|
"loss": 0.8395,
|
||
|
|
"step": 770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14529433820772403,
|
||
|
|
"grad_norm": 0.046995063624861166,
|
||
|
|
"learning_rate": 0.0002981378614406022,
|
||
|
|
"loss": 0.8604,
|
||
|
|
"step": 775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14623172103487064,
|
||
|
|
"grad_norm": 0.05520295521540041,
|
||
|
|
"learning_rate": 0.0002980599614667548,
|
||
|
|
"loss": 0.8645,
|
||
|
|
"step": 780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14716910386201726,
|
||
|
|
"grad_norm": 0.04894265506037402,
|
||
|
|
"learning_rate": 0.0002979804758896704,
|
||
|
|
"loss": 0.8652,
|
||
|
|
"step": 785
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14810648668916385,
|
||
|
|
"grad_norm": 0.06029411158482043,
|
||
|
|
"learning_rate": 0.0002978994055605757,
|
||
|
|
"loss": 0.8416,
|
||
|
|
"step": 790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14904386951631046,
|
||
|
|
"grad_norm": 0.05780387639217229,
|
||
|
|
"learning_rate": 0.0002978167513476688,
|
||
|
|
"loss": 0.8526,
|
||
|
|
"step": 795
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14998125234345708,
|
||
|
|
"grad_norm": 0.05465366981027733,
|
||
|
|
"learning_rate": 0.00029773251413610987,
|
||
|
|
"loss": 0.8655,
|
||
|
|
"step": 800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15091863517060367,
|
||
|
|
"grad_norm": 0.05272490905818877,
|
||
|
|
"learning_rate": 0.00029764669482801174,
|
||
|
|
"loss": 0.8519,
|
||
|
|
"step": 805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15185601799775028,
|
||
|
|
"grad_norm": 0.05718657624393557,
|
||
|
|
"learning_rate": 0.00029755929434243034,
|
||
|
|
"loss": 0.8853,
|
||
|
|
"step": 810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1527934008248969,
|
||
|
|
"grad_norm": 0.05197508584506214,
|
||
|
|
"learning_rate": 0.00029747031361535464,
|
||
|
|
"loss": 0.8349,
|
||
|
|
"step": 815
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15373078365204348,
|
||
|
|
"grad_norm": 0.05481428649702033,
|
||
|
|
"learning_rate": 0.0002973797535996967,
|
||
|
|
"loss": 0.8627,
|
||
|
|
"step": 820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1546681664791901,
|
||
|
|
"grad_norm": 0.04732127095107982,
|
||
|
|
"learning_rate": 0.00029728761526528157,
|
||
|
|
"loss": 0.8698,
|
||
|
|
"step": 825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15560554930633672,
|
||
|
|
"grad_norm": 0.05743584601025622,
|
||
|
|
"learning_rate": 0.00029719389959883673,
|
||
|
|
"loss": 0.8736,
|
||
|
|
"step": 830
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1565429321334833,
|
||
|
|
"grad_norm": 0.05196494960994641,
|
||
|
|
"learning_rate": 0.00029709860760398176,
|
||
|
|
"loss": 0.8634,
|
||
|
|
"step": 835
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15748031496062992,
|
||
|
|
"grad_norm": 0.05085488275567502,
|
||
|
|
"learning_rate": 0.0002970017403012173,
|
||
|
|
"loss": 0.8568,
|
||
|
|
"step": 840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15841769778777653,
|
||
|
|
"grad_norm": 0.050395431338846663,
|
||
|
|
"learning_rate": 0.0002969032987279144,
|
||
|
|
"loss": 0.8225,
|
||
|
|
"step": 845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15935508061492312,
|
||
|
|
"grad_norm": 0.04628767205411848,
|
||
|
|
"learning_rate": 0.00029680328393830315,
|
||
|
|
"loss": 0.875,
|
||
|
|
"step": 850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16029246344206974,
|
||
|
|
"grad_norm": 0.051723935710402326,
|
||
|
|
"learning_rate": 0.00029670169700346164,
|
||
|
|
"loss": 0.8145,
|
||
|
|
"step": 855
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16122984626921635,
|
||
|
|
"grad_norm": 0.05201508957810899,
|
||
|
|
"learning_rate": 0.0002965985390113043,
|
||
|
|
"loss": 0.8648,
|
||
|
|
"step": 860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16216722909636294,
|
||
|
|
"grad_norm": 0.0495933900905848,
|
||
|
|
"learning_rate": 0.0002964938110665704,
|
||
|
|
"loss": 0.8587,
|
||
|
|
"step": 865
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16310461192350956,
|
||
|
|
"grad_norm": 0.04586671475206476,
|
||
|
|
"learning_rate": 0.0002963875142908121,
|
||
|
|
"loss": 0.8412,
|
||
|
|
"step": 870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16404199475065617,
|
||
|
|
"grad_norm": 0.0532312699053512,
|
||
|
|
"learning_rate": 0.00029627964982238236,
|
||
|
|
"loss": 0.842,
|
||
|
|
"step": 875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1649793775778028,
|
||
|
|
"grad_norm": 0.043681059319998494,
|
||
|
|
"learning_rate": 0.0002961702188164231,
|
||
|
|
"loss": 0.8274,
|
||
|
|
"step": 880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16591676040494938,
|
||
|
|
"grad_norm": 0.048557735504717925,
|
||
|
|
"learning_rate": 0.0002960592224448524,
|
||
|
|
"loss": 0.8426,
|
||
|
|
"step": 885
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.166854143232096,
|
||
|
|
"grad_norm": 0.04679289724415963,
|
||
|
|
"learning_rate": 0.00029594666189635224,
|
||
|
|
"loss": 0.8347,
|
||
|
|
"step": 890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1677915260592426,
|
||
|
|
"grad_norm": 0.047070279337964094,
|
||
|
|
"learning_rate": 0.00029583253837635575,
|
||
|
|
"loss": 0.8456,
|
||
|
|
"step": 895
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1687289088863892,
|
||
|
|
"grad_norm": 0.05283241949368948,
|
||
|
|
"learning_rate": 0.00029571685310703403,
|
||
|
|
"loss": 0.8326,
|
||
|
|
"step": 900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1696662917135358,
|
||
|
|
"grad_norm": 0.05385523710797248,
|
||
|
|
"learning_rate": 0.00029559960732728337,
|
||
|
|
"loss": 0.8529,
|
||
|
|
"step": 905
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17060367454068243,
|
||
|
|
"grad_norm": 0.04730829638902959,
|
||
|
|
"learning_rate": 0.000295480802292712,
|
||
|
|
"loss": 0.8085,
|
||
|
|
"step": 910
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17154105736782901,
|
||
|
|
"grad_norm": 0.04488134785670412,
|
||
|
|
"learning_rate": 0.0002953604392756263,
|
||
|
|
"loss": 0.8371,
|
||
|
|
"step": 915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17247844019497563,
|
||
|
|
"grad_norm": 0.051313805015627746,
|
||
|
|
"learning_rate": 0.00029523851956501744,
|
||
|
|
"loss": 0.8486,
|
||
|
|
"step": 920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17341582302212225,
|
||
|
|
"grad_norm": 0.0430483274069335,
|
||
|
|
"learning_rate": 0.00029511504446654767,
|
||
|
|
"loss": 0.8475,
|
||
|
|
"step": 925
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17435320584926883,
|
||
|
|
"grad_norm": 0.05543052805044051,
|
||
|
|
"learning_rate": 0.00029499001530253606,
|
||
|
|
"loss": 0.8571,
|
||
|
|
"step": 930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17529058867641545,
|
||
|
|
"grad_norm": 0.05160006837470796,
|
||
|
|
"learning_rate": 0.0002948634334119445,
|
||
|
|
"loss": 0.8348,
|
||
|
|
"step": 935
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17622797150356206,
|
||
|
|
"grad_norm": 0.049000725000881734,
|
||
|
|
"learning_rate": 0.00029473530015036335,
|
||
|
|
"loss": 0.8243,
|
||
|
|
"step": 940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17716535433070865,
|
||
|
|
"grad_norm": 0.05324980516591907,
|
||
|
|
"learning_rate": 0.0002946056168899969,
|
||
|
|
"loss": 0.817,
|
||
|
|
"step": 945
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17810273715785527,
|
||
|
|
"grad_norm": 0.05112366142864306,
|
||
|
|
"learning_rate": 0.00029447438501964873,
|
||
|
|
"loss": 0.8493,
|
||
|
|
"step": 950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17904011998500188,
|
||
|
|
"grad_norm": 0.0498102110793069,
|
||
|
|
"learning_rate": 0.0002943416059447066,
|
||
|
|
"loss": 0.8155,
|
||
|
|
"step": 955
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17997750281214847,
|
||
|
|
"grad_norm": 0.048456183794281356,
|
||
|
|
"learning_rate": 0.0002942072810871279,
|
||
|
|
"loss": 0.8057,
|
||
|
|
"step": 960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1809148856392951,
|
||
|
|
"grad_norm": 0.0514791482209668,
|
||
|
|
"learning_rate": 0.0002940714118854238,
|
||
|
|
"loss": 0.8125,
|
||
|
|
"step": 965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1818522684664417,
|
||
|
|
"grad_norm": 0.04725030159140053,
|
||
|
|
"learning_rate": 0.0002939339997946444,
|
||
|
|
"loss": 0.8572,
|
||
|
|
"step": 970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1827896512935883,
|
||
|
|
"grad_norm": 0.05374547829970908,
|
||
|
|
"learning_rate": 0.0002937950462863627,
|
||
|
|
"loss": 0.8328,
|
||
|
|
"step": 975
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1837270341207349,
|
||
|
|
"grad_norm": 0.0562408856971059,
|
||
|
|
"learning_rate": 0.00029365455284865923,
|
||
|
|
"loss": 0.8087,
|
||
|
|
"step": 980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18466441694788152,
|
||
|
|
"grad_norm": 0.0455189157494031,
|
||
|
|
"learning_rate": 0.00029351252098610577,
|
||
|
|
"loss": 0.8418,
|
||
|
|
"step": 985
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1856017997750281,
|
||
|
|
"grad_norm": 0.050711487872205016,
|
||
|
|
"learning_rate": 0.00029336895221974946,
|
||
|
|
"loss": 0.8482,
|
||
|
|
"step": 990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18653918260217472,
|
||
|
|
"grad_norm": 0.04919906548209661,
|
||
|
|
"learning_rate": 0.00029322384808709654,
|
||
|
|
"loss": 0.8349,
|
||
|
|
"step": 995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18747656542932134,
|
||
|
|
"grad_norm": 0.05662595028293593,
|
||
|
|
"learning_rate": 0.00029307721014209555,
|
||
|
|
"loss": 0.8577,
|
||
|
|
"step": 1000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18841394825646793,
|
||
|
|
"grad_norm": 0.057447476057492466,
|
||
|
|
"learning_rate": 0.00029292903995512123,
|
||
|
|
"loss": 0.8534,
|
||
|
|
"step": 1005
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18935133108361454,
|
||
|
|
"grad_norm": 0.05299814571131224,
|
||
|
|
"learning_rate": 0.0002927793391129571,
|
||
|
|
"loss": 0.8577,
|
||
|
|
"step": 1010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19028871391076116,
|
||
|
|
"grad_norm": 0.04319708352419991,
|
||
|
|
"learning_rate": 0.00029262810921877906,
|
||
|
|
"loss": 0.8188,
|
||
|
|
"step": 1015
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19122609673790777,
|
||
|
|
"grad_norm": 0.05353832659477085,
|
||
|
|
"learning_rate": 0.0002924753518921376,
|
||
|
|
"loss": 0.846,
|
||
|
|
"step": 1020
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19216347956505436,
|
||
|
|
"grad_norm": 0.04966007615937987,
|
||
|
|
"learning_rate": 0.0002923210687689411,
|
||
|
|
"loss": 0.8552,
|
||
|
|
"step": 1025
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19310086239220098,
|
||
|
|
"grad_norm": 0.048671986424680805,
|
||
|
|
"learning_rate": 0.00029216526150143785,
|
||
|
|
"loss": 0.8433,
|
||
|
|
"step": 1030
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1940382452193476,
|
||
|
|
"grad_norm": 0.05118103987659261,
|
||
|
|
"learning_rate": 0.0002920079317581984,
|
||
|
|
"loss": 0.8545,
|
||
|
|
"step": 1035
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19497562804649418,
|
||
|
|
"grad_norm": 0.04730760417822063,
|
||
|
|
"learning_rate": 0.00029184908122409804,
|
||
|
|
"loss": 0.8255,
|
||
|
|
"step": 1040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1959130108736408,
|
||
|
|
"grad_norm": 0.05859108169984351,
|
||
|
|
"learning_rate": 0.0002916887116002983,
|
||
|
|
"loss": 0.8391,
|
||
|
|
"step": 1045
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1968503937007874,
|
||
|
|
"grad_norm": 0.05306001691326635,
|
||
|
|
"learning_rate": 0.000291526824604229,
|
||
|
|
"loss": 0.8424,
|
||
|
|
"step": 1050
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.197787776527934,
|
||
|
|
"grad_norm": 0.047277287716965946,
|
||
|
|
"learning_rate": 0.00029136342196956985,
|
||
|
|
"loss": 0.833,
|
||
|
|
"step": 1055
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19872515935508062,
|
||
|
|
"grad_norm": 0.05159138803836899,
|
||
|
|
"learning_rate": 0.0002911985054462318,
|
||
|
|
"loss": 0.8304,
|
||
|
|
"step": 1060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19966254218222723,
|
||
|
|
"grad_norm": 0.046550244925313046,
|
||
|
|
"learning_rate": 0.00029103207680033827,
|
||
|
|
"loss": 0.8422,
|
||
|
|
"step": 1065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20059992500937382,
|
||
|
|
"grad_norm": 0.046411253292755236,
|
||
|
|
"learning_rate": 0.00029086413781420633,
|
||
|
|
"loss": 0.8575,
|
||
|
|
"step": 1070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20153730783652044,
|
||
|
|
"grad_norm": 0.04762694973314196,
|
||
|
|
"learning_rate": 0.0002906946902863277,
|
||
|
|
"loss": 0.8541,
|
||
|
|
"step": 1075
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20247469066366705,
|
||
|
|
"grad_norm": 0.043558681229160734,
|
||
|
|
"learning_rate": 0.0002905237360313492,
|
||
|
|
"loss": 0.8509,
|
||
|
|
"step": 1080
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20341207349081364,
|
||
|
|
"grad_norm": 0.047235839608751946,
|
||
|
|
"learning_rate": 0.00029035127688005355,
|
||
|
|
"loss": 0.8326,
|
||
|
|
"step": 1085
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20434945631796025,
|
||
|
|
"grad_norm": 0.04405113672607244,
|
||
|
|
"learning_rate": 0.00029017731467933974,
|
||
|
|
"loss": 0.8235,
|
||
|
|
"step": 1090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20528683914510687,
|
||
|
|
"grad_norm": 0.04917819994251594,
|
||
|
|
"learning_rate": 0.0002900018512922032,
|
||
|
|
"loss": 0.8514,
|
||
|
|
"step": 1095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20622422197225346,
|
||
|
|
"grad_norm": 0.045391849870203616,
|
||
|
|
"learning_rate": 0.0002898248885977158,
|
||
|
|
"loss": 0.841,
|
||
|
|
"step": 1100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20716160479940007,
|
||
|
|
"grad_norm": 0.04950574892382217,
|
||
|
|
"learning_rate": 0.0002896464284910058,
|
||
|
|
"loss": 0.8604,
|
||
|
|
"step": 1105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2080989876265467,
|
||
|
|
"grad_norm": 0.04483838357821004,
|
||
|
|
"learning_rate": 0.00028946647288323766,
|
||
|
|
"loss": 0.8477,
|
||
|
|
"step": 1110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20903637045369328,
|
||
|
|
"grad_norm": 0.04819763033021056,
|
||
|
|
"learning_rate": 0.00028928502370159133,
|
||
|
|
"loss": 0.8301,
|
||
|
|
"step": 1115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2099737532808399,
|
||
|
|
"grad_norm": 0.04846529213809432,
|
||
|
|
"learning_rate": 0.0002891020828892417,
|
||
|
|
"loss": 0.8376,
|
||
|
|
"step": 1120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2109111361079865,
|
||
|
|
"grad_norm": 0.0467946353030176,
|
||
|
|
"learning_rate": 0.00028891765240533795,
|
||
|
|
"loss": 0.8705,
|
||
|
|
"step": 1125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2118485189351331,
|
||
|
|
"grad_norm": 0.044000801667735845,
|
||
|
|
"learning_rate": 0.00028873173422498243,
|
||
|
|
"loss": 0.8281,
|
||
|
|
"step": 1130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2127859017622797,
|
||
|
|
"grad_norm": 0.047502640131898,
|
||
|
|
"learning_rate": 0.0002885443303392094,
|
||
|
|
"loss": 0.8381,
|
||
|
|
"step": 1135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21372328458942633,
|
||
|
|
"grad_norm": 0.04801904197558332,
|
||
|
|
"learning_rate": 0.000288355442754964,
|
||
|
|
"loss": 0.8179,
|
||
|
|
"step": 1140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21466066741657294,
|
||
|
|
"grad_norm": 0.04261523390710894,
|
||
|
|
"learning_rate": 0.00028816507349508047,
|
||
|
|
"loss": 0.8263,
|
||
|
|
"step": 1145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21559805024371953,
|
||
|
|
"grad_norm": 0.047191156643762354,
|
||
|
|
"learning_rate": 0.00028797322459826063,
|
||
|
|
"loss": 0.8374,
|
||
|
|
"step": 1150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21653543307086615,
|
||
|
|
"grad_norm": 0.05167657856075751,
|
||
|
|
"learning_rate": 0.00028777989811905205,
|
||
|
|
"loss": 0.8614,
|
||
|
|
"step": 1155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21747281589801276,
|
||
|
|
"grad_norm": 0.048897121204297574,
|
||
|
|
"learning_rate": 0.000287585096127826,
|
||
|
|
"loss": 0.8538,
|
||
|
|
"step": 1160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21841019872515935,
|
||
|
|
"grad_norm": 0.043161388562608975,
|
||
|
|
"learning_rate": 0.0002873888207107553,
|
||
|
|
"loss": 0.8457,
|
||
|
|
"step": 1165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21934758155230596,
|
||
|
|
"grad_norm": 0.054581961552759836,
|
||
|
|
"learning_rate": 0.000287191073969792,
|
||
|
|
"loss": 0.8362,
|
||
|
|
"step": 1170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22028496437945258,
|
||
|
|
"grad_norm": 0.05058965528101186,
|
||
|
|
"learning_rate": 0.0002869918580226448,
|
||
|
|
"loss": 0.8687,
|
||
|
|
"step": 1175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22122234720659917,
|
||
|
|
"grad_norm": 0.04739504236542665,
|
||
|
|
"learning_rate": 0.00028679117500275653,
|
||
|
|
"loss": 0.831,
|
||
|
|
"step": 1180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22215973003374578,
|
||
|
|
"grad_norm": 0.045754358105207775,
|
||
|
|
"learning_rate": 0.00028658902705928094,
|
||
|
|
"loss": 0.868,
|
||
|
|
"step": 1185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2230971128608924,
|
||
|
|
"grad_norm": 0.04508332677839864,
|
||
|
|
"learning_rate": 0.00028638541635706027,
|
||
|
|
"loss": 0.8129,
|
||
|
|
"step": 1190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.224034495688039,
|
||
|
|
"grad_norm": 0.047178895399584496,
|
||
|
|
"learning_rate": 0.00028618034507660144,
|
||
|
|
"loss": 0.8548,
|
||
|
|
"step": 1195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2249718785151856,
|
||
|
|
"grad_norm": 0.04249857153746906,
|
||
|
|
"learning_rate": 0.0002859738154140532,
|
||
|
|
"loss": 0.8171,
|
||
|
|
"step": 1200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22590926134233222,
|
||
|
|
"grad_norm": 0.04271090221572632,
|
||
|
|
"learning_rate": 0.00028576582958118223,
|
||
|
|
"loss": 0.8274,
|
||
|
|
"step": 1205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2268466441694788,
|
||
|
|
"grad_norm": 0.04664121668293475,
|
||
|
|
"learning_rate": 0.00028555638980534974,
|
||
|
|
"loss": 0.8374,
|
||
|
|
"step": 1210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22778402699662542,
|
||
|
|
"grad_norm": 0.045498060568064186,
|
||
|
|
"learning_rate": 0.0002853454983294875,
|
||
|
|
"loss": 0.8039,
|
||
|
|
"step": 1215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22872140982377204,
|
||
|
|
"grad_norm": 0.04534000569075493,
|
||
|
|
"learning_rate": 0.0002851331574120738,
|
||
|
|
"loss": 0.858,
|
||
|
|
"step": 1220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22965879265091863,
|
||
|
|
"grad_norm": 0.04664900784446732,
|
||
|
|
"learning_rate": 0.00028491936932710917,
|
||
|
|
"loss": 0.8402,
|
||
|
|
"step": 1225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23059617547806524,
|
||
|
|
"grad_norm": 0.04417588784422196,
|
||
|
|
"learning_rate": 0.0002847041363640923,
|
||
|
|
"loss": 0.8207,
|
||
|
|
"step": 1230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23153355830521186,
|
||
|
|
"grad_norm": 0.049652178225426,
|
||
|
|
"learning_rate": 0.0002844874608279954,
|
||
|
|
"loss": 0.8515,
|
||
|
|
"step": 1235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23247094113235844,
|
||
|
|
"grad_norm": 0.048818844243959776,
|
||
|
|
"learning_rate": 0.00028426934503923923,
|
||
|
|
"loss": 0.8322,
|
||
|
|
"step": 1240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23340832395950506,
|
||
|
|
"grad_norm": 0.04283049790199607,
|
||
|
|
"learning_rate": 0.0002840497913336687,
|
||
|
|
"loss": 0.8247,
|
||
|
|
"step": 1245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23434570678665168,
|
||
|
|
"grad_norm": 0.04305639669050898,
|
||
|
|
"learning_rate": 0.0002838288020625277,
|
||
|
|
"loss": 0.8273,
|
||
|
|
"step": 1250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23528308961379826,
|
||
|
|
"grad_norm": 0.046845793494778305,
|
||
|
|
"learning_rate": 0.00028360637959243365,
|
||
|
|
"loss": 0.8628,
|
||
|
|
"step": 1255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23622047244094488,
|
||
|
|
"grad_norm": 0.046154254481951956,
|
||
|
|
"learning_rate": 0.00028338252630535264,
|
||
|
|
"loss": 0.8565,
|
||
|
|
"step": 1260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2371578552680915,
|
||
|
|
"grad_norm": 0.04282295436813289,
|
||
|
|
"learning_rate": 0.00028315724459857346,
|
||
|
|
"loss": 0.8468,
|
||
|
|
"step": 1265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23809523809523808,
|
||
|
|
"grad_norm": 0.04767889894413164,
|
||
|
|
"learning_rate": 0.00028293053688468214,
|
||
|
|
"loss": 0.81,
|
||
|
|
"step": 1270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2390326209223847,
|
||
|
|
"grad_norm": 0.05527610802137319,
|
||
|
|
"learning_rate": 0.00028270240559153634,
|
||
|
|
"loss": 0.8481,
|
||
|
|
"step": 1275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2399700037495313,
|
||
|
|
"grad_norm": 0.0506803378722897,
|
||
|
|
"learning_rate": 0.0002824728531622388,
|
||
|
|
"loss": 0.8354,
|
||
|
|
"step": 1280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24090738657667793,
|
||
|
|
"grad_norm": 0.04602503202904283,
|
||
|
|
"learning_rate": 0.00028224188205511154,
|
||
|
|
"loss": 0.8169,
|
||
|
|
"step": 1285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24184476940382452,
|
||
|
|
"grad_norm": 0.047777320498584894,
|
||
|
|
"learning_rate": 0.0002820094947436698,
|
||
|
|
"loss": 0.8143,
|
||
|
|
"step": 1290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24278215223097113,
|
||
|
|
"grad_norm": 0.041292637408164454,
|
||
|
|
"learning_rate": 0.0002817756937165947,
|
||
|
|
"loss": 0.8429,
|
||
|
|
"step": 1295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24371953505811775,
|
||
|
|
"grad_norm": 0.04301715565733742,
|
||
|
|
"learning_rate": 0.00028154048147770763,
|
||
|
|
"loss": 0.8225,
|
||
|
|
"step": 1300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24465691788526434,
|
||
|
|
"grad_norm": 0.046059693751385175,
|
||
|
|
"learning_rate": 0.0002813038605459426,
|
||
|
|
"loss": 0.8459,
|
||
|
|
"step": 1305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24559430071241095,
|
||
|
|
"grad_norm": 0.044196493163681724,
|
||
|
|
"learning_rate": 0.0002810658334553198,
|
||
|
|
"loss": 0.8161,
|
||
|
|
"step": 1310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24653168353955757,
|
||
|
|
"grad_norm": 0.04577271591342366,
|
||
|
|
"learning_rate": 0.00028082640275491793,
|
||
|
|
"loss": 0.8386,
|
||
|
|
"step": 1315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24746906636670415,
|
||
|
|
"grad_norm": 0.043540551106033394,
|
||
|
|
"learning_rate": 0.0002805855710088476,
|
||
|
|
"loss": 0.8332,
|
||
|
|
"step": 1320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24840644919385077,
|
||
|
|
"grad_norm": 0.05495402299781008,
|
||
|
|
"learning_rate": 0.0002803433407962233,
|
||
|
|
"loss": 0.7858,
|
||
|
|
"step": 1325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24934383202099739,
|
||
|
|
"grad_norm": 0.04718094998030703,
|
||
|
|
"learning_rate": 0.00028009971471113594,
|
||
|
|
"loss": 0.8382,
|
||
|
|
"step": 1330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.250281214848144,
|
||
|
|
"grad_norm": 0.047616730401137404,
|
||
|
|
"learning_rate": 0.00027985469536262524,
|
||
|
|
"loss": 0.8454,
|
||
|
|
"step": 1335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2512185976752906,
|
||
|
|
"grad_norm": 0.0409389484091512,
|
||
|
|
"learning_rate": 0.0002796082853746515,
|
||
|
|
"loss": 0.81,
|
||
|
|
"step": 1340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2521559805024372,
|
||
|
|
"grad_norm": 0.04522706585670625,
|
||
|
|
"learning_rate": 0.00027936048738606785,
|
||
|
|
"loss": 0.8199,
|
||
|
|
"step": 1345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2530933633295838,
|
||
|
|
"grad_norm": 0.04653508675318254,
|
||
|
|
"learning_rate": 0.0002791113040505915,
|
||
|
|
"loss": 0.8298,
|
||
|
|
"step": 1350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25403074615673044,
|
||
|
|
"grad_norm": 0.04663421779124662,
|
||
|
|
"learning_rate": 0.0002788607380367759,
|
||
|
|
"loss": 0.865,
|
||
|
|
"step": 1355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.254968128983877,
|
||
|
|
"grad_norm": 0.04061192230877169,
|
||
|
|
"learning_rate": 0.0002786087920279818,
|
||
|
|
"loss": 0.8188,
|
||
|
|
"step": 1360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2559055118110236,
|
||
|
|
"grad_norm": 0.04379262228090009,
|
||
|
|
"learning_rate": 0.0002783554687223484,
|
||
|
|
"loss": 0.8412,
|
||
|
|
"step": 1365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2568428946381702,
|
||
|
|
"grad_norm": 0.04557822686420308,
|
||
|
|
"learning_rate": 0.0002781007708327649,
|
||
|
|
"loss": 0.8349,
|
||
|
|
"step": 1370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25778027746531684,
|
||
|
|
"grad_norm": 0.045209475352911276,
|
||
|
|
"learning_rate": 0.00027784470108684094,
|
||
|
|
"loss": 0.8554,
|
||
|
|
"step": 1375
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25871766029246346,
|
||
|
|
"grad_norm": 0.0435631363718186,
|
||
|
|
"learning_rate": 0.0002775872622268779,
|
||
|
|
"loss": 0.8127,
|
||
|
|
"step": 1380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2596550431196101,
|
||
|
|
"grad_norm": 0.044012008476995,
|
||
|
|
"learning_rate": 0.0002773284570098391,
|
||
|
|
"loss": 0.8125,
|
||
|
|
"step": 1385
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26059242594675663,
|
||
|
|
"grad_norm": 0.04874513976965234,
|
||
|
|
"learning_rate": 0.0002770682882073206,
|
||
|
|
"loss": 0.8116,
|
||
|
|
"step": 1390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26152980877390325,
|
||
|
|
"grad_norm": 0.04889268142442837,
|
||
|
|
"learning_rate": 0.00027680675860552106,
|
||
|
|
"loss": 0.8315,
|
||
|
|
"step": 1395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26246719160104987,
|
||
|
|
"grad_norm": 0.05581866633201944,
|
||
|
|
"learning_rate": 0.0002765438710052125,
|
||
|
|
"loss": 0.8279,
|
||
|
|
"step": 1400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2634045744281965,
|
||
|
|
"grad_norm": 0.04602436853592128,
|
||
|
|
"learning_rate": 0.0002762796282217099,
|
||
|
|
"loss": 0.821,
|
||
|
|
"step": 1405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2643419572553431,
|
||
|
|
"grad_norm": 0.04678707820307465,
|
||
|
|
"learning_rate": 0.0002760140330848412,
|
||
|
|
"loss": 0.8466,
|
||
|
|
"step": 1410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2652793400824897,
|
||
|
|
"grad_norm": 0.04516389038252434,
|
||
|
|
"learning_rate": 0.000275747088438917,
|
||
|
|
"loss": 0.7947,
|
||
|
|
"step": 1415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26621672290963627,
|
||
|
|
"grad_norm": 0.04755311816167163,
|
||
|
|
"learning_rate": 0.00027547879714269995,
|
||
|
|
"loss": 0.8314,
|
||
|
|
"step": 1420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2671541057367829,
|
||
|
|
"grad_norm": 0.044502973033447774,
|
||
|
|
"learning_rate": 0.0002752091620693742,
|
||
|
|
"loss": 0.8227,
|
||
|
|
"step": 1425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2680914885639295,
|
||
|
|
"grad_norm": 0.0441159145083971,
|
||
|
|
"learning_rate": 0.00027493818610651487,
|
||
|
|
"loss": 0.8322,
|
||
|
|
"step": 1430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2690288713910761,
|
||
|
|
"grad_norm": 0.04046403847734057,
|
||
|
|
"learning_rate": 0.0002746658721560568,
|
||
|
|
"loss": 0.8287,
|
||
|
|
"step": 1435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26996625421822273,
|
||
|
|
"grad_norm": 0.04518695193113121,
|
||
|
|
"learning_rate": 0.0002743922231342636,
|
||
|
|
"loss": 0.8063,
|
||
|
|
"step": 1440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27090363704536935,
|
||
|
|
"grad_norm": 0.04299554365501998,
|
||
|
|
"learning_rate": 0.00027411724197169647,
|
||
|
|
"loss": 0.8418,
|
||
|
|
"step": 1445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2718410198725159,
|
||
|
|
"grad_norm": 0.04235528853329544,
|
||
|
|
"learning_rate": 0.0002738409316131827,
|
||
|
|
"loss": 0.8506,
|
||
|
|
"step": 1450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2727784026996625,
|
||
|
|
"grad_norm": 0.04133212847113579,
|
||
|
|
"learning_rate": 0.0002735632950177843,
|
||
|
|
"loss": 0.8095,
|
||
|
|
"step": 1455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27371578552680914,
|
||
|
|
"grad_norm": 0.044217737534462995,
|
||
|
|
"learning_rate": 0.00027328433515876613,
|
||
|
|
"loss": 0.8222,
|
||
|
|
"step": 1460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27465316835395576,
|
||
|
|
"grad_norm": 0.04811397623157419,
|
||
|
|
"learning_rate": 0.0002730040550235642,
|
||
|
|
"loss": 0.8223,
|
||
|
|
"step": 1465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2755905511811024,
|
||
|
|
"grad_norm": 0.043283154172989766,
|
||
|
|
"learning_rate": 0.0002727224576137535,
|
||
|
|
"loss": 0.8244,
|
||
|
|
"step": 1470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.276527934008249,
|
||
|
|
"grad_norm": 0.04254147668888092,
|
||
|
|
"learning_rate": 0.0002724395459450161,
|
||
|
|
"loss": 0.8158,
|
||
|
|
"step": 1475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27746531683539555,
|
||
|
|
"grad_norm": 0.0437512052356442,
|
||
|
|
"learning_rate": 0.0002721553230471087,
|
||
|
|
"loss": 0.8449,
|
||
|
|
"step": 1480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27840269966254216,
|
||
|
|
"grad_norm": 0.04187590056923411,
|
||
|
|
"learning_rate": 0.0002718697919638302,
|
||
|
|
"loss": 0.7986,
|
||
|
|
"step": 1485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2793400824896888,
|
||
|
|
"grad_norm": 0.042437873734035854,
|
||
|
|
"learning_rate": 0.0002715829557529891,
|
||
|
|
"loss": 0.8286,
|
||
|
|
"step": 1490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2802774653168354,
|
||
|
|
"grad_norm": 0.04430563389754819,
|
||
|
|
"learning_rate": 0.00027129481748637075,
|
||
|
|
"loss": 0.841,
|
||
|
|
"step": 1495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.281214848143982,
|
||
|
|
"grad_norm": 0.045758411532611065,
|
||
|
|
"learning_rate": 0.00027100538024970444,
|
||
|
|
"loss": 0.8285,
|
||
|
|
"step": 1500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2821522309711286,
|
||
|
|
"grad_norm": 0.04423481610722165,
|
||
|
|
"learning_rate": 0.00027071464714263063,
|
||
|
|
"loss": 0.8168,
|
||
|
|
"step": 1505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28308961379827524,
|
||
|
|
"grad_norm": 0.04571606782924112,
|
||
|
|
"learning_rate": 0.00027042262127866716,
|
||
|
|
"loss": 0.8249,
|
||
|
|
"step": 1510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2840269966254218,
|
||
|
|
"grad_norm": 0.04732141795830983,
|
||
|
|
"learning_rate": 0.00027012930578517645,
|
||
|
|
"loss": 0.8387,
|
||
|
|
"step": 1515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2849643794525684,
|
||
|
|
"grad_norm": 0.046305063021735725,
|
||
|
|
"learning_rate": 0.00026983470380333185,
|
||
|
|
"loss": 0.8106,
|
||
|
|
"step": 1520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28590176227971503,
|
||
|
|
"grad_norm": 0.04195897212582909,
|
||
|
|
"learning_rate": 0.0002695388184880839,
|
||
|
|
"loss": 0.8261,
|
||
|
|
"step": 1525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28683914510686165,
|
||
|
|
"grad_norm": 0.04456321258007771,
|
||
|
|
"learning_rate": 0.0002692416530081265,
|
||
|
|
"loss": 0.8215,
|
||
|
|
"step": 1530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28777652793400826,
|
||
|
|
"grad_norm": 0.04879364590021316,
|
||
|
|
"learning_rate": 0.0002689432105458633,
|
||
|
|
"loss": 0.8135,
|
||
|
|
"step": 1535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2887139107611549,
|
||
|
|
"grad_norm": 0.04789940824296893,
|
||
|
|
"learning_rate": 0.00026864349429737326,
|
||
|
|
"loss": 0.8368,
|
||
|
|
"step": 1540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28965129358830144,
|
||
|
|
"grad_norm": 0.04616782271102101,
|
||
|
|
"learning_rate": 0.00026834250747237665,
|
||
|
|
"loss": 0.8269,
|
||
|
|
"step": 1545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29058867641544806,
|
||
|
|
"grad_norm": 0.04502530076723608,
|
||
|
|
"learning_rate": 0.0002680402532942006,
|
||
|
|
"loss": 0.83,
|
||
|
|
"step": 1550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29152605924259467,
|
||
|
|
"grad_norm": 0.04002183200170624,
|
||
|
|
"learning_rate": 0.00026773673499974436,
|
||
|
|
"loss": 0.8053,
|
||
|
|
"step": 1555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2924634420697413,
|
||
|
|
"grad_norm": 0.04425864811179813,
|
||
|
|
"learning_rate": 0.00026743195583944524,
|
||
|
|
"loss": 0.8354,
|
||
|
|
"step": 1560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2934008248968879,
|
||
|
|
"grad_norm": 0.046865997902279015,
|
||
|
|
"learning_rate": 0.000267125919077243,
|
||
|
|
"loss": 0.8263,
|
||
|
|
"step": 1565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2943382077240345,
|
||
|
|
"grad_norm": 0.04361053554450397,
|
||
|
|
"learning_rate": 0.00026681862799054557,
|
||
|
|
"loss": 0.8007,
|
||
|
|
"step": 1570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2952755905511811,
|
||
|
|
"grad_norm": 0.048458258290373916,
|
||
|
|
"learning_rate": 0.0002665100858701937,
|
||
|
|
"loss": 0.8237,
|
||
|
|
"step": 1575
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2962129733783277,
|
||
|
|
"grad_norm": 0.047990047586811434,
|
||
|
|
"learning_rate": 0.0002662002960204254,
|
||
|
|
"loss": 0.8162,
|
||
|
|
"step": 1580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2971503562054743,
|
||
|
|
"grad_norm": 0.04409542819477004,
|
||
|
|
"learning_rate": 0.0002658892617588413,
|
||
|
|
"loss": 0.8433,
|
||
|
|
"step": 1585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2980877390326209,
|
||
|
|
"grad_norm": 0.04731184190347321,
|
||
|
|
"learning_rate": 0.00026557698641636835,
|
||
|
|
"loss": 0.8133,
|
||
|
|
"step": 1590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29902512185976754,
|
||
|
|
"grad_norm": 0.042762397685975305,
|
||
|
|
"learning_rate": 0.0002652634733372246,
|
||
|
|
"loss": 0.834,
|
||
|
|
"step": 1595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29996250468691416,
|
||
|
|
"grad_norm": 0.04579010165352788,
|
||
|
|
"learning_rate": 0.0002649487258788833,
|
||
|
|
"loss": 0.8214,
|
||
|
|
"step": 1600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3008998875140607,
|
||
|
|
"grad_norm": 0.04144592090502628,
|
||
|
|
"learning_rate": 0.0002646327474120368,
|
||
|
|
"loss": 0.8207,
|
||
|
|
"step": 1605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30183727034120733,
|
||
|
|
"grad_norm": 0.04770299028768292,
|
||
|
|
"learning_rate": 0.00026431554132056063,
|
||
|
|
"loss": 0.8258,
|
||
|
|
"step": 1610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30277465316835395,
|
||
|
|
"grad_norm": 0.044453071872775064,
|
||
|
|
"learning_rate": 0.00026399711100147724,
|
||
|
|
"loss": 0.8085,
|
||
|
|
"step": 1615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30371203599550056,
|
||
|
|
"grad_norm": 0.05130385596231612,
|
||
|
|
"learning_rate": 0.0002636774598649195,
|
||
|
|
"loss": 0.8287,
|
||
|
|
"step": 1620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3046494188226472,
|
||
|
|
"grad_norm": 0.053015815539756074,
|
||
|
|
"learning_rate": 0.00026335659133409423,
|
||
|
|
"loss": 0.8063,
|
||
|
|
"step": 1625
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3055868016497938,
|
||
|
|
"grad_norm": 0.04286401355249045,
|
||
|
|
"learning_rate": 0.00026303450884524566,
|
||
|
|
"loss": 0.8084,
|
||
|
|
"step": 1630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3065241844769404,
|
||
|
|
"grad_norm": 0.0383196394419625,
|
||
|
|
"learning_rate": 0.0002627112158476185,
|
||
|
|
"loss": 0.8001,
|
||
|
|
"step": 1635
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30746156730408697,
|
||
|
|
"grad_norm": 0.042481350200920615,
|
||
|
|
"learning_rate": 0.00026238671580342096,
|
||
|
|
"loss": 0.8342,
|
||
|
|
"step": 1640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3083989501312336,
|
||
|
|
"grad_norm": 0.0437271017287051,
|
||
|
|
"learning_rate": 0.0002620610121877879,
|
||
|
|
"loss": 0.8301,
|
||
|
|
"step": 1645
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3093363329583802,
|
||
|
|
"grad_norm": 0.042066811375379806,
|
||
|
|
"learning_rate": 0.0002617341084887433,
|
||
|
|
"loss": 0.8183,
|
||
|
|
"step": 1650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3102737157855268,
|
||
|
|
"grad_norm": 0.0434545427613146,
|
||
|
|
"learning_rate": 0.00026140600820716314,
|
||
|
|
"loss": 0.8144,
|
||
|
|
"step": 1655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31121109861267343,
|
||
|
|
"grad_norm": 0.038937665823880176,
|
||
|
|
"learning_rate": 0.00026107671485673794,
|
||
|
|
"loss": 0.817,
|
||
|
|
"step": 1660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31214848143982005,
|
||
|
|
"grad_norm": 0.044655599527732244,
|
||
|
|
"learning_rate": 0.0002607462319639348,
|
||
|
|
"loss": 0.8344,
|
||
|
|
"step": 1665
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3130858642669666,
|
||
|
|
"grad_norm": 0.03965599804534495,
|
||
|
|
"learning_rate": 0.00026041456306796014,
|
||
|
|
"loss": 0.8083,
|
||
|
|
"step": 1670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3140232470941132,
|
||
|
|
"grad_norm": 0.039069129149835194,
|
||
|
|
"learning_rate": 0.00026008171172072126,
|
||
|
|
"loss": 0.8196,
|
||
|
|
"step": 1675
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31496062992125984,
|
||
|
|
"grad_norm": 0.040202855570058024,
|
||
|
|
"learning_rate": 0.0002597476814867887,
|
||
|
|
"loss": 0.8205,
|
||
|
|
"step": 1680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31589801274840645,
|
||
|
|
"grad_norm": 0.04003968304410291,
|
||
|
|
"learning_rate": 0.0002594124759433579,
|
||
|
|
"loss": 0.8108,
|
||
|
|
"step": 1685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31683539557555307,
|
||
|
|
"grad_norm": 0.047433872652346235,
|
||
|
|
"learning_rate": 0.000259076098680211,
|
||
|
|
"loss": 0.8039,
|
||
|
|
"step": 1690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3177727784026997,
|
||
|
|
"grad_norm": 0.04141242579423705,
|
||
|
|
"learning_rate": 0.0002587385532996782,
|
||
|
|
"loss": 0.8259,
|
||
|
|
"step": 1695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31871016122984624,
|
||
|
|
"grad_norm": 0.054121951067220224,
|
||
|
|
"learning_rate": 0.0002583998434165993,
|
||
|
|
"loss": 0.8246,
|
||
|
|
"step": 1700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31964754405699286,
|
||
|
|
"grad_norm": 0.04197913764657862,
|
||
|
|
"learning_rate": 0.00025805997265828507,
|
||
|
|
"loss": 0.8463,
|
||
|
|
"step": 1705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3205849268841395,
|
||
|
|
"grad_norm": 0.0426917661733632,
|
||
|
|
"learning_rate": 0.0002577189446644783,
|
||
|
|
"loss": 0.8183,
|
||
|
|
"step": 1710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3215223097112861,
|
||
|
|
"grad_norm": 0.04179736226931492,
|
||
|
|
"learning_rate": 0.00025737676308731477,
|
||
|
|
"loss": 0.7976,
|
||
|
|
"step": 1715
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3224596925384327,
|
||
|
|
"grad_norm": 0.04799629600304747,
|
||
|
|
"learning_rate": 0.0002570334315912844,
|
||
|
|
"loss": 0.8289,
|
||
|
|
"step": 1720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3233970753655793,
|
||
|
|
"grad_norm": 0.042393149930441694,
|
||
|
|
"learning_rate": 0.0002566889538531915,
|
||
|
|
"loss": 0.8112,
|
||
|
|
"step": 1725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3243344581927259,
|
||
|
|
"grad_norm": 0.04255536591889664,
|
||
|
|
"learning_rate": 0.000256343333562116,
|
||
|
|
"loss": 0.8187,
|
||
|
|
"step": 1730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3252718410198725,
|
||
|
|
"grad_norm": 0.044062962674437295,
|
||
|
|
"learning_rate": 0.00025599657441937354,
|
||
|
|
"loss": 0.8018,
|
||
|
|
"step": 1735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3262092238470191,
|
||
|
|
"grad_norm": 0.043474283263771174,
|
||
|
|
"learning_rate": 0.00025564868013847595,
|
||
|
|
"loss": 0.8306,
|
||
|
|
"step": 1740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32714660667416573,
|
||
|
|
"grad_norm": 0.0422049730670292,
|
||
|
|
"learning_rate": 0.0002552996544450914,
|
||
|
|
"loss": 0.8047,
|
||
|
|
"step": 1745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32808398950131235,
|
||
|
|
"grad_norm": 0.04744673000933406,
|
||
|
|
"learning_rate": 0.0002549495010770048,
|
||
|
|
"loss": 0.8422,
|
||
|
|
"step": 1750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32902137232845896,
|
||
|
|
"grad_norm": 0.04429260845252424,
|
||
|
|
"learning_rate": 0.0002545982237840773,
|
||
|
|
"loss": 0.8191,
|
||
|
|
"step": 1755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3299587551556056,
|
||
|
|
"grad_norm": 0.04232121926909998,
|
||
|
|
"learning_rate": 0.0002542458263282066,
|
||
|
|
"loss": 0.7905,
|
||
|
|
"step": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33089613798275214,
|
||
|
|
"grad_norm": 0.03903941148795766,
|
||
|
|
"learning_rate": 0.00025389231248328624,
|
||
|
|
"loss": 0.8047,
|
||
|
|
"step": 1765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33183352080989875,
|
||
|
|
"grad_norm": 0.043127224537608456,
|
||
|
|
"learning_rate": 0.00025353768603516555,
|
||
|
|
"loss": 0.8202,
|
||
|
|
"step": 1770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33277090363704537,
|
||
|
|
"grad_norm": 0.04603755356895304,
|
||
|
|
"learning_rate": 0.0002531819507816089,
|
||
|
|
"loss": 0.8474,
|
||
|
|
"step": 1775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.333708286464192,
|
||
|
|
"grad_norm": 0.04421659565539193,
|
||
|
|
"learning_rate": 0.00025282511053225493,
|
||
|
|
"loss": 0.8258,
|
||
|
|
"step": 1780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3346456692913386,
|
||
|
|
"grad_norm": 0.04354315354304148,
|
||
|
|
"learning_rate": 0.0002524671691085762,
|
||
|
|
"loss": 0.7872,
|
||
|
|
"step": 1785
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3355830521184852,
|
||
|
|
"grad_norm": 0.04513774703149973,
|
||
|
|
"learning_rate": 0.0002521081303438377,
|
||
|
|
"loss": 0.7985,
|
||
|
|
"step": 1790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3365204349456318,
|
||
|
|
"grad_norm": 0.04656688027583207,
|
||
|
|
"learning_rate": 0.00025174799808305606,
|
||
|
|
"loss": 0.808,
|
||
|
|
"step": 1795
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3374578177727784,
|
||
|
|
"grad_norm": 0.04760744964038916,
|
||
|
|
"learning_rate": 0.0002513867761829587,
|
||
|
|
"loss": 0.793,
|
||
|
|
"step": 1800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.338395200599925,
|
||
|
|
"grad_norm": 0.04121747602293146,
|
||
|
|
"learning_rate": 0.0002510244685119418,
|
||
|
|
"loss": 0.8293,
|
||
|
|
"step": 1805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3393325834270716,
|
||
|
|
"grad_norm": 0.04112754367048587,
|
||
|
|
"learning_rate": 0.00025066107895002946,
|
||
|
|
"loss": 0.831,
|
||
|
|
"step": 1810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34026996625421824,
|
||
|
|
"grad_norm": 0.03926106896606557,
|
||
|
|
"learning_rate": 0.0002502966113888319,
|
||
|
|
"loss": 0.8072,
|
||
|
|
"step": 1815
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34120734908136485,
|
||
|
|
"grad_norm": 0.04562546212037259,
|
||
|
|
"learning_rate": 0.000249931069731504,
|
||
|
|
"loss": 0.788,
|
||
|
|
"step": 1820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3421447319085114,
|
||
|
|
"grad_norm": 0.0406588900726224,
|
||
|
|
"learning_rate": 0.0002495644578927032,
|
||
|
|
"loss": 0.8184,
|
||
|
|
"step": 1825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34308211473565803,
|
||
|
|
"grad_norm": 0.04003290325962031,
|
||
|
|
"learning_rate": 0.00024919677979854776,
|
||
|
|
"loss": 0.8272,
|
||
|
|
"step": 1830
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34401949756280464,
|
||
|
|
"grad_norm": 0.04186901209736264,
|
||
|
|
"learning_rate": 0.00024882803938657466,
|
||
|
|
"loss": 0.7956,
|
||
|
|
"step": 1835
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34495688038995126,
|
||
|
|
"grad_norm": 0.041398093060463485,
|
||
|
|
"learning_rate": 0.00024845824060569743,
|
||
|
|
"loss": 0.8114,
|
||
|
|
"step": 1840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3458942632170979,
|
||
|
|
"grad_norm": 0.04109679086847299,
|
||
|
|
"learning_rate": 0.000248087387416164,
|
||
|
|
"loss": 0.807,
|
||
|
|
"step": 1845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3468316460442445,
|
||
|
|
"grad_norm": 0.042039328634813876,
|
||
|
|
"learning_rate": 0.000247715483789514,
|
||
|
|
"loss": 0.8306,
|
||
|
|
"step": 1850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34776902887139105,
|
||
|
|
"grad_norm": 0.0394540126640408,
|
||
|
|
"learning_rate": 0.0002473425337085366,
|
||
|
|
"loss": 0.7966,
|
||
|
|
"step": 1855
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34870641169853767,
|
||
|
|
"grad_norm": 0.04432229876319661,
|
||
|
|
"learning_rate": 0.0002469685411672275,
|
||
|
|
"loss": 0.811,
|
||
|
|
"step": 1860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3496437945256843,
|
||
|
|
"grad_norm": 0.04227321189035649,
|
||
|
|
"learning_rate": 0.0002465935101707463,
|
||
|
|
"loss": 0.8248,
|
||
|
|
"step": 1865
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3505811773528309,
|
||
|
|
"grad_norm": 0.039191702176161206,
|
||
|
|
"learning_rate": 0.00024621744473537365,
|
||
|
|
"loss": 0.8205,
|
||
|
|
"step": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3515185601799775,
|
||
|
|
"grad_norm": 0.04634923450670881,
|
||
|
|
"learning_rate": 0.00024584034888846835,
|
||
|
|
"loss": 0.7763,
|
||
|
|
"step": 1875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35245594300712413,
|
||
|
|
"grad_norm": 0.04501112952096222,
|
||
|
|
"learning_rate": 0.0002454622266684239,
|
||
|
|
"loss": 0.8258,
|
||
|
|
"step": 1880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3533933258342707,
|
||
|
|
"grad_norm": 0.04422758808317238,
|
||
|
|
"learning_rate": 0.0002450830821246255,
|
||
|
|
"loss": 0.8106,
|
||
|
|
"step": 1885
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3543307086614173,
|
||
|
|
"grad_norm": 0.04421016805518408,
|
||
|
|
"learning_rate": 0.00024470291931740667,
|
||
|
|
"loss": 0.7815,
|
||
|
|
"step": 1890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3552680914885639,
|
||
|
|
"grad_norm": 0.04319380391113109,
|
||
|
|
"learning_rate": 0.0002443217423180055,
|
||
|
|
"loss": 0.7973,
|
||
|
|
"step": 1895
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35620547431571054,
|
||
|
|
"grad_norm": 0.040535892316044465,
|
||
|
|
"learning_rate": 0.00024393955520852158,
|
||
|
|
"loss": 0.8231,
|
||
|
|
"step": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35714285714285715,
|
||
|
|
"grad_norm": 0.050182245944215505,
|
||
|
|
"learning_rate": 0.00024355636208187175,
|
||
|
|
"loss": 0.809,
|
||
|
|
"step": 1905
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35808023997000377,
|
||
|
|
"grad_norm": 0.05056635484821874,
|
||
|
|
"learning_rate": 0.00024317216704174653,
|
||
|
|
"loss": 0.7863,
|
||
|
|
"step": 1910
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3590176227971504,
|
||
|
|
"grad_norm": 0.04022954249460216,
|
||
|
|
"learning_rate": 0.00024278697420256615,
|
||
|
|
"loss": 0.8144,
|
||
|
|
"step": 1915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35995500562429694,
|
||
|
|
"grad_norm": 0.04323635830441304,
|
||
|
|
"learning_rate": 0.00024240078768943647,
|
||
|
|
"loss": 0.8052,
|
||
|
|
"step": 1920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36089238845144356,
|
||
|
|
"grad_norm": 0.05048502253231987,
|
||
|
|
"learning_rate": 0.00024201361163810476,
|
||
|
|
"loss": 0.8292,
|
||
|
|
"step": 1925
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3618297712785902,
|
||
|
|
"grad_norm": 0.045094345947545966,
|
||
|
|
"learning_rate": 0.00024162545019491545,
|
||
|
|
"loss": 0.8263,
|
||
|
|
"step": 1930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3627671541057368,
|
||
|
|
"grad_norm": 0.04751777775798578,
|
||
|
|
"learning_rate": 0.0002412363075167658,
|
||
|
|
"loss": 0.813,
|
||
|
|
"step": 1935
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3637045369328834,
|
||
|
|
"grad_norm": 0.03970350230348181,
|
||
|
|
"learning_rate": 0.0002408461877710613,
|
||
|
|
"loss": 0.7874,
|
||
|
|
"step": 1940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36464191976003,
|
||
|
|
"grad_norm": 0.043021517102531895,
|
||
|
|
"learning_rate": 0.00024045509513567092,
|
||
|
|
"loss": 0.7835,
|
||
|
|
"step": 1945
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3655793025871766,
|
||
|
|
"grad_norm": 0.04145029708505387,
|
||
|
|
"learning_rate": 0.0002400630337988826,
|
||
|
|
"loss": 0.7976,
|
||
|
|
"step": 1950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3665166854143232,
|
||
|
|
"grad_norm": 0.04060705381251834,
|
||
|
|
"learning_rate": 0.0002396700079593583,
|
||
|
|
"loss": 0.784,
|
||
|
|
"step": 1955
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3674540682414698,
|
||
|
|
"grad_norm": 0.04105772844447629,
|
||
|
|
"learning_rate": 0.00023927602182608902,
|
||
|
|
"loss": 0.7982,
|
||
|
|
"step": 1960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3683914510686164,
|
||
|
|
"grad_norm": 0.04405339270275701,
|
||
|
|
"learning_rate": 0.00023888107961834968,
|
||
|
|
"loss": 0.8367,
|
||
|
|
"step": 1965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36932883389576304,
|
||
|
|
"grad_norm": 0.04229707897710055,
|
||
|
|
"learning_rate": 0.00023848518556565405,
|
||
|
|
"loss": 0.8147,
|
||
|
|
"step": 1970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37026621672290966,
|
||
|
|
"grad_norm": 0.04101868201617462,
|
||
|
|
"learning_rate": 0.00023808834390770937,
|
||
|
|
"loss": 0.7984,
|
||
|
|
"step": 1975
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3712035995500562,
|
||
|
|
"grad_norm": 0.0452498903694969,
|
||
|
|
"learning_rate": 0.00023769055889437103,
|
||
|
|
"loss": 0.8064,
|
||
|
|
"step": 1980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37214098237720283,
|
||
|
|
"grad_norm": 0.040047239815103164,
|
||
|
|
"learning_rate": 0.0002372918347855969,
|
||
|
|
"loss": 0.7737,
|
||
|
|
"step": 1985
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37307836520434945,
|
||
|
|
"grad_norm": 0.03830098106296232,
|
||
|
|
"learning_rate": 0.0002368921758514018,
|
||
|
|
"loss": 0.7735,
|
||
|
|
"step": 1990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37401574803149606,
|
||
|
|
"grad_norm": 0.0466704182451149,
|
||
|
|
"learning_rate": 0.00023649158637181191,
|
||
|
|
"loss": 0.7913,
|
||
|
|
"step": 1995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3749531308586427,
|
||
|
|
"grad_norm": 0.045697413605673594,
|
||
|
|
"learning_rate": 0.00023609007063681874,
|
||
|
|
"loss": 0.8083,
|
||
|
|
"step": 2000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3758905136857893,
|
||
|
|
"grad_norm": 0.046012146633052885,
|
||
|
|
"learning_rate": 0.0002356876329463332,
|
||
|
|
"loss": 0.7986,
|
||
|
|
"step": 2005
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37682789651293586,
|
||
|
|
"grad_norm": 0.042087372585577834,
|
||
|
|
"learning_rate": 0.0002352842776101396,
|
||
|
|
"loss": 0.789,
|
||
|
|
"step": 2010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37776527934008247,
|
||
|
|
"grad_norm": 0.04351288912284283,
|
||
|
|
"learning_rate": 0.00023488000894784954,
|
||
|
|
"loss": 0.8066,
|
||
|
|
"step": 2015
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3787026621672291,
|
||
|
|
"grad_norm": 0.04264731715281628,
|
||
|
|
"learning_rate": 0.0002344748312888557,
|
||
|
|
"loss": 0.8242,
|
||
|
|
"step": 2020
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3796400449943757,
|
||
|
|
"grad_norm": 0.04004005288826855,
|
||
|
|
"learning_rate": 0.00023406874897228527,
|
||
|
|
"loss": 0.8134,
|
||
|
|
"step": 2025
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3805774278215223,
|
||
|
|
"grad_norm": 0.03990638844779895,
|
||
|
|
"learning_rate": 0.00023366176634695353,
|
||
|
|
"loss": 0.8341,
|
||
|
|
"step": 2030
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38151481064866893,
|
||
|
|
"grad_norm": 0.04606026511862361,
|
||
|
|
"learning_rate": 0.00023325388777131748,
|
||
|
|
"loss": 0.8001,
|
||
|
|
"step": 2035
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38245219347581555,
|
||
|
|
"grad_norm": 0.04391729186424558,
|
||
|
|
"learning_rate": 0.000232845117613429,
|
||
|
|
"loss": 0.8195,
|
||
|
|
"step": 2040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3833895763029621,
|
||
|
|
"grad_norm": 0.041518162558628426,
|
||
|
|
"learning_rate": 0.00023243546025088799,
|
||
|
|
"loss": 0.7802,
|
||
|
|
"step": 2045
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3843269591301087,
|
||
|
|
"grad_norm": 0.04366132632044734,
|
||
|
|
"learning_rate": 0.00023202492007079584,
|
||
|
|
"loss": 0.7828,
|
||
|
|
"step": 2050
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38526434195725534,
|
||
|
|
"grad_norm": 0.04383475380804256,
|
||
|
|
"learning_rate": 0.00023161350146970794,
|
||
|
|
"loss": 0.7876,
|
||
|
|
"step": 2055
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38620172478440196,
|
||
|
|
"grad_norm": 0.036393360894576204,
|
||
|
|
"learning_rate": 0.00023120120885358698,
|
||
|
|
"loss": 0.7975,
|
||
|
|
"step": 2060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38713910761154857,
|
||
|
|
"grad_norm": 0.04084955682840953,
|
||
|
|
"learning_rate": 0.00023078804663775572,
|
||
|
|
"loss": 0.7786,
|
||
|
|
"step": 2065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3880764904386952,
|
||
|
|
"grad_norm": 0.03911103036688829,
|
||
|
|
"learning_rate": 0.00023037401924684946,
|
||
|
|
"loss": 0.8026,
|
||
|
|
"step": 2070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38901387326584175,
|
||
|
|
"grad_norm": 0.041012148951149276,
|
||
|
|
"learning_rate": 0.000229959131114769,
|
||
|
|
"loss": 0.7885,
|
||
|
|
"step": 2075
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38995125609298836,
|
||
|
|
"grad_norm": 0.03899340776511314,
|
||
|
|
"learning_rate": 0.00022954338668463296,
|
||
|
|
"loss": 0.7813,
|
||
|
|
"step": 2080
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.390888638920135,
|
||
|
|
"grad_norm": 0.04177139426462313,
|
||
|
|
"learning_rate": 0.00022912679040873005,
|
||
|
|
"loss": 0.7433,
|
||
|
|
"step": 2085
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3918260217472816,
|
||
|
|
"grad_norm": 0.041993929485497614,
|
||
|
|
"learning_rate": 0.00022870934674847177,
|
||
|
|
"loss": 0.8079,
|
||
|
|
"step": 2090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3927634045744282,
|
||
|
|
"grad_norm": 0.04106413868995631,
|
||
|
|
"learning_rate": 0.00022829106017434434,
|
||
|
|
"loss": 0.7872,
|
||
|
|
"step": 2095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3937007874015748,
|
||
|
|
"grad_norm": 0.047413157951248786,
|
||
|
|
"learning_rate": 0.00022787193516586091,
|
||
|
|
"loss": 0.796,
|
||
|
|
"step": 2100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3946381702287214,
|
||
|
|
"grad_norm": 0.04503978112797596,
|
||
|
|
"learning_rate": 0.00022745197621151363,
|
||
|
|
"loss": 0.8352,
|
||
|
|
"step": 2105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.395575553055868,
|
||
|
|
"grad_norm": 0.04115650674340414,
|
||
|
|
"learning_rate": 0.0002270311878087255,
|
||
|
|
"loss": 0.816,
|
||
|
|
"step": 2110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3965129358830146,
|
||
|
|
"grad_norm": 0.041160476408568784,
|
||
|
|
"learning_rate": 0.00022660957446380225,
|
||
|
|
"loss": 0.7982,
|
||
|
|
"step": 2115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39745031871016123,
|
||
|
|
"grad_norm": 0.04408506165618202,
|
||
|
|
"learning_rate": 0.00022618714069188404,
|
||
|
|
"loss": 0.8209,
|
||
|
|
"step": 2120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39838770153730785,
|
||
|
|
"grad_norm": 0.04001800900141434,
|
||
|
|
"learning_rate": 0.00022576389101689725,
|
||
|
|
"loss": 0.7771,
|
||
|
|
"step": 2125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39932508436445446,
|
||
|
|
"grad_norm": 0.04320785141278407,
|
||
|
|
"learning_rate": 0.00022533982997150585,
|
||
|
|
"loss": 0.7677,
|
||
|
|
"step": 2130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.400262467191601,
|
||
|
|
"grad_norm": 0.039431554219398125,
|
||
|
|
"learning_rate": 0.00022491496209706293,
|
||
|
|
"loss": 0.7715,
|
||
|
|
"step": 2135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.40119985001874764,
|
||
|
|
"grad_norm": 0.04041447511709435,
|
||
|
|
"learning_rate": 0.0002244892919435621,
|
||
|
|
"loss": 0.7961,
|
||
|
|
"step": 2140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.40213723284589425,
|
||
|
|
"grad_norm": 0.04233570336630376,
|
||
|
|
"learning_rate": 0.00022406282406958874,
|
||
|
|
"loss": 0.7932,
|
||
|
|
"step": 2145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.40307461567304087,
|
||
|
|
"grad_norm": 0.040223931466695007,
|
||
|
|
"learning_rate": 0.00022363556304227111,
|
||
|
|
"loss": 0.7972,
|
||
|
|
"step": 2150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4040119985001875,
|
||
|
|
"grad_norm": 0.03920199535556696,
|
||
|
|
"learning_rate": 0.0002232075134372316,
|
||
|
|
"loss": 0.7912,
|
||
|
|
"step": 2155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4049493813273341,
|
||
|
|
"grad_norm": 0.04045705766440815,
|
||
|
|
"learning_rate": 0.00022277867983853754,
|
||
|
|
"loss": 0.772,
|
||
|
|
"step": 2160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4058867641544807,
|
||
|
|
"grad_norm": 0.038172006672106074,
|
||
|
|
"learning_rate": 0.00022234906683865234,
|
||
|
|
"loss": 0.7994,
|
||
|
|
"step": 2165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4068241469816273,
|
||
|
|
"grad_norm": 0.042094914465141645,
|
||
|
|
"learning_rate": 0.00022191867903838597,
|
||
|
|
"loss": 0.7908,
|
||
|
|
"step": 2170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4077615298087739,
|
||
|
|
"grad_norm": 0.041299074122275056,
|
||
|
|
"learning_rate": 0.00022148752104684608,
|
||
|
|
"loss": 0.791,
|
||
|
|
"step": 2175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4086989126359205,
|
||
|
|
"grad_norm": 0.03672565143578591,
|
||
|
|
"learning_rate": 0.00022105559748138834,
|
||
|
|
"loss": 0.7879,
|
||
|
|
"step": 2180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4096362954630671,
|
||
|
|
"grad_norm": 0.03978313771126583,
|
||
|
|
"learning_rate": 0.00022062291296756715,
|
||
|
|
"loss": 0.8095,
|
||
|
|
"step": 2185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41057367829021374,
|
||
|
|
"grad_norm": 0.04001569067395201,
|
||
|
|
"learning_rate": 0.000220189472139086,
|
||
|
|
"loss": 0.7826,
|
||
|
|
"step": 2190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41151106111736035,
|
||
|
|
"grad_norm": 0.043631782457239365,
|
||
|
|
"learning_rate": 0.00021975527963774796,
|
||
|
|
"loss": 0.7927,
|
||
|
|
"step": 2195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4124484439445069,
|
||
|
|
"grad_norm": 0.04073514355199719,
|
||
|
|
"learning_rate": 0.00021932034011340587,
|
||
|
|
"loss": 0.7939,
|
||
|
|
"step": 2200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41338582677165353,
|
||
|
|
"grad_norm": 0.044704633805377016,
|
||
|
|
"learning_rate": 0.00021888465822391269,
|
||
|
|
"loss": 0.795,
|
||
|
|
"step": 2205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41432320959880015,
|
||
|
|
"grad_norm": 0.04223574180256086,
|
||
|
|
"learning_rate": 0.00021844823863507136,
|
||
|
|
"loss": 0.7697,
|
||
|
|
"step": 2210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41526059242594676,
|
||
|
|
"grad_norm": 0.04033678944300133,
|
||
|
|
"learning_rate": 0.00021801108602058507,
|
||
|
|
"loss": 0.7942,
|
||
|
|
"step": 2215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4161979752530934,
|
||
|
|
"grad_norm": 0.040615950774112376,
|
||
|
|
"learning_rate": 0.00021757320506200713,
|
||
|
|
"loss": 0.7976,
|
||
|
|
"step": 2220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41713535808024,
|
||
|
|
"grad_norm": 0.03966904553021298,
|
||
|
|
"learning_rate": 0.00021713460044869078,
|
||
|
|
"loss": 0.7356,
|
||
|
|
"step": 2225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41807274090738655,
|
||
|
|
"grad_norm": 0.049048512180610696,
|
||
|
|
"learning_rate": 0.0002166952768777391,
|
||
|
|
"loss": 0.788,
|
||
|
|
"step": 2230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41901012373453317,
|
||
|
|
"grad_norm": 0.04363519676021418,
|
||
|
|
"learning_rate": 0.00021625523905395458,
|
||
|
|
"loss": 0.7778,
|
||
|
|
"step": 2235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4199475065616798,
|
||
|
|
"grad_norm": 0.040788196081375995,
|
||
|
|
"learning_rate": 0.00021581449168978878,
|
||
|
|
"loss": 0.7845,
|
||
|
|
"step": 2240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4208848893888264,
|
||
|
|
"grad_norm": 0.04388975232146075,
|
||
|
|
"learning_rate": 0.00021537303950529185,
|
||
|
|
"loss": 0.7912,
|
||
|
|
"step": 2245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.421822272215973,
|
||
|
|
"grad_norm": 0.039487905759282925,
|
||
|
|
"learning_rate": 0.0002149308872280621,
|
||
|
|
"loss": 0.7904,
|
||
|
|
"step": 2250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42275965504311963,
|
||
|
|
"grad_norm": 0.03892803525416569,
|
||
|
|
"learning_rate": 0.0002144880395931951,
|
||
|
|
"loss": 0.7832,
|
||
|
|
"step": 2255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4236970378702662,
|
||
|
|
"grad_norm": 0.048355006016535,
|
||
|
|
"learning_rate": 0.0002140445013432333,
|
||
|
|
"loss": 0.8233,
|
||
|
|
"step": 2260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4246344206974128,
|
||
|
|
"grad_norm": 0.04226596396802164,
|
||
|
|
"learning_rate": 0.00021360027722811505,
|
||
|
|
"loss": 0.7986,
|
||
|
|
"step": 2265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4255718035245594,
|
||
|
|
"grad_norm": 0.03777343997339362,
|
||
|
|
"learning_rate": 0.00021315537200512362,
|
||
|
|
"loss": 0.7739,
|
||
|
|
"step": 2270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42650918635170604,
|
||
|
|
"grad_norm": 0.0447778590274291,
|
||
|
|
"learning_rate": 0.00021270979043883664,
|
||
|
|
"loss": 0.8097,
|
||
|
|
"step": 2275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42744656917885265,
|
||
|
|
"grad_norm": 0.04168554679696771,
|
||
|
|
"learning_rate": 0.00021226353730107467,
|
||
|
|
"loss": 0.7835,
|
||
|
|
"step": 2280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42838395200599927,
|
||
|
|
"grad_norm": 0.04011938574259242,
|
||
|
|
"learning_rate": 0.00021181661737085028,
|
||
|
|
"loss": 0.8223,
|
||
|
|
"step": 2285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4293213348331459,
|
||
|
|
"grad_norm": 0.03778469888602811,
|
||
|
|
"learning_rate": 0.00021136903543431685,
|
||
|
|
"loss": 0.7739,
|
||
|
|
"step": 2290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.43025871766029244,
|
||
|
|
"grad_norm": 0.03715090423495881,
|
||
|
|
"learning_rate": 0.0002109207962847174,
|
||
|
|
"loss": 0.8144,
|
||
|
|
"step": 2295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.43119610048743906,
|
||
|
|
"grad_norm": 0.03962090097485277,
|
||
|
|
"learning_rate": 0.00021047190472233305,
|
||
|
|
"loss": 0.7811,
|
||
|
|
"step": 2300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4321334833145857,
|
||
|
|
"grad_norm": 0.039092040132198524,
|
||
|
|
"learning_rate": 0.00021002236555443183,
|
||
|
|
"loss": 0.7909,
|
||
|
|
"step": 2305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4330708661417323,
|
||
|
|
"grad_norm": 0.03883145505567921,
|
||
|
|
"learning_rate": 0.00020957218359521706,
|
||
|
|
"loss": 0.8176,
|
||
|
|
"step": 2310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4340082489688789,
|
||
|
|
"grad_norm": 0.03985259948134514,
|
||
|
|
"learning_rate": 0.0002091213636657759,
|
||
|
|
"loss": 0.7869,
|
||
|
|
"step": 2315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4349456317960255,
|
||
|
|
"grad_norm": 0.0405689266411763,
|
||
|
|
"learning_rate": 0.0002086699105940275,
|
||
|
|
"loss": 0.8039,
|
||
|
|
"step": 2320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4358830146231721,
|
||
|
|
"grad_norm": 0.04016614154253833,
|
||
|
|
"learning_rate": 0.00020821782921467166,
|
||
|
|
"loss": 0.7911,
|
||
|
|
"step": 2325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4368203974503187,
|
||
|
|
"grad_norm": 0.03975323854656061,
|
||
|
|
"learning_rate": 0.0002077651243691367,
|
||
|
|
"loss": 0.7833,
|
||
|
|
"step": 2330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4377577802774653,
|
||
|
|
"grad_norm": 0.041105027708813344,
|
||
|
|
"learning_rate": 0.00020731180090552783,
|
||
|
|
"loss": 0.7675,
|
||
|
|
"step": 2335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.43869516310461193,
|
||
|
|
"grad_norm": 0.03872126186366071,
|
||
|
|
"learning_rate": 0.00020685786367857518,
|
||
|
|
"loss": 0.7959,
|
||
|
|
"step": 2340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.43963254593175854,
|
||
|
|
"grad_norm": 0.041950004467004046,
|
||
|
|
"learning_rate": 0.0002064033175495817,
|
||
|
|
"loss": 0.7642,
|
||
|
|
"step": 2345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44056992875890516,
|
||
|
|
"grad_norm": 0.04308753428596666,
|
||
|
|
"learning_rate": 0.00020594816738637133,
|
||
|
|
"loss": 0.7828,
|
||
|
|
"step": 2350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4415073115860517,
|
||
|
|
"grad_norm": 0.04067335511731369,
|
||
|
|
"learning_rate": 0.00020549241806323658,
|
||
|
|
"loss": 0.7731,
|
||
|
|
"step": 2355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44244469441319834,
|
||
|
|
"grad_norm": 0.038488821747570894,
|
||
|
|
"learning_rate": 0.00020503607446088661,
|
||
|
|
"loss": 0.7783,
|
||
|
|
"step": 2360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44338207724034495,
|
||
|
|
"grad_norm": 0.0405566911757408,
|
||
|
|
"learning_rate": 0.00020457914146639473,
|
||
|
|
"loss": 0.7913,
|
||
|
|
"step": 2365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44431946006749157,
|
||
|
|
"grad_norm": 0.041242394949931045,
|
||
|
|
"learning_rate": 0.00020412162397314624,
|
||
|
|
"loss": 0.7971,
|
||
|
|
"step": 2370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4452568428946382,
|
||
|
|
"grad_norm": 0.039550878105764575,
|
||
|
|
"learning_rate": 0.00020366352688078597,
|
||
|
|
"loss": 0.7941,
|
||
|
|
"step": 2375
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4461942257217848,
|
||
|
|
"grad_norm": 0.04176542654646332,
|
||
|
|
"learning_rate": 0.00020320485509516564,
|
||
|
|
"loss": 0.7796,
|
||
|
|
"step": 2380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44713160854893136,
|
||
|
|
"grad_norm": 0.04091658096529671,
|
||
|
|
"learning_rate": 0.0002027456135282917,
|
||
|
|
"loss": 0.7656,
|
||
|
|
"step": 2385
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.448068991376078,
|
||
|
|
"grad_norm": 0.04204041507697926,
|
||
|
|
"learning_rate": 0.00020228580709827227,
|
||
|
|
"loss": 0.7842,
|
||
|
|
"step": 2390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4490063742032246,
|
||
|
|
"grad_norm": 0.043581111984004314,
|
||
|
|
"learning_rate": 0.0002018254407292649,
|
||
|
|
"loss": 0.8044,
|
||
|
|
"step": 2395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4499437570303712,
|
||
|
|
"grad_norm": 0.03967757526818896,
|
||
|
|
"learning_rate": 0.00020136451935142349,
|
||
|
|
"loss": 0.7807,
|
||
|
|
"step": 2400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4508811398575178,
|
||
|
|
"grad_norm": 0.04119079014264244,
|
||
|
|
"learning_rate": 0.00020090304790084572,
|
||
|
|
"loss": 0.7949,
|
||
|
|
"step": 2405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45181852268466444,
|
||
|
|
"grad_norm": 0.04744494144786433,
|
||
|
|
"learning_rate": 0.00020044103131952007,
|
||
|
|
"loss": 0.7886,
|
||
|
|
"step": 2410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.452755905511811,
|
||
|
|
"grad_norm": 0.03865847999571606,
|
||
|
|
"learning_rate": 0.000199978474555273,
|
||
|
|
"loss": 0.7824,
|
||
|
|
"step": 2415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4536932883389576,
|
||
|
|
"grad_norm": 0.04257054661223989,
|
||
|
|
"learning_rate": 0.0001995153825617157,
|
||
|
|
"loss": 0.7958,
|
||
|
|
"step": 2420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4546306711661042,
|
||
|
|
"grad_norm": 0.03723070056879195,
|
||
|
|
"learning_rate": 0.0001990517602981915,
|
||
|
|
"loss": 0.7743,
|
||
|
|
"step": 2425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45556805399325084,
|
||
|
|
"grad_norm": 0.04206537348222414,
|
||
|
|
"learning_rate": 0.0001985876127297224,
|
||
|
|
"loss": 0.7818,
|
||
|
|
"step": 2430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45650543682039746,
|
||
|
|
"grad_norm": 0.046605846594271055,
|
||
|
|
"learning_rate": 0.00019812294482695586,
|
||
|
|
"loss": 0.7888,
|
||
|
|
"step": 2435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4574428196475441,
|
||
|
|
"grad_norm": 0.03927213215049627,
|
||
|
|
"learning_rate": 0.00019765776156611189,
|
||
|
|
"loss": 0.7941,
|
||
|
|
"step": 2440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4583802024746907,
|
||
|
|
"grad_norm": 0.03773012168883939,
|
||
|
|
"learning_rate": 0.00019719206792892944,
|
||
|
|
"loss": 0.779,
|
||
|
|
"step": 2445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45931758530183725,
|
||
|
|
"grad_norm": 0.04033866134296589,
|
||
|
|
"learning_rate": 0.00019672586890261322,
|
||
|
|
"loss": 0.7548,
|
||
|
|
"step": 2450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46025496812898387,
|
||
|
|
"grad_norm": 0.03863797353130748,
|
||
|
|
"learning_rate": 0.00019625916947978029,
|
||
|
|
"loss": 0.7519,
|
||
|
|
"step": 2455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4611923509561305,
|
||
|
|
"grad_norm": 0.03834068421875446,
|
||
|
|
"learning_rate": 0.00019579197465840654,
|
||
|
|
"loss": 0.7524,
|
||
|
|
"step": 2460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4621297337832771,
|
||
|
|
"grad_norm": 0.04082115337906864,
|
||
|
|
"learning_rate": 0.0001953242894417731,
|
||
|
|
"loss": 0.7748,
|
||
|
|
"step": 2465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4630671166104237,
|
||
|
|
"grad_norm": 0.038882691470253396,
|
||
|
|
"learning_rate": 0.000194856118838413,
|
||
|
|
"loss": 0.7732,
|
||
|
|
"step": 2470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46400449943757033,
|
||
|
|
"grad_norm": 0.04383855537670663,
|
||
|
|
"learning_rate": 0.0001943874678620572,
|
||
|
|
"loss": 0.7718,
|
||
|
|
"step": 2475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4649418822647169,
|
||
|
|
"grad_norm": 0.04487220174913269,
|
||
|
|
"learning_rate": 0.0001939183415315812,
|
||
|
|
"loss": 0.8184,
|
||
|
|
"step": 2480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4658792650918635,
|
||
|
|
"grad_norm": 0.04243430887998934,
|
||
|
|
"learning_rate": 0.00019344874487095106,
|
||
|
|
"loss": 0.7909,
|
||
|
|
"step": 2485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4668166479190101,
|
||
|
|
"grad_norm": 0.042879736936158284,
|
||
|
|
"learning_rate": 0.00019297868290916973,
|
||
|
|
"loss": 0.8164,
|
||
|
|
"step": 2490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46775403074615673,
|
||
|
|
"grad_norm": 0.04530733011987864,
|
||
|
|
"learning_rate": 0.00019250816068022326,
|
||
|
|
"loss": 0.795,
|
||
|
|
"step": 2495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46869141357330335,
|
||
|
|
"grad_norm": 0.04578548882806771,
|
||
|
|
"learning_rate": 0.0001920371832230266,
|
||
|
|
"loss": 0.7974,
|
||
|
|
"step": 2500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46962879640044997,
|
||
|
|
"grad_norm": 0.04042238169087785,
|
||
|
|
"learning_rate": 0.00019156575558137003,
|
||
|
|
"loss": 0.8004,
|
||
|
|
"step": 2505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4705661792275965,
|
||
|
|
"grad_norm": 0.03887172482698654,
|
||
|
|
"learning_rate": 0.00019109388280386488,
|
||
|
|
"loss": 0.7686,
|
||
|
|
"step": 2510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47150356205474314,
|
||
|
|
"grad_norm": 0.03584281998116991,
|
||
|
|
"learning_rate": 0.00019062156994388937,
|
||
|
|
"loss": 0.7488,
|
||
|
|
"step": 2515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47244094488188976,
|
||
|
|
"grad_norm": 0.038784324923480024,
|
||
|
|
"learning_rate": 0.00019014882205953485,
|
||
|
|
"loss": 0.7797,
|
||
|
|
"step": 2520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4733783277090364,
|
||
|
|
"grad_norm": 0.03811245259868312,
|
||
|
|
"learning_rate": 0.00018967564421355134,
|
||
|
|
"loss": 0.7566,
|
||
|
|
"step": 2525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.474315710536183,
|
||
|
|
"grad_norm": 0.03796594842270583,
|
||
|
|
"learning_rate": 0.0001892020414732934,
|
||
|
|
"loss": 0.7853,
|
||
|
|
"step": 2530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4752530933633296,
|
||
|
|
"grad_norm": 0.04195011212848616,
|
||
|
|
"learning_rate": 0.000188728018910666,
|
||
|
|
"loss": 0.7924,
|
||
|
|
"step": 2535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47619047619047616,
|
||
|
|
"grad_norm": 0.045188665210356145,
|
||
|
|
"learning_rate": 0.00018825358160206982,
|
||
|
|
"loss": 0.7961,
|
||
|
|
"step": 2540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4771278590176228,
|
||
|
|
"grad_norm": 0.04796346505150324,
|
||
|
|
"learning_rate": 0.00018777873462834735,
|
||
|
|
"loss": 0.7809,
|
||
|
|
"step": 2545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4780652418447694,
|
||
|
|
"grad_norm": 0.039964514626294564,
|
||
|
|
"learning_rate": 0.00018730348307472824,
|
||
|
|
"loss": 0.7653,
|
||
|
|
"step": 2550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.479002624671916,
|
||
|
|
"grad_norm": 0.0427034937035765,
|
||
|
|
"learning_rate": 0.0001868278320307747,
|
||
|
|
"loss": 0.7726,
|
||
|
|
"step": 2555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4799400074990626,
|
||
|
|
"grad_norm": 0.03972042150913065,
|
||
|
|
"learning_rate": 0.00018635178659032732,
|
||
|
|
"loss": 0.7805,
|
||
|
|
"step": 2560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48087739032620924,
|
||
|
|
"grad_norm": 0.0371985040034197,
|
||
|
|
"learning_rate": 0.0001858753518514503,
|
||
|
|
"loss": 0.7561,
|
||
|
|
"step": 2565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48181477315335586,
|
||
|
|
"grad_norm": 0.04584757733571624,
|
||
|
|
"learning_rate": 0.00018539853291637696,
|
||
|
|
"loss": 0.7753,
|
||
|
|
"step": 2570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4827521559805024,
|
||
|
|
"grad_norm": 0.03974757572397099,
|
||
|
|
"learning_rate": 0.00018492133489145506,
|
||
|
|
"loss": 0.7748,
|
||
|
|
"step": 2575
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48368953880764903,
|
||
|
|
"grad_norm": 0.03742920129272121,
|
||
|
|
"learning_rate": 0.000184443762887092,
|
||
|
|
"loss": 0.7917,
|
||
|
|
"step": 2580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48462692163479565,
|
||
|
|
"grad_norm": 0.04694450953419577,
|
||
|
|
"learning_rate": 0.00018396582201770032,
|
||
|
|
"loss": 0.7859,
|
||
|
|
"step": 2585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48556430446194226,
|
||
|
|
"grad_norm": 0.042973639248807004,
|
||
|
|
"learning_rate": 0.00018348751740164272,
|
||
|
|
"loss": 0.7836,
|
||
|
|
"step": 2590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4865016872890889,
|
||
|
|
"grad_norm": 0.04047247960728854,
|
||
|
|
"learning_rate": 0.00018300885416117733,
|
||
|
|
"loss": 0.7796,
|
||
|
|
"step": 2595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4874390701162355,
|
||
|
|
"grad_norm": 0.0396897138962343,
|
||
|
|
"learning_rate": 0.000182529837422403,
|
||
|
|
"loss": 0.7682,
|
||
|
|
"step": 2600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48837645294338206,
|
||
|
|
"grad_norm": 0.0398548055720446,
|
||
|
|
"learning_rate": 0.0001820504723152041,
|
||
|
|
"loss": 0.7932,
|
||
|
|
"step": 2605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48931383577052867,
|
||
|
|
"grad_norm": 0.037622818268512194,
|
||
|
|
"learning_rate": 0.0001815707639731958,
|
||
|
|
"loss": 0.7781,
|
||
|
|
"step": 2610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4902512185976753,
|
||
|
|
"grad_norm": 0.0392715706617893,
|
||
|
|
"learning_rate": 0.00018109071753366916,
|
||
|
|
"loss": 0.7929,
|
||
|
|
"step": 2615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4911886014248219,
|
||
|
|
"grad_norm": 0.039744227841198555,
|
||
|
|
"learning_rate": 0.00018061033813753576,
|
||
|
|
"loss": 0.7756,
|
||
|
|
"step": 2620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4921259842519685,
|
||
|
|
"grad_norm": 0.04155014821494704,
|
||
|
|
"learning_rate": 0.00018012963092927297,
|
||
|
|
"loss": 0.7706,
|
||
|
|
"step": 2625
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49306336707911513,
|
||
|
|
"grad_norm": 0.03840166882540415,
|
||
|
|
"learning_rate": 0.0001796486010568689,
|
||
|
|
"loss": 0.7893,
|
||
|
|
"step": 2630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4940007499062617,
|
||
|
|
"grad_norm": 0.039395802156394155,
|
||
|
|
"learning_rate": 0.000179167253671767,
|
||
|
|
"loss": 0.775,
|
||
|
|
"step": 2635
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4949381327334083,
|
||
|
|
"grad_norm": 0.03787300343153706,
|
||
|
|
"learning_rate": 0.00017868559392881107,
|
||
|
|
"loss": 0.795,
|
||
|
|
"step": 2640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4958755155605549,
|
||
|
|
"grad_norm": 0.040301333180301116,
|
||
|
|
"learning_rate": 0.0001782036269861899,
|
||
|
|
"loss": 0.7775,
|
||
|
|
"step": 2645
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49681289838770154,
|
||
|
|
"grad_norm": 0.03780702169928712,
|
||
|
|
"learning_rate": 0.0001777213580053823,
|
||
|
|
"loss": 0.7774,
|
||
|
|
"step": 2650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49775028121484816,
|
||
|
|
"grad_norm": 0.04018675410195464,
|
||
|
|
"learning_rate": 0.0001772387921511016,
|
||
|
|
"loss": 0.7853,
|
||
|
|
"step": 2655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49868766404199477,
|
||
|
|
"grad_norm": 0.039105991236874874,
|
||
|
|
"learning_rate": 0.00017675593459124045,
|
||
|
|
"loss": 0.7853,
|
||
|
|
"step": 2660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49962504686914133,
|
||
|
|
"grad_norm": 0.043058223660341025,
|
||
|
|
"learning_rate": 0.00017627279049681538,
|
||
|
|
"loss": 0.779,
|
||
|
|
"step": 2665
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.500562429696288,
|
||
|
|
"grad_norm": 0.038503318239807786,
|
||
|
|
"learning_rate": 0.0001757893650419114,
|
||
|
|
"loss": 0.7746,
|
||
|
|
"step": 2670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5014998125234346,
|
||
|
|
"grad_norm": 0.038809904361382756,
|
||
|
|
"learning_rate": 0.00017530566340362685,
|
||
|
|
"loss": 0.7836,
|
||
|
|
"step": 2675
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5024371953505812,
|
||
|
|
"grad_norm": 0.03720404583133728,
|
||
|
|
"learning_rate": 0.00017482169076201765,
|
||
|
|
"loss": 0.7738,
|
||
|
|
"step": 2680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5033745781777278,
|
||
|
|
"grad_norm": 0.04072977160239652,
|
||
|
|
"learning_rate": 0.00017433745230004192,
|
||
|
|
"loss": 0.7932,
|
||
|
|
"step": 2685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5043119610048744,
|
||
|
|
"grad_norm": 0.04007107726737037,
|
||
|
|
"learning_rate": 0.00017385295320350463,
|
||
|
|
"loss": 0.7925,
|
||
|
|
"step": 2690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.505249343832021,
|
||
|
|
"grad_norm": 0.03706793674088091,
|
||
|
|
"learning_rate": 0.00017336819866100182,
|
||
|
|
"loss": 0.7802,
|
||
|
|
"step": 2695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5061867266591676,
|
||
|
|
"grad_norm": 0.03970514233327491,
|
||
|
|
"learning_rate": 0.00017288319386386515,
|
||
|
|
"loss": 0.7641,
|
||
|
|
"step": 2700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5071241094863143,
|
||
|
|
"grad_norm": 0.04248378336710558,
|
||
|
|
"learning_rate": 0.0001723979440061064,
|
||
|
|
"loss": 0.7843,
|
||
|
|
"step": 2705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5080614923134609,
|
||
|
|
"grad_norm": 0.035133630205708484,
|
||
|
|
"learning_rate": 0.00017191245428436173,
|
||
|
|
"loss": 0.7685,
|
||
|
|
"step": 2710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5089988751406074,
|
||
|
|
"grad_norm": 0.03584192505625803,
|
||
|
|
"learning_rate": 0.00017142672989783601,
|
||
|
|
"loss": 0.7488,
|
||
|
|
"step": 2715
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.509936257967754,
|
||
|
|
"grad_norm": 0.043073407159687965,
|
||
|
|
"learning_rate": 0.00017094077604824708,
|
||
|
|
"loss": 0.7625,
|
||
|
|
"step": 2720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5108736407949006,
|
||
|
|
"grad_norm": 0.04373054551404995,
|
||
|
|
"learning_rate": 0.00017045459793977037,
|
||
|
|
"loss": 0.7793,
|
||
|
|
"step": 2725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5118110236220472,
|
||
|
|
"grad_norm": 0.0401634365888691,
|
||
|
|
"learning_rate": 0.00016996820077898285,
|
||
|
|
"loss": 0.7673,
|
||
|
|
"step": 2730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5127484064491938,
|
||
|
|
"grad_norm": 0.04228564245730852,
|
||
|
|
"learning_rate": 0.00016948158977480722,
|
||
|
|
"loss": 0.7829,
|
||
|
|
"step": 2735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5136857892763405,
|
||
|
|
"grad_norm": 0.04101625807575367,
|
||
|
|
"learning_rate": 0.00016899477013845656,
|
||
|
|
"loss": 0.7739,
|
||
|
|
"step": 2740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5146231721034871,
|
||
|
|
"grad_norm": 0.037270510821881576,
|
||
|
|
"learning_rate": 0.00016850774708337794,
|
||
|
|
"loss": 0.7819,
|
||
|
|
"step": 2745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5155605549306337,
|
||
|
|
"grad_norm": 0.03719664186844611,
|
||
|
|
"learning_rate": 0.00016802052582519706,
|
||
|
|
"loss": 0.7547,
|
||
|
|
"step": 2750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5164979377577803,
|
||
|
|
"grad_norm": 0.042746632036701274,
|
||
|
|
"learning_rate": 0.00016753311158166216,
|
||
|
|
"loss": 0.7875,
|
||
|
|
"step": 2755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5174353205849269,
|
||
|
|
"grad_norm": 0.040113090184156625,
|
||
|
|
"learning_rate": 0.00016704550957258817,
|
||
|
|
"loss": 0.7671,
|
||
|
|
"step": 2760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5183727034120735,
|
||
|
|
"grad_norm": 0.04096689940473245,
|
||
|
|
"learning_rate": 0.0001665577250198009,
|
||
|
|
"loss": 0.7504,
|
||
|
|
"step": 2765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5193100862392201,
|
||
|
|
"grad_norm": 0.03979245826251994,
|
||
|
|
"learning_rate": 0.00016606976314708104,
|
||
|
|
"loss": 0.7692,
|
||
|
|
"step": 2770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5202474690663667,
|
||
|
|
"grad_norm": 0.03499884259333867,
|
||
|
|
"learning_rate": 0.0001655816291801082,
|
||
|
|
"loss": 0.7502,
|
||
|
|
"step": 2775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5211848518935133,
|
||
|
|
"grad_norm": 0.038595691040614596,
|
||
|
|
"learning_rate": 0.00016509332834640505,
|
||
|
|
"loss": 0.7779,
|
||
|
|
"step": 2780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5221222347206599,
|
||
|
|
"grad_norm": 0.03845551078176031,
|
||
|
|
"learning_rate": 0.00016460486587528114,
|
||
|
|
"loss": 0.7734,
|
||
|
|
"step": 2785
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5230596175478065,
|
||
|
|
"grad_norm": 0.04268014575183859,
|
||
|
|
"learning_rate": 0.00016411624699777717,
|
||
|
|
"loss": 0.7932,
|
||
|
|
"step": 2790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5239970003749531,
|
||
|
|
"grad_norm": 0.04217654750286591,
|
||
|
|
"learning_rate": 0.0001636274769466087,
|
||
|
|
"loss": 0.7755,
|
||
|
|
"step": 2795
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5249343832020997,
|
||
|
|
"grad_norm": 0.03663813497845865,
|
||
|
|
"learning_rate": 0.00016313856095611037,
|
||
|
|
"loss": 0.7819,
|
||
|
|
"step": 2800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5258717660292463,
|
||
|
|
"grad_norm": 0.03649865813691932,
|
||
|
|
"learning_rate": 0.00016264950426217963,
|
||
|
|
"loss": 0.7854,
|
||
|
|
"step": 2805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.526809148856393,
|
||
|
|
"grad_norm": 0.035007178804664606,
|
||
|
|
"learning_rate": 0.0001621603121022208,
|
||
|
|
"loss": 0.7763,
|
||
|
|
"step": 2810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5277465316835396,
|
||
|
|
"grad_norm": 0.03631407653031134,
|
||
|
|
"learning_rate": 0.00016167098971508884,
|
||
|
|
"loss": 0.75,
|
||
|
|
"step": 2815
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5286839145106862,
|
||
|
|
"grad_norm": 0.03748295963128187,
|
||
|
|
"learning_rate": 0.00016118154234103345,
|
||
|
|
"loss": 0.7755,
|
||
|
|
"step": 2820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5296212973378328,
|
||
|
|
"grad_norm": 0.03847790308231328,
|
||
|
|
"learning_rate": 0.00016069197522164272,
|
||
|
|
"loss": 0.7721,
|
||
|
|
"step": 2825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5305586801649794,
|
||
|
|
"grad_norm": 0.0403928945535946,
|
||
|
|
"learning_rate": 0.00016020229359978722,
|
||
|
|
"loss": 0.7823,
|
||
|
|
"step": 2830
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.531496062992126,
|
||
|
|
"grad_norm": 0.042093934154398625,
|
||
|
|
"learning_rate": 0.0001597125027195637,
|
||
|
|
"loss": 0.7594,
|
||
|
|
"step": 2835
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5324334458192725,
|
||
|
|
"grad_norm": 0.03740153505333051,
|
||
|
|
"learning_rate": 0.00015922260782623906,
|
||
|
|
"loss": 0.775,
|
||
|
|
"step": 2840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5333708286464192,
|
||
|
|
"grad_norm": 0.0365020182974372,
|
||
|
|
"learning_rate": 0.00015873261416619395,
|
||
|
|
"loss": 0.7788,
|
||
|
|
"step": 2845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5343082114735658,
|
||
|
|
"grad_norm": 0.04097368109261412,
|
||
|
|
"learning_rate": 0.00015824252698686686,
|
||
|
|
"loss": 0.7801,
|
||
|
|
"step": 2850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5352455943007124,
|
||
|
|
"grad_norm": 0.040252898201126604,
|
||
|
|
"learning_rate": 0.00015775235153669772,
|
||
|
|
"loss": 0.7651,
|
||
|
|
"step": 2855
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.536182977127859,
|
||
|
|
"grad_norm": 0.04080680519751646,
|
||
|
|
"learning_rate": 0.00015726209306507182,
|
||
|
|
"loss": 0.7609,
|
||
|
|
"step": 2860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5371203599550056,
|
||
|
|
"grad_norm": 0.04064129669100478,
|
||
|
|
"learning_rate": 0.00015677175682226346,
|
||
|
|
"loss": 0.7686,
|
||
|
|
"step": 2865
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5380577427821522,
|
||
|
|
"grad_norm": 0.040674542378951246,
|
||
|
|
"learning_rate": 0.0001562813480593799,
|
||
|
|
"loss": 0.7616,
|
||
|
|
"step": 2870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5389951256092989,
|
||
|
|
"grad_norm": 0.04054872658419522,
|
||
|
|
"learning_rate": 0.0001557908720283051,
|
||
|
|
"loss": 0.7938,
|
||
|
|
"step": 2875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5399325084364455,
|
||
|
|
"grad_norm": 0.037193930697877325,
|
||
|
|
"learning_rate": 0.00015530033398164318,
|
||
|
|
"loss": 0.7671,
|
||
|
|
"step": 2880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5408698912635921,
|
||
|
|
"grad_norm": 0.04330464356262373,
|
||
|
|
"learning_rate": 0.00015480973917266256,
|
||
|
|
"loss": 0.789,
|
||
|
|
"step": 2885
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5418072740907387,
|
||
|
|
"grad_norm": 0.04054372294824471,
|
||
|
|
"learning_rate": 0.0001543190928552395,
|
||
|
|
"loss": 0.7511,
|
||
|
|
"step": 2890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5427446569178853,
|
||
|
|
"grad_norm": 0.037141465243079236,
|
||
|
|
"learning_rate": 0.00015382840028380193,
|
||
|
|
"loss": 0.7806,
|
||
|
|
"step": 2895
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5436820397450318,
|
||
|
|
"grad_norm": 0.04616963653816421,
|
||
|
|
"learning_rate": 0.000153337666713273,
|
||
|
|
"loss": 0.7685,
|
||
|
|
"step": 2900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5446194225721784,
|
||
|
|
"grad_norm": 0.04026197513442802,
|
||
|
|
"learning_rate": 0.000152846897399015,
|
||
|
|
"loss": 0.7538,
|
||
|
|
"step": 2905
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.545556805399325,
|
||
|
|
"grad_norm": 0.040209956896230005,
|
||
|
|
"learning_rate": 0.0001523560975967731,
|
||
|
|
"loss": 0.7669,
|
||
|
|
"step": 2910
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5464941882264717,
|
||
|
|
"grad_norm": 0.039214472443270074,
|
||
|
|
"learning_rate": 0.0001518652725626188,
|
||
|
|
"loss": 0.7821,
|
||
|
|
"step": 2915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5474315710536183,
|
||
|
|
"grad_norm": 0.037735873161464144,
|
||
|
|
"learning_rate": 0.00015137442755289388,
|
||
|
|
"loss": 0.7669,
|
||
|
|
"step": 2920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5483689538807649,
|
||
|
|
"grad_norm": 0.040692415941492,
|
||
|
|
"learning_rate": 0.00015088356782415408,
|
||
|
|
"loss": 0.7642,
|
||
|
|
"step": 2925
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5493063367079115,
|
||
|
|
"grad_norm": 0.037557444480219304,
|
||
|
|
"learning_rate": 0.0001503926986331127,
|
||
|
|
"loss": 0.7491,
|
||
|
|
"step": 2930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5502437195350581,
|
||
|
|
"grad_norm": 0.04303948252096911,
|
||
|
|
"learning_rate": 0.00014990182523658444,
|
||
|
|
"loss": 0.7633,
|
||
|
|
"step": 2935
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5511811023622047,
|
||
|
|
"grad_norm": 0.039445563951292666,
|
||
|
|
"learning_rate": 0.0001494109528914291,
|
||
|
|
"loss": 0.7837,
|
||
|
|
"step": 2940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5521184851893514,
|
||
|
|
"grad_norm": 0.0369146298860869,
|
||
|
|
"learning_rate": 0.000148920086854495,
|
||
|
|
"loss": 0.7783,
|
||
|
|
"step": 2945
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.553055868016498,
|
||
|
|
"grad_norm": 0.03928589902574045,
|
||
|
|
"learning_rate": 0.00014842923238256317,
|
||
|
|
"loss": 0.7623,
|
||
|
|
"step": 2950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5539932508436446,
|
||
|
|
"grad_norm": 0.040961368365580925,
|
||
|
|
"learning_rate": 0.00014793839473229047,
|
||
|
|
"loss": 0.7302,
|
||
|
|
"step": 2955
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5549306336707911,
|
||
|
|
"grad_norm": 0.04066646466874108,
|
||
|
|
"learning_rate": 0.000147447579160154,
|
||
|
|
"loss": 0.7933,
|
||
|
|
"step": 2960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5558680164979377,
|
||
|
|
"grad_norm": 0.04046950253782251,
|
||
|
|
"learning_rate": 0.00014695679092239405,
|
||
|
|
"loss": 0.7794,
|
||
|
|
"step": 2965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5568053993250843,
|
||
|
|
"grad_norm": 0.0395374977204719,
|
||
|
|
"learning_rate": 0.00014646603527495848,
|
||
|
|
"loss": 0.772,
|
||
|
|
"step": 2970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5577427821522309,
|
||
|
|
"grad_norm": 0.03703594756866284,
|
||
|
|
"learning_rate": 0.00014597531747344593,
|
||
|
|
"loss": 0.7511,
|
||
|
|
"step": 2975
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5586801649793776,
|
||
|
|
"grad_norm": 0.036214320470059805,
|
||
|
|
"learning_rate": 0.00014548464277304982,
|
||
|
|
"loss": 0.7706,
|
||
|
|
"step": 2980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5596175478065242,
|
||
|
|
"grad_norm": 0.03917835401091949,
|
||
|
|
"learning_rate": 0.00014499401642850207,
|
||
|
|
"loss": 0.739,
|
||
|
|
"step": 2985
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5605549306336708,
|
||
|
|
"grad_norm": 0.0498467008354317,
|
||
|
|
"learning_rate": 0.00014450344369401651,
|
||
|
|
"loss": 0.763,
|
||
|
|
"step": 2990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5614923134608174,
|
||
|
|
"grad_norm": 0.05384613905490524,
|
||
|
|
"learning_rate": 0.0001440129298232332,
|
||
|
|
"loss": 0.7892,
|
||
|
|
"step": 2995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.562429696287964,
|
||
|
|
"grad_norm": 0.0457358145474257,
|
||
|
|
"learning_rate": 0.0001435224800691614,
|
||
|
|
"loss": 0.7988,
|
||
|
|
"step": 3000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5633670791151106,
|
||
|
|
"grad_norm": 0.04336091654025471,
|
||
|
|
"learning_rate": 0.00014303209968412418,
|
||
|
|
"loss": 0.799,
|
||
|
|
"step": 3005
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5643044619422573,
|
||
|
|
"grad_norm": 0.04292534665730051,
|
||
|
|
"learning_rate": 0.00014254179391970132,
|
||
|
|
"loss": 0.7775,
|
||
|
|
"step": 3010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5652418447694039,
|
||
|
|
"grad_norm": 0.04250101254193063,
|
||
|
|
"learning_rate": 0.00014205156802667374,
|
||
|
|
"loss": 0.7788,
|
||
|
|
"step": 3015
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5661792275965505,
|
||
|
|
"grad_norm": 0.04030937649018944,
|
||
|
|
"learning_rate": 0.00014156142725496682,
|
||
|
|
"loss": 0.7771,
|
||
|
|
"step": 3020
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.567116610423697,
|
||
|
|
"grad_norm": 0.037700014044718524,
|
||
|
|
"learning_rate": 0.00014107137685359457,
|
||
|
|
"loss": 0.7603,
|
||
|
|
"step": 3025
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5680539932508436,
|
||
|
|
"grad_norm": 0.03917274407024749,
|
||
|
|
"learning_rate": 0.000140581422070603,
|
||
|
|
"loss": 0.7756,
|
||
|
|
"step": 3030
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5689913760779902,
|
||
|
|
"grad_norm": 0.03965878064631959,
|
||
|
|
"learning_rate": 0.00014009156815301426,
|
||
|
|
"loss": 0.781,
|
||
|
|
"step": 3035
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5699287589051368,
|
||
|
|
"grad_norm": 0.03694985326600243,
|
||
|
|
"learning_rate": 0.00013960182034677016,
|
||
|
|
"loss": 0.7764,
|
||
|
|
"step": 3040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5708661417322834,
|
||
|
|
"grad_norm": 0.03649532964794123,
|
||
|
|
"learning_rate": 0.00013911218389667642,
|
||
|
|
"loss": 0.7549,
|
||
|
|
"step": 3045
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5718035245594301,
|
||
|
|
"grad_norm": 0.03787093545824704,
|
||
|
|
"learning_rate": 0.0001386226640463459,
|
||
|
|
"loss": 0.7472,
|
||
|
|
"step": 3050
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5727409073865767,
|
||
|
|
"grad_norm": 0.038098821200233814,
|
||
|
|
"learning_rate": 0.00013813326603814296,
|
||
|
|
"loss": 0.794,
|
||
|
|
"step": 3055
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5736782902137233,
|
||
|
|
"grad_norm": 0.04007624623894434,
|
||
|
|
"learning_rate": 0.00013764399511312716,
|
||
|
|
"loss": 0.7973,
|
||
|
|
"step": 3060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5746156730408699,
|
||
|
|
"grad_norm": 0.040480907401198925,
|
||
|
|
"learning_rate": 0.0001371548565109969,
|
||
|
|
"loss": 0.7693,
|
||
|
|
"step": 3065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5755530558680165,
|
||
|
|
"grad_norm": 0.037195195565526876,
|
||
|
|
"learning_rate": 0.00013666585547003377,
|
||
|
|
"loss": 0.7548,
|
||
|
|
"step": 3070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5764904386951631,
|
||
|
|
"grad_norm": 0.04118922842272934,
|
||
|
|
"learning_rate": 0.00013617699722704598,
|
||
|
|
"loss": 0.781,
|
||
|
|
"step": 3075
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5774278215223098,
|
||
|
|
"grad_norm": 0.041231050481496015,
|
||
|
|
"learning_rate": 0.0001356882870173126,
|
||
|
|
"loss": 0.7764,
|
||
|
|
"step": 3080
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5783652043494563,
|
||
|
|
"grad_norm": 0.03825190175591559,
|
||
|
|
"learning_rate": 0.0001351997300745273,
|
||
|
|
"loss": 0.7604,
|
||
|
|
"step": 3085
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5793025871766029,
|
||
|
|
"grad_norm": 0.041476811594546956,
|
||
|
|
"learning_rate": 0.0001347113316307425,
|
||
|
|
"loss": 0.774,
|
||
|
|
"step": 3090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5802399700037495,
|
||
|
|
"grad_norm": 0.04062186609985697,
|
||
|
|
"learning_rate": 0.0001342230969163131,
|
||
|
|
"loss": 0.7669,
|
||
|
|
"step": 3095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5811773528308961,
|
||
|
|
"grad_norm": 0.037469179130772814,
|
||
|
|
"learning_rate": 0.00013373503115984072,
|
||
|
|
"loss": 0.8056,
|
||
|
|
"step": 3100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5821147356580427,
|
||
|
|
"grad_norm": 0.04327856604489263,
|
||
|
|
"learning_rate": 0.0001332471395881174,
|
||
|
|
"loss": 0.7616,
|
||
|
|
"step": 3105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5830521184851893,
|
||
|
|
"grad_norm": 0.03958983021180182,
|
||
|
|
"learning_rate": 0.00013275942742607002,
|
||
|
|
"loss": 0.7756,
|
||
|
|
"step": 3110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.583989501312336,
|
||
|
|
"grad_norm": 0.03790940533047183,
|
||
|
|
"learning_rate": 0.00013227189989670392,
|
||
|
|
"loss": 0.7568,
|
||
|
|
"step": 3115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5849268841394826,
|
||
|
|
"grad_norm": 0.0403239958967278,
|
||
|
|
"learning_rate": 0.00013178456222104733,
|
||
|
|
"loss": 0.742,
|
||
|
|
"step": 3120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5858642669666292,
|
||
|
|
"grad_norm": 0.03895542187325954,
|
||
|
|
"learning_rate": 0.00013129741961809513,
|
||
|
|
"loss": 0.7699,
|
||
|
|
"step": 3125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5868016497937758,
|
||
|
|
"grad_norm": 0.03820864797313289,
|
||
|
|
"learning_rate": 0.00013081047730475331,
|
||
|
|
"loss": 0.7601,
|
||
|
|
"step": 3130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5877390326209224,
|
||
|
|
"grad_norm": 0.04726355486417647,
|
||
|
|
"learning_rate": 0.00013032374049578292,
|
||
|
|
"loss": 0.7642,
|
||
|
|
"step": 3135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.588676415448069,
|
||
|
|
"grad_norm": 0.04203366758155159,
|
||
|
|
"learning_rate": 0.000129837214403744,
|
||
|
|
"loss": 0.7488,
|
||
|
|
"step": 3140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5896137982752157,
|
||
|
|
"grad_norm": 0.038981436347331345,
|
||
|
|
"learning_rate": 0.00012935090423894015,
|
||
|
|
"loss": 0.7862,
|
||
|
|
"step": 3145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5905511811023622,
|
||
|
|
"grad_norm": 0.03432435415992516,
|
||
|
|
"learning_rate": 0.0001288648152093626,
|
||
|
|
"loss": 0.7525,
|
||
|
|
"step": 3150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5914885639295088,
|
||
|
|
"grad_norm": 0.03822931424545101,
|
||
|
|
"learning_rate": 0.00012837895252063432,
|
||
|
|
"loss": 0.8001,
|
||
|
|
"step": 3155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5924259467566554,
|
||
|
|
"grad_norm": 0.039214953158152176,
|
||
|
|
"learning_rate": 0.00012789332137595427,
|
||
|
|
"loss": 0.7245,
|
||
|
|
"step": 3160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.593363329583802,
|
||
|
|
"grad_norm": 0.037907598245917364,
|
||
|
|
"learning_rate": 0.00012740792697604202,
|
||
|
|
"loss": 0.7648,
|
||
|
|
"step": 3165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5943007124109486,
|
||
|
|
"grad_norm": 0.03785947037848109,
|
||
|
|
"learning_rate": 0.00012692277451908145,
|
||
|
|
"loss": 0.7654,
|
||
|
|
"step": 3170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5952380952380952,
|
||
|
|
"grad_norm": 0.03850785633219605,
|
||
|
|
"learning_rate": 0.0001264378692006658,
|
||
|
|
"loss": 0.7663,
|
||
|
|
"step": 3175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5961754780652418,
|
||
|
|
"grad_norm": 0.036428916422083596,
|
||
|
|
"learning_rate": 0.00012595321621374135,
|
||
|
|
"loss": 0.7596,
|
||
|
|
"step": 3180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5971128608923885,
|
||
|
|
"grad_norm": 0.038639622718949614,
|
||
|
|
"learning_rate": 0.0001254688207485522,
|
||
|
|
"loss": 0.7318,
|
||
|
|
"step": 3185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5980502437195351,
|
||
|
|
"grad_norm": 0.04200767445250016,
|
||
|
|
"learning_rate": 0.00012498468799258466,
|
||
|
|
"loss": 0.7543,
|
||
|
|
"step": 3190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5989876265466817,
|
||
|
|
"grad_norm": 0.039347067609858684,
|
||
|
|
"learning_rate": 0.00012450082313051163,
|
||
|
|
"loss": 0.7782,
|
||
|
|
"step": 3195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5999250093738283,
|
||
|
|
"grad_norm": 0.03956472616518923,
|
||
|
|
"learning_rate": 0.00012401723134413694,
|
||
|
|
"loss": 0.7748,
|
||
|
|
"step": 3200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6008623922009749,
|
||
|
|
"grad_norm": 0.03906550581460343,
|
||
|
|
"learning_rate": 0.00012353391781234026,
|
||
|
|
"loss": 0.781,
|
||
|
|
"step": 3205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6017997750281214,
|
||
|
|
"grad_norm": 0.04226202156309189,
|
||
|
|
"learning_rate": 0.0001230508877110211,
|
||
|
|
"loss": 0.7813,
|
||
|
|
"step": 3210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.602737157855268,
|
||
|
|
"grad_norm": 0.04310759188393267,
|
||
|
|
"learning_rate": 0.00012256814621304385,
|
||
|
|
"loss": 0.7538,
|
||
|
|
"step": 3215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6036745406824147,
|
||
|
|
"grad_norm": 0.04137403063128998,
|
||
|
|
"learning_rate": 0.00012208569848818214,
|
||
|
|
"loss": 0.7785,
|
||
|
|
"step": 3220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6046119235095613,
|
||
|
|
"grad_norm": 0.03490693407426236,
|
||
|
|
"learning_rate": 0.00012160354970306339,
|
||
|
|
"loss": 0.7615,
|
||
|
|
"step": 3225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6055493063367079,
|
||
|
|
"grad_norm": 0.04058597804316207,
|
||
|
|
"learning_rate": 0.0001211217050211139,
|
||
|
|
"loss": 0.7737,
|
||
|
|
"step": 3230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6064866891638545,
|
||
|
|
"grad_norm": 0.039857581905152456,
|
||
|
|
"learning_rate": 0.00012064016960250294,
|
||
|
|
"loss": 0.7624,
|
||
|
|
"step": 3235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6074240719910011,
|
||
|
|
"grad_norm": 0.03861470764960366,
|
||
|
|
"learning_rate": 0.00012015894860408811,
|
||
|
|
"loss": 0.732,
|
||
|
|
"step": 3240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6083614548181477,
|
||
|
|
"grad_norm": 0.03893813785005398,
|
||
|
|
"learning_rate": 0.00011967804717935964,
|
||
|
|
"loss": 0.7755,
|
||
|
|
"step": 3245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6092988376452944,
|
||
|
|
"grad_norm": 0.03949254956830129,
|
||
|
|
"learning_rate": 0.00011919747047838545,
|
||
|
|
"loss": 0.7575,
|
||
|
|
"step": 3250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.610236220472441,
|
||
|
|
"grad_norm": 0.03697345161480537,
|
||
|
|
"learning_rate": 0.00011871722364775583,
|
||
|
|
"loss": 0.7606,
|
||
|
|
"step": 3255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6111736032995876,
|
||
|
|
"grad_norm": 0.036342062943778654,
|
||
|
|
"learning_rate": 0.00011823731183052867,
|
||
|
|
"loss": 0.766,
|
||
|
|
"step": 3260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6121109861267342,
|
||
|
|
"grad_norm": 0.0370100352544129,
|
||
|
|
"learning_rate": 0.00011775774016617381,
|
||
|
|
"loss": 0.7623,
|
||
|
|
"step": 3265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6130483689538808,
|
||
|
|
"grad_norm": 0.03696482383175287,
|
||
|
|
"learning_rate": 0.00011727851379051865,
|
||
|
|
"loss": 0.7655,
|
||
|
|
"step": 3270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6139857517810273,
|
||
|
|
"grad_norm": 0.03985849511649083,
|
||
|
|
"learning_rate": 0.00011679963783569248,
|
||
|
|
"loss": 0.7495,
|
||
|
|
"step": 3275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6149231346081739,
|
||
|
|
"grad_norm": 0.038876495340474354,
|
||
|
|
"learning_rate": 0.00011632111743007223,
|
||
|
|
"loss": 0.7813,
|
||
|
|
"step": 3280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6158605174353206,
|
||
|
|
"grad_norm": 0.03750007207907501,
|
||
|
|
"learning_rate": 0.00011584295769822694,
|
||
|
|
"loss": 0.7528,
|
||
|
|
"step": 3285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6167979002624672,
|
||
|
|
"grad_norm": 0.0408841339742789,
|
||
|
|
"learning_rate": 0.00011536516376086311,
|
||
|
|
"loss": 0.7719,
|
||
|
|
"step": 3290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6177352830896138,
|
||
|
|
"grad_norm": 0.042011717790033064,
|
||
|
|
"learning_rate": 0.0001148877407347701,
|
||
|
|
"loss": 0.7493,
|
||
|
|
"step": 3295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6186726659167604,
|
||
|
|
"grad_norm": 0.0414847232838945,
|
||
|
|
"learning_rate": 0.00011441069373276481,
|
||
|
|
"loss": 0.7536,
|
||
|
|
"step": 3300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.619610048743907,
|
||
|
|
"grad_norm": 0.042292805389266726,
|
||
|
|
"learning_rate": 0.00011393402786363753,
|
||
|
|
"loss": 0.7602,
|
||
|
|
"step": 3305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6205474315710536,
|
||
|
|
"grad_norm": 0.038665796975353536,
|
||
|
|
"learning_rate": 0.00011345774823209661,
|
||
|
|
"loss": 0.7504,
|
||
|
|
"step": 3310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6214848143982002,
|
||
|
|
"grad_norm": 0.04162065454602133,
|
||
|
|
"learning_rate": 0.00011298185993871442,
|
||
|
|
"loss": 0.7638,
|
||
|
|
"step": 3315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6224221972253469,
|
||
|
|
"grad_norm": 0.03784958872128606,
|
||
|
|
"learning_rate": 0.00011250636807987208,
|
||
|
|
"loss": 0.7368,
|
||
|
|
"step": 3320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6233595800524935,
|
||
|
|
"grad_norm": 0.039769498866178414,
|
||
|
|
"learning_rate": 0.00011203127774770554,
|
||
|
|
"loss": 0.7664,
|
||
|
|
"step": 3325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6242969628796401,
|
||
|
|
"grad_norm": 0.0373640006459259,
|
||
|
|
"learning_rate": 0.00011155659403005048,
|
||
|
|
"loss": 0.7524,
|
||
|
|
"step": 3330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6252343457067866,
|
||
|
|
"grad_norm": 0.03809871959132381,
|
||
|
|
"learning_rate": 0.00011108232201038828,
|
||
|
|
"loss": 0.7484,
|
||
|
|
"step": 3335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6261717285339332,
|
||
|
|
"grad_norm": 0.03763522153071777,
|
||
|
|
"learning_rate": 0.00011060846676779107,
|
||
|
|
"loss": 0.7433,
|
||
|
|
"step": 3340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6271091113610798,
|
||
|
|
"grad_norm": 0.03946664053179257,
|
||
|
|
"learning_rate": 0.00011013503337686799,
|
||
|
|
"loss": 0.7677,
|
||
|
|
"step": 3345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6280464941882264,
|
||
|
|
"grad_norm": 0.03901754964576778,
|
||
|
|
"learning_rate": 0.00010966202690771014,
|
||
|
|
"loss": 0.7561,
|
||
|
|
"step": 3350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6289838770153731,
|
||
|
|
"grad_norm": 0.03912014672930949,
|
||
|
|
"learning_rate": 0.00010918945242583688,
|
||
|
|
"loss": 0.7789,
|
||
|
|
"step": 3355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6299212598425197,
|
||
|
|
"grad_norm": 0.03717356045637909,
|
||
|
|
"learning_rate": 0.00010871731499214128,
|
||
|
|
"loss": 0.7521,
|
||
|
|
"step": 3360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6308586426696663,
|
||
|
|
"grad_norm": 0.037849859871512744,
|
||
|
|
"learning_rate": 0.00010824561966283583,
|
||
|
|
"loss": 0.7638,
|
||
|
|
"step": 3365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6317960254968129,
|
||
|
|
"grad_norm": 0.03680463786596637,
|
||
|
|
"learning_rate": 0.00010777437148939868,
|
||
|
|
"loss": 0.7417,
|
||
|
|
"step": 3370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6327334083239595,
|
||
|
|
"grad_norm": 0.03897036657910483,
|
||
|
|
"learning_rate": 0.0001073035755185191,
|
||
|
|
"loss": 0.737,
|
||
|
|
"step": 3375
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6336707911511061,
|
||
|
|
"grad_norm": 0.04507944036400963,
|
||
|
|
"learning_rate": 0.00010683323679204374,
|
||
|
|
"loss": 0.7526,
|
||
|
|
"step": 3380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6346081739782528,
|
||
|
|
"grad_norm": 0.041130560554180164,
|
||
|
|
"learning_rate": 0.00010636336034692238,
|
||
|
|
"loss": 0.7472,
|
||
|
|
"step": 3385
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6355455568053994,
|
||
|
|
"grad_norm": 0.037500549260058,
|
||
|
|
"learning_rate": 0.00010589395121515441,
|
||
|
|
"loss": 0.7642,
|
||
|
|
"step": 3390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.636482939632546,
|
||
|
|
"grad_norm": 0.03501911608758103,
|
||
|
|
"learning_rate": 0.00010542501442373441,
|
||
|
|
"loss": 0.7552,
|
||
|
|
"step": 3395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6374203224596925,
|
||
|
|
"grad_norm": 0.041747590796681534,
|
||
|
|
"learning_rate": 0.00010495655499459874,
|
||
|
|
"loss": 0.7825,
|
||
|
|
"step": 3400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6383577052868391,
|
||
|
|
"grad_norm": 0.0415454403917153,
|
||
|
|
"learning_rate": 0.00010448857794457143,
|
||
|
|
"loss": 0.7615,
|
||
|
|
"step": 3405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6392950881139857,
|
||
|
|
"grad_norm": 0.034393345932052226,
|
||
|
|
"learning_rate": 0.00010402108828531086,
|
||
|
|
"loss": 0.7501,
|
||
|
|
"step": 3410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6402324709411323,
|
||
|
|
"grad_norm": 0.041393829589670965,
|
||
|
|
"learning_rate": 0.00010355409102325557,
|
||
|
|
"loss": 0.7725,
|
||
|
|
"step": 3415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.641169853768279,
|
||
|
|
"grad_norm": 0.039070120349108735,
|
||
|
|
"learning_rate": 0.00010308759115957118,
|
||
|
|
"loss": 0.7953,
|
||
|
|
"step": 3420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6421072365954256,
|
||
|
|
"grad_norm": 0.03792543146870935,
|
||
|
|
"learning_rate": 0.00010262159369009628,
|
||
|
|
"loss": 0.7584,
|
||
|
|
"step": 3425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6430446194225722,
|
||
|
|
"grad_norm": 0.04012922678154492,
|
||
|
|
"learning_rate": 0.00010215610360528948,
|
||
|
|
"loss": 0.736,
|
||
|
|
"step": 3430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6439820022497188,
|
||
|
|
"grad_norm": 0.03600161764198426,
|
||
|
|
"learning_rate": 0.00010169112589017568,
|
||
|
|
"loss": 0.739,
|
||
|
|
"step": 3435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6449193850768654,
|
||
|
|
"grad_norm": 0.04003494519804115,
|
||
|
|
"learning_rate": 0.00010122666552429259,
|
||
|
|
"loss": 0.7668,
|
||
|
|
"step": 3440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.645856767904012,
|
||
|
|
"grad_norm": 0.039267717082010545,
|
||
|
|
"learning_rate": 0.00010076272748163756,
|
||
|
|
"loss": 0.7683,
|
||
|
|
"step": 3445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6467941507311586,
|
||
|
|
"grad_norm": 0.037805171037559454,
|
||
|
|
"learning_rate": 0.00010029931673061433,
|
||
|
|
"loss": 0.7426,
|
||
|
|
"step": 3450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6477315335583053,
|
||
|
|
"grad_norm": 0.037277881133197355,
|
||
|
|
"learning_rate": 9.98364382339798e-05,
|
||
|
|
"loss": 0.7267,
|
||
|
|
"step": 3455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6486689163854518,
|
||
|
|
"grad_norm": 0.043347070226570104,
|
||
|
|
"learning_rate": 9.937409694879064e-05,
|
||
|
|
"loss": 0.7548,
|
||
|
|
"step": 3460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6496062992125984,
|
||
|
|
"grad_norm": 0.03829734267181962,
|
||
|
|
"learning_rate": 9.891229782635074e-05,
|
||
|
|
"loss": 0.7718,
|
||
|
|
"step": 3465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.650543682039745,
|
||
|
|
"grad_norm": 0.038650875886255875,
|
||
|
|
"learning_rate": 9.845104581215758e-05,
|
||
|
|
"loss": 0.7705,
|
||
|
|
"step": 3470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6514810648668916,
|
||
|
|
"grad_norm": 0.042871623377863474,
|
||
|
|
"learning_rate": 9.799034584584975e-05,
|
||
|
|
"loss": 0.7495,
|
||
|
|
"step": 3475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6524184476940382,
|
||
|
|
"grad_norm": 0.03585928229669108,
|
||
|
|
"learning_rate": 9.753020286115368e-05,
|
||
|
|
"loss": 0.7432,
|
||
|
|
"step": 3480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6533558305211848,
|
||
|
|
"grad_norm": 0.03786939257702819,
|
||
|
|
"learning_rate": 9.707062178583119e-05,
|
||
|
|
"loss": 0.7904,
|
||
|
|
"step": 3485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6542932133483315,
|
||
|
|
"grad_norm": 0.0368092502095729,
|
||
|
|
"learning_rate": 9.661160754162618e-05,
|
||
|
|
"loss": 0.7555,
|
||
|
|
"step": 3490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6552305961754781,
|
||
|
|
"grad_norm": 0.03613287171684745,
|
||
|
|
"learning_rate": 9.615316504421262e-05,
|
||
|
|
"loss": 0.7439,
|
||
|
|
"step": 3495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6561679790026247,
|
||
|
|
"grad_norm": 0.036433445794931595,
|
||
|
|
"learning_rate": 9.569529920314121e-05,
|
||
|
|
"loss": 0.7529,
|
||
|
|
"step": 3500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6571053618297713,
|
||
|
|
"grad_norm": 0.0424735007867028,
|
||
|
|
"learning_rate": 9.523801492178736e-05,
|
||
|
|
"loss": 0.7397,
|
||
|
|
"step": 3505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6580427446569179,
|
||
|
|
"grad_norm": 0.03698802222139945,
|
||
|
|
"learning_rate": 9.47813170972983e-05,
|
||
|
|
"loss": 0.7437,
|
||
|
|
"step": 3510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6589801274840645,
|
||
|
|
"grad_norm": 0.03733477324154079,
|
||
|
|
"learning_rate": 9.432521062054084e-05,
|
||
|
|
"loss": 0.7705,
|
||
|
|
"step": 3515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6599175103112112,
|
||
|
|
"grad_norm": 0.03508532202382053,
|
||
|
|
"learning_rate": 9.386970037604892e-05,
|
||
|
|
"loss": 0.7392,
|
||
|
|
"step": 3520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6608548931383577,
|
||
|
|
"grad_norm": 0.03827744300792672,
|
||
|
|
"learning_rate": 9.341479124197123e-05,
|
||
|
|
"loss": 0.7238,
|
||
|
|
"step": 3525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6617922759655043,
|
||
|
|
"grad_norm": 0.034951831735317074,
|
||
|
|
"learning_rate": 9.296048809001928e-05,
|
||
|
|
"loss": 0.7445,
|
||
|
|
"step": 3530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6627296587926509,
|
||
|
|
"grad_norm": 0.036754627748938604,
|
||
|
|
"learning_rate": 9.250679578541465e-05,
|
||
|
|
"loss": 0.7648,
|
||
|
|
"step": 3535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6636670416197975,
|
||
|
|
"grad_norm": 0.04075976815278343,
|
||
|
|
"learning_rate": 9.205371918683761e-05,
|
||
|
|
"loss": 0.7498,
|
||
|
|
"step": 3540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6646044244469441,
|
||
|
|
"grad_norm": 0.03941979332992536,
|
||
|
|
"learning_rate": 9.16012631463744e-05,
|
||
|
|
"loss": 0.7599,
|
||
|
|
"step": 3545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6655418072740907,
|
||
|
|
"grad_norm": 0.03389585413188664,
|
||
|
|
"learning_rate": 9.114943250946581e-05,
|
||
|
|
"loss": 0.7242,
|
||
|
|
"step": 3550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6664791901012374,
|
||
|
|
"grad_norm": 0.037973128654648046,
|
||
|
|
"learning_rate": 9.069823211485485e-05,
|
||
|
|
"loss": 0.7397,
|
||
|
|
"step": 3555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.667416572928384,
|
||
|
|
"grad_norm": 0.03493012197632408,
|
||
|
|
"learning_rate": 9.024766679453538e-05,
|
||
|
|
"loss": 0.7218,
|
||
|
|
"step": 3560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6683539557555306,
|
||
|
|
"grad_norm": 0.048208082428385556,
|
||
|
|
"learning_rate": 8.979774137369989e-05,
|
||
|
|
"loss": 0.7709,
|
||
|
|
"step": 3565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6692913385826772,
|
||
|
|
"grad_norm": 0.039871249220599175,
|
||
|
|
"learning_rate": 8.934846067068825e-05,
|
||
|
|
"loss": 0.7306,
|
||
|
|
"step": 3570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6702287214098238,
|
||
|
|
"grad_norm": 0.03855933322661882,
|
||
|
|
"learning_rate": 8.88998294969358e-05,
|
||
|
|
"loss": 0.73,
|
||
|
|
"step": 3575
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6711661042369704,
|
||
|
|
"grad_norm": 0.0397648331368396,
|
||
|
|
"learning_rate": 8.845185265692201e-05,
|
||
|
|
"loss": 0.7324,
|
||
|
|
"step": 3580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6721034870641169,
|
||
|
|
"grad_norm": 0.03767828224137,
|
||
|
|
"learning_rate": 8.800453494811894e-05,
|
||
|
|
"loss": 0.7372,
|
||
|
|
"step": 3585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6730408698912635,
|
||
|
|
"grad_norm": 0.03982314491187269,
|
||
|
|
"learning_rate": 8.755788116093983e-05,
|
||
|
|
"loss": 0.7735,
|
||
|
|
"step": 3590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6739782527184102,
|
||
|
|
"grad_norm": 0.03712738479588227,
|
||
|
|
"learning_rate": 8.711189607868795e-05,
|
||
|
|
"loss": 0.725,
|
||
|
|
"step": 3595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6749156355455568,
|
||
|
|
"grad_norm": 0.03733488944845488,
|
||
|
|
"learning_rate": 8.666658447750519e-05,
|
||
|
|
"loss": 0.7372,
|
||
|
|
"step": 3600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6758530183727034,
|
||
|
|
"grad_norm": 0.0377678114465905,
|
||
|
|
"learning_rate": 8.622195112632107e-05,
|
||
|
|
"loss": 0.7225,
|
||
|
|
"step": 3605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.67679040119985,
|
||
|
|
"grad_norm": 0.03971467083786042,
|
||
|
|
"learning_rate": 8.577800078680156e-05,
|
||
|
|
"loss": 0.7691,
|
||
|
|
"step": 3610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6777277840269966,
|
||
|
|
"grad_norm": 0.041555052270688675,
|
||
|
|
"learning_rate": 8.533473821329814e-05,
|
||
|
|
"loss": 0.746,
|
||
|
|
"step": 3615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6786651668541432,
|
||
|
|
"grad_norm": 0.03681199845630553,
|
||
|
|
"learning_rate": 8.489216815279682e-05,
|
||
|
|
"loss": 0.7326,
|
||
|
|
"step": 3620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6796025496812899,
|
||
|
|
"grad_norm": 0.03639372767368361,
|
||
|
|
"learning_rate": 8.445029534486741e-05,
|
||
|
|
"loss": 0.7558,
|
||
|
|
"step": 3625
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6805399325084365,
|
||
|
|
"grad_norm": 0.03930037562387389,
|
||
|
|
"learning_rate": 8.400912452161271e-05,
|
||
|
|
"loss": 0.7566,
|
||
|
|
"step": 3630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6814773153355831,
|
||
|
|
"grad_norm": 0.038113159588276734,
|
||
|
|
"learning_rate": 8.356866040761786e-05,
|
||
|
|
"loss": 0.7479,
|
||
|
|
"step": 3635
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6824146981627297,
|
||
|
|
"grad_norm": 0.03668907273839101,
|
||
|
|
"learning_rate": 8.312890771989943e-05,
|
||
|
|
"loss": 0.7507,
|
||
|
|
"step": 3640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6833520809898763,
|
||
|
|
"grad_norm": 0.03877486758990776,
|
||
|
|
"learning_rate": 8.268987116785569e-05,
|
||
|
|
"loss": 0.7119,
|
||
|
|
"step": 3645
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6842894638170228,
|
||
|
|
"grad_norm": 0.03641698338134557,
|
||
|
|
"learning_rate": 8.225155545321514e-05,
|
||
|
|
"loss": 0.7419,
|
||
|
|
"step": 3650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6852268466441694,
|
||
|
|
"grad_norm": 0.037028014658097706,
|
||
|
|
"learning_rate": 8.181396526998713e-05,
|
||
|
|
"loss": 0.7511,
|
||
|
|
"step": 3655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6861642294713161,
|
||
|
|
"grad_norm": 0.03742801285890605,
|
||
|
|
"learning_rate": 8.13771053044109e-05,
|
||
|
|
"loss": 0.7464,
|
||
|
|
"step": 3660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6871016122984627,
|
||
|
|
"grad_norm": 0.03700498083910332,
|
||
|
|
"learning_rate": 8.094098023490573e-05,
|
||
|
|
"loss": 0.7509,
|
||
|
|
"step": 3665
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6880389951256093,
|
||
|
|
"grad_norm": 0.041613081744209884,
|
||
|
|
"learning_rate": 8.050559473202077e-05,
|
||
|
|
"loss": 0.7547,
|
||
|
|
"step": 3670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6889763779527559,
|
||
|
|
"grad_norm": 0.03578452096886478,
|
||
|
|
"learning_rate": 8.00709534583848e-05,
|
||
|
|
"loss": 0.7486,
|
||
|
|
"step": 3675
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6899137607799025,
|
||
|
|
"grad_norm": 0.039555712535017766,
|
||
|
|
"learning_rate": 7.963706106865692e-05,
|
||
|
|
"loss": 0.7441,
|
||
|
|
"step": 3680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6908511436070491,
|
||
|
|
"grad_norm": 0.03614203981567192,
|
||
|
|
"learning_rate": 7.920392220947577e-05,
|
||
|
|
"loss": 0.7546,
|
||
|
|
"step": 3685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6917885264341957,
|
||
|
|
"grad_norm": 0.03936732269908494,
|
||
|
|
"learning_rate": 7.877154151941082e-05,
|
||
|
|
"loss": 0.7544,
|
||
|
|
"step": 3690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6927259092613424,
|
||
|
|
"grad_norm": 0.04056135650395248,
|
||
|
|
"learning_rate": 7.833992362891173e-05,
|
||
|
|
"loss": 0.748,
|
||
|
|
"step": 3695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.693663292088489,
|
||
|
|
"grad_norm": 0.03813900176127187,
|
||
|
|
"learning_rate": 7.790907316025935e-05,
|
||
|
|
"loss": 0.7566,
|
||
|
|
"step": 3700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6946006749156356,
|
||
|
|
"grad_norm": 0.036115814487516676,
|
||
|
|
"learning_rate": 7.74789947275161e-05,
|
||
|
|
"loss": 0.731,
|
||
|
|
"step": 3705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6955380577427821,
|
||
|
|
"grad_norm": 0.041142128143449405,
|
||
|
|
"learning_rate": 7.704969293647643e-05,
|
||
|
|
"loss": 0.7686,
|
||
|
|
"step": 3710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6964754405699287,
|
||
|
|
"grad_norm": 0.04017787329812371,
|
||
|
|
"learning_rate": 7.662117238461769e-05,
|
||
|
|
"loss": 0.7641,
|
||
|
|
"step": 3715
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6974128233970753,
|
||
|
|
"grad_norm": 0.03912928253082181,
|
||
|
|
"learning_rate": 7.619343766105065e-05,
|
||
|
|
"loss": 0.7337,
|
||
|
|
"step": 3720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.698350206224222,
|
||
|
|
"grad_norm": 0.03739403328622796,
|
||
|
|
"learning_rate": 7.576649334647063e-05,
|
||
|
|
"loss": 0.7688,
|
||
|
|
"step": 3725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6992875890513686,
|
||
|
|
"grad_norm": 0.0362755943904781,
|
||
|
|
"learning_rate": 7.534034401310817e-05,
|
||
|
|
"loss": 0.7449,
|
||
|
|
"step": 3730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7002249718785152,
|
||
|
|
"grad_norm": 0.03857280564508149,
|
||
|
|
"learning_rate": 7.49149942246803e-05,
|
||
|
|
"loss": 0.7417,
|
||
|
|
"step": 3735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7011623547056618,
|
||
|
|
"grad_norm": 0.03765394646978099,
|
||
|
|
"learning_rate": 7.449044853634153e-05,
|
||
|
|
"loss": 0.7733,
|
||
|
|
"step": 3740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7020997375328084,
|
||
|
|
"grad_norm": 0.038387501642174006,
|
||
|
|
"learning_rate": 7.406671149463509e-05,
|
||
|
|
"loss": 0.7157,
|
||
|
|
"step": 3745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.703037120359955,
|
||
|
|
"grad_norm": 0.03475103792139527,
|
||
|
|
"learning_rate": 7.364378763744429e-05,
|
||
|
|
"loss": 0.7478,
|
||
|
|
"step": 3750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7039745031871016,
|
||
|
|
"grad_norm": 0.0378553405313383,
|
||
|
|
"learning_rate": 7.322168149394386e-05,
|
||
|
|
"loss": 0.7566,
|
||
|
|
"step": 3755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7049118860142483,
|
||
|
|
"grad_norm": 0.039967415152981675,
|
||
|
|
"learning_rate": 7.280039758455147e-05,
|
||
|
|
"loss": 0.7541,
|
||
|
|
"step": 3760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7058492688413949,
|
||
|
|
"grad_norm": 0.035394814696152195,
|
||
|
|
"learning_rate": 7.23799404208794e-05,
|
||
|
|
"loss": 0.7625,
|
||
|
|
"step": 3765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7067866516685414,
|
||
|
|
"grad_norm": 0.038752752547861896,
|
||
|
|
"learning_rate": 7.19603145056859e-05,
|
||
|
|
"loss": 0.7612,
|
||
|
|
"step": 3770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.707724034495688,
|
||
|
|
"grad_norm": 0.037109030416605036,
|
||
|
|
"learning_rate": 7.154152433282762e-05,
|
||
|
|
"loss": 0.7382,
|
||
|
|
"step": 3775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7086614173228346,
|
||
|
|
"grad_norm": 0.03554207953122535,
|
||
|
|
"learning_rate": 7.112357438721065e-05,
|
||
|
|
"loss": 0.7455,
|
||
|
|
"step": 3780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7095988001499812,
|
||
|
|
"grad_norm": 0.039676102077878514,
|
||
|
|
"learning_rate": 7.070646914474335e-05,
|
||
|
|
"loss": 0.7173,
|
||
|
|
"step": 3785
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7105361829771278,
|
||
|
|
"grad_norm": 0.03421837506332554,
|
||
|
|
"learning_rate": 7.029021307228755e-05,
|
||
|
|
"loss": 0.7261,
|
||
|
|
"step": 3790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7114735658042745,
|
||
|
|
"grad_norm": 0.04037332220576155,
|
||
|
|
"learning_rate": 6.987481062761157e-05,
|
||
|
|
"loss": 0.7435,
|
||
|
|
"step": 3795
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7124109486314211,
|
||
|
|
"grad_norm": 0.03870777741485929,
|
||
|
|
"learning_rate": 6.94602662593417e-05,
|
||
|
|
"loss": 0.7263,
|
||
|
|
"step": 3800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7133483314585677,
|
||
|
|
"grad_norm": 0.04496580126935979,
|
||
|
|
"learning_rate": 6.90465844069151e-05,
|
||
|
|
"loss": 0.776,
|
||
|
|
"step": 3805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7142857142857143,
|
||
|
|
"grad_norm": 0.038235977347547696,
|
||
|
|
"learning_rate": 6.863376950053221e-05,
|
||
|
|
"loss": 0.7408,
|
||
|
|
"step": 3810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7152230971128609,
|
||
|
|
"grad_norm": 0.03531689033224846,
|
||
|
|
"learning_rate": 6.82218259611088e-05,
|
||
|
|
"loss": 0.738,
|
||
|
|
"step": 3815
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7161604799400075,
|
||
|
|
"grad_norm": 0.036562035053447246,
|
||
|
|
"learning_rate": 6.781075820022946e-05,
|
||
|
|
"loss": 0.779,
|
||
|
|
"step": 3820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7170978627671541,
|
||
|
|
"grad_norm": 0.03879265532726638,
|
||
|
|
"learning_rate": 6.740057062009951e-05,
|
||
|
|
"loss": 0.7514,
|
||
|
|
"step": 3825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7180352455943008,
|
||
|
|
"grad_norm": 0.037768573089647754,
|
||
|
|
"learning_rate": 6.69912676134984e-05,
|
||
|
|
"loss": 0.7423,
|
||
|
|
"step": 3830
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7189726284214473,
|
||
|
|
"grad_norm": 0.03723804398233768,
|
||
|
|
"learning_rate": 6.658285356373253e-05,
|
||
|
|
"loss": 0.7277,
|
||
|
|
"step": 3835
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7199100112485939,
|
||
|
|
"grad_norm": 0.03866540526571928,
|
||
|
|
"learning_rate": 6.617533284458826e-05,
|
||
|
|
"loss": 0.7452,
|
||
|
|
"step": 3840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7208473940757405,
|
||
|
|
"grad_norm": 0.03743514627207939,
|
||
|
|
"learning_rate": 6.576870982028508e-05,
|
||
|
|
"loss": 0.7487,
|
||
|
|
"step": 3845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7217847769028871,
|
||
|
|
"grad_norm": 0.04147587697115252,
|
||
|
|
"learning_rate": 6.53629888454289e-05,
|
||
|
|
"loss": 0.7273,
|
||
|
|
"step": 3850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7227221597300337,
|
||
|
|
"grad_norm": 0.04232917813229702,
|
||
|
|
"learning_rate": 6.495817426496541e-05,
|
||
|
|
"loss": 0.7413,
|
||
|
|
"step": 3855
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7236595425571803,
|
||
|
|
"grad_norm": 0.041237294474918335,
|
||
|
|
"learning_rate": 6.455427041413356e-05,
|
||
|
|
"loss": 0.735,
|
||
|
|
"step": 3860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.724596925384327,
|
||
|
|
"grad_norm": 0.039856233442403854,
|
||
|
|
"learning_rate": 6.415128161841909e-05,
|
||
|
|
"loss": 0.7512,
|
||
|
|
"step": 3865
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7255343082114736,
|
||
|
|
"grad_norm": 0.03843674829308366,
|
||
|
|
"learning_rate": 6.374921219350826e-05,
|
||
|
|
"loss": 0.7421,
|
||
|
|
"step": 3870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7264716910386202,
|
||
|
|
"grad_norm": 0.03492500389641903,
|
||
|
|
"learning_rate": 6.334806644524147e-05,
|
||
|
|
"loss": 0.7613,
|
||
|
|
"step": 3875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7274090738657668,
|
||
|
|
"grad_norm": 0.03869959252720361,
|
||
|
|
"learning_rate": 6.294784866956757e-05,
|
||
|
|
"loss": 0.7525,
|
||
|
|
"step": 3880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7283464566929134,
|
||
|
|
"grad_norm": 0.038276776258313015,
|
||
|
|
"learning_rate": 6.254856315249736e-05,
|
||
|
|
"loss": 0.747,
|
||
|
|
"step": 3885
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.72928383952006,
|
||
|
|
"grad_norm": 0.035243252084036204,
|
||
|
|
"learning_rate": 6.21502141700579e-05,
|
||
|
|
"loss": 0.7306,
|
||
|
|
"step": 3890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7302212223472065,
|
||
|
|
"grad_norm": 0.03786773805086975,
|
||
|
|
"learning_rate": 6.175280598824678e-05,
|
||
|
|
"loss": 0.724,
|
||
|
|
"step": 3895
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7311586051743532,
|
||
|
|
"grad_norm": 0.039123147712291625,
|
||
|
|
"learning_rate": 6.135634286298637e-05,
|
||
|
|
"loss": 0.7349,
|
||
|
|
"step": 3900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7320959880014998,
|
||
|
|
"grad_norm": 0.037251487752691576,
|
||
|
|
"learning_rate": 6.0960829040078265e-05,
|
||
|
|
"loss": 0.7345,
|
||
|
|
"step": 3905
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7330333708286464,
|
||
|
|
"grad_norm": 0.03742284184866869,
|
||
|
|
"learning_rate": 6.05662687551576e-05,
|
||
|
|
"loss": 0.7795,
|
||
|
|
"step": 3910
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.733970753655793,
|
||
|
|
"grad_norm": 0.04105680206555956,
|
||
|
|
"learning_rate": 6.017266623364826e-05,
|
||
|
|
"loss": 0.7498,
|
||
|
|
"step": 3915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7349081364829396,
|
||
|
|
"grad_norm": 0.039477278781974275,
|
||
|
|
"learning_rate": 5.978002569071679e-05,
|
||
|
|
"loss": 0.713,
|
||
|
|
"step": 3920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7358455193100862,
|
||
|
|
"grad_norm": 0.03698403869400972,
|
||
|
|
"learning_rate": 5.938835133122821e-05,
|
||
|
|
"loss": 0.745,
|
||
|
|
"step": 3925
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7367829021372329,
|
||
|
|
"grad_norm": 0.0410733277463145,
|
||
|
|
"learning_rate": 5.899764734970007e-05,
|
||
|
|
"loss": 0.7341,
|
||
|
|
"step": 3930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7377202849643795,
|
||
|
|
"grad_norm": 0.038917622759833966,
|
||
|
|
"learning_rate": 5.860791793025817e-05,
|
||
|
|
"loss": 0.7475,
|
||
|
|
"step": 3935
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7386576677915261,
|
||
|
|
"grad_norm": 0.03627337453876378,
|
||
|
|
"learning_rate": 5.821916724659148e-05,
|
||
|
|
"loss": 0.746,
|
||
|
|
"step": 3940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7395950506186727,
|
||
|
|
"grad_norm": 0.03964503193315036,
|
||
|
|
"learning_rate": 5.783139946190751e-05,
|
||
|
|
"loss": 0.7398,
|
||
|
|
"step": 3945
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7405324334458193,
|
||
|
|
"grad_norm": 0.03762868562695812,
|
||
|
|
"learning_rate": 5.744461872888771e-05,
|
||
|
|
"loss": 0.7152,
|
||
|
|
"step": 3950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7414698162729659,
|
||
|
|
"grad_norm": 0.03589488400010939,
|
||
|
|
"learning_rate": 5.705882918964299e-05,
|
||
|
|
"loss": 0.7461,
|
||
|
|
"step": 3955
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7424071991001124,
|
||
|
|
"grad_norm": 0.03949112731558336,
|
||
|
|
"learning_rate": 5.667403497566939e-05,
|
||
|
|
"loss": 0.7344,
|
||
|
|
"step": 3960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.743344581927259,
|
||
|
|
"grad_norm": 0.04151757065593634,
|
||
|
|
"learning_rate": 5.629024020780375e-05,
|
||
|
|
"loss": 0.7436,
|
||
|
|
"step": 3965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7442819647544057,
|
||
|
|
"grad_norm": 0.037059488736541316,
|
||
|
|
"learning_rate": 5.5907448996179766e-05,
|
||
|
|
"loss": 0.7502,
|
||
|
|
"step": 3970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7452193475815523,
|
||
|
|
"grad_norm": 0.03555904631512526,
|
||
|
|
"learning_rate": 5.552566544018373e-05,
|
||
|
|
"loss": 0.7465,
|
||
|
|
"step": 3975
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7461567304086989,
|
||
|
|
"grad_norm": 0.036464378499175255,
|
||
|
|
"learning_rate": 5.514489362841083e-05,
|
||
|
|
"loss": 0.715,
|
||
|
|
"step": 3980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7470941132358455,
|
||
|
|
"grad_norm": 0.0394124136183908,
|
||
|
|
"learning_rate": 5.4765137638621246e-05,
|
||
|
|
"loss": 0.7613,
|
||
|
|
"step": 3985
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7480314960629921,
|
||
|
|
"grad_norm": 0.03711650720168927,
|
||
|
|
"learning_rate": 5.4386401537696536e-05,
|
||
|
|
"loss": 0.7425,
|
||
|
|
"step": 3990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7489688788901387,
|
||
|
|
"grad_norm": 0.03888911993575119,
|
||
|
|
"learning_rate": 5.400868938159609e-05,
|
||
|
|
"loss": 0.7659,
|
||
|
|
"step": 3995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7499062617172854,
|
||
|
|
"grad_norm": 0.03744744409013629,
|
||
|
|
"learning_rate": 5.363200521531366e-05,
|
||
|
|
"loss": 0.7325,
|
||
|
|
"step": 4000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.750843644544432,
|
||
|
|
"grad_norm": 0.03613327265209726,
|
||
|
|
"learning_rate": 5.3256353072833936e-05,
|
||
|
|
"loss": 0.7519,
|
||
|
|
"step": 4005
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7517810273715786,
|
||
|
|
"grad_norm": 0.03595596503402885,
|
||
|
|
"learning_rate": 5.288173697708973e-05,
|
||
|
|
"loss": 0.7469,
|
||
|
|
"step": 4010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7527184101987252,
|
||
|
|
"grad_norm": 0.03769961152327502,
|
||
|
|
"learning_rate": 5.2508160939918286e-05,
|
||
|
|
"loss": 0.752,
|
||
|
|
"step": 4015
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7536557930258717,
|
||
|
|
"grad_norm": 0.03633117479738773,
|
||
|
|
"learning_rate": 5.213562896201902e-05,
|
||
|
|
"loss": 0.7446,
|
||
|
|
"step": 4020
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7545931758530183,
|
||
|
|
"grad_norm": 0.042037986286737154,
|
||
|
|
"learning_rate": 5.176414503290993e-05,
|
||
|
|
"loss": 0.7465,
|
||
|
|
"step": 4025
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7555305586801649,
|
||
|
|
"grad_norm": 0.03964006330583541,
|
||
|
|
"learning_rate": 5.139371313088561e-05,
|
||
|
|
"loss": 0.7409,
|
||
|
|
"step": 4030
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7564679415073116,
|
||
|
|
"grad_norm": 0.03557466108190626,
|
||
|
|
"learning_rate": 5.1024337222974125e-05,
|
||
|
|
"loss": 0.7482,
|
||
|
|
"step": 4035
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7574053243344582,
|
||
|
|
"grad_norm": 0.03735795418886787,
|
||
|
|
"learning_rate": 5.065602126489453e-05,
|
||
|
|
"loss": 0.7429,
|
||
|
|
"step": 4040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7583427071616048,
|
||
|
|
"grad_norm": 0.03696761727309839,
|
||
|
|
"learning_rate": 5.028876920101504e-05,
|
||
|
|
"loss": 0.7217,
|
||
|
|
"step": 4045
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7592800899887514,
|
||
|
|
"grad_norm": 0.03683633483444796,
|
||
|
|
"learning_rate": 4.992258496431002e-05,
|
||
|
|
"loss": 0.7415,
|
||
|
|
"step": 4050
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.760217472815898,
|
||
|
|
"grad_norm": 0.03527425447836841,
|
||
|
|
"learning_rate": 4.955747247631865e-05,
|
||
|
|
"loss": 0.7311,
|
||
|
|
"step": 4055
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7611548556430446,
|
||
|
|
"grad_norm": 0.03674873764080841,
|
||
|
|
"learning_rate": 4.91934356471022e-05,
|
||
|
|
"loss": 0.7515,
|
||
|
|
"step": 4060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7620922384701913,
|
||
|
|
"grad_norm": 0.036177949800544354,
|
||
|
|
"learning_rate": 4.883047837520268e-05,
|
||
|
|
"loss": 0.7752,
|
||
|
|
"step": 4065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7630296212973379,
|
||
|
|
"grad_norm": 0.03907021802297722,
|
||
|
|
"learning_rate": 4.84686045476009e-05,
|
||
|
|
"loss": 0.728,
|
||
|
|
"step": 4070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7639670041244845,
|
||
|
|
"grad_norm": 0.037180348890681836,
|
||
|
|
"learning_rate": 4.810781803967482e-05,
|
||
|
|
"loss": 0.7221,
|
||
|
|
"step": 4075
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7649043869516311,
|
||
|
|
"grad_norm": 0.038648597139017825,
|
||
|
|
"learning_rate": 4.7748122715158074e-05,
|
||
|
|
"loss": 0.7171,
|
||
|
|
"step": 4080
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7658417697787776,
|
||
|
|
"grad_norm": 0.037736025165151156,
|
||
|
|
"learning_rate": 4.7389522426098614e-05,
|
||
|
|
"loss": 0.7521,
|
||
|
|
"step": 4085
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7667791526059242,
|
||
|
|
"grad_norm": 0.03723459058036576,
|
||
|
|
"learning_rate": 4.703202101281744e-05,
|
||
|
|
"loss": 0.7352,
|
||
|
|
"step": 4090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7677165354330708,
|
||
|
|
"grad_norm": 0.03642827803946468,
|
||
|
|
"learning_rate": 4.667562230386749e-05,
|
||
|
|
"loss": 0.7449,
|
||
|
|
"step": 4095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7686539182602175,
|
||
|
|
"grad_norm": 0.036360477245182196,
|
||
|
|
"learning_rate": 4.63203301159926e-05,
|
||
|
|
"loss": 0.735,
|
||
|
|
"step": 4100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7695913010873641,
|
||
|
|
"grad_norm": 0.03758177450726638,
|
||
|
|
"learning_rate": 4.596614825408666e-05,
|
||
|
|
"loss": 0.7186,
|
||
|
|
"step": 4105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7705286839145107,
|
||
|
|
"grad_norm": 0.03748476699050447,
|
||
|
|
"learning_rate": 4.561308051115285e-05,
|
||
|
|
"loss": 0.757,
|
||
|
|
"step": 4110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7714660667416573,
|
||
|
|
"grad_norm": 0.036097069379836325,
|
||
|
|
"learning_rate": 4.5261130668263054e-05,
|
||
|
|
"loss": 0.743,
|
||
|
|
"step": 4115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7724034495688039,
|
||
|
|
"grad_norm": 0.03661609433595777,
|
||
|
|
"learning_rate": 4.4910302494517345e-05,
|
||
|
|
"loss": 0.727,
|
||
|
|
"step": 4120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7733408323959505,
|
||
|
|
"grad_norm": 0.036604261302591215,
|
||
|
|
"learning_rate": 4.456059974700361e-05,
|
||
|
|
"loss": 0.7198,
|
||
|
|
"step": 4125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7742782152230971,
|
||
|
|
"grad_norm": 0.0375410578618143,
|
||
|
|
"learning_rate": 4.4212026170757384e-05,
|
||
|
|
"loss": 0.7385,
|
||
|
|
"step": 4130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7752155980502438,
|
||
|
|
"grad_norm": 0.037797934383579375,
|
||
|
|
"learning_rate": 4.3864585498721445e-05,
|
||
|
|
"loss": 0.7375,
|
||
|
|
"step": 4135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7761529808773904,
|
||
|
|
"grad_norm": 0.036408551423671234,
|
||
|
|
"learning_rate": 4.3518281451706477e-05,
|
||
|
|
"loss": 0.7614,
|
||
|
|
"step": 4140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7770903637045369,
|
||
|
|
"grad_norm": 0.036659780564322314,
|
||
|
|
"learning_rate": 4.317311773835043e-05,
|
||
|
|
"loss": 0.7333,
|
||
|
|
"step": 4145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7780277465316835,
|
||
|
|
"grad_norm": 0.03787359723072369,
|
||
|
|
"learning_rate": 4.2829098055079524e-05,
|
||
|
|
"loss": 0.7321,
|
||
|
|
"step": 4150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7789651293588301,
|
||
|
|
"grad_norm": 0.039178508740307504,
|
||
|
|
"learning_rate": 4.248622608606802e-05,
|
||
|
|
"loss": 0.7568,
|
||
|
|
"step": 4155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7799025121859767,
|
||
|
|
"grad_norm": 0.03788147306371571,
|
||
|
|
"learning_rate": 4.214450550319943e-05,
|
||
|
|
"loss": 0.7002,
|
||
|
|
"step": 4160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7808398950131233,
|
||
|
|
"grad_norm": 0.03764951434893332,
|
||
|
|
"learning_rate": 4.180393996602651e-05,
|
||
|
|
"loss": 0.7483,
|
||
|
|
"step": 4165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.78177727784027,
|
||
|
|
"grad_norm": 0.03643925778123276,
|
||
|
|
"learning_rate": 4.1464533121732613e-05,
|
||
|
|
"loss": 0.7608,
|
||
|
|
"step": 4170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7827146606674166,
|
||
|
|
"grad_norm": 0.03904458879716677,
|
||
|
|
"learning_rate": 4.112628860509238e-05,
|
||
|
|
"loss": 0.736,
|
||
|
|
"step": 4175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7836520434945632,
|
||
|
|
"grad_norm": 0.0367790048472788,
|
||
|
|
"learning_rate": 4.078921003843276e-05,
|
||
|
|
"loss": 0.7516,
|
||
|
|
"step": 4180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7845894263217098,
|
||
|
|
"grad_norm": 0.035606207842431264,
|
||
|
|
"learning_rate": 4.045330103159454e-05,
|
||
|
|
"loss": 0.7487,
|
||
|
|
"step": 4185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7855268091488564,
|
||
|
|
"grad_norm": 0.03778912384186634,
|
||
|
|
"learning_rate": 4.01185651818932e-05,
|
||
|
|
"loss": 0.7418,
|
||
|
|
"step": 4190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.786464191976003,
|
||
|
|
"grad_norm": 0.03616645503819011,
|
||
|
|
"learning_rate": 3.97850060740808e-05,
|
||
|
|
"loss": 0.72,
|
||
|
|
"step": 4195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7874015748031497,
|
||
|
|
"grad_norm": 0.04137105544838257,
|
||
|
|
"learning_rate": 3.945262728030739e-05,
|
||
|
|
"loss": 0.7297,
|
||
|
|
"step": 4200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7883389576302963,
|
||
|
|
"grad_norm": 0.04008149649572554,
|
||
|
|
"learning_rate": 3.912143236008286e-05,
|
||
|
|
"loss": 0.7546,
|
||
|
|
"step": 4205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7892763404574428,
|
||
|
|
"grad_norm": 0.04178918635092829,
|
||
|
|
"learning_rate": 3.879142486023869e-05,
|
||
|
|
"loss": 0.705,
|
||
|
|
"step": 4210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7902137232845894,
|
||
|
|
"grad_norm": 0.03910230683910834,
|
||
|
|
"learning_rate": 3.8462608314890084e-05,
|
||
|
|
"loss": 0.7227,
|
||
|
|
"step": 4215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.791151106111736,
|
||
|
|
"grad_norm": 0.041392117493789675,
|
||
|
|
"learning_rate": 3.8134986245398084e-05,
|
||
|
|
"loss": 0.7221,
|
||
|
|
"step": 4220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7920884889388826,
|
||
|
|
"grad_norm": 0.037713798352039335,
|
||
|
|
"learning_rate": 3.780856216033185e-05,
|
||
|
|
"loss": 0.7362,
|
||
|
|
"step": 4225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7930258717660292,
|
||
|
|
"grad_norm": 0.0386257393458269,
|
||
|
|
"learning_rate": 3.7483339555431055e-05,
|
||
|
|
"loss": 0.75,
|
||
|
|
"step": 4230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7939632545931758,
|
||
|
|
"grad_norm": 0.036597780629712674,
|
||
|
|
"learning_rate": 3.715932191356861e-05,
|
||
|
|
"loss": 0.7258,
|
||
|
|
"step": 4235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7949006374203225,
|
||
|
|
"grad_norm": 0.0351893354995898,
|
||
|
|
"learning_rate": 3.683651270471296e-05,
|
||
|
|
"loss": 0.7199,
|
||
|
|
"step": 4240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7958380202474691,
|
||
|
|
"grad_norm": 0.038598355578632684,
|
||
|
|
"learning_rate": 3.65149153858916e-05,
|
||
|
|
"loss": 0.7298,
|
||
|
|
"step": 4245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7967754030746157,
|
||
|
|
"grad_norm": 0.03440364370849841,
|
||
|
|
"learning_rate": 3.619453340115326e-05,
|
||
|
|
"loss": 0.7312,
|
||
|
|
"step": 4250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7977127859017623,
|
||
|
|
"grad_norm": 0.03802311815601217,
|
||
|
|
"learning_rate": 3.5875370181531754e-05,
|
||
|
|
"loss": 0.7424,
|
||
|
|
"step": 4255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7986501687289089,
|
||
|
|
"grad_norm": 0.03885312032393017,
|
||
|
|
"learning_rate": 3.555742914500867e-05,
|
||
|
|
"loss": 0.7447,
|
||
|
|
"step": 4260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7995875515560555,
|
||
|
|
"grad_norm": 0.03952348857049222,
|
||
|
|
"learning_rate": 3.5240713696477095e-05,
|
||
|
|
"loss": 0.7275,
|
||
|
|
"step": 4265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.800524934383202,
|
||
|
|
"grad_norm": 0.03783212853790451,
|
||
|
|
"learning_rate": 3.4925227227705085e-05,
|
||
|
|
"loss": 0.7673,
|
||
|
|
"step": 4270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8014623172103487,
|
||
|
|
"grad_norm": 0.03855091891475635,
|
||
|
|
"learning_rate": 3.461097311729914e-05,
|
||
|
|
"loss": 0.7321,
|
||
|
|
"step": 4275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8023997000374953,
|
||
|
|
"grad_norm": 0.03853653138387215,
|
||
|
|
"learning_rate": 3.429795473066845e-05,
|
||
|
|
"loss": 0.7502,
|
||
|
|
"step": 4280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8033370828646419,
|
||
|
|
"grad_norm": 0.037938414011982106,
|
||
|
|
"learning_rate": 3.3986175419988326e-05,
|
||
|
|
"loss": 0.7827,
|
||
|
|
"step": 4285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8042744656917885,
|
||
|
|
"grad_norm": 0.03714608764447697,
|
||
|
|
"learning_rate": 3.367563852416484e-05,
|
||
|
|
"loss": 0.7504,
|
||
|
|
"step": 4290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8052118485189351,
|
||
|
|
"grad_norm": 0.037586754731944845,
|
||
|
|
"learning_rate": 3.336634736879857e-05,
|
||
|
|
"loss": 0.7412,
|
||
|
|
"step": 4295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8061492313460817,
|
||
|
|
"grad_norm": 0.03609469062292054,
|
||
|
|
"learning_rate": 3.3058305266149335e-05,
|
||
|
|
"loss": 0.7309,
|
||
|
|
"step": 4300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8070866141732284,
|
||
|
|
"grad_norm": 0.04157190953672981,
|
||
|
|
"learning_rate": 3.275151551510057e-05,
|
||
|
|
"loss": 0.7533,
|
||
|
|
"step": 4305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.808023997000375,
|
||
|
|
"grad_norm": 0.03738613014861174,
|
||
|
|
"learning_rate": 3.2445981401124035e-05,
|
||
|
|
"loss": 0.7344,
|
||
|
|
"step": 4310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8089613798275216,
|
||
|
|
"grad_norm": 0.037333684702503485,
|
||
|
|
"learning_rate": 3.2141706196244646e-05,
|
||
|
|
"loss": 0.7313,
|
||
|
|
"step": 4315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8098987626546682,
|
||
|
|
"grad_norm": 0.03849398738947332,
|
||
|
|
"learning_rate": 3.183869315900537e-05,
|
||
|
|
"loss": 0.7321,
|
||
|
|
"step": 4320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8108361454818148,
|
||
|
|
"grad_norm": 0.03780735515283732,
|
||
|
|
"learning_rate": 3.153694553443241e-05,
|
||
|
|
"loss": 0.7604,
|
||
|
|
"step": 4325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8117735283089614,
|
||
|
|
"grad_norm": 0.03877151747469851,
|
||
|
|
"learning_rate": 3.1236466554000414e-05,
|
||
|
|
"loss": 0.7123,
|
||
|
|
"step": 4330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8127109111361079,
|
||
|
|
"grad_norm": 0.03868932391660062,
|
||
|
|
"learning_rate": 3.093725943559784e-05,
|
||
|
|
"loss": 0.7377,
|
||
|
|
"step": 4335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8136482939632546,
|
||
|
|
"grad_norm": 0.03748111742387308,
|
||
|
|
"learning_rate": 3.0639327383492544e-05,
|
||
|
|
"loss": 0.7171,
|
||
|
|
"step": 4340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8145856767904012,
|
||
|
|
"grad_norm": 0.03879381228880918,
|
||
|
|
"learning_rate": 3.0342673588297473e-05,
|
||
|
|
"loss": 0.7177,
|
||
|
|
"step": 4345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8155230596175478,
|
||
|
|
"grad_norm": 0.03783669509160595,
|
||
|
|
"learning_rate": 3.004730122693641e-05,
|
||
|
|
"loss": 0.7492,
|
||
|
|
"step": 4350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8164604424446944,
|
||
|
|
"grad_norm": 0.036123962271055896,
|
||
|
|
"learning_rate": 2.9753213462610077e-05,
|
||
|
|
"loss": 0.7327,
|
||
|
|
"step": 4355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.817397825271841,
|
||
|
|
"grad_norm": 0.037712932440491655,
|
||
|
|
"learning_rate": 2.9460413444762143e-05,
|
||
|
|
"loss": 0.7382,
|
||
|
|
"step": 4360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8183352080989876,
|
||
|
|
"grad_norm": 0.03722083272698044,
|
||
|
|
"learning_rate": 2.9168904309045614e-05,
|
||
|
|
"loss": 0.7259,
|
||
|
|
"step": 4365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8192725909261342,
|
||
|
|
"grad_norm": 0.035672510126638225,
|
||
|
|
"learning_rate": 2.8878689177289005e-05,
|
||
|
|
"loss": 0.7282,
|
||
|
|
"step": 4370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8202099737532809,
|
||
|
|
"grad_norm": 0.03514557869239733,
|
||
|
|
"learning_rate": 2.8589771157463394e-05,
|
||
|
|
"loss": 0.741,
|
||
|
|
"step": 4375
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8211473565804275,
|
||
|
|
"grad_norm": 0.03779365346411691,
|
||
|
|
"learning_rate": 2.8302153343648486e-05,
|
||
|
|
"loss": 0.7367,
|
||
|
|
"step": 4380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8220847394075741,
|
||
|
|
"grad_norm": 0.036530868365906646,
|
||
|
|
"learning_rate": 2.8015838816000168e-05,
|
||
|
|
"loss": 0.7395,
|
||
|
|
"step": 4385
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8230221222347207,
|
||
|
|
"grad_norm": 0.03774672510896896,
|
||
|
|
"learning_rate": 2.773083064071685e-05,
|
||
|
|
"loss": 0.7439,
|
||
|
|
"step": 4390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8239595050618672,
|
||
|
|
"grad_norm": 0.03737530943067525,
|
||
|
|
"learning_rate": 2.7447131870007268e-05,
|
||
|
|
"loss": 0.7391,
|
||
|
|
"step": 4395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8248968878890138,
|
||
|
|
"grad_norm": 0.03589949691489075,
|
||
|
|
"learning_rate": 2.716474554205722e-05,
|
||
|
|
"loss": 0.7201,
|
||
|
|
"step": 4400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8258342707161604,
|
||
|
|
"grad_norm": 0.039108253157191145,
|
||
|
|
"learning_rate": 2.688367468099739e-05,
|
||
|
|
"loss": 0.715,
|
||
|
|
"step": 4405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8267716535433071,
|
||
|
|
"grad_norm": 0.040098561050262535,
|
||
|
|
"learning_rate": 2.6603922296871043e-05,
|
||
|
|
"loss": 0.7635,
|
||
|
|
"step": 4410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8277090363704537,
|
||
|
|
"grad_norm": 0.035538429924797096,
|
||
|
|
"learning_rate": 2.632549138560129e-05,
|
||
|
|
"loss": 0.7304,
|
||
|
|
"step": 4415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8286464191976003,
|
||
|
|
"grad_norm": 0.035183348616448194,
|
||
|
|
"learning_rate": 2.6048384928959653e-05,
|
||
|
|
"loss": 0.723,
|
||
|
|
"step": 4420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8295838020247469,
|
||
|
|
"grad_norm": 0.035542818739637466,
|
||
|
|
"learning_rate": 2.5772605894533533e-05,
|
||
|
|
"loss": 0.7513,
|
||
|
|
"step": 4425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8305211848518935,
|
||
|
|
"grad_norm": 0.040912090543415644,
|
||
|
|
"learning_rate": 2.5498157235694888e-05,
|
||
|
|
"loss": 0.7463,
|
||
|
|
"step": 4430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8314585676790401,
|
||
|
|
"grad_norm": 0.04115878743265954,
|
||
|
|
"learning_rate": 2.5225041891568366e-05,
|
||
|
|
"loss": 0.7421,
|
||
|
|
"step": 4435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8323959505061868,
|
||
|
|
"grad_norm": 0.03473020418203695,
|
||
|
|
"learning_rate": 2.4953262786999846e-05,
|
||
|
|
"loss": 0.7318,
|
||
|
|
"step": 4440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8333333333333334,
|
||
|
|
"grad_norm": 0.03945899992708612,
|
||
|
|
"learning_rate": 2.468282283252524e-05,
|
||
|
|
"loss": 0.7399,
|
||
|
|
"step": 4445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.83427071616048,
|
||
|
|
"grad_norm": 0.041181932452618496,
|
||
|
|
"learning_rate": 2.4413724924339166e-05,
|
||
|
|
"loss": 0.7515,
|
||
|
|
"step": 4450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8352080989876266,
|
||
|
|
"grad_norm": 0.03757379419175963,
|
||
|
|
"learning_rate": 2.4145971944264025e-05,
|
||
|
|
"loss": 0.7572,
|
||
|
|
"step": 4455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8361454818147731,
|
||
|
|
"grad_norm": 0.03812065383102868,
|
||
|
|
"learning_rate": 2.3879566759719132e-05,
|
||
|
|
"loss": 0.7111,
|
||
|
|
"step": 4460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8370828646419197,
|
||
|
|
"grad_norm": 0.03758167524030431,
|
||
|
|
"learning_rate": 2.3614512223690002e-05,
|
||
|
|
"loss": 0.7408,
|
||
|
|
"step": 4465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8380202474690663,
|
||
|
|
"grad_norm": 0.0357989413923993,
|
||
|
|
"learning_rate": 2.335081117469777e-05,
|
||
|
|
"loss": 0.7404,
|
||
|
|
"step": 4470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.838957630296213,
|
||
|
|
"grad_norm": 0.03719143556333986,
|
||
|
|
"learning_rate": 2.308846643676875e-05,
|
||
|
|
"loss": 0.7421,
|
||
|
|
"step": 4475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8398950131233596,
|
||
|
|
"grad_norm": 0.038416698176695595,
|
||
|
|
"learning_rate": 2.2827480819404386e-05,
|
||
|
|
"loss": 0.7391,
|
||
|
|
"step": 4480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8408323959505062,
|
||
|
|
"grad_norm": 0.038838405309790824,
|
||
|
|
"learning_rate": 2.2567857117550958e-05,
|
||
|
|
"loss": 0.7504,
|
||
|
|
"step": 4485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8417697787776528,
|
||
|
|
"grad_norm": 0.03678430153531787,
|
||
|
|
"learning_rate": 2.230959811156972e-05,
|
||
|
|
"loss": 0.721,
|
||
|
|
"step": 4490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8427071616047994,
|
||
|
|
"grad_norm": 0.037007576832510956,
|
||
|
|
"learning_rate": 2.2052706567207156e-05,
|
||
|
|
"loss": 0.7095,
|
||
|
|
"step": 4495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.843644544431946,
|
||
|
|
"grad_norm": 0.03453973181827858,
|
||
|
|
"learning_rate": 2.179718523556531e-05,
|
||
|
|
"loss": 0.7229,
|
||
|
|
"step": 4500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8445819272590926,
|
||
|
|
"grad_norm": 0.037382201951380103,
|
||
|
|
"learning_rate": 2.1543036853072386e-05,
|
||
|
|
"loss": 0.7185,
|
||
|
|
"step": 4505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8455193100862393,
|
||
|
|
"grad_norm": 0.04001888522833685,
|
||
|
|
"learning_rate": 2.1290264141453313e-05,
|
||
|
|
"loss": 0.7342,
|
||
|
|
"step": 4510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8464566929133859,
|
||
|
|
"grad_norm": 0.035396516604348185,
|
||
|
|
"learning_rate": 2.103886980770085e-05,
|
||
|
|
"loss": 0.7128,
|
||
|
|
"step": 4515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8473940757405324,
|
||
|
|
"grad_norm": 0.03790943797877688,
|
||
|
|
"learning_rate": 2.0788856544046216e-05,
|
||
|
|
"loss": 0.7599,
|
||
|
|
"step": 4520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.848331458567679,
|
||
|
|
"grad_norm": 0.03690950991272133,
|
||
|
|
"learning_rate": 2.0540227027930773e-05,
|
||
|
|
"loss": 0.7094,
|
||
|
|
"step": 4525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8492688413948256,
|
||
|
|
"grad_norm": 0.036299029934951134,
|
||
|
|
"learning_rate": 2.0292983921976753e-05,
|
||
|
|
"loss": 0.7197,
|
||
|
|
"step": 4530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8502062242219722,
|
||
|
|
"grad_norm": 0.036464577070665444,
|
||
|
|
"learning_rate": 2.004712987395924e-05,
|
||
|
|
"loss": 0.7306,
|
||
|
|
"step": 4535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8511436070491188,
|
||
|
|
"grad_norm": 0.035761152065985326,
|
||
|
|
"learning_rate": 1.9802667516777565e-05,
|
||
|
|
"loss": 0.7289,
|
||
|
|
"step": 4540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8520809898762655,
|
||
|
|
"grad_norm": 0.03812262175593615,
|
||
|
|
"learning_rate": 1.9559599468427183e-05,
|
||
|
|
"loss": 0.7406,
|
||
|
|
"step": 4545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8530183727034121,
|
||
|
|
"grad_norm": 0.03807115739595877,
|
||
|
|
"learning_rate": 1.9317928331971592e-05,
|
||
|
|
"loss": 0.7245,
|
||
|
|
"step": 4550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8539557555305587,
|
||
|
|
"grad_norm": 0.03647185658765888,
|
||
|
|
"learning_rate": 1.9077656695514526e-05,
|
||
|
|
"loss": 0.7348,
|
||
|
|
"step": 4555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8548931383577053,
|
||
|
|
"grad_norm": 0.037735068284022226,
|
||
|
|
"learning_rate": 1.8838787132172184e-05,
|
||
|
|
"loss": 0.7433,
|
||
|
|
"step": 4560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8558305211848519,
|
||
|
|
"grad_norm": 0.03411010074767823,
|
||
|
|
"learning_rate": 1.860132220004565e-05,
|
||
|
|
"loss": 0.7292,
|
||
|
|
"step": 4565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8567679040119985,
|
||
|
|
"grad_norm": 0.03512838788517134,
|
||
|
|
"learning_rate": 1.8365264442193618e-05,
|
||
|
|
"loss": 0.7125,
|
||
|
|
"step": 4570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8577052868391452,
|
||
|
|
"grad_norm": 0.03797586269699929,
|
||
|
|
"learning_rate": 1.8130616386604973e-05,
|
||
|
|
"loss": 0.7457,
|
||
|
|
"step": 4575
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8586426696662918,
|
||
|
|
"grad_norm": 0.03846116349108381,
|
||
|
|
"learning_rate": 1.789738054617193e-05,
|
||
|
|
"loss": 0.7254,
|
||
|
|
"step": 4580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8595800524934383,
|
||
|
|
"grad_norm": 0.03248052807212059,
|
||
|
|
"learning_rate": 1.766555941866291e-05,
|
||
|
|
"loss": 0.7287,
|
||
|
|
"step": 4585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8605174353205849,
|
||
|
|
"grad_norm": 0.03749670548712012,
|
||
|
|
"learning_rate": 1.743515548669598e-05,
|
||
|
|
"loss": 0.7345,
|
||
|
|
"step": 4590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8614548181477315,
|
||
|
|
"grad_norm": 0.03868597238209571,
|
||
|
|
"learning_rate": 1.7206171217712135e-05,
|
||
|
|
"loss": 0.7387,
|
||
|
|
"step": 4595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8623922009748781,
|
||
|
|
"grad_norm": 0.03763605398447834,
|
||
|
|
"learning_rate": 1.6978609063948973e-05,
|
||
|
|
"loss": 0.7201,
|
||
|
|
"step": 4600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8633295838020247,
|
||
|
|
"grad_norm": 0.04009619133432012,
|
||
|
|
"learning_rate": 1.6752471462414226e-05,
|
||
|
|
"loss": 0.7384,
|
||
|
|
"step": 4605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8642669666291714,
|
||
|
|
"grad_norm": 0.03572102487743062,
|
||
|
|
"learning_rate": 1.6527760834860056e-05,
|
||
|
|
"loss": 0.7348,
|
||
|
|
"step": 4610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.865204349456318,
|
||
|
|
"grad_norm": 0.038750256044430074,
|
||
|
|
"learning_rate": 1.6304479587756652e-05,
|
||
|
|
"loss": 0.7065,
|
||
|
|
"step": 4615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8661417322834646,
|
||
|
|
"grad_norm": 0.03675663032395201,
|
||
|
|
"learning_rate": 1.6082630112266888e-05,
|
||
|
|
"loss": 0.7077,
|
||
|
|
"step": 4620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8670791151106112,
|
||
|
|
"grad_norm": 0.03352806721702409,
|
||
|
|
"learning_rate": 1.5862214784220305e-05,
|
||
|
|
"loss": 0.7214,
|
||
|
|
"step": 4625
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8680164979377578,
|
||
|
|
"grad_norm": 0.03651862777178404,
|
||
|
|
"learning_rate": 1.5643235964088064e-05,
|
||
|
|
"loss": 0.7158,
|
||
|
|
"step": 4630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8689538807649044,
|
||
|
|
"grad_norm": 0.03455566062476393,
|
||
|
|
"learning_rate": 1.5425695996957416e-05,
|
||
|
|
"loss": 0.736,
|
||
|
|
"step": 4635
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.869891263592051,
|
||
|
|
"grad_norm": 0.038034552465222866,
|
||
|
|
"learning_rate": 1.520959721250653e-05,
|
||
|
|
"loss": 0.735,
|
||
|
|
"step": 4640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8708286464191975,
|
||
|
|
"grad_norm": 0.03814002964874412,
|
||
|
|
"learning_rate": 1.4994941924979919e-05,
|
||
|
|
"loss": 0.725,
|
||
|
|
"step": 4645
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8717660292463442,
|
||
|
|
"grad_norm": 0.03613963285066848,
|
||
|
|
"learning_rate": 1.4781732433163129e-05,
|
||
|
|
"loss": 0.7158,
|
||
|
|
"step": 4650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8727034120734908,
|
||
|
|
"grad_norm": 0.0391963633366358,
|
||
|
|
"learning_rate": 1.4569971020358656e-05,
|
||
|
|
"loss": 0.7043,
|
||
|
|
"step": 4655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8736407949006374,
|
||
|
|
"grad_norm": 0.03573970458333823,
|
||
|
|
"learning_rate": 1.4359659954360985e-05,
|
||
|
|
"loss": 0.7262,
|
||
|
|
"step": 4660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.874578177727784,
|
||
|
|
"grad_norm": 0.03402185272612653,
|
||
|
|
"learning_rate": 1.4150801487432727e-05,
|
||
|
|
"loss": 0.726,
|
||
|
|
"step": 4665
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8755155605549306,
|
||
|
|
"grad_norm": 0.03988457684610016,
|
||
|
|
"learning_rate": 1.394339785628027e-05,
|
||
|
|
"loss": 0.7116,
|
||
|
|
"step": 4670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8764529433820772,
|
||
|
|
"grad_norm": 0.03749913331479699,
|
||
|
|
"learning_rate": 1.373745128202986e-05,
|
||
|
|
"loss": 0.7486,
|
||
|
|
"step": 4675
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8773903262092239,
|
||
|
|
"grad_norm": 0.039418943387831605,
|
||
|
|
"learning_rate": 1.3532963970203848e-05,
|
||
|
|
"loss": 0.7311,
|
||
|
|
"step": 4680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8783277090363705,
|
||
|
|
"grad_norm": 0.03691545995561202,
|
||
|
|
"learning_rate": 1.332993811069708e-05,
|
||
|
|
"loss": 0.7464,
|
||
|
|
"step": 4685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8792650918635171,
|
||
|
|
"grad_norm": 0.03634444663432465,
|
||
|
|
"learning_rate": 1.3128375877753393e-05,
|
||
|
|
"loss": 0.7048,
|
||
|
|
"step": 4690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8802024746906637,
|
||
|
|
"grad_norm": 0.03883997615492241,
|
||
|
|
"learning_rate": 1.2928279429942362e-05,
|
||
|
|
"loss": 0.743,
|
||
|
|
"step": 4695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8811398575178103,
|
||
|
|
"grad_norm": 0.03831787223481465,
|
||
|
|
"learning_rate": 1.2729650910136196e-05,
|
||
|
|
"loss": 0.72,
|
||
|
|
"step": 4700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8820772403449568,
|
||
|
|
"grad_norm": 0.038782793377748884,
|
||
|
|
"learning_rate": 1.2532492445486769e-05,
|
||
|
|
"loss": 0.7556,
|
||
|
|
"step": 4705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8830146231721034,
|
||
|
|
"grad_norm": 0.035930508758284546,
|
||
|
|
"learning_rate": 1.2336806147402828e-05,
|
||
|
|
"loss": 0.7359,
|
||
|
|
"step": 4710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8839520059992501,
|
||
|
|
"grad_norm": 0.03688784164900098,
|
||
|
|
"learning_rate": 1.2142594111527415e-05,
|
||
|
|
"loss": 0.7322,
|
||
|
|
"step": 4715
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8848893888263967,
|
||
|
|
"grad_norm": 0.03418578395324309,
|
||
|
|
"learning_rate": 1.1949858417715418e-05,
|
||
|
|
"loss": 0.7145,
|
||
|
|
"step": 4720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8858267716535433,
|
||
|
|
"grad_norm": 0.03839104854863887,
|
||
|
|
"learning_rate": 1.1758601130011259e-05,
|
||
|
|
"loss": 0.708,
|
||
|
|
"step": 4725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8867641544806899,
|
||
|
|
"grad_norm": 0.03690073415729465,
|
||
|
|
"learning_rate": 1.1568824296626866e-05,
|
||
|
|
"loss": 0.7268,
|
||
|
|
"step": 4730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8877015373078365,
|
||
|
|
"grad_norm": 0.03785506282627266,
|
||
|
|
"learning_rate": 1.1380529949919593e-05,
|
||
|
|
"loss": 0.7472,
|
||
|
|
"step": 4735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8886389201349831,
|
||
|
|
"grad_norm": 0.03505149905088435,
|
||
|
|
"learning_rate": 1.1193720106370701e-05,
|
||
|
|
"loss": 0.735,
|
||
|
|
"step": 4740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8895763029621298,
|
||
|
|
"grad_norm": 0.038177240967898714,
|
||
|
|
"learning_rate": 1.100839676656346e-05,
|
||
|
|
"loss": 0.7181,
|
||
|
|
"step": 4745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8905136857892764,
|
||
|
|
"grad_norm": 0.03798215847626741,
|
||
|
|
"learning_rate": 1.0824561915162016e-05,
|
||
|
|
"loss": 0.719,
|
||
|
|
"step": 4750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.891451068616423,
|
||
|
|
"grad_norm": 0.03757317113276994,
|
||
|
|
"learning_rate": 1.0642217520889873e-05,
|
||
|
|
"loss": 0.7412,
|
||
|
|
"step": 4755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8923884514435696,
|
||
|
|
"grad_norm": 0.03762594674374493,
|
||
|
|
"learning_rate": 1.0461365536509065e-05,
|
||
|
|
"loss": 0.7365,
|
||
|
|
"step": 4760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8933258342707162,
|
||
|
|
"grad_norm": 0.03752202520507965,
|
||
|
|
"learning_rate": 1.0282007898798995e-05,
|
||
|
|
"loss": 0.7146,
|
||
|
|
"step": 4765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8942632170978627,
|
||
|
|
"grad_norm": 0.031040074178325168,
|
||
|
|
"learning_rate": 1.01041465285359e-05,
|
||
|
|
"loss": 0.7121,
|
||
|
|
"step": 4770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8952005999250093,
|
||
|
|
"grad_norm": 0.03608189060059074,
|
||
|
|
"learning_rate": 9.927783330472139e-06,
|
||
|
|
"loss": 0.7328,
|
||
|
|
"step": 4775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.896137982752156,
|
||
|
|
"grad_norm": 0.03589888025539565,
|
||
|
|
"learning_rate": 9.752920193315865e-06,
|
||
|
|
"loss": 0.6956,
|
||
|
|
"step": 4780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8970753655793026,
|
||
|
|
"grad_norm": 0.03503081744003102,
|
||
|
|
"learning_rate": 9.579558989710872e-06,
|
||
|
|
"loss": 0.734,
|
||
|
|
"step": 4785
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8980127484064492,
|
||
|
|
"grad_norm": 0.036423527513909,
|
||
|
|
"learning_rate": 9.407701576216281e-06,
|
||
|
|
"loss": 0.7139,
|
||
|
|
"step": 4790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8989501312335958,
|
||
|
|
"grad_norm": 0.03773162502672724,
|
||
|
|
"learning_rate": 9.237349793286963e-06,
|
||
|
|
"loss": 0.7286,
|
||
|
|
"step": 4795
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8998875140607424,
|
||
|
|
"grad_norm": 0.0370655727344935,
|
||
|
|
"learning_rate": 9.068505465253656e-06,
|
||
|
|
"loss": 0.7364,
|
||
|
|
"step": 4800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.900824896887889,
|
||
|
|
"grad_norm": 0.03793992378742871,
|
||
|
|
"learning_rate": 8.901170400303443e-06,
|
||
|
|
"loss": 0.7117,
|
||
|
|
"step": 4805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9017622797150356,
|
||
|
|
"grad_norm": 0.03749076700190644,
|
||
|
|
"learning_rate": 8.735346390460452e-06,
|
||
|
|
"loss": 0.7482,
|
||
|
|
"step": 4810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9026996625421823,
|
||
|
|
"grad_norm": 0.03478713094360769,
|
||
|
|
"learning_rate": 8.571035211566606e-06,
|
||
|
|
"loss": 0.7282,
|
||
|
|
"step": 4815
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9036370453693289,
|
||
|
|
"grad_norm": 0.03931631750725771,
|
||
|
|
"learning_rate": 8.408238623262625e-06,
|
||
|
|
"loss": 0.7347,
|
||
|
|
"step": 4820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9045744281964755,
|
||
|
|
"grad_norm": 0.03832734319910059,
|
||
|
|
"learning_rate": 8.246958368969164e-06,
|
||
|
|
"loss": 0.7085,
|
||
|
|
"step": 4825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.905511811023622,
|
||
|
|
"grad_norm": 0.036342961917758904,
|
||
|
|
"learning_rate": 8.087196175868204e-06,
|
||
|
|
"loss": 0.7459,
|
||
|
|
"step": 4830
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9064491938507686,
|
||
|
|
"grad_norm": 0.03811297614516944,
|
||
|
|
"learning_rate": 7.928953754884482e-06,
|
||
|
|
"loss": 0.7245,
|
||
|
|
"step": 4835
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9073865766779152,
|
||
|
|
"grad_norm": 0.03617635523056376,
|
||
|
|
"learning_rate": 7.772232800667117e-06,
|
||
|
|
"loss": 0.6879,
|
||
|
|
"step": 4840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9083239595050618,
|
||
|
|
"grad_norm": 0.03686080577213026,
|
||
|
|
"learning_rate": 7.617034991571747e-06,
|
||
|
|
"loss": 0.7308,
|
||
|
|
"step": 4845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9092613423322085,
|
||
|
|
"grad_norm": 0.03468911090610316,
|
||
|
|
"learning_rate": 7.463361989642108e-06,
|
||
|
|
"loss": 0.7266,
|
||
|
|
"step": 4850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9101987251593551,
|
||
|
|
"grad_norm": 0.03837314601114124,
|
||
|
|
"learning_rate": 7.311215440592649e-06,
|
||
|
|
"loss": 0.7244,
|
||
|
|
"step": 4855
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9111361079865017,
|
||
|
|
"grad_norm": 0.03515020181576783,
|
||
|
|
"learning_rate": 7.160596973790678e-06,
|
||
|
|
"loss": 0.7095,
|
||
|
|
"step": 4860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9120734908136483,
|
||
|
|
"grad_norm": 0.04067375872375338,
|
||
|
|
"learning_rate": 7.011508202238958e-06,
|
||
|
|
"loss": 0.7565,
|
||
|
|
"step": 4865
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9130108736407949,
|
||
|
|
"grad_norm": 0.037896195508876414,
|
||
|
|
"learning_rate": 6.8639507225584755e-06,
|
||
|
|
"loss": 0.7479,
|
||
|
|
"step": 4870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9139482564679415,
|
||
|
|
"grad_norm": 0.035925556435556304,
|
||
|
|
"learning_rate": 6.7179261149712335e-06,
|
||
|
|
"loss": 0.7287,
|
||
|
|
"step": 4875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9148856392950881,
|
||
|
|
"grad_norm": 0.03437946260531882,
|
||
|
|
"learning_rate": 6.57343594328355e-06,
|
||
|
|
"loss": 0.7154,
|
||
|
|
"step": 4880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9158230221222348,
|
||
|
|
"grad_norm": 0.036848739384731176,
|
||
|
|
"learning_rate": 6.430481754868988e-06,
|
||
|
|
"loss": 0.7306,
|
||
|
|
"step": 4885
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9167604049493814,
|
||
|
|
"grad_norm": 0.03819174514800313,
|
||
|
|
"learning_rate": 6.289065080652134e-06,
|
||
|
|
"loss": 0.7423,
|
||
|
|
"step": 4890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9176977877765279,
|
||
|
|
"grad_norm": 0.04015640945147137,
|
||
|
|
"learning_rate": 6.149187435091912e-06,
|
||
|
|
"loss": 0.7388,
|
||
|
|
"step": 4895
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9186351706036745,
|
||
|
|
"grad_norm": 0.0354113100153628,
|
||
|
|
"learning_rate": 6.010850316165533e-06,
|
||
|
|
"loss": 0.7153,
|
||
|
|
"step": 4900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9195725534308211,
|
||
|
|
"grad_norm": 0.03619736356277513,
|
||
|
|
"learning_rate": 5.8740552053524185e-06,
|
||
|
|
"loss": 0.7123,
|
||
|
|
"step": 4905
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9205099362579677,
|
||
|
|
"grad_norm": 0.03357169683122578,
|
||
|
|
"learning_rate": 5.7388035676183e-06,
|
||
|
|
"loss": 0.7367,
|
||
|
|
"step": 4910
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9214473190851143,
|
||
|
|
"grad_norm": 0.035341368116608526,
|
||
|
|
"learning_rate": 5.6050968513995484e-06,
|
||
|
|
"loss": 0.735,
|
||
|
|
"step": 4915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.922384701912261,
|
||
|
|
"grad_norm": 0.03463388384208746,
|
||
|
|
"learning_rate": 5.472936488587687e-06,
|
||
|
|
"loss": 0.7045,
|
||
|
|
"step": 4920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9233220847394076,
|
||
|
|
"grad_norm": 0.03646756098895019,
|
||
|
|
"learning_rate": 5.342323894514017e-06,
|
||
|
|
"loss": 0.692,
|
||
|
|
"step": 4925
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9242594675665542,
|
||
|
|
"grad_norm": 0.0378552791061763,
|
||
|
|
"learning_rate": 5.213260467934499e-06,
|
||
|
|
"loss": 0.7308,
|
||
|
|
"step": 4930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9251968503937008,
|
||
|
|
"grad_norm": 0.03651837390186468,
|
||
|
|
"learning_rate": 5.085747591014716e-06,
|
||
|
|
"loss": 0.7241,
|
||
|
|
"step": 4935
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9261342332208474,
|
||
|
|
"grad_norm": 0.03496033002413129,
|
||
|
|
"learning_rate": 4.959786629315166e-06,
|
||
|
|
"loss": 0.7201,
|
||
|
|
"step": 4940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.927071616047994,
|
||
|
|
"grad_norm": 0.03807303842127231,
|
||
|
|
"learning_rate": 4.83537893177654e-06,
|
||
|
|
"loss": 0.7228,
|
||
|
|
"step": 4945
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9280089988751407,
|
||
|
|
"grad_norm": 0.03388630160185748,
|
||
|
|
"learning_rate": 4.712525830705338e-06,
|
||
|
|
"loss": 0.7255,
|
||
|
|
"step": 4950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9289463817022872,
|
||
|
|
"grad_norm": 0.03359401994529555,
|
||
|
|
"learning_rate": 4.591228641759559e-06,
|
||
|
|
"loss": 0.7145,
|
||
|
|
"step": 4955
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9298837645294338,
|
||
|
|
"grad_norm": 0.03790506150006882,
|
||
|
|
"learning_rate": 4.471488663934647e-06,
|
||
|
|
"loss": 0.7182,
|
||
|
|
"step": 4960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9308211473565804,
|
||
|
|
"grad_norm": 0.03717261368444523,
|
||
|
|
"learning_rate": 4.3533071795496035e-06,
|
||
|
|
"loss": 0.7423,
|
||
|
|
"step": 4965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.931758530183727,
|
||
|
|
"grad_norm": 0.03725676902347381,
|
||
|
|
"learning_rate": 4.236685454233113e-06,
|
||
|
|
"loss": 0.7384,
|
||
|
|
"step": 4970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9326959130108736,
|
||
|
|
"grad_norm": 0.03790059729911343,
|
||
|
|
"learning_rate": 4.12162473691024e-06,
|
||
|
|
"loss": 0.7408,
|
||
|
|
"step": 4975
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9336332958380202,
|
||
|
|
"grad_norm": 0.03917020457274146,
|
||
|
|
"learning_rate": 4.008126259788752e-06,
|
||
|
|
"loss": 0.7203,
|
||
|
|
"step": 4980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9345706786651669,
|
||
|
|
"grad_norm": 0.037051465451931255,
|
||
|
|
"learning_rate": 3.896191238346219e-06,
|
||
|
|
"loss": 0.7295,
|
||
|
|
"step": 4985
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9355080614923135,
|
||
|
|
"grad_norm": 0.037826370662724866,
|
||
|
|
"learning_rate": 3.785820871316736e-06,
|
||
|
|
"loss": 0.7087,
|
||
|
|
"step": 4990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9364454443194601,
|
||
|
|
"grad_norm": 0.03734181026651175,
|
||
|
|
"learning_rate": 3.677016340678318e-06,
|
||
|
|
"loss": 0.7244,
|
||
|
|
"step": 4995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9373828271466067,
|
||
|
|
"grad_norm": 0.03568772503256821,
|
||
|
|
"learning_rate": 3.5697788116400283e-06,
|
||
|
|
"loss": 0.7204,
|
||
|
|
"step": 5000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9383202099737533,
|
||
|
|
"grad_norm": 0.03537228998662857,
|
||
|
|
"learning_rate": 3.4641094326296524e-06,
|
||
|
|
"loss": 0.7089,
|
||
|
|
"step": 5005
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9392575928008999,
|
||
|
|
"grad_norm": 0.037141105148803415,
|
||
|
|
"learning_rate": 3.3600093352814107e-06,
|
||
|
|
"loss": 0.7329,
|
||
|
|
"step": 5010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9401949756280465,
|
||
|
|
"grad_norm": 0.035406141374499524,
|
||
|
|
"learning_rate": 3.2574796344236153e-06,
|
||
|
|
"loss": 0.7033,
|
||
|
|
"step": 5015
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.941132358455193,
|
||
|
|
"grad_norm": 0.041168173265274895,
|
||
|
|
"learning_rate": 3.1565214280670825e-06,
|
||
|
|
"loss": 0.7038,
|
||
|
|
"step": 5020
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9420697412823397,
|
||
|
|
"grad_norm": 0.039723554038275884,
|
||
|
|
"learning_rate": 3.0571357973930234e-06,
|
||
|
|
"loss": 0.7097,
|
||
|
|
"step": 5025
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9430071241094863,
|
||
|
|
"grad_norm": 0.032544537124457665,
|
||
|
|
"learning_rate": 2.959323806741737e-06,
|
||
|
|
"loss": 0.7272,
|
||
|
|
"step": 5030
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9439445069366329,
|
||
|
|
"grad_norm": 0.03461405648425469,
|
||
|
|
"learning_rate": 2.8630865036010364e-06,
|
||
|
|
"loss": 0.7141,
|
||
|
|
"step": 5035
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9448818897637795,
|
||
|
|
"grad_norm": 0.036824105839646924,
|
||
|
|
"learning_rate": 2.7684249185951415e-06,
|
||
|
|
"loss": 0.7335,
|
||
|
|
"step": 5040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9458192725909261,
|
||
|
|
"grad_norm": 0.04002092643887271,
|
||
|
|
"learning_rate": 2.6753400654735524e-06,
|
||
|
|
"loss": 0.7372,
|
||
|
|
"step": 5045
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9467566554180727,
|
||
|
|
"grad_norm": 0.03570329291792829,
|
||
|
|
"learning_rate": 2.5838329411002943e-06,
|
||
|
|
"loss": 0.7127,
|
||
|
|
"step": 5050
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9476940382452194,
|
||
|
|
"grad_norm": 0.034274985857504735,
|
||
|
|
"learning_rate": 2.493904525443141e-06,
|
||
|
|
"loss": 0.7057,
|
||
|
|
"step": 5055
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.948631421072366,
|
||
|
|
"grad_norm": 0.03801311689032566,
|
||
|
|
"learning_rate": 2.405555781563173e-06,
|
||
|
|
"loss": 0.7282,
|
||
|
|
"step": 5060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9495688038995126,
|
||
|
|
"grad_norm": 0.03756145522959914,
|
||
|
|
"learning_rate": 2.3187876556044537e-06,
|
||
|
|
"loss": 0.6886,
|
||
|
|
"step": 5065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9505061867266592,
|
||
|
|
"grad_norm": 0.03530134077170944,
|
||
|
|
"learning_rate": 2.2336010767839194e-06,
|
||
|
|
"loss": 0.7168,
|
||
|
|
"step": 5070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9514435695538058,
|
||
|
|
"grad_norm": 0.03876854731664466,
|
||
|
|
"learning_rate": 2.1499969573813724e-06,
|
||
|
|
"loss": 0.7163,
|
||
|
|
"step": 5075
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9523809523809523,
|
||
|
|
"grad_norm": 0.03235204006913281,
|
||
|
|
"learning_rate": 2.06797619272977e-06,
|
||
|
|
"loss": 0.7339,
|
||
|
|
"step": 5080
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9533183352080989,
|
||
|
|
"grad_norm": 0.035560439311177215,
|
||
|
|
"learning_rate": 1.9875396612056005e-06,
|
||
|
|
"loss": 0.7356,
|
||
|
|
"step": 5085
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9542557180352456,
|
||
|
|
"grad_norm": 0.03566397362898293,
|
||
|
|
"learning_rate": 1.9086882242195235e-06,
|
||
|
|
"loss": 0.6973,
|
||
|
|
"step": 5090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9551931008623922,
|
||
|
|
"grad_norm": 0.037624367410972304,
|
||
|
|
"learning_rate": 1.8314227262070435e-06,
|
||
|
|
"loss": 0.7273,
|
||
|
|
"step": 5095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9561304836895388,
|
||
|
|
"grad_norm": 0.03441332414126222,
|
||
|
|
"learning_rate": 1.7557439946196017e-06,
|
||
|
|
"loss": 0.722,
|
||
|
|
"step": 5100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9570678665166854,
|
||
|
|
"grad_norm": 0.03664904948143634,
|
||
|
|
"learning_rate": 1.6816528399155982e-06,
|
||
|
|
"loss": 0.7412,
|
||
|
|
"step": 5105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.958005249343832,
|
||
|
|
"grad_norm": 0.03715182226165275,
|
||
|
|
"learning_rate": 1.60915005555175e-06,
|
||
|
|
"loss": 0.7232,
|
||
|
|
"step": 5110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9589426321709786,
|
||
|
|
"grad_norm": 0.03639187438599698,
|
||
|
|
"learning_rate": 1.5382364179746808e-06,
|
||
|
|
"loss": 0.737,
|
||
|
|
"step": 5115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9598800149981253,
|
||
|
|
"grad_norm": 0.03868780676867553,
|
||
|
|
"learning_rate": 1.4689126866124278e-06,
|
||
|
|
"loss": 0.7097,
|
||
|
|
"step": 5120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9608173978252719,
|
||
|
|
"grad_norm": 0.036291056043944926,
|
||
|
|
"learning_rate": 1.4011796038665145e-06,
|
||
|
|
"loss": 0.7186,
|
||
|
|
"step": 5125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9617547806524185,
|
||
|
|
"grad_norm": 0.06821587616968404,
|
||
|
|
"learning_rate": 1.335037895103791e-06,
|
||
|
|
"loss": 0.7374,
|
||
|
|
"step": 5130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9626921634795651,
|
||
|
|
"grad_norm": 0.03887971150133188,
|
||
|
|
"learning_rate": 1.2704882686488393e-06,
|
||
|
|
"loss": 0.7624,
|
||
|
|
"step": 5135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9636295463067117,
|
||
|
|
"grad_norm": 0.036497584321372616,
|
||
|
|
"learning_rate": 1.2075314157762972e-06,
|
||
|
|
"loss": 0.718,
|
||
|
|
"step": 5140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9645669291338582,
|
||
|
|
"grad_norm": 0.039035533336186354,
|
||
|
|
"learning_rate": 1.1461680107034798e-06,
|
||
|
|
"loss": 0.7421,
|
||
|
|
"step": 5145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9655043119610048,
|
||
|
|
"grad_norm": 0.035839951992695206,
|
||
|
|
"learning_rate": 1.0863987105831696e-06,
|
||
|
|
"loss": 0.73,
|
||
|
|
"step": 5150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9664416947881515,
|
||
|
|
"grad_norm": 0.03563901861543763,
|
||
|
|
"learning_rate": 1.0282241554965375e-06,
|
||
|
|
"loss": 0.7314,
|
||
|
|
"step": 5155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9673790776152981,
|
||
|
|
"grad_norm": 0.034948413836722134,
|
||
|
|
"learning_rate": 9.716449684463502e-07,
|
||
|
|
"loss": 0.7079,
|
||
|
|
"step": 5160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9683164604424447,
|
||
|
|
"grad_norm": 0.037670482239960106,
|
||
|
|
"learning_rate": 9.166617553502064e-07,
|
||
|
|
"loss": 0.7209,
|
||
|
|
"step": 5165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9692538432695913,
|
||
|
|
"grad_norm": 0.03868203643112819,
|
||
|
|
"learning_rate": 8.632751050341946e-07,
|
||
|
|
"loss": 0.7267,
|
||
|
|
"step": 5170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9701912260967379,
|
||
|
|
"grad_norm": 0.03685196650404434,
|
||
|
|
"learning_rate": 8.114855892264128e-07,
|
||
|
|
"loss": 0.7288,
|
||
|
|
"step": 5175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9711286089238845,
|
||
|
|
"grad_norm": 0.03480760162856044,
|
||
|
|
"learning_rate": 7.612937625509741e-07,
|
||
|
|
"loss": 0.714,
|
||
|
|
"step": 5180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9720659917510311,
|
||
|
|
"grad_norm": 0.035112246544013306,
|
||
|
|
"learning_rate": 7.127001625220286e-07,
|
||
|
|
"loss": 0.725,
|
||
|
|
"step": 5185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9730033745781778,
|
||
|
|
"grad_norm": 0.034016779690611565,
|
||
|
|
"learning_rate": 6.657053095380005e-07,
|
||
|
|
"loss": 0.6879,
|
||
|
|
"step": 5190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9739407574053244,
|
||
|
|
"grad_norm": 0.03711659986007524,
|
||
|
|
"learning_rate": 6.203097068759933e-07,
|
||
|
|
"loss": 0.7412,
|
||
|
|
"step": 5195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.974878140232471,
|
||
|
|
"grad_norm": 0.039330504751635645,
|
||
|
|
"learning_rate": 5.765138406864434e-07,
|
||
|
|
"loss": 0.7359,
|
||
|
|
"step": 5200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9758155230596175,
|
||
|
|
"grad_norm": 0.03613689473953272,
|
||
|
|
"learning_rate": 5.343181799878916e-07,
|
||
|
|
"loss": 0.7249,
|
||
|
|
"step": 5205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9767529058867641,
|
||
|
|
"grad_norm": 0.03766304326222476,
|
||
|
|
"learning_rate": 4.937231766619698e-07,
|
||
|
|
"loss": 0.7141,
|
||
|
|
"step": 5210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9776902887139107,
|
||
|
|
"grad_norm": 0.03797367873629258,
|
||
|
|
"learning_rate": 4.547292654485557e-07,
|
||
|
|
"loss": 0.7325,
|
||
|
|
"step": 5215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9786276715410573,
|
||
|
|
"grad_norm": 0.038696832388832716,
|
||
|
|
"learning_rate": 4.1733686394109236e-07,
|
||
|
|
"loss": 0.735,
|
||
|
|
"step": 5220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.979565054368204,
|
||
|
|
"grad_norm": 0.03372024341796256,
|
||
|
|
"learning_rate": 3.815463725821755e-07,
|
||
|
|
"loss": 0.7132,
|
||
|
|
"step": 5225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9805024371953506,
|
||
|
|
"grad_norm": 0.03911193375759219,
|
||
|
|
"learning_rate": 3.473581746592069e-07,
|
||
|
|
"loss": 0.7413,
|
||
|
|
"step": 5230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9814398200224972,
|
||
|
|
"grad_norm": 0.03595612615841676,
|
||
|
|
"learning_rate": 3.1477263630033113e-07,
|
||
|
|
"loss": 0.7263,
|
||
|
|
"step": 5235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9823772028496438,
|
||
|
|
"grad_norm": 0.035577055944527154,
|
||
|
|
"learning_rate": 2.8379010647045506e-07,
|
||
|
|
"loss": 0.724,
|
||
|
|
"step": 5240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9833145856767904,
|
||
|
|
"grad_norm": 0.03939881083971358,
|
||
|
|
"learning_rate": 2.5441091696761783e-07,
|
||
|
|
"loss": 0.7345,
|
||
|
|
"step": 5245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.984251968503937,
|
||
|
|
"grad_norm": 0.03509258626104014,
|
||
|
|
"learning_rate": 2.266353824193101e-07,
|
||
|
|
"loss": 0.689,
|
||
|
|
"step": 5250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9851893513310837,
|
||
|
|
"grad_norm": 0.03642207354018445,
|
||
|
|
"learning_rate": 2.0046380027921028e-07,
|
||
|
|
"loss": 0.7582,
|
||
|
|
"step": 5255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9861267341582303,
|
||
|
|
"grad_norm": 0.03655317102724065,
|
||
|
|
"learning_rate": 1.7589645082392024e-07,
|
||
|
|
"loss": 0.728,
|
||
|
|
"step": 5260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9870641169853769,
|
||
|
|
"grad_norm": 0.03681201840565274,
|
||
|
|
"learning_rate": 1.529335971500345e-07,
|
||
|
|
"loss": 0.7029,
|
||
|
|
"step": 5265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9880014998125234,
|
||
|
|
"grad_norm": 0.034195294045265584,
|
||
|
|
"learning_rate": 1.315754851712425e-07,
|
||
|
|
"loss": 0.7106,
|
||
|
|
"step": 5270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.98893888263967,
|
||
|
|
"grad_norm": 0.033879744761242093,
|
||
|
|
"learning_rate": 1.1182234361579722e-07,
|
||
|
|
"loss": 0.732,
|
||
|
|
"step": 5275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9898762654668166,
|
||
|
|
"grad_norm": 0.036906203150992155,
|
||
|
|
"learning_rate": 9.367438402395066e-08,
|
||
|
|
"loss": 0.7243,
|
||
|
|
"step": 5280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9908136482939632,
|
||
|
|
"grad_norm": 0.03751280991426079,
|
||
|
|
"learning_rate": 7.713180074577219e-08,
|
||
|
|
"loss": 0.7429,
|
||
|
|
"step": 5285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9917510311211098,
|
||
|
|
"grad_norm": 0.03933888372810454,
|
||
|
|
"learning_rate": 6.219477093905023e-08,
|
||
|
|
"loss": 0.7368,
|
||
|
|
"step": 5290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9926884139482565,
|
||
|
|
"grad_norm": 0.03920435371912244,
|
||
|
|
"learning_rate": 4.8863454567360513e-08,
|
||
|
|
"loss": 0.7415,
|
||
|
|
"step": 5295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9936257967754031,
|
||
|
|
"grad_norm": 0.0340028517083224,
|
||
|
|
"learning_rate": 3.7137994398400705e-08,
|
||
|
|
"loss": 0.736,
|
||
|
|
"step": 5300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9945631796025497,
|
||
|
|
"grad_norm": 0.03596691936417686,
|
||
|
|
"learning_rate": 2.7018516002424996e-08,
|
||
|
|
"loss": 0.7027,
|
||
|
|
"step": 5305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9955005624296963,
|
||
|
|
"grad_norm": 0.035041534347360945,
|
||
|
|
"learning_rate": 1.850512775091184e-08,
|
||
|
|
"loss": 0.7338,
|
||
|
|
"step": 5310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9964379452568429,
|
||
|
|
"grad_norm": 0.036327963873331384,
|
||
|
|
"learning_rate": 1.1597920815414885e-08,
|
||
|
|
"loss": 0.7262,
|
||
|
|
"step": 5315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9973753280839895,
|
||
|
|
"grad_norm": 0.036279192100004604,
|
||
|
|
"learning_rate": 6.296969166580401e-09,
|
||
|
|
"loss": 0.7153,
|
||
|
|
"step": 5320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9983127109111362,
|
||
|
|
"grad_norm": 0.035186584742647825,
|
||
|
|
"learning_rate": 2.6023295733312855e-09,
|
||
|
|
"loss": 0.7068,
|
||
|
|
"step": 5325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9992500937382827,
|
||
|
|
"grad_norm": 0.036172737685700306,
|
||
|
|
"learning_rate": 5.140416022841875e-10,
|
||
|
|
"loss": 0.7243,
|
||
|
|
"step": 5330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"eval_loss": 1.0353564023971558,
|
||
|
|
"eval_runtime": 1193.1467,
|
||
|
|
"eval_samples_per_second": 196.235,
|
||
|
|
"eval_steps_per_second": 6.133,
|
||
|
|
"step": 5334
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"step": 5334,
|
||
|
|
"total_flos": 777591764811776.0,
|
||
|
|
"train_loss": 0.7984603145035815,
|
||
|
|
"train_runtime": 21845.7301,
|
||
|
|
"train_samples_per_second": 31.253,
|
||
|
|
"train_steps_per_second": 0.244
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 5334,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 1,
|
||
|
|
"save_steps": 500,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": false,
|
||
|
|
"should_training_stop": false
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 777591764811776.0,
|
||
|
|
"train_batch_size": 4,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|