6908 lines
166 KiB
JSON
6908 lines
166 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 2.001018848700968,
|
|
"eval_steps": 500,
|
|
"global_step": 982,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0020376974019358125,
|
|
"grad_norm": 5.835855484008789,
|
|
"learning_rate": 3.3783783783783786e-08,
|
|
"loss": 0.3773,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.004075394803871625,
|
|
"grad_norm": 6.126637935638428,
|
|
"learning_rate": 6.756756756756757e-08,
|
|
"loss": 0.359,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.006113092205807438,
|
|
"grad_norm": 6.190367698669434,
|
|
"learning_rate": 1.0135135135135137e-07,
|
|
"loss": 0.3575,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.00815078960774325,
|
|
"grad_norm": 6.040923595428467,
|
|
"learning_rate": 1.3513513513513515e-07,
|
|
"loss": 0.3513,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.010188487009679063,
|
|
"grad_norm": 5.735762596130371,
|
|
"learning_rate": 1.6891891891891894e-07,
|
|
"loss": 0.3365,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.012226184411614875,
|
|
"grad_norm": 4.77271842956543,
|
|
"learning_rate": 2.0270270270270273e-07,
|
|
"loss": 0.3494,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.014263881813550688,
|
|
"grad_norm": 4.999124050140381,
|
|
"learning_rate": 2.3648648648648652e-07,
|
|
"loss": 0.3416,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.0163015792154865,
|
|
"grad_norm": 4.210795879364014,
|
|
"learning_rate": 2.702702702702703e-07,
|
|
"loss": 0.3138,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.018339276617422313,
|
|
"grad_norm": 2.9208009243011475,
|
|
"learning_rate": 3.040540540540541e-07,
|
|
"loss": 0.2978,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.020376974019358125,
|
|
"grad_norm": 2.9749715328216553,
|
|
"learning_rate": 3.378378378378379e-07,
|
|
"loss": 0.2941,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.022414671421293938,
|
|
"grad_norm": 2.37031888961792,
|
|
"learning_rate": 3.716216216216217e-07,
|
|
"loss": 0.2859,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.02445236882322975,
|
|
"grad_norm": 2.144174098968506,
|
|
"learning_rate": 4.0540540540540546e-07,
|
|
"loss": 0.2767,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.026490066225165563,
|
|
"grad_norm": 2.0079538822174072,
|
|
"learning_rate": 4.3918918918918923e-07,
|
|
"loss": 0.2718,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.028527763627101375,
|
|
"grad_norm": 2.1613566875457764,
|
|
"learning_rate": 4.7297297297297305e-07,
|
|
"loss": 0.3009,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.030565461029037188,
|
|
"grad_norm": 1.8342921733856201,
|
|
"learning_rate": 5.067567567567568e-07,
|
|
"loss": 0.2576,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.032603158430973,
|
|
"grad_norm": 1.9354841709136963,
|
|
"learning_rate": 5.405405405405406e-07,
|
|
"loss": 0.2781,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.034640855832908816,
|
|
"grad_norm": 1.7937740087509155,
|
|
"learning_rate": 5.743243243243245e-07,
|
|
"loss": 0.2851,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.036678553234844626,
|
|
"grad_norm": 1.8419092893600464,
|
|
"learning_rate": 6.081081081081082e-07,
|
|
"loss": 0.2826,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.03871625063678044,
|
|
"grad_norm": 1.814770221710205,
|
|
"learning_rate": 6.418918918918919e-07,
|
|
"loss": 0.2653,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.04075394803871625,
|
|
"grad_norm": 1.7113629579544067,
|
|
"learning_rate": 6.756756756756758e-07,
|
|
"loss": 0.2695,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.04279164544065207,
|
|
"grad_norm": 1.6104000806808472,
|
|
"learning_rate": 7.094594594594595e-07,
|
|
"loss": 0.2605,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.044829342842587876,
|
|
"grad_norm": 1.8066229820251465,
|
|
"learning_rate": 7.432432432432434e-07,
|
|
"loss": 0.2654,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.04686704024452369,
|
|
"grad_norm": 1.6007901430130005,
|
|
"learning_rate": 7.770270270270271e-07,
|
|
"loss": 0.2592,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.0489047376464595,
|
|
"grad_norm": 1.728143334388733,
|
|
"learning_rate": 8.108108108108109e-07,
|
|
"loss": 0.27,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.05094243504839532,
|
|
"grad_norm": 1.6227835416793823,
|
|
"learning_rate": 8.445945945945947e-07,
|
|
"loss": 0.2647,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.052980132450331126,
|
|
"grad_norm": 1.5776833295822144,
|
|
"learning_rate": 8.783783783783785e-07,
|
|
"loss": 0.256,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.05501782985226694,
|
|
"grad_norm": 1.5556491613388062,
|
|
"learning_rate": 9.121621621621622e-07,
|
|
"loss": 0.2611,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.05705552725420275,
|
|
"grad_norm": 1.6114894151687622,
|
|
"learning_rate": 9.459459459459461e-07,
|
|
"loss": 0.2666,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.05909322465613857,
|
|
"grad_norm": 1.508907675743103,
|
|
"learning_rate": 9.797297297297298e-07,
|
|
"loss": 0.2513,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.061130922058074376,
|
|
"grad_norm": 1.6146596670150757,
|
|
"learning_rate": 1.0135135135135136e-06,
|
|
"loss": 0.2735,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.06316861946001019,
|
|
"grad_norm": 1.691273808479309,
|
|
"learning_rate": 1.0472972972972973e-06,
|
|
"loss": 0.2637,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.065206316861946,
|
|
"grad_norm": 1.5835543870925903,
|
|
"learning_rate": 1.0810810810810812e-06,
|
|
"loss": 0.2657,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.06724401426388181,
|
|
"grad_norm": 1.5753782987594604,
|
|
"learning_rate": 1.114864864864865e-06,
|
|
"loss": 0.2477,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.06928171166581763,
|
|
"grad_norm": 1.6279054880142212,
|
|
"learning_rate": 1.148648648648649e-06,
|
|
"loss": 0.2603,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.07131940906775344,
|
|
"grad_norm": 1.789923906326294,
|
|
"learning_rate": 1.1824324324324326e-06,
|
|
"loss": 0.272,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.07335710646968925,
|
|
"grad_norm": 1.9096171855926514,
|
|
"learning_rate": 1.2162162162162164e-06,
|
|
"loss": 0.2405,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.07539480387162506,
|
|
"grad_norm": 1.6934822797775269,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": 0.2649,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.07743250127356088,
|
|
"grad_norm": 1.5827739238739014,
|
|
"learning_rate": 1.2837837837837838e-06,
|
|
"loss": 0.2433,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.07947019867549669,
|
|
"grad_norm": 1.7752106189727783,
|
|
"learning_rate": 1.3175675675675676e-06,
|
|
"loss": 0.2492,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.0815078960774325,
|
|
"grad_norm": 1.5755575895309448,
|
|
"learning_rate": 1.3513513513513515e-06,
|
|
"loss": 0.2483,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.08354559347936831,
|
|
"grad_norm": 1.5823661088943481,
|
|
"learning_rate": 1.3851351351351352e-06,
|
|
"loss": 0.2436,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.08558329088130413,
|
|
"grad_norm": 1.4753117561340332,
|
|
"learning_rate": 1.418918918918919e-06,
|
|
"loss": 0.2312,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.08762098828323994,
|
|
"grad_norm": 1.6108685731887817,
|
|
"learning_rate": 1.4527027027027027e-06,
|
|
"loss": 0.2503,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.08965868568517575,
|
|
"grad_norm": 1.6036980152130127,
|
|
"learning_rate": 1.4864864864864868e-06,
|
|
"loss": 0.2409,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.09169638308711156,
|
|
"grad_norm": 1.6441094875335693,
|
|
"learning_rate": 1.5202702702702704e-06,
|
|
"loss": 0.2486,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.09373408048904738,
|
|
"grad_norm": 1.6840177774429321,
|
|
"learning_rate": 1.5540540540540541e-06,
|
|
"loss": 0.2631,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.09577177789098319,
|
|
"grad_norm": 1.7543444633483887,
|
|
"learning_rate": 1.5878378378378378e-06,
|
|
"loss": 0.2632,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.097809475292919,
|
|
"grad_norm": 1.5591580867767334,
|
|
"learning_rate": 1.6216216216216219e-06,
|
|
"loss": 0.2382,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.09984717269485481,
|
|
"grad_norm": 1.5250903367996216,
|
|
"learning_rate": 1.6554054054054055e-06,
|
|
"loss": 0.2431,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.10188487009679063,
|
|
"grad_norm": 1.5948268175125122,
|
|
"learning_rate": 1.6891891891891894e-06,
|
|
"loss": 0.2591,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.10392256749872644,
|
|
"grad_norm": 2.0706777572631836,
|
|
"learning_rate": 1.722972972972973e-06,
|
|
"loss": 0.2536,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.10596026490066225,
|
|
"grad_norm": 1.6050032377243042,
|
|
"learning_rate": 1.756756756756757e-06,
|
|
"loss": 0.2353,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.10799796230259806,
|
|
"grad_norm": 1.572332739830017,
|
|
"learning_rate": 1.7905405405405408e-06,
|
|
"loss": 0.2586,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.11003565970453388,
|
|
"grad_norm": 1.5016673803329468,
|
|
"learning_rate": 1.8243243243243245e-06,
|
|
"loss": 0.2506,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.11207335710646969,
|
|
"grad_norm": 1.5977917909622192,
|
|
"learning_rate": 1.8581081081081081e-06,
|
|
"loss": 0.2377,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.1141110545084055,
|
|
"grad_norm": 1.4931672811508179,
|
|
"learning_rate": 1.8918918918918922e-06,
|
|
"loss": 0.2373,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.11614875191034131,
|
|
"grad_norm": 1.4913355112075806,
|
|
"learning_rate": 1.925675675675676e-06,
|
|
"loss": 0.243,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.11818644931227713,
|
|
"grad_norm": 1.3302721977233887,
|
|
"learning_rate": 1.9594594594594595e-06,
|
|
"loss": 0.2256,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.12022414671421294,
|
|
"grad_norm": 1.6652206182479858,
|
|
"learning_rate": 1.9932432432432434e-06,
|
|
"loss": 0.2444,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.12226184411614875,
|
|
"grad_norm": 1.701822280883789,
|
|
"learning_rate": 2.0270270270270273e-06,
|
|
"loss": 0.2311,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.12429954151808456,
|
|
"grad_norm": 2.096843957901001,
|
|
"learning_rate": 2.060810810810811e-06,
|
|
"loss": 0.2524,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.12633723892002038,
|
|
"grad_norm": 1.7165014743804932,
|
|
"learning_rate": 2.0945945945945946e-06,
|
|
"loss": 0.2514,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.12837493632195618,
|
|
"grad_norm": 1.5645636320114136,
|
|
"learning_rate": 2.1283783783783785e-06,
|
|
"loss": 0.2425,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.130412633723892,
|
|
"grad_norm": 1.4007450342178345,
|
|
"learning_rate": 2.1621621621621623e-06,
|
|
"loss": 0.2489,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.13245033112582782,
|
|
"grad_norm": 1.485996127128601,
|
|
"learning_rate": 2.195945945945946e-06,
|
|
"loss": 0.254,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.13448802852776362,
|
|
"grad_norm": 1.4894458055496216,
|
|
"learning_rate": 2.22972972972973e-06,
|
|
"loss": 0.2502,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.13652572592969944,
|
|
"grad_norm": 1.59180748462677,
|
|
"learning_rate": 2.2635135135135135e-06,
|
|
"loss": 0.2379,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.13856342333163527,
|
|
"grad_norm": 1.5528825521469116,
|
|
"learning_rate": 2.297297297297298e-06,
|
|
"loss": 0.2316,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.14060112073357106,
|
|
"grad_norm": 1.7990005016326904,
|
|
"learning_rate": 2.3310810810810813e-06,
|
|
"loss": 0.2553,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.14263881813550688,
|
|
"grad_norm": 1.5510270595550537,
|
|
"learning_rate": 2.364864864864865e-06,
|
|
"loss": 0.243,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.14467651553744268,
|
|
"grad_norm": 1.5694071054458618,
|
|
"learning_rate": 2.3986486486486486e-06,
|
|
"loss": 0.2471,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.1467142129393785,
|
|
"grad_norm": 1.4359402656555176,
|
|
"learning_rate": 2.432432432432433e-06,
|
|
"loss": 0.2324,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.14875191034131433,
|
|
"grad_norm": 1.372413158416748,
|
|
"learning_rate": 2.4662162162162163e-06,
|
|
"loss": 0.2455,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.15078960774325012,
|
|
"grad_norm": 1.4357329607009888,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 0.2332,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.15282730514518594,
|
|
"grad_norm": 1.4806591272354126,
|
|
"learning_rate": 2.533783783783784e-06,
|
|
"loss": 0.2367,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.15486500254712177,
|
|
"grad_norm": 1.5909092426300049,
|
|
"learning_rate": 2.5675675675675675e-06,
|
|
"loss": 0.2351,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.15690269994905756,
|
|
"grad_norm": 1.3923670053482056,
|
|
"learning_rate": 2.601351351351352e-06,
|
|
"loss": 0.2413,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.15894039735099338,
|
|
"grad_norm": 1.4559671878814697,
|
|
"learning_rate": 2.6351351351351353e-06,
|
|
"loss": 0.2365,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.16097809475292918,
|
|
"grad_norm": 1.5630332231521606,
|
|
"learning_rate": 2.668918918918919e-06,
|
|
"loss": 0.2431,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.163015792154865,
|
|
"grad_norm": 1.783199429512024,
|
|
"learning_rate": 2.702702702702703e-06,
|
|
"loss": 0.2497,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.16505348955680083,
|
|
"grad_norm": 1.385953664779663,
|
|
"learning_rate": 2.7364864864864865e-06,
|
|
"loss": 0.2456,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.16709118695873662,
|
|
"grad_norm": 1.5928620100021362,
|
|
"learning_rate": 2.7702702702702703e-06,
|
|
"loss": 0.2356,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.16912888436067244,
|
|
"grad_norm": 1.6051955223083496,
|
|
"learning_rate": 2.8040540540540546e-06,
|
|
"loss": 0.2443,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.17116658176260827,
|
|
"grad_norm": 1.5298365354537964,
|
|
"learning_rate": 2.837837837837838e-06,
|
|
"loss": 0.2354,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.17320427916454406,
|
|
"grad_norm": 1.4721895456314087,
|
|
"learning_rate": 2.871621621621622e-06,
|
|
"loss": 0.2271,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.17524197656647988,
|
|
"grad_norm": 1.3692090511322021,
|
|
"learning_rate": 2.9054054054054054e-06,
|
|
"loss": 0.2416,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.17727967396841568,
|
|
"grad_norm": 1.4523481130599976,
|
|
"learning_rate": 2.9391891891891893e-06,
|
|
"loss": 0.2271,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.1793173713703515,
|
|
"grad_norm": 1.5602335929870605,
|
|
"learning_rate": 2.9729729729729736e-06,
|
|
"loss": 0.2498,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.18135506877228733,
|
|
"grad_norm": 1.478890061378479,
|
|
"learning_rate": 3.006756756756757e-06,
|
|
"loss": 0.2373,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.18339276617422312,
|
|
"grad_norm": 1.4697165489196777,
|
|
"learning_rate": 3.040540540540541e-06,
|
|
"loss": 0.2522,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.18543046357615894,
|
|
"grad_norm": 1.5102362632751465,
|
|
"learning_rate": 3.0743243243243248e-06,
|
|
"loss": 0.2403,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.18746816097809477,
|
|
"grad_norm": 1.3657342195510864,
|
|
"learning_rate": 3.1081081081081082e-06,
|
|
"loss": 0.2304,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.18950585838003056,
|
|
"grad_norm": 1.5056520700454712,
|
|
"learning_rate": 3.141891891891892e-06,
|
|
"loss": 0.2372,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.19154355578196638,
|
|
"grad_norm": 1.5441231727600098,
|
|
"learning_rate": 3.1756756756756755e-06,
|
|
"loss": 0.2456,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.19358125318390218,
|
|
"grad_norm": 1.472184181213379,
|
|
"learning_rate": 3.20945945945946e-06,
|
|
"loss": 0.2416,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.195618950585838,
|
|
"grad_norm": 1.4700815677642822,
|
|
"learning_rate": 3.2432432432432437e-06,
|
|
"loss": 0.2254,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.19765664798777383,
|
|
"grad_norm": 1.2121925354003906,
|
|
"learning_rate": 3.277027027027027e-06,
|
|
"loss": 0.2343,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.19969434538970962,
|
|
"grad_norm": 1.5309983491897583,
|
|
"learning_rate": 3.310810810810811e-06,
|
|
"loss": 0.233,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.20173204279164544,
|
|
"grad_norm": 1.422556757926941,
|
|
"learning_rate": 3.3445945945945953e-06,
|
|
"loss": 0.254,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.20376974019358127,
|
|
"grad_norm": 1.3337507247924805,
|
|
"learning_rate": 3.3783783783783788e-06,
|
|
"loss": 0.2443,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.20580743759551706,
|
|
"grad_norm": 1.3088750839233398,
|
|
"learning_rate": 3.4121621621621626e-06,
|
|
"loss": 0.2456,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.20784513499745289,
|
|
"grad_norm": 1.5006299018859863,
|
|
"learning_rate": 3.445945945945946e-06,
|
|
"loss": 0.2315,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.20988283239938868,
|
|
"grad_norm": 1.3762035369873047,
|
|
"learning_rate": 3.47972972972973e-06,
|
|
"loss": 0.2524,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.2119205298013245,
|
|
"grad_norm": 1.2947945594787598,
|
|
"learning_rate": 3.513513513513514e-06,
|
|
"loss": 0.2345,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.21395822720326033,
|
|
"grad_norm": 1.3257865905761719,
|
|
"learning_rate": 3.5472972972972973e-06,
|
|
"loss": 0.2275,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.21599592460519612,
|
|
"grad_norm": 1.3588801622390747,
|
|
"learning_rate": 3.5810810810810816e-06,
|
|
"loss": 0.2302,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.21803362200713194,
|
|
"grad_norm": 1.3438785076141357,
|
|
"learning_rate": 3.6148648648648655e-06,
|
|
"loss": 0.2353,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.22007131940906777,
|
|
"grad_norm": 1.415419340133667,
|
|
"learning_rate": 3.648648648648649e-06,
|
|
"loss": 0.2232,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.22210901681100356,
|
|
"grad_norm": 1.2871397733688354,
|
|
"learning_rate": 3.6824324324324328e-06,
|
|
"loss": 0.2039,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.22414671421293939,
|
|
"grad_norm": 1.4281516075134277,
|
|
"learning_rate": 3.7162162162162162e-06,
|
|
"loss": 0.2476,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.22618441161487518,
|
|
"grad_norm": 1.3740307092666626,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": 0.2206,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.228222109016811,
|
|
"grad_norm": 1.5596915483474731,
|
|
"learning_rate": 3.7837837837837844e-06,
|
|
"loss": 0.2337,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.23025980641874683,
|
|
"grad_norm": 1.501287817955017,
|
|
"learning_rate": 3.817567567567567e-06,
|
|
"loss": 0.2348,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.23229750382068262,
|
|
"grad_norm": 1.5411885976791382,
|
|
"learning_rate": 3.851351351351352e-06,
|
|
"loss": 0.2466,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.23433520122261844,
|
|
"grad_norm": 1.5837326049804688,
|
|
"learning_rate": 3.885135135135135e-06,
|
|
"loss": 0.2329,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.23637289862455427,
|
|
"grad_norm": 1.222288966178894,
|
|
"learning_rate": 3.918918918918919e-06,
|
|
"loss": 0.2324,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.23841059602649006,
|
|
"grad_norm": 1.310904860496521,
|
|
"learning_rate": 3.952702702702703e-06,
|
|
"loss": 0.2338,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.24044829342842589,
|
|
"grad_norm": 1.4169098138809204,
|
|
"learning_rate": 3.986486486486487e-06,
|
|
"loss": 0.2461,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.24248599083036168,
|
|
"grad_norm": 1.6267861127853394,
|
|
"learning_rate": 4.020270270270271e-06,
|
|
"loss": 0.2506,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.2445236882322975,
|
|
"grad_norm": 1.396310567855835,
|
|
"learning_rate": 4.0540540540540545e-06,
|
|
"loss": 0.2169,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.24656138563423333,
|
|
"grad_norm": 1.3390737771987915,
|
|
"learning_rate": 4.087837837837838e-06,
|
|
"loss": 0.2468,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.24859908303616912,
|
|
"grad_norm": 1.418508768081665,
|
|
"learning_rate": 4.121621621621622e-06,
|
|
"loss": 0.2592,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.25063678043810494,
|
|
"grad_norm": 1.4217811822891235,
|
|
"learning_rate": 4.155405405405405e-06,
|
|
"loss": 0.2277,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.25267447784004077,
|
|
"grad_norm": 1.351367473602295,
|
|
"learning_rate": 4.189189189189189e-06,
|
|
"loss": 0.2421,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.2547121752419766,
|
|
"grad_norm": 1.4876950979232788,
|
|
"learning_rate": 4.222972972972974e-06,
|
|
"loss": 0.2503,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.25674987264391236,
|
|
"grad_norm": 1.304235577583313,
|
|
"learning_rate": 4.256756756756757e-06,
|
|
"loss": 0.2376,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.2587875700458482,
|
|
"grad_norm": 1.3415497541427612,
|
|
"learning_rate": 4.290540540540541e-06,
|
|
"loss": 0.2386,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.260825267447784,
|
|
"grad_norm": 1.6345967054367065,
|
|
"learning_rate": 4.324324324324325e-06,
|
|
"loss": 0.251,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.2628629648497198,
|
|
"grad_norm": 1.3598498106002808,
|
|
"learning_rate": 4.3581081081081085e-06,
|
|
"loss": 0.2568,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.26490066225165565,
|
|
"grad_norm": 1.2988228797912598,
|
|
"learning_rate": 4.391891891891892e-06,
|
|
"loss": 0.2423,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.2669383596535914,
|
|
"grad_norm": 1.4423168897628784,
|
|
"learning_rate": 4.4256756756756754e-06,
|
|
"loss": 0.2565,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.26897605705552724,
|
|
"grad_norm": 1.4470850229263306,
|
|
"learning_rate": 4.45945945945946e-06,
|
|
"loss": 0.254,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.27101375445746306,
|
|
"grad_norm": 1.3750495910644531,
|
|
"learning_rate": 4.493243243243244e-06,
|
|
"loss": 0.2438,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.2730514518593989,
|
|
"grad_norm": 1.2969499826431274,
|
|
"learning_rate": 4.527027027027027e-06,
|
|
"loss": 0.2519,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.2750891492613347,
|
|
"grad_norm": 1.3548568487167358,
|
|
"learning_rate": 4.560810810810811e-06,
|
|
"loss": 0.2407,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.27712684666327053,
|
|
"grad_norm": 1.3551725149154663,
|
|
"learning_rate": 4.594594594594596e-06,
|
|
"loss": 0.2432,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.2791645440652063,
|
|
"grad_norm": 1.3198033571243286,
|
|
"learning_rate": 4.628378378378379e-06,
|
|
"loss": 0.2349,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.2812022414671421,
|
|
"grad_norm": 1.344118595123291,
|
|
"learning_rate": 4.6621621621621625e-06,
|
|
"loss": 0.2374,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.28323993886907795,
|
|
"grad_norm": 1.4489444494247437,
|
|
"learning_rate": 4.695945945945946e-06,
|
|
"loss": 0.2452,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.28527763627101377,
|
|
"grad_norm": 1.2641702890396118,
|
|
"learning_rate": 4.72972972972973e-06,
|
|
"loss": 0.2341,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.2873153336729496,
|
|
"grad_norm": 1.227349042892456,
|
|
"learning_rate": 4.763513513513514e-06,
|
|
"loss": 0.2351,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.28935303107488536,
|
|
"grad_norm": 1.237866759300232,
|
|
"learning_rate": 4.797297297297297e-06,
|
|
"loss": 0.2416,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.2913907284768212,
|
|
"grad_norm": 1.341732144355774,
|
|
"learning_rate": 4.831081081081082e-06,
|
|
"loss": 0.2273,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.293428425878757,
|
|
"grad_norm": 1.2197740077972412,
|
|
"learning_rate": 4.864864864864866e-06,
|
|
"loss": 0.2371,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.2954661232806928,
|
|
"grad_norm": 1.3351991176605225,
|
|
"learning_rate": 4.898648648648649e-06,
|
|
"loss": 0.237,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.29750382068262865,
|
|
"grad_norm": 1.364261507987976,
|
|
"learning_rate": 4.932432432432433e-06,
|
|
"loss": 0.244,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.2995415180845644,
|
|
"grad_norm": 1.4714289903640747,
|
|
"learning_rate": 4.9662162162162165e-06,
|
|
"loss": 0.2566,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.30157921548650024,
|
|
"grad_norm": 1.2321275472640991,
|
|
"learning_rate": 5e-06,
|
|
"loss": 0.2305,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.30361691288843606,
|
|
"grad_norm": 1.257879614830017,
|
|
"learning_rate": 4.99622641509434e-06,
|
|
"loss": 0.2364,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.3056546102903719,
|
|
"grad_norm": 1.3090922832489014,
|
|
"learning_rate": 4.99245283018868e-06,
|
|
"loss": 0.2322,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3076923076923077,
|
|
"grad_norm": 1.2299717664718628,
|
|
"learning_rate": 4.988679245283019e-06,
|
|
"loss": 0.2267,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.30973000509424353,
|
|
"grad_norm": 1.141762375831604,
|
|
"learning_rate": 4.984905660377358e-06,
|
|
"loss": 0.2332,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.3117677024961793,
|
|
"grad_norm": 1.4402216672897339,
|
|
"learning_rate": 4.981132075471698e-06,
|
|
"loss": 0.2479,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.3138053998981151,
|
|
"grad_norm": 1.4099055528640747,
|
|
"learning_rate": 4.977358490566038e-06,
|
|
"loss": 0.2379,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.31584309730005095,
|
|
"grad_norm": 1.2121434211730957,
|
|
"learning_rate": 4.973584905660378e-06,
|
|
"loss": 0.2328,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.31788079470198677,
|
|
"grad_norm": 1.2919939756393433,
|
|
"learning_rate": 4.969811320754717e-06,
|
|
"loss": 0.242,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.3199184921039226,
|
|
"grad_norm": 1.4471988677978516,
|
|
"learning_rate": 4.966037735849057e-06,
|
|
"loss": 0.2504,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.32195618950585836,
|
|
"grad_norm": 2.552502393722534,
|
|
"learning_rate": 4.962264150943397e-06,
|
|
"loss": 0.2496,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.3239938869077942,
|
|
"grad_norm": 1.2150771617889404,
|
|
"learning_rate": 4.958490566037736e-06,
|
|
"loss": 0.2385,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.32603158430973,
|
|
"grad_norm": 1.3135796785354614,
|
|
"learning_rate": 4.954716981132076e-06,
|
|
"loss": 0.2355,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.3280692817116658,
|
|
"grad_norm": 1.3980915546417236,
|
|
"learning_rate": 4.950943396226415e-06,
|
|
"loss": 0.2338,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.33010697911360165,
|
|
"grad_norm": 1.3292484283447266,
|
|
"learning_rate": 4.947169811320755e-06,
|
|
"loss": 0.243,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.3321446765155374,
|
|
"grad_norm": 1.5226550102233887,
|
|
"learning_rate": 4.943396226415095e-06,
|
|
"loss": 0.2486,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.33418237391747324,
|
|
"grad_norm": 1.2037345170974731,
|
|
"learning_rate": 4.939622641509435e-06,
|
|
"loss": 0.2528,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.33622007131940906,
|
|
"grad_norm": 1.3524994850158691,
|
|
"learning_rate": 4.935849056603774e-06,
|
|
"loss": 0.2739,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.3382577687213449,
|
|
"grad_norm": 1.1955732107162476,
|
|
"learning_rate": 4.932075471698114e-06,
|
|
"loss": 0.2257,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.3402954661232807,
|
|
"grad_norm": 1.273659348487854,
|
|
"learning_rate": 4.928301886792453e-06,
|
|
"loss": 0.2434,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.34233316352521653,
|
|
"grad_norm": 1.365476369857788,
|
|
"learning_rate": 4.924528301886793e-06,
|
|
"loss": 0.2269,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.3443708609271523,
|
|
"grad_norm": 1.3352711200714111,
|
|
"learning_rate": 4.920754716981133e-06,
|
|
"loss": 0.2413,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.3464085583290881,
|
|
"grad_norm": 1.2405195236206055,
|
|
"learning_rate": 4.916981132075473e-06,
|
|
"loss": 0.2382,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.34844625573102395,
|
|
"grad_norm": 1.4409878253936768,
|
|
"learning_rate": 4.913207547169812e-06,
|
|
"loss": 0.2379,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.35048395313295977,
|
|
"grad_norm": 1.269126534461975,
|
|
"learning_rate": 4.909433962264152e-06,
|
|
"loss": 0.2366,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.3525216505348956,
|
|
"grad_norm": 1.1738016605377197,
|
|
"learning_rate": 4.905660377358491e-06,
|
|
"loss": 0.2311,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.35455934793683136,
|
|
"grad_norm": 1.3719390630722046,
|
|
"learning_rate": 4.9018867924528306e-06,
|
|
"loss": 0.2238,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.3565970453387672,
|
|
"grad_norm": 1.301747441291809,
|
|
"learning_rate": 4.8981132075471705e-06,
|
|
"loss": 0.2376,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.358634742740703,
|
|
"grad_norm": 1.2950748205184937,
|
|
"learning_rate": 4.8943396226415095e-06,
|
|
"loss": 0.2614,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.36067244014263883,
|
|
"grad_norm": 1.3460197448730469,
|
|
"learning_rate": 4.8905660377358495e-06,
|
|
"loss": 0.247,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.36271013754457465,
|
|
"grad_norm": 1.1415988206863403,
|
|
"learning_rate": 4.886792452830189e-06,
|
|
"loss": 0.236,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.3647478349465104,
|
|
"grad_norm": 1.2343894243240356,
|
|
"learning_rate": 4.8830188679245284e-06,
|
|
"loss": 0.2338,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.36678553234844624,
|
|
"grad_norm": 1.3122376203536987,
|
|
"learning_rate": 4.879245283018868e-06,
|
|
"loss": 0.2512,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.36882322975038206,
|
|
"grad_norm": 1.1867709159851074,
|
|
"learning_rate": 4.875471698113207e-06,
|
|
"loss": 0.2319,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.3708609271523179,
|
|
"grad_norm": 1.242540955543518,
|
|
"learning_rate": 4.871698113207547e-06,
|
|
"loss": 0.2391,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.3728986245542537,
|
|
"grad_norm": 1.277694821357727,
|
|
"learning_rate": 4.867924528301887e-06,
|
|
"loss": 0.2334,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.37493632195618953,
|
|
"grad_norm": 1.341858983039856,
|
|
"learning_rate": 4.864150943396227e-06,
|
|
"loss": 0.2363,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.3769740193581253,
|
|
"grad_norm": 1.2502440214157104,
|
|
"learning_rate": 4.860377358490567e-06,
|
|
"loss": 0.2342,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.3790117167600611,
|
|
"grad_norm": 1.2432913780212402,
|
|
"learning_rate": 4.856603773584906e-06,
|
|
"loss": 0.2436,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.38104941416199695,
|
|
"grad_norm": 1.3028502464294434,
|
|
"learning_rate": 4.852830188679245e-06,
|
|
"loss": 0.2367,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.38308711156393277,
|
|
"grad_norm": 1.3056414127349854,
|
|
"learning_rate": 4.849056603773585e-06,
|
|
"loss": 0.2221,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.3851248089658686,
|
|
"grad_norm": 1.1587262153625488,
|
|
"learning_rate": 4.845283018867925e-06,
|
|
"loss": 0.2511,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.38716250636780436,
|
|
"grad_norm": 1.3277629613876343,
|
|
"learning_rate": 4.841509433962265e-06,
|
|
"loss": 0.2376,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.3892002037697402,
|
|
"grad_norm": 1.3022247552871704,
|
|
"learning_rate": 4.837735849056604e-06,
|
|
"loss": 0.2503,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.391237901171676,
|
|
"grad_norm": 1.1903053522109985,
|
|
"learning_rate": 4.833962264150944e-06,
|
|
"loss": 0.2376,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.39327559857361183,
|
|
"grad_norm": 1.3128589391708374,
|
|
"learning_rate": 4.830188679245284e-06,
|
|
"loss": 0.2393,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.39531329597554765,
|
|
"grad_norm": 2.1321053504943848,
|
|
"learning_rate": 4.826415094339623e-06,
|
|
"loss": 0.2214,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.3973509933774834,
|
|
"grad_norm": 1.255610466003418,
|
|
"learning_rate": 4.822641509433963e-06,
|
|
"loss": 0.2377,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.39938869077941924,
|
|
"grad_norm": 1.1986833810806274,
|
|
"learning_rate": 4.818867924528302e-06,
|
|
"loss": 0.2285,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.40142638818135506,
|
|
"grad_norm": 1.2554630041122437,
|
|
"learning_rate": 4.815094339622642e-06,
|
|
"loss": 0.2491,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.4034640855832909,
|
|
"grad_norm": 1.2297279834747314,
|
|
"learning_rate": 4.811320754716982e-06,
|
|
"loss": 0.2301,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.4055017829852267,
|
|
"grad_norm": 1.3254568576812744,
|
|
"learning_rate": 4.807547169811322e-06,
|
|
"loss": 0.2326,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.40753948038716253,
|
|
"grad_norm": 1.2830324172973633,
|
|
"learning_rate": 4.803773584905661e-06,
|
|
"loss": 0.2466,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.4095771777890983,
|
|
"grad_norm": 1.3986701965332031,
|
|
"learning_rate": 4.800000000000001e-06,
|
|
"loss": 0.2632,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.4116148751910341,
|
|
"grad_norm": 1.3166711330413818,
|
|
"learning_rate": 4.79622641509434e-06,
|
|
"loss": 0.2294,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.41365257259296995,
|
|
"grad_norm": 1.4140809774398804,
|
|
"learning_rate": 4.79245283018868e-06,
|
|
"loss": 0.2524,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.41569026999490577,
|
|
"grad_norm": 1.298222303390503,
|
|
"learning_rate": 4.7886792452830195e-06,
|
|
"loss": 0.2201,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.4177279673968416,
|
|
"grad_norm": 1.2514641284942627,
|
|
"learning_rate": 4.7849056603773594e-06,
|
|
"loss": 0.2458,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.41976566479877736,
|
|
"grad_norm": 1.1963963508605957,
|
|
"learning_rate": 4.7811320754716985e-06,
|
|
"loss": 0.2363,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.4218033622007132,
|
|
"grad_norm": 1.2280910015106201,
|
|
"learning_rate": 4.777358490566038e-06,
|
|
"loss": 0.249,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.423841059602649,
|
|
"grad_norm": 1.2325594425201416,
|
|
"learning_rate": 4.7735849056603775e-06,
|
|
"loss": 0.239,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.42587875700458483,
|
|
"grad_norm": 1.268089771270752,
|
|
"learning_rate": 4.769811320754717e-06,
|
|
"loss": 0.2428,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.42791645440652065,
|
|
"grad_norm": 1.147208571434021,
|
|
"learning_rate": 4.766037735849057e-06,
|
|
"loss": 0.2027,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.4299541518084564,
|
|
"grad_norm": 1.293784260749817,
|
|
"learning_rate": 4.762264150943396e-06,
|
|
"loss": 0.2467,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.43199184921039224,
|
|
"grad_norm": 1.3141661882400513,
|
|
"learning_rate": 4.758490566037736e-06,
|
|
"loss": 0.235,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.43402954661232807,
|
|
"grad_norm": 1.3035210371017456,
|
|
"learning_rate": 4.754716981132076e-06,
|
|
"loss": 0.2461,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.4360672440142639,
|
|
"grad_norm": 1.26072359085083,
|
|
"learning_rate": 4.750943396226415e-06,
|
|
"loss": 0.2483,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.4381049414161997,
|
|
"grad_norm": 1.3666430711746216,
|
|
"learning_rate": 4.747169811320755e-06,
|
|
"loss": 0.2343,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.44014263881813553,
|
|
"grad_norm": 1.1508736610412598,
|
|
"learning_rate": 4.743396226415094e-06,
|
|
"loss": 0.2456,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.4421803362200713,
|
|
"grad_norm": 1.292580485343933,
|
|
"learning_rate": 4.739622641509434e-06,
|
|
"loss": 0.2244,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.4442180336220071,
|
|
"grad_norm": 1.2299766540527344,
|
|
"learning_rate": 4.735849056603774e-06,
|
|
"loss": 0.2394,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.44625573102394295,
|
|
"grad_norm": 1.316611886024475,
|
|
"learning_rate": 4.732075471698114e-06,
|
|
"loss": 0.2301,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.44829342842587877,
|
|
"grad_norm": 1.3932688236236572,
|
|
"learning_rate": 4.728301886792453e-06,
|
|
"loss": 0.2184,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.4503311258278146,
|
|
"grad_norm": 1.2194689512252808,
|
|
"learning_rate": 4.724528301886793e-06,
|
|
"loss": 0.2312,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.45236882322975036,
|
|
"grad_norm": 1.1593202352523804,
|
|
"learning_rate": 4.720754716981132e-06,
|
|
"loss": 0.2122,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.4544065206316862,
|
|
"grad_norm": 1.2845839262008667,
|
|
"learning_rate": 4.716981132075472e-06,
|
|
"loss": 0.2309,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.456444218033622,
|
|
"grad_norm": 1.28933846950531,
|
|
"learning_rate": 4.713207547169812e-06,
|
|
"loss": 0.2461,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.45848191543555783,
|
|
"grad_norm": 1.2130182981491089,
|
|
"learning_rate": 4.709433962264151e-06,
|
|
"loss": 0.2276,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.46051961283749365,
|
|
"grad_norm": 1.1695858240127563,
|
|
"learning_rate": 4.705660377358491e-06,
|
|
"loss": 0.2386,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.4625573102394294,
|
|
"grad_norm": 1.1642833948135376,
|
|
"learning_rate": 4.701886792452831e-06,
|
|
"loss": 0.2234,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.46459500764136524,
|
|
"grad_norm": 1.2203588485717773,
|
|
"learning_rate": 4.69811320754717e-06,
|
|
"loss": 0.2245,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.46663270504330107,
|
|
"grad_norm": 1.5108790397644043,
|
|
"learning_rate": 4.69433962264151e-06,
|
|
"loss": 0.2587,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.4686704024452369,
|
|
"grad_norm": 2.0064799785614014,
|
|
"learning_rate": 4.690566037735849e-06,
|
|
"loss": 0.2279,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.4707080998471727,
|
|
"grad_norm": 1.1252530813217163,
|
|
"learning_rate": 4.686792452830189e-06,
|
|
"loss": 0.23,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.47274579724910853,
|
|
"grad_norm": 1.359333872795105,
|
|
"learning_rate": 4.683018867924529e-06,
|
|
"loss": 0.2444,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.4747834946510443,
|
|
"grad_norm": 1.1184419393539429,
|
|
"learning_rate": 4.6792452830188686e-06,
|
|
"loss": 0.2339,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.4768211920529801,
|
|
"grad_norm": 1.2560921907424927,
|
|
"learning_rate": 4.6754716981132085e-06,
|
|
"loss": 0.2549,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.47885888945491595,
|
|
"grad_norm": 1.164919137954712,
|
|
"learning_rate": 4.6716981132075476e-06,
|
|
"loss": 0.238,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.48089658685685177,
|
|
"grad_norm": 1.3727017641067505,
|
|
"learning_rate": 4.667924528301887e-06,
|
|
"loss": 0.2434,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.4829342842587876,
|
|
"grad_norm": 1.4192495346069336,
|
|
"learning_rate": 4.6641509433962265e-06,
|
|
"loss": 0.2364,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.48497198166072336,
|
|
"grad_norm": 1.3315473794937134,
|
|
"learning_rate": 4.6603773584905665e-06,
|
|
"loss": 0.2166,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.4870096790626592,
|
|
"grad_norm": 1.140080213546753,
|
|
"learning_rate": 4.656603773584906e-06,
|
|
"loss": 0.2404,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.489047376464595,
|
|
"grad_norm": 1.3821226358413696,
|
|
"learning_rate": 4.6528301886792454e-06,
|
|
"loss": 0.2376,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.49108507386653083,
|
|
"grad_norm": 1.3087458610534668,
|
|
"learning_rate": 4.649056603773585e-06,
|
|
"loss": 0.2364,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.49312277126846665,
|
|
"grad_norm": 1.1473658084869385,
|
|
"learning_rate": 4.645283018867925e-06,
|
|
"loss": 0.2356,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.4951604686704024,
|
|
"grad_norm": 1.2189340591430664,
|
|
"learning_rate": 4.641509433962264e-06,
|
|
"loss": 0.2377,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.49719816607233824,
|
|
"grad_norm": 1.6314069032669067,
|
|
"learning_rate": 4.637735849056604e-06,
|
|
"loss": 0.2496,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.49923586347427407,
|
|
"grad_norm": 1.1271792650222778,
|
|
"learning_rate": 4.633962264150943e-06,
|
|
"loss": 0.2179,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.5012735608762099,
|
|
"grad_norm": 1.1837356090545654,
|
|
"learning_rate": 4.630188679245283e-06,
|
|
"loss": 0.2334,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.5033112582781457,
|
|
"grad_norm": 1.2714848518371582,
|
|
"learning_rate": 4.626415094339623e-06,
|
|
"loss": 0.263,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.5053489556800815,
|
|
"grad_norm": 1.235137701034546,
|
|
"learning_rate": 4.622641509433963e-06,
|
|
"loss": 0.2551,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.5073866530820174,
|
|
"grad_norm": 1.067122220993042,
|
|
"learning_rate": 4.618867924528302e-06,
|
|
"loss": 0.2397,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.5094243504839532,
|
|
"grad_norm": 1.3224409818649292,
|
|
"learning_rate": 4.615094339622642e-06,
|
|
"loss": 0.2407,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.5114620478858889,
|
|
"grad_norm": 1.1779237985610962,
|
|
"learning_rate": 4.611320754716981e-06,
|
|
"loss": 0.2297,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.5134997452878247,
|
|
"grad_norm": 1.3744945526123047,
|
|
"learning_rate": 4.607547169811321e-06,
|
|
"loss": 0.241,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.5155374426897605,
|
|
"grad_norm": 1.198855996131897,
|
|
"learning_rate": 4.603773584905661e-06,
|
|
"loss": 0.2352,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.5175751400916964,
|
|
"grad_norm": 1.117774486541748,
|
|
"learning_rate": 4.600000000000001e-06,
|
|
"loss": 0.2418,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.5196128374936322,
|
|
"grad_norm": 1.248888611793518,
|
|
"learning_rate": 4.59622641509434e-06,
|
|
"loss": 0.2244,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.521650534895568,
|
|
"grad_norm": 1.2080906629562378,
|
|
"learning_rate": 4.59245283018868e-06,
|
|
"loss": 0.229,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.5236882322975038,
|
|
"grad_norm": 1.1990790367126465,
|
|
"learning_rate": 4.588679245283019e-06,
|
|
"loss": 0.2253,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.5257259296994397,
|
|
"grad_norm": 1.1284271478652954,
|
|
"learning_rate": 4.584905660377359e-06,
|
|
"loss": 0.2381,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.5277636271013755,
|
|
"grad_norm": 1.2414554357528687,
|
|
"learning_rate": 4.581132075471699e-06,
|
|
"loss": 0.2332,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.5298013245033113,
|
|
"grad_norm": 1.0976932048797607,
|
|
"learning_rate": 4.577358490566038e-06,
|
|
"loss": 0.2331,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.5318390219052471,
|
|
"grad_norm": 1.2075899839401245,
|
|
"learning_rate": 4.573584905660378e-06,
|
|
"loss": 0.2413,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.5338767193071828,
|
|
"grad_norm": 1.1429880857467651,
|
|
"learning_rate": 4.569811320754718e-06,
|
|
"loss": 0.23,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.5359144167091187,
|
|
"grad_norm": 1.0777602195739746,
|
|
"learning_rate": 4.566037735849057e-06,
|
|
"loss": 0.2185,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.5379521141110545,
|
|
"grad_norm": 1.143699288368225,
|
|
"learning_rate": 4.562264150943397e-06,
|
|
"loss": 0.2324,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.5399898115129903,
|
|
"grad_norm": 1.3619898557662964,
|
|
"learning_rate": 4.558490566037736e-06,
|
|
"loss": 0.2484,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.5420275089149261,
|
|
"grad_norm": 1.2638384103775024,
|
|
"learning_rate": 4.554716981132076e-06,
|
|
"loss": 0.2349,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.544065206316862,
|
|
"grad_norm": 1.2247638702392578,
|
|
"learning_rate": 4.5509433962264155e-06,
|
|
"loss": 0.2372,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.5461029037187978,
|
|
"grad_norm": 1.2362172603607178,
|
|
"learning_rate": 4.547169811320755e-06,
|
|
"loss": 0.2333,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.5481406011207336,
|
|
"grad_norm": 1.2307566404342651,
|
|
"learning_rate": 4.543396226415095e-06,
|
|
"loss": 0.2411,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.5501782985226694,
|
|
"grad_norm": 1.2503217458724976,
|
|
"learning_rate": 4.539622641509434e-06,
|
|
"loss": 0.2459,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.5522159959246052,
|
|
"grad_norm": 1.2475491762161255,
|
|
"learning_rate": 4.5358490566037735e-06,
|
|
"loss": 0.2272,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.5542536933265411,
|
|
"grad_norm": 1.105730414390564,
|
|
"learning_rate": 4.532075471698113e-06,
|
|
"loss": 0.2481,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.5562913907284768,
|
|
"grad_norm": 1.275002121925354,
|
|
"learning_rate": 4.528301886792453e-06,
|
|
"loss": 0.2445,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.5583290881304126,
|
|
"grad_norm": 1.1774675846099854,
|
|
"learning_rate": 4.524528301886793e-06,
|
|
"loss": 0.2377,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.5603667855323484,
|
|
"grad_norm": 1.329745888710022,
|
|
"learning_rate": 4.520754716981132e-06,
|
|
"loss": 0.2319,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.5624044829342842,
|
|
"grad_norm": 1.2236435413360596,
|
|
"learning_rate": 4.516981132075472e-06,
|
|
"loss": 0.2274,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.5644421803362201,
|
|
"grad_norm": 1.0417534112930298,
|
|
"learning_rate": 4.513207547169812e-06,
|
|
"loss": 0.2234,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.5664798777381559,
|
|
"grad_norm": 1.0934056043624878,
|
|
"learning_rate": 4.509433962264151e-06,
|
|
"loss": 0.2445,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.5685175751400917,
|
|
"grad_norm": 1.2551244497299194,
|
|
"learning_rate": 4.505660377358491e-06,
|
|
"loss": 0.2434,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.5705552725420275,
|
|
"grad_norm": 1.2088017463684082,
|
|
"learning_rate": 4.50188679245283e-06,
|
|
"loss": 0.2399,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.5725929699439634,
|
|
"grad_norm": 1.2738829851150513,
|
|
"learning_rate": 4.49811320754717e-06,
|
|
"loss": 0.2328,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.5746306673458992,
|
|
"grad_norm": 1.312220811843872,
|
|
"learning_rate": 4.49433962264151e-06,
|
|
"loss": 0.2334,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.5766683647478349,
|
|
"grad_norm": 1.1316941976547241,
|
|
"learning_rate": 4.49056603773585e-06,
|
|
"loss": 0.225,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.5787060621497707,
|
|
"grad_norm": 1.0500327348709106,
|
|
"learning_rate": 4.486792452830189e-06,
|
|
"loss": 0.226,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.5807437595517065,
|
|
"grad_norm": 1.0962241888046265,
|
|
"learning_rate": 4.483018867924528e-06,
|
|
"loss": 0.2168,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.5827814569536424,
|
|
"grad_norm": 1.100046992301941,
|
|
"learning_rate": 4.479245283018868e-06,
|
|
"loss": 0.2267,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.5848191543555782,
|
|
"grad_norm": 1.2387049198150635,
|
|
"learning_rate": 4.475471698113208e-06,
|
|
"loss": 0.2509,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.586856851757514,
|
|
"grad_norm": 1.1974563598632812,
|
|
"learning_rate": 4.471698113207548e-06,
|
|
"loss": 0.2351,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.5888945491594498,
|
|
"grad_norm": 1.2102775573730469,
|
|
"learning_rate": 4.467924528301888e-06,
|
|
"loss": 0.2474,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.5909322465613857,
|
|
"grad_norm": 1.0824848413467407,
|
|
"learning_rate": 4.464150943396227e-06,
|
|
"loss": 0.2289,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.5929699439633215,
|
|
"grad_norm": 1.111902117729187,
|
|
"learning_rate": 4.460377358490567e-06,
|
|
"loss": 0.2363,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.5950076413652573,
|
|
"grad_norm": 1.1692800521850586,
|
|
"learning_rate": 4.456603773584906e-06,
|
|
"loss": 0.2266,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.5970453387671931,
|
|
"grad_norm": 1.160117506980896,
|
|
"learning_rate": 4.452830188679246e-06,
|
|
"loss": 0.2351,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.5990830361691288,
|
|
"grad_norm": 1.1320550441741943,
|
|
"learning_rate": 4.4490566037735856e-06,
|
|
"loss": 0.2239,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.6011207335710647,
|
|
"grad_norm": 1.1472080945968628,
|
|
"learning_rate": 4.445283018867925e-06,
|
|
"loss": 0.2157,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.6031584309730005,
|
|
"grad_norm": 1.2992992401123047,
|
|
"learning_rate": 4.4415094339622646e-06,
|
|
"loss": 0.2283,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.6051961283749363,
|
|
"grad_norm": 1.2557927370071411,
|
|
"learning_rate": 4.4377358490566045e-06,
|
|
"loss": 0.2339,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.6072338257768721,
|
|
"grad_norm": 1.0591647624969482,
|
|
"learning_rate": 4.4339622641509435e-06,
|
|
"loss": 0.2152,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.609271523178808,
|
|
"grad_norm": 1.0702134370803833,
|
|
"learning_rate": 4.4301886792452834e-06,
|
|
"loss": 0.206,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.6113092205807438,
|
|
"grad_norm": 1.2004814147949219,
|
|
"learning_rate": 4.4264150943396225e-06,
|
|
"loss": 0.2307,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.6133469179826796,
|
|
"grad_norm": 1.1907483339309692,
|
|
"learning_rate": 4.4226415094339624e-06,
|
|
"loss": 0.2289,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.6153846153846154,
|
|
"grad_norm": 1.1154402494430542,
|
|
"learning_rate": 4.418867924528302e-06,
|
|
"loss": 0.2135,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.6174223127865512,
|
|
"grad_norm": 1.1816260814666748,
|
|
"learning_rate": 4.415094339622642e-06,
|
|
"loss": 0.2286,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.6194600101884871,
|
|
"grad_norm": 1.1875680685043335,
|
|
"learning_rate": 4.411320754716981e-06,
|
|
"loss": 0.2375,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.6214977075904228,
|
|
"grad_norm": 1.0976321697235107,
|
|
"learning_rate": 4.407547169811321e-06,
|
|
"loss": 0.2137,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.6235354049923586,
|
|
"grad_norm": 1.10517418384552,
|
|
"learning_rate": 4.40377358490566e-06,
|
|
"loss": 0.219,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.6255731023942944,
|
|
"grad_norm": 1.1795883178710938,
|
|
"learning_rate": 4.4e-06,
|
|
"loss": 0.2286,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.6276107997962302,
|
|
"grad_norm": 1.1178569793701172,
|
|
"learning_rate": 4.39622641509434e-06,
|
|
"loss": 0.22,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.6296484971981661,
|
|
"grad_norm": 1.1791189908981323,
|
|
"learning_rate": 4.39245283018868e-06,
|
|
"loss": 0.2474,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.6316861946001019,
|
|
"grad_norm": 1.1312475204467773,
|
|
"learning_rate": 4.388679245283019e-06,
|
|
"loss": 0.2474,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.6337238920020377,
|
|
"grad_norm": 1.1903657913208008,
|
|
"learning_rate": 4.384905660377359e-06,
|
|
"loss": 0.2477,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.6357615894039735,
|
|
"grad_norm": 1.1177330017089844,
|
|
"learning_rate": 4.381132075471698e-06,
|
|
"loss": 0.2412,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.6377992868059094,
|
|
"grad_norm": 1.3050440549850464,
|
|
"learning_rate": 4.377358490566038e-06,
|
|
"loss": 0.2465,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.6398369842078452,
|
|
"grad_norm": 1.1658434867858887,
|
|
"learning_rate": 4.373584905660378e-06,
|
|
"loss": 0.2162,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.6418746816097809,
|
|
"grad_norm": 1.1645337343215942,
|
|
"learning_rate": 4.369811320754717e-06,
|
|
"loss": 0.2369,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.6439123790117167,
|
|
"grad_norm": 1.1002851724624634,
|
|
"learning_rate": 4.366037735849057e-06,
|
|
"loss": 0.1995,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.6459500764136525,
|
|
"grad_norm": 1.1481510400772095,
|
|
"learning_rate": 4.362264150943397e-06,
|
|
"loss": 0.2324,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.6479877738155884,
|
|
"grad_norm": 1.2481803894042969,
|
|
"learning_rate": 4.358490566037737e-06,
|
|
"loss": 0.2327,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.6500254712175242,
|
|
"grad_norm": 1.1513328552246094,
|
|
"learning_rate": 4.354716981132076e-06,
|
|
"loss": 0.2585,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.65206316861946,
|
|
"grad_norm": 1.1497119665145874,
|
|
"learning_rate": 4.350943396226415e-06,
|
|
"loss": 0.233,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.6541008660213958,
|
|
"grad_norm": 1.1609100103378296,
|
|
"learning_rate": 4.347169811320755e-06,
|
|
"loss": 0.2405,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.6561385634233317,
|
|
"grad_norm": 1.2081102132797241,
|
|
"learning_rate": 4.343396226415095e-06,
|
|
"loss": 0.2378,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.6581762608252675,
|
|
"grad_norm": 1.1453851461410522,
|
|
"learning_rate": 4.339622641509435e-06,
|
|
"loss": 0.2298,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.6602139582272033,
|
|
"grad_norm": 1.051963210105896,
|
|
"learning_rate": 4.3358490566037745e-06,
|
|
"loss": 0.2424,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.6622516556291391,
|
|
"grad_norm": 1.122475028038025,
|
|
"learning_rate": 4.332075471698114e-06,
|
|
"loss": 0.2217,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.6642893530310748,
|
|
"grad_norm": 1.557624101638794,
|
|
"learning_rate": 4.3283018867924535e-06,
|
|
"loss": 0.2476,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.6663270504330107,
|
|
"grad_norm": 1.133476972579956,
|
|
"learning_rate": 4.324528301886793e-06,
|
|
"loss": 0.24,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.6683647478349465,
|
|
"grad_norm": 1.192134976387024,
|
|
"learning_rate": 4.3207547169811325e-06,
|
|
"loss": 0.2285,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.6704024452368823,
|
|
"grad_norm": 1.0376332998275757,
|
|
"learning_rate": 4.316981132075472e-06,
|
|
"loss": 0.2314,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.6724401426388181,
|
|
"grad_norm": 1.1142336130142212,
|
|
"learning_rate": 4.3132075471698115e-06,
|
|
"loss": 0.2213,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.674477840040754,
|
|
"grad_norm": 1.075834035873413,
|
|
"learning_rate": 4.309433962264151e-06,
|
|
"loss": 0.2541,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.6765155374426898,
|
|
"grad_norm": 1.6311166286468506,
|
|
"learning_rate": 4.305660377358491e-06,
|
|
"loss": 0.2273,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.6785532348446256,
|
|
"grad_norm": 1.2183853387832642,
|
|
"learning_rate": 4.30188679245283e-06,
|
|
"loss": 0.2235,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.6805909322465614,
|
|
"grad_norm": 1.115402340888977,
|
|
"learning_rate": 4.29811320754717e-06,
|
|
"loss": 0.2241,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.6826286296484972,
|
|
"grad_norm": 1.1034786701202393,
|
|
"learning_rate": 4.294339622641509e-06,
|
|
"loss": 0.2397,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.6846663270504331,
|
|
"grad_norm": 1.1569246053695679,
|
|
"learning_rate": 4.290566037735849e-06,
|
|
"loss": 0.231,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.6867040244523688,
|
|
"grad_norm": 1.0261273384094238,
|
|
"learning_rate": 4.286792452830189e-06,
|
|
"loss": 0.2381,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.6887417218543046,
|
|
"grad_norm": 1.1715890169143677,
|
|
"learning_rate": 4.283018867924529e-06,
|
|
"loss": 0.2271,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.6907794192562404,
|
|
"grad_norm": 1.1164259910583496,
|
|
"learning_rate": 4.279245283018868e-06,
|
|
"loss": 0.2145,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.6928171166581762,
|
|
"grad_norm": 1.1052844524383545,
|
|
"learning_rate": 4.275471698113208e-06,
|
|
"loss": 0.2303,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.6948548140601121,
|
|
"grad_norm": 1.193002700805664,
|
|
"learning_rate": 4.271698113207547e-06,
|
|
"loss": 0.2329,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.6968925114620479,
|
|
"grad_norm": 1.141808032989502,
|
|
"learning_rate": 4.267924528301887e-06,
|
|
"loss": 0.2293,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.6989302088639837,
|
|
"grad_norm": 1.0740857124328613,
|
|
"learning_rate": 4.264150943396227e-06,
|
|
"loss": 0.2336,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.7009679062659195,
|
|
"grad_norm": 1.1825228929519653,
|
|
"learning_rate": 4.260377358490567e-06,
|
|
"loss": 0.2367,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.7030056036678554,
|
|
"grad_norm": 1.0624991655349731,
|
|
"learning_rate": 4.256603773584906e-06,
|
|
"loss": 0.2246,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.7050433010697912,
|
|
"grad_norm": 1.292654037475586,
|
|
"learning_rate": 4.252830188679246e-06,
|
|
"loss": 0.2479,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.7070809984717269,
|
|
"grad_norm": 1.0635449886322021,
|
|
"learning_rate": 4.249056603773585e-06,
|
|
"loss": 0.2285,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.7091186958736627,
|
|
"grad_norm": 1.0410432815551758,
|
|
"learning_rate": 4.245283018867925e-06,
|
|
"loss": 0.203,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.7111563932755985,
|
|
"grad_norm": 1.154789924621582,
|
|
"learning_rate": 4.241509433962264e-06,
|
|
"loss": 0.2424,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.7131940906775344,
|
|
"grad_norm": 1.1573512554168701,
|
|
"learning_rate": 4.237735849056604e-06,
|
|
"loss": 0.2163,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.7152317880794702,
|
|
"grad_norm": 1.0690231323242188,
|
|
"learning_rate": 4.233962264150944e-06,
|
|
"loss": 0.2204,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.717269485481406,
|
|
"grad_norm": 1.1083498001098633,
|
|
"learning_rate": 4.230188679245284e-06,
|
|
"loss": 0.2258,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.7193071828833418,
|
|
"grad_norm": 1.260735273361206,
|
|
"learning_rate": 4.226415094339623e-06,
|
|
"loss": 0.236,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.7213448802852777,
|
|
"grad_norm": 1.0777976512908936,
|
|
"learning_rate": 4.222641509433963e-06,
|
|
"loss": 0.217,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.7233825776872135,
|
|
"grad_norm": 1.0879008769989014,
|
|
"learning_rate": 4.218867924528302e-06,
|
|
"loss": 0.2163,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.7254202750891493,
|
|
"grad_norm": 1.1055690050125122,
|
|
"learning_rate": 4.215094339622642e-06,
|
|
"loss": 0.2244,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.7274579724910851,
|
|
"grad_norm": 1.1160818338394165,
|
|
"learning_rate": 4.2113207547169815e-06,
|
|
"loss": 0.2081,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.7294956698930208,
|
|
"grad_norm": 1.238552212715149,
|
|
"learning_rate": 4.2075471698113215e-06,
|
|
"loss": 0.2242,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.7315333672949567,
|
|
"grad_norm": 1.0889108180999756,
|
|
"learning_rate": 4.2037735849056605e-06,
|
|
"loss": 0.2276,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.7335710646968925,
|
|
"grad_norm": 1.314106822013855,
|
|
"learning_rate": 4.2000000000000004e-06,
|
|
"loss": 0.2423,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.7356087620988283,
|
|
"grad_norm": 1.304366111755371,
|
|
"learning_rate": 4.1962264150943395e-06,
|
|
"loss": 0.2556,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.7376464595007641,
|
|
"grad_norm": 1.227425217628479,
|
|
"learning_rate": 4.1924528301886794e-06,
|
|
"loss": 0.2275,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.7396841569027,
|
|
"grad_norm": 1.1975058317184448,
|
|
"learning_rate": 4.188679245283019e-06,
|
|
"loss": 0.2376,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.7417218543046358,
|
|
"grad_norm": 1.1609851121902466,
|
|
"learning_rate": 4.184905660377358e-06,
|
|
"loss": 0.2296,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.7437595517065716,
|
|
"grad_norm": 1.1305787563323975,
|
|
"learning_rate": 4.181132075471698e-06,
|
|
"loss": 0.231,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.7457972491085074,
|
|
"grad_norm": 1.245123267173767,
|
|
"learning_rate": 4.177358490566038e-06,
|
|
"loss": 0.2438,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.7478349465104432,
|
|
"grad_norm": 1.2077217102050781,
|
|
"learning_rate": 4.173584905660378e-06,
|
|
"loss": 0.2331,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.7498726439123791,
|
|
"grad_norm": 1.2838149070739746,
|
|
"learning_rate": 4.169811320754717e-06,
|
|
"loss": 0.2205,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.7519103413143148,
|
|
"grad_norm": 1.2761950492858887,
|
|
"learning_rate": 4.166037735849056e-06,
|
|
"loss": 0.2339,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.7539480387162506,
|
|
"grad_norm": 1.2258546352386475,
|
|
"learning_rate": 4.162264150943396e-06,
|
|
"loss": 0.2494,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.7559857361181864,
|
|
"grad_norm": 1.0878491401672363,
|
|
"learning_rate": 4.158490566037736e-06,
|
|
"loss": 0.2337,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.7580234335201222,
|
|
"grad_norm": 1.4389631748199463,
|
|
"learning_rate": 4.154716981132076e-06,
|
|
"loss": 0.2409,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.7600611309220581,
|
|
"grad_norm": 1.0960638523101807,
|
|
"learning_rate": 4.150943396226416e-06,
|
|
"loss": 0.2239,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.7620988283239939,
|
|
"grad_norm": 1.293862223625183,
|
|
"learning_rate": 4.147169811320755e-06,
|
|
"loss": 0.2438,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.7641365257259297,
|
|
"grad_norm": 1.177188754081726,
|
|
"learning_rate": 4.143396226415095e-06,
|
|
"loss": 0.2241,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.7661742231278655,
|
|
"grad_norm": 1.2292778491973877,
|
|
"learning_rate": 4.139622641509434e-06,
|
|
"loss": 0.2386,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.7682119205298014,
|
|
"grad_norm": 1.1312750577926636,
|
|
"learning_rate": 4.135849056603774e-06,
|
|
"loss": 0.2331,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.7702496179317372,
|
|
"grad_norm": 1.0975465774536133,
|
|
"learning_rate": 4.132075471698114e-06,
|
|
"loss": 0.2213,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.7722873153336729,
|
|
"grad_norm": 1.2238826751708984,
|
|
"learning_rate": 4.128301886792453e-06,
|
|
"loss": 0.2338,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.7743250127356087,
|
|
"grad_norm": 1.3611332178115845,
|
|
"learning_rate": 4.124528301886793e-06,
|
|
"loss": 0.2454,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.7763627101375445,
|
|
"grad_norm": 1.3693833351135254,
|
|
"learning_rate": 4.120754716981133e-06,
|
|
"loss": 0.2434,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.7784004075394804,
|
|
"grad_norm": 1.2046077251434326,
|
|
"learning_rate": 4.116981132075472e-06,
|
|
"loss": 0.224,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.7804381049414162,
|
|
"grad_norm": 1.2061010599136353,
|
|
"learning_rate": 4.113207547169812e-06,
|
|
"loss": 0.2264,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.782475802343352,
|
|
"grad_norm": 1.0464826822280884,
|
|
"learning_rate": 4.109433962264151e-06,
|
|
"loss": 0.2111,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.7845134997452878,
|
|
"grad_norm": 0.9789960980415344,
|
|
"learning_rate": 4.105660377358491e-06,
|
|
"loss": 0.2082,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.7865511971472237,
|
|
"grad_norm": 1.1676138639450073,
|
|
"learning_rate": 4.101886792452831e-06,
|
|
"loss": 0.2283,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.7885888945491595,
|
|
"grad_norm": 1.179202914237976,
|
|
"learning_rate": 4.0981132075471705e-06,
|
|
"loss": 0.2329,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.7906265919510953,
|
|
"grad_norm": 1.2767287492752075,
|
|
"learning_rate": 4.09433962264151e-06,
|
|
"loss": 0.2378,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.7926642893530311,
|
|
"grad_norm": 1.1678310632705688,
|
|
"learning_rate": 4.0905660377358495e-06,
|
|
"loss": 0.2232,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.7947019867549668,
|
|
"grad_norm": 1.2610273361206055,
|
|
"learning_rate": 4.0867924528301886e-06,
|
|
"loss": 0.2384,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.7967396841569027,
|
|
"grad_norm": 1.3496994972229004,
|
|
"learning_rate": 4.0830188679245285e-06,
|
|
"loss": 0.2346,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.7987773815588385,
|
|
"grad_norm": 1.163509488105774,
|
|
"learning_rate": 4.079245283018868e-06,
|
|
"loss": 0.2234,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.8008150789607743,
|
|
"grad_norm": 1.1540744304656982,
|
|
"learning_rate": 4.075471698113208e-06,
|
|
"loss": 0.2164,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.8028527763627101,
|
|
"grad_norm": 1.158379316329956,
|
|
"learning_rate": 4.071698113207547e-06,
|
|
"loss": 0.2323,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.804890473764646,
|
|
"grad_norm": 1.1848655939102173,
|
|
"learning_rate": 4.067924528301887e-06,
|
|
"loss": 0.2448,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.8069281711665818,
|
|
"grad_norm": 1.239961862564087,
|
|
"learning_rate": 4.064150943396226e-06,
|
|
"loss": 0.2343,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.8089658685685176,
|
|
"grad_norm": 1.0600473880767822,
|
|
"learning_rate": 4.060377358490566e-06,
|
|
"loss": 0.2333,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.8110035659704534,
|
|
"grad_norm": 1.2741254568099976,
|
|
"learning_rate": 4.056603773584906e-06,
|
|
"loss": 0.237,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.8130412633723892,
|
|
"grad_norm": 1.182904839515686,
|
|
"learning_rate": 4.052830188679245e-06,
|
|
"loss": 0.2217,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.8150789607743251,
|
|
"grad_norm": 1.1751116514205933,
|
|
"learning_rate": 4.049056603773585e-06,
|
|
"loss": 0.2354,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.8171166581762608,
|
|
"grad_norm": 1.134203553199768,
|
|
"learning_rate": 4.045283018867925e-06,
|
|
"loss": 0.2297,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.8191543555781966,
|
|
"grad_norm": 1.1873515844345093,
|
|
"learning_rate": 4.041509433962265e-06,
|
|
"loss": 0.2476,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.8211920529801324,
|
|
"grad_norm": 1.1874173879623413,
|
|
"learning_rate": 4.037735849056604e-06,
|
|
"loss": 0.2232,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.8232297503820682,
|
|
"grad_norm": 1.119139552116394,
|
|
"learning_rate": 4.033962264150943e-06,
|
|
"loss": 0.2348,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.8252674477840041,
|
|
"grad_norm": 1.1560324430465698,
|
|
"learning_rate": 4.030188679245283e-06,
|
|
"loss": 0.2337,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.8273051451859399,
|
|
"grad_norm": 1.1288225650787354,
|
|
"learning_rate": 4.026415094339623e-06,
|
|
"loss": 0.2319,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.8293428425878757,
|
|
"grad_norm": 1.2800090312957764,
|
|
"learning_rate": 4.022641509433963e-06,
|
|
"loss": 0.2237,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.8313805399898115,
|
|
"grad_norm": 1.2394243478775024,
|
|
"learning_rate": 4.018867924528303e-06,
|
|
"loss": 0.2358,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.8334182373917474,
|
|
"grad_norm": 1.231703758239746,
|
|
"learning_rate": 4.015094339622642e-06,
|
|
"loss": 0.2275,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.8354559347936832,
|
|
"grad_norm": 1.0887949466705322,
|
|
"learning_rate": 4.011320754716982e-06,
|
|
"loss": 0.2335,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.8374936321956189,
|
|
"grad_norm": 1.2228186130523682,
|
|
"learning_rate": 4.007547169811321e-06,
|
|
"loss": 0.2307,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.8395313295975547,
|
|
"grad_norm": 1.0364912748336792,
|
|
"learning_rate": 4.003773584905661e-06,
|
|
"loss": 0.2117,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.8415690269994905,
|
|
"grad_norm": 1.0746346712112427,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 0.2181,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.8436067244014264,
|
|
"grad_norm": 1.0695878267288208,
|
|
"learning_rate": 3.99622641509434e-06,
|
|
"loss": 0.2497,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.8456444218033622,
|
|
"grad_norm": 1.2379292249679565,
|
|
"learning_rate": 3.99245283018868e-06,
|
|
"loss": 0.2236,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.847682119205298,
|
|
"grad_norm": 1.0842210054397583,
|
|
"learning_rate": 3.9886792452830196e-06,
|
|
"loss": 0.2268,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.8497198166072338,
|
|
"grad_norm": 1.2367124557495117,
|
|
"learning_rate": 3.984905660377359e-06,
|
|
"loss": 0.2366,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.8517575140091697,
|
|
"grad_norm": 1.2747502326965332,
|
|
"learning_rate": 3.9811320754716985e-06,
|
|
"loss": 0.2371,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.8537952114111055,
|
|
"grad_norm": 1.1272820234298706,
|
|
"learning_rate": 3.977358490566038e-06,
|
|
"loss": 0.2369,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.8558329088130413,
|
|
"grad_norm": 1.0960078239440918,
|
|
"learning_rate": 3.9735849056603775e-06,
|
|
"loss": 0.2383,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.8578706062149771,
|
|
"grad_norm": 1.1670606136322021,
|
|
"learning_rate": 3.9698113207547174e-06,
|
|
"loss": 0.2511,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.8599083036169128,
|
|
"grad_norm": 1.0942180156707764,
|
|
"learning_rate": 3.966037735849057e-06,
|
|
"loss": 0.2319,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.8619460010188487,
|
|
"grad_norm": 1.1233775615692139,
|
|
"learning_rate": 3.962264150943396e-06,
|
|
"loss": 0.2144,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.8639836984207845,
|
|
"grad_norm": 1.2059624195098877,
|
|
"learning_rate": 3.958490566037736e-06,
|
|
"loss": 0.2212,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.8660213958227203,
|
|
"grad_norm": 1.1963043212890625,
|
|
"learning_rate": 3.954716981132075e-06,
|
|
"loss": 0.2378,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.8680590932246561,
|
|
"grad_norm": 1.2415270805358887,
|
|
"learning_rate": 3.950943396226415e-06,
|
|
"loss": 0.2276,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.870096790626592,
|
|
"grad_norm": 1.3280036449432373,
|
|
"learning_rate": 3.947169811320755e-06,
|
|
"loss": 0.2395,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.8721344880285278,
|
|
"grad_norm": 1.2570695877075195,
|
|
"learning_rate": 3.943396226415095e-06,
|
|
"loss": 0.2474,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.8741721854304636,
|
|
"grad_norm": 1.1252264976501465,
|
|
"learning_rate": 3.939622641509434e-06,
|
|
"loss": 0.2265,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.8762098828323994,
|
|
"grad_norm": 1.0487228631973267,
|
|
"learning_rate": 3.935849056603774e-06,
|
|
"loss": 0.2224,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.8782475802343352,
|
|
"grad_norm": 1.0646063089370728,
|
|
"learning_rate": 3.932075471698113e-06,
|
|
"loss": 0.2232,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.8802852776362711,
|
|
"grad_norm": 1.1609469652175903,
|
|
"learning_rate": 3.928301886792453e-06,
|
|
"loss": 0.2347,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.8823229750382068,
|
|
"grad_norm": 1.0545512437820435,
|
|
"learning_rate": 3.924528301886793e-06,
|
|
"loss": 0.2251,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.8843606724401426,
|
|
"grad_norm": 1.1264142990112305,
|
|
"learning_rate": 3.920754716981132e-06,
|
|
"loss": 0.2459,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.8863983698420784,
|
|
"grad_norm": 1.1396156549453735,
|
|
"learning_rate": 3.916981132075472e-06,
|
|
"loss": 0.2385,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.8884360672440142,
|
|
"grad_norm": 1.17756187915802,
|
|
"learning_rate": 3.913207547169812e-06,
|
|
"loss": 0.2306,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.8904737646459501,
|
|
"grad_norm": 1.0548409223556519,
|
|
"learning_rate": 3.909433962264151e-06,
|
|
"loss": 0.2192,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.8925114620478859,
|
|
"grad_norm": 1.161879062652588,
|
|
"learning_rate": 3.905660377358491e-06,
|
|
"loss": 0.2264,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.8945491594498217,
|
|
"grad_norm": 1.1480745077133179,
|
|
"learning_rate": 3.90188679245283e-06,
|
|
"loss": 0.2389,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.8965868568517575,
|
|
"grad_norm": 1.0667020082473755,
|
|
"learning_rate": 3.89811320754717e-06,
|
|
"loss": 0.2312,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.8986245542536934,
|
|
"grad_norm": 1.2451261281967163,
|
|
"learning_rate": 3.89433962264151e-06,
|
|
"loss": 0.241,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.9006622516556292,
|
|
"grad_norm": 1.2452954053878784,
|
|
"learning_rate": 3.89056603773585e-06,
|
|
"loss": 0.2444,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.9026999490575649,
|
|
"grad_norm": 1.134698510169983,
|
|
"learning_rate": 3.88679245283019e-06,
|
|
"loss": 0.2132,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.9047376464595007,
|
|
"grad_norm": 1.269184947013855,
|
|
"learning_rate": 3.883018867924529e-06,
|
|
"loss": 0.2445,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.9067753438614365,
|
|
"grad_norm": 1.2156351804733276,
|
|
"learning_rate": 3.879245283018868e-06,
|
|
"loss": 0.2469,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.9088130412633724,
|
|
"grad_norm": 1.1011265516281128,
|
|
"learning_rate": 3.875471698113208e-06,
|
|
"loss": 0.2307,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.9108507386653082,
|
|
"grad_norm": 1.08492910861969,
|
|
"learning_rate": 3.871698113207548e-06,
|
|
"loss": 0.2228,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.912888436067244,
|
|
"grad_norm": 1.1414035558700562,
|
|
"learning_rate": 3.8679245283018875e-06,
|
|
"loss": 0.2191,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.9149261334691798,
|
|
"grad_norm": 1.0980679988861084,
|
|
"learning_rate": 3.8641509433962266e-06,
|
|
"loss": 0.2323,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.9169638308711157,
|
|
"grad_norm": 1.1721632480621338,
|
|
"learning_rate": 3.8603773584905665e-06,
|
|
"loss": 0.2457,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.9190015282730515,
|
|
"grad_norm": 1.1284496784210205,
|
|
"learning_rate": 3.856603773584906e-06,
|
|
"loss": 0.2326,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.9210392256749873,
|
|
"grad_norm": 1.0117298364639282,
|
|
"learning_rate": 3.8528301886792455e-06,
|
|
"loss": 0.2389,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.9230769230769231,
|
|
"grad_norm": 1.173325777053833,
|
|
"learning_rate": 3.849056603773585e-06,
|
|
"loss": 0.2304,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.9251146204788588,
|
|
"grad_norm": 1.0675781965255737,
|
|
"learning_rate": 3.8452830188679245e-06,
|
|
"loss": 0.2178,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.9271523178807947,
|
|
"grad_norm": 1.0862107276916504,
|
|
"learning_rate": 3.841509433962264e-06,
|
|
"loss": 0.2293,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.9291900152827305,
|
|
"grad_norm": 1.119224190711975,
|
|
"learning_rate": 3.837735849056604e-06,
|
|
"loss": 0.2228,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.9312277126846663,
|
|
"grad_norm": 1.0795427560806274,
|
|
"learning_rate": 3.833962264150944e-06,
|
|
"loss": 0.2235,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.9332654100866021,
|
|
"grad_norm": 1.1415457725524902,
|
|
"learning_rate": 3.830188679245283e-06,
|
|
"loss": 0.2272,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.935303107488538,
|
|
"grad_norm": 1.1307644844055176,
|
|
"learning_rate": 3.826415094339623e-06,
|
|
"loss": 0.2186,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.9373408048904738,
|
|
"grad_norm": 1.1211094856262207,
|
|
"learning_rate": 3.822641509433962e-06,
|
|
"loss": 0.2233,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.9393785022924096,
|
|
"grad_norm": 1.1230515241622925,
|
|
"learning_rate": 3.818867924528302e-06,
|
|
"loss": 0.2318,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.9414161996943454,
|
|
"grad_norm": 1.2053518295288086,
|
|
"learning_rate": 3.815094339622642e-06,
|
|
"loss": 0.2225,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.9434538970962812,
|
|
"grad_norm": 1.1487395763397217,
|
|
"learning_rate": 3.8113207547169816e-06,
|
|
"loss": 0.23,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.9454915944982171,
|
|
"grad_norm": 1.03309166431427,
|
|
"learning_rate": 3.807547169811321e-06,
|
|
"loss": 0.2163,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.9475292919001528,
|
|
"grad_norm": 1.2096184492111206,
|
|
"learning_rate": 3.8037735849056605e-06,
|
|
"loss": 0.2312,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.9495669893020886,
|
|
"grad_norm": 1.5864837169647217,
|
|
"learning_rate": 3.8000000000000005e-06,
|
|
"loss": 0.227,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.9516046867040244,
|
|
"grad_norm": 1.1054576635360718,
|
|
"learning_rate": 3.79622641509434e-06,
|
|
"loss": 0.2303,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.9536423841059603,
|
|
"grad_norm": 1.0742146968841553,
|
|
"learning_rate": 3.79245283018868e-06,
|
|
"loss": 0.2282,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.9556800815078961,
|
|
"grad_norm": 1.048632025718689,
|
|
"learning_rate": 3.788679245283019e-06,
|
|
"loss": 0.222,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.9577177789098319,
|
|
"grad_norm": 1.1467828750610352,
|
|
"learning_rate": 3.784905660377359e-06,
|
|
"loss": 0.2169,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.9597554763117677,
|
|
"grad_norm": 1.1006637811660767,
|
|
"learning_rate": 3.7811320754716983e-06,
|
|
"loss": 0.227,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.9617931737137035,
|
|
"grad_norm": 1.4877111911773682,
|
|
"learning_rate": 3.7773584905660383e-06,
|
|
"loss": 0.2207,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.9638308711156394,
|
|
"grad_norm": 1.174248456954956,
|
|
"learning_rate": 3.7735849056603777e-06,
|
|
"loss": 0.2257,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.9658685685175752,
|
|
"grad_norm": 1.0940933227539062,
|
|
"learning_rate": 3.7698113207547172e-06,
|
|
"loss": 0.2265,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.9679062659195109,
|
|
"grad_norm": 1.0824356079101562,
|
|
"learning_rate": 3.7660377358490567e-06,
|
|
"loss": 0.2261,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.9699439633214467,
|
|
"grad_norm": 1.0655136108398438,
|
|
"learning_rate": 3.7622641509433966e-06,
|
|
"loss": 0.2148,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.9719816607233825,
|
|
"grad_norm": 1.3165481090545654,
|
|
"learning_rate": 3.758490566037736e-06,
|
|
"loss": 0.2337,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.9740193581253184,
|
|
"grad_norm": 1.0988367795944214,
|
|
"learning_rate": 3.754716981132076e-06,
|
|
"loss": 0.1979,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.9760570555272542,
|
|
"grad_norm": 1.0447558164596558,
|
|
"learning_rate": 3.750943396226415e-06,
|
|
"loss": 0.2325,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.97809475292919,
|
|
"grad_norm": 1.1018916368484497,
|
|
"learning_rate": 3.747169811320755e-06,
|
|
"loss": 0.2161,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.9801324503311258,
|
|
"grad_norm": 1.2155579328536987,
|
|
"learning_rate": 3.7433962264150945e-06,
|
|
"loss": 0.2191,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.9821701477330617,
|
|
"grad_norm": 0.9788108468055725,
|
|
"learning_rate": 3.7396226415094344e-06,
|
|
"loss": 0.2282,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.9842078451349975,
|
|
"grad_norm": 1.0340372323989868,
|
|
"learning_rate": 3.7358490566037735e-06,
|
|
"loss": 0.2276,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.9862455425369333,
|
|
"grad_norm": 0.9971087574958801,
|
|
"learning_rate": 3.7320754716981134e-06,
|
|
"loss": 0.2176,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.9882832399388691,
|
|
"grad_norm": 1.0751736164093018,
|
|
"learning_rate": 3.728301886792453e-06,
|
|
"loss": 0.2143,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.9903209373408048,
|
|
"grad_norm": 1.188984751701355,
|
|
"learning_rate": 3.724528301886793e-06,
|
|
"loss": 0.2375,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.9923586347427407,
|
|
"grad_norm": 1.320594072341919,
|
|
"learning_rate": 3.7207547169811327e-06,
|
|
"loss": 0.223,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.9943963321446765,
|
|
"grad_norm": 1.1396737098693848,
|
|
"learning_rate": 3.716981132075472e-06,
|
|
"loss": 0.2413,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.9964340295466123,
|
|
"grad_norm": 1.0497945547103882,
|
|
"learning_rate": 3.7132075471698113e-06,
|
|
"loss": 0.2177,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.9984717269485481,
|
|
"grad_norm": 1.2380748987197876,
|
|
"learning_rate": 3.709433962264151e-06,
|
|
"loss": 0.2351,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 1.000509424350484,
|
|
"grad_norm": 0.9542668461799622,
|
|
"learning_rate": 3.705660377358491e-06,
|
|
"loss": 0.2136,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 1.0025471217524198,
|
|
"grad_norm": 0.9574536681175232,
|
|
"learning_rate": 3.7018867924528306e-06,
|
|
"loss": 0.1899,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 1.0045848191543556,
|
|
"grad_norm": 1.0352755784988403,
|
|
"learning_rate": 3.6981132075471697e-06,
|
|
"loss": 0.1816,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 1.0066225165562914,
|
|
"grad_norm": 1.0826165676116943,
|
|
"learning_rate": 3.6943396226415096e-06,
|
|
"loss": 0.1858,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 1.0086602139582272,
|
|
"grad_norm": 1.2422000169754028,
|
|
"learning_rate": 3.6905660377358495e-06,
|
|
"loss": 0.1878,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 1.010697911360163,
|
|
"grad_norm": 1.0961295366287231,
|
|
"learning_rate": 3.686792452830189e-06,
|
|
"loss": 0.1721,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 1.012735608762099,
|
|
"grad_norm": 1.2105534076690674,
|
|
"learning_rate": 3.683018867924529e-06,
|
|
"loss": 0.1682,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 1.0147733061640347,
|
|
"grad_norm": 1.0163434743881226,
|
|
"learning_rate": 3.679245283018868e-06,
|
|
"loss": 0.1745,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 1.0168110035659705,
|
|
"grad_norm": 1.1357200145721436,
|
|
"learning_rate": 3.675471698113208e-06,
|
|
"loss": 0.174,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 1.0188487009679064,
|
|
"grad_norm": 1.129521369934082,
|
|
"learning_rate": 3.6716981132075474e-06,
|
|
"loss": 0.1737,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 1.0208863983698422,
|
|
"grad_norm": 1.1067070960998535,
|
|
"learning_rate": 3.6679245283018873e-06,
|
|
"loss": 0.1715,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 1.0229240957717778,
|
|
"grad_norm": 1.3292362689971924,
|
|
"learning_rate": 3.664150943396227e-06,
|
|
"loss": 0.1719,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 1.0249617931737136,
|
|
"grad_norm": 1.184263825416565,
|
|
"learning_rate": 3.6603773584905663e-06,
|
|
"loss": 0.1772,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 1.0269994905756494,
|
|
"grad_norm": 1.2224076986312866,
|
|
"learning_rate": 3.6566037735849058e-06,
|
|
"loss": 0.1799,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 1.0290371879775853,
|
|
"grad_norm": 1.2455564737319946,
|
|
"learning_rate": 3.6528301886792457e-06,
|
|
"loss": 0.1875,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 1.031074885379521,
|
|
"grad_norm": 1.037973165512085,
|
|
"learning_rate": 3.649056603773585e-06,
|
|
"loss": 0.1751,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 1.033112582781457,
|
|
"grad_norm": 1.3113584518432617,
|
|
"learning_rate": 3.645283018867925e-06,
|
|
"loss": 0.1805,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 1.0351502801833927,
|
|
"grad_norm": 1.113845705986023,
|
|
"learning_rate": 3.641509433962264e-06,
|
|
"loss": 0.163,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 1.0371879775853285,
|
|
"grad_norm": 1.1282869577407837,
|
|
"learning_rate": 3.637735849056604e-06,
|
|
"loss": 0.1774,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 1.0392256749872644,
|
|
"grad_norm": 0.9915235042572021,
|
|
"learning_rate": 3.6339622641509436e-06,
|
|
"loss": 0.1676,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 1.0412633723892002,
|
|
"grad_norm": 1.1076091527938843,
|
|
"learning_rate": 3.6301886792452835e-06,
|
|
"loss": 0.1811,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 1.043301069791136,
|
|
"grad_norm": 1.4706580638885498,
|
|
"learning_rate": 3.626415094339623e-06,
|
|
"loss": 0.1749,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 1.0453387671930718,
|
|
"grad_norm": 1.0995841026306152,
|
|
"learning_rate": 3.6226415094339625e-06,
|
|
"loss": 0.1682,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 1.0473764645950077,
|
|
"grad_norm": 1.3873177766799927,
|
|
"learning_rate": 3.618867924528302e-06,
|
|
"loss": 0.1812,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 1.0494141619969435,
|
|
"grad_norm": 1.1935499906539917,
|
|
"learning_rate": 3.615094339622642e-06,
|
|
"loss": 0.1876,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 1.0514518593988793,
|
|
"grad_norm": 1.2057229280471802,
|
|
"learning_rate": 3.6113207547169814e-06,
|
|
"loss": 0.1815,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 1.0534895568008151,
|
|
"grad_norm": 1.1333197355270386,
|
|
"learning_rate": 3.6075471698113213e-06,
|
|
"loss": 0.1828,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 1.055527254202751,
|
|
"grad_norm": 1.0647273063659668,
|
|
"learning_rate": 3.6037735849056603e-06,
|
|
"loss": 0.181,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 1.0575649516046868,
|
|
"grad_norm": 1.204564094543457,
|
|
"learning_rate": 3.6000000000000003e-06,
|
|
"loss": 0.1825,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 1.0596026490066226,
|
|
"grad_norm": 1.0661295652389526,
|
|
"learning_rate": 3.5962264150943398e-06,
|
|
"loss": 0.1864,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 1.0616403464085584,
|
|
"grad_norm": 1.0870025157928467,
|
|
"learning_rate": 3.5924528301886797e-06,
|
|
"loss": 0.1789,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 1.0636780438104942,
|
|
"grad_norm": 1.0620194673538208,
|
|
"learning_rate": 3.588679245283019e-06,
|
|
"loss": 0.1863,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 1.06571574121243,
|
|
"grad_norm": 1.1938071250915527,
|
|
"learning_rate": 3.5849056603773586e-06,
|
|
"loss": 0.1863,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 1.0677534386143657,
|
|
"grad_norm": 1.2299485206604004,
|
|
"learning_rate": 3.581132075471698e-06,
|
|
"loss": 0.1881,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 1.0697911360163015,
|
|
"grad_norm": 1.043164610862732,
|
|
"learning_rate": 3.577358490566038e-06,
|
|
"loss": 0.1631,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 1.0718288334182373,
|
|
"grad_norm": 1.200393795967102,
|
|
"learning_rate": 3.5735849056603775e-06,
|
|
"loss": 0.1871,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 1.0738665308201731,
|
|
"grad_norm": 1.1729276180267334,
|
|
"learning_rate": 3.5698113207547175e-06,
|
|
"loss": 0.1776,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 1.075904228222109,
|
|
"grad_norm": 1.3533014059066772,
|
|
"learning_rate": 3.5660377358490565e-06,
|
|
"loss": 0.1924,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 1.0779419256240448,
|
|
"grad_norm": 1.1192210912704468,
|
|
"learning_rate": 3.5622641509433964e-06,
|
|
"loss": 0.1826,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 1.0799796230259806,
|
|
"grad_norm": 1.2234528064727783,
|
|
"learning_rate": 3.558490566037736e-06,
|
|
"loss": 0.1803,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 1.0820173204279164,
|
|
"grad_norm": 1.1349793672561646,
|
|
"learning_rate": 3.554716981132076e-06,
|
|
"loss": 0.1862,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 1.0840550178298523,
|
|
"grad_norm": 1.1058518886566162,
|
|
"learning_rate": 3.5509433962264158e-06,
|
|
"loss": 0.1722,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 1.086092715231788,
|
|
"grad_norm": 1.0707038640975952,
|
|
"learning_rate": 3.547169811320755e-06,
|
|
"loss": 0.1709,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 1.088130412633724,
|
|
"grad_norm": 1.2310295104980469,
|
|
"learning_rate": 3.5433962264150943e-06,
|
|
"loss": 0.187,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 1.0901681100356597,
|
|
"grad_norm": 1.098715901374817,
|
|
"learning_rate": 3.5396226415094342e-06,
|
|
"loss": 0.1695,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 1.0922058074375955,
|
|
"grad_norm": 1.1150951385498047,
|
|
"learning_rate": 3.535849056603774e-06,
|
|
"loss": 0.1717,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 1.0942435048395314,
|
|
"grad_norm": 1.0338242053985596,
|
|
"learning_rate": 3.5320754716981136e-06,
|
|
"loss": 0.1789,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 1.0962812022414672,
|
|
"grad_norm": 1.0984159708023071,
|
|
"learning_rate": 3.5283018867924527e-06,
|
|
"loss": 0.1767,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 1.098318899643403,
|
|
"grad_norm": 1.1370503902435303,
|
|
"learning_rate": 3.5245283018867926e-06,
|
|
"loss": 0.1863,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 1.1003565970453388,
|
|
"grad_norm": 1.1123195886611938,
|
|
"learning_rate": 3.5207547169811325e-06,
|
|
"loss": 0.1733,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 1.1023942944472747,
|
|
"grad_norm": 1.1519520282745361,
|
|
"learning_rate": 3.516981132075472e-06,
|
|
"loss": 0.1669,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 1.1044319918492105,
|
|
"grad_norm": 1.1219109296798706,
|
|
"learning_rate": 3.513207547169812e-06,
|
|
"loss": 0.1788,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 1.1064696892511463,
|
|
"grad_norm": 1.237865686416626,
|
|
"learning_rate": 3.509433962264151e-06,
|
|
"loss": 0.1776,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 1.108507386653082,
|
|
"grad_norm": 1.0959861278533936,
|
|
"learning_rate": 3.505660377358491e-06,
|
|
"loss": 0.1773,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 1.1105450840550177,
|
|
"grad_norm": 1.079746127128601,
|
|
"learning_rate": 3.5018867924528304e-06,
|
|
"loss": 0.1942,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 1.1125827814569536,
|
|
"grad_norm": 1.1233259439468384,
|
|
"learning_rate": 3.4981132075471703e-06,
|
|
"loss": 0.1707,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 1.1146204788588894,
|
|
"grad_norm": 1.2879219055175781,
|
|
"learning_rate": 3.49433962264151e-06,
|
|
"loss": 0.1746,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 1.1166581762608252,
|
|
"grad_norm": 1.1267422437667847,
|
|
"learning_rate": 3.4905660377358493e-06,
|
|
"loss": 0.1782,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 1.118695873662761,
|
|
"grad_norm": 1.397052526473999,
|
|
"learning_rate": 3.486792452830189e-06,
|
|
"loss": 0.1696,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 1.1207335710646968,
|
|
"grad_norm": 1.3258302211761475,
|
|
"learning_rate": 3.4830188679245287e-06,
|
|
"loss": 0.1668,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 1.1227712684666327,
|
|
"grad_norm": 1.225081205368042,
|
|
"learning_rate": 3.479245283018868e-06,
|
|
"loss": 0.179,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 1.1248089658685685,
|
|
"grad_norm": 1.187245488166809,
|
|
"learning_rate": 3.475471698113208e-06,
|
|
"loss": 0.1739,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 1.1268466632705043,
|
|
"grad_norm": 1.2275511026382446,
|
|
"learning_rate": 3.471698113207547e-06,
|
|
"loss": 0.1892,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 1.1288843606724401,
|
|
"grad_norm": 1.1659022569656372,
|
|
"learning_rate": 3.467924528301887e-06,
|
|
"loss": 0.1801,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 1.130922058074376,
|
|
"grad_norm": 1.3677842617034912,
|
|
"learning_rate": 3.4641509433962266e-06,
|
|
"loss": 0.1787,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 1.1329597554763118,
|
|
"grad_norm": 1.2617255449295044,
|
|
"learning_rate": 3.4603773584905665e-06,
|
|
"loss": 0.1792,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 1.1349974528782476,
|
|
"grad_norm": 1.1734035015106201,
|
|
"learning_rate": 3.456603773584906e-06,
|
|
"loss": 0.1735,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 1.1370351502801834,
|
|
"grad_norm": 1.3135229349136353,
|
|
"learning_rate": 3.4528301886792455e-06,
|
|
"loss": 0.1844,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 1.1390728476821192,
|
|
"grad_norm": 1.281538724899292,
|
|
"learning_rate": 3.449056603773585e-06,
|
|
"loss": 0.1811,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 1.141110545084055,
|
|
"grad_norm": 1.1368190050125122,
|
|
"learning_rate": 3.445283018867925e-06,
|
|
"loss": 0.1633,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 1.143148242485991,
|
|
"grad_norm": 1.0890092849731445,
|
|
"learning_rate": 3.4415094339622644e-06,
|
|
"loss": 0.178,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 1.1451859398879267,
|
|
"grad_norm": 1.20881986618042,
|
|
"learning_rate": 3.4377358490566043e-06,
|
|
"loss": 0.1722,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 1.1472236372898625,
|
|
"grad_norm": 1.1593676805496216,
|
|
"learning_rate": 3.4339622641509434e-06,
|
|
"loss": 0.1829,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 1.1492613346917984,
|
|
"grad_norm": 1.090755820274353,
|
|
"learning_rate": 3.4301886792452833e-06,
|
|
"loss": 0.1787,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 1.1512990320937342,
|
|
"grad_norm": 1.2112749814987183,
|
|
"learning_rate": 3.4264150943396228e-06,
|
|
"loss": 0.1801,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 1.15333672949567,
|
|
"grad_norm": 1.119545340538025,
|
|
"learning_rate": 3.4226415094339627e-06,
|
|
"loss": 0.1821,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 1.1553744268976056,
|
|
"grad_norm": 1.1820521354675293,
|
|
"learning_rate": 3.4188679245283026e-06,
|
|
"loss": 0.189,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 1.1574121242995414,
|
|
"grad_norm": 1.2243889570236206,
|
|
"learning_rate": 3.4150943396226417e-06,
|
|
"loss": 0.1838,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 1.1594498217014773,
|
|
"grad_norm": 1.0234663486480713,
|
|
"learning_rate": 3.411320754716981e-06,
|
|
"loss": 0.1767,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 1.161487519103413,
|
|
"grad_norm": 1.209953784942627,
|
|
"learning_rate": 3.407547169811321e-06,
|
|
"loss": 0.1769,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 1.163525216505349,
|
|
"grad_norm": 1.1745116710662842,
|
|
"learning_rate": 3.403773584905661e-06,
|
|
"loss": 0.1856,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 1.1655629139072847,
|
|
"grad_norm": 1.071757435798645,
|
|
"learning_rate": 3.4000000000000005e-06,
|
|
"loss": 0.1591,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 1.1676006113092205,
|
|
"grad_norm": 1.150458574295044,
|
|
"learning_rate": 3.3962264150943395e-06,
|
|
"loss": 0.1776,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 1.1696383087111564,
|
|
"grad_norm": 1.193291187286377,
|
|
"learning_rate": 3.3924528301886795e-06,
|
|
"loss": 0.175,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 1.1716760061130922,
|
|
"grad_norm": 1.2312043905258179,
|
|
"learning_rate": 3.3886792452830194e-06,
|
|
"loss": 0.1906,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 1.173713703515028,
|
|
"grad_norm": 1.1354984045028687,
|
|
"learning_rate": 3.384905660377359e-06,
|
|
"loss": 0.1753,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 1.1757514009169638,
|
|
"grad_norm": 1.3425500392913818,
|
|
"learning_rate": 3.3811320754716988e-06,
|
|
"loss": 0.1876,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 1.1777890983188997,
|
|
"grad_norm": 1.0738446712493896,
|
|
"learning_rate": 3.377358490566038e-06,
|
|
"loss": 0.1765,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 1.1798267957208355,
|
|
"grad_norm": 1.1612354516983032,
|
|
"learning_rate": 3.3735849056603773e-06,
|
|
"loss": 0.1712,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 1.1818644931227713,
|
|
"grad_norm": 1.2308764457702637,
|
|
"learning_rate": 3.3698113207547173e-06,
|
|
"loss": 0.182,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 1.1839021905247071,
|
|
"grad_norm": 1.3299064636230469,
|
|
"learning_rate": 3.366037735849057e-06,
|
|
"loss": 0.1812,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 1.185939887926643,
|
|
"grad_norm": 1.1064029932022095,
|
|
"learning_rate": 3.3622641509433967e-06,
|
|
"loss": 0.1853,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 1.1879775853285788,
|
|
"grad_norm": 1.131239414215088,
|
|
"learning_rate": 3.3584905660377357e-06,
|
|
"loss": 0.1827,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 1.1900152827305146,
|
|
"grad_norm": 1.1805070638656616,
|
|
"learning_rate": 3.3547169811320756e-06,
|
|
"loss": 0.1937,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 1.1920529801324504,
|
|
"grad_norm": 1.2116690874099731,
|
|
"learning_rate": 3.3509433962264156e-06,
|
|
"loss": 0.197,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 1.194090677534386,
|
|
"grad_norm": 1.3518807888031006,
|
|
"learning_rate": 3.347169811320755e-06,
|
|
"loss": 0.1765,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 1.1961283749363218,
|
|
"grad_norm": 1.2591750621795654,
|
|
"learning_rate": 3.343396226415095e-06,
|
|
"loss": 0.1782,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 1.1981660723382577,
|
|
"grad_norm": 1.1681146621704102,
|
|
"learning_rate": 3.339622641509434e-06,
|
|
"loss": 0.1811,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 1.2002037697401935,
|
|
"grad_norm": 1.2340030670166016,
|
|
"learning_rate": 3.335849056603774e-06,
|
|
"loss": 0.1732,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 1.2022414671421293,
|
|
"grad_norm": 1.2480478286743164,
|
|
"learning_rate": 3.3320754716981134e-06,
|
|
"loss": 0.1747,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 1.2042791645440651,
|
|
"grad_norm": 1.2134257555007935,
|
|
"learning_rate": 3.3283018867924534e-06,
|
|
"loss": 0.1807,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 1.206316861946001,
|
|
"grad_norm": 1.050817608833313,
|
|
"learning_rate": 3.324528301886793e-06,
|
|
"loss": 0.1725,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 1.2083545593479368,
|
|
"grad_norm": 1.2634903192520142,
|
|
"learning_rate": 3.3207547169811323e-06,
|
|
"loss": 0.1883,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 1.2103922567498726,
|
|
"grad_norm": 1.2350244522094727,
|
|
"learning_rate": 3.316981132075472e-06,
|
|
"loss": 0.1872,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 1.2124299541518084,
|
|
"grad_norm": 1.232961893081665,
|
|
"learning_rate": 3.3132075471698117e-06,
|
|
"loss": 0.1654,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 1.2144676515537443,
|
|
"grad_norm": 1.163649320602417,
|
|
"learning_rate": 3.3094339622641512e-06,
|
|
"loss": 0.1933,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 1.21650534895568,
|
|
"grad_norm": 1.21866774559021,
|
|
"learning_rate": 3.305660377358491e-06,
|
|
"loss": 0.1818,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 1.218543046357616,
|
|
"grad_norm": 1.1113258600234985,
|
|
"learning_rate": 3.30188679245283e-06,
|
|
"loss": 0.1755,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 1.2205807437595517,
|
|
"grad_norm": 1.1248152256011963,
|
|
"learning_rate": 3.29811320754717e-06,
|
|
"loss": 0.1756,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 1.2226184411614875,
|
|
"grad_norm": 1.1161712408065796,
|
|
"learning_rate": 3.2943396226415096e-06,
|
|
"loss": 0.1967,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 1.2246561385634234,
|
|
"grad_norm": 1.1488161087036133,
|
|
"learning_rate": 3.2905660377358495e-06,
|
|
"loss": 0.1791,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 1.2266938359653592,
|
|
"grad_norm": 1.2753115892410278,
|
|
"learning_rate": 3.286792452830189e-06,
|
|
"loss": 0.1739,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 1.228731533367295,
|
|
"grad_norm": 1.1130990982055664,
|
|
"learning_rate": 3.2830188679245285e-06,
|
|
"loss": 0.1682,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 1.2307692307692308,
|
|
"grad_norm": 1.1455460786819458,
|
|
"learning_rate": 3.279245283018868e-06,
|
|
"loss": 0.1854,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 1.2328069281711667,
|
|
"grad_norm": 1.1896706819534302,
|
|
"learning_rate": 3.275471698113208e-06,
|
|
"loss": 0.1901,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 1.2348446255731025,
|
|
"grad_norm": 1.132242202758789,
|
|
"learning_rate": 3.2716981132075474e-06,
|
|
"loss": 0.1705,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 1.2368823229750383,
|
|
"grad_norm": 1.1296707391738892,
|
|
"learning_rate": 3.2679245283018873e-06,
|
|
"loss": 0.1875,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 1.2389200203769741,
|
|
"grad_norm": 1.2837047576904297,
|
|
"learning_rate": 3.2641509433962264e-06,
|
|
"loss": 0.1862,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 1.24095771777891,
|
|
"grad_norm": 1.2516281604766846,
|
|
"learning_rate": 3.2603773584905663e-06,
|
|
"loss": 0.1763,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 1.2429954151808456,
|
|
"grad_norm": 1.2051138877868652,
|
|
"learning_rate": 3.256603773584906e-06,
|
|
"loss": 0.1681,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 1.2450331125827814,
|
|
"grad_norm": 1.1206097602844238,
|
|
"learning_rate": 3.2528301886792457e-06,
|
|
"loss": 0.1895,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 1.2470708099847172,
|
|
"grad_norm": 1.085570216178894,
|
|
"learning_rate": 3.2490566037735848e-06,
|
|
"loss": 0.1674,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 1.249108507386653,
|
|
"grad_norm": 1.1711559295654297,
|
|
"learning_rate": 3.2452830188679247e-06,
|
|
"loss": 0.1847,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 1.2511462047885888,
|
|
"grad_norm": 1.2569772005081177,
|
|
"learning_rate": 3.241509433962264e-06,
|
|
"loss": 0.185,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 1.2531839021905247,
|
|
"grad_norm": 1.265191912651062,
|
|
"learning_rate": 3.237735849056604e-06,
|
|
"loss": 0.188,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 1.2552215995924605,
|
|
"grad_norm": 1.2143467664718628,
|
|
"learning_rate": 3.233962264150944e-06,
|
|
"loss": 0.189,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 1.2572592969943963,
|
|
"grad_norm": 1.3829542398452759,
|
|
"learning_rate": 3.230188679245283e-06,
|
|
"loss": 0.174,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 1.2592969943963321,
|
|
"grad_norm": 1.2590124607086182,
|
|
"learning_rate": 3.2264150943396226e-06,
|
|
"loss": 0.1921,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 1.261334691798268,
|
|
"grad_norm": 1.125143051147461,
|
|
"learning_rate": 3.2226415094339625e-06,
|
|
"loss": 0.1788,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 1.2633723892002038,
|
|
"grad_norm": 1.136713981628418,
|
|
"learning_rate": 3.2188679245283024e-06,
|
|
"loss": 0.1665,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 1.2654100866021396,
|
|
"grad_norm": 1.1080840826034546,
|
|
"learning_rate": 3.215094339622642e-06,
|
|
"loss": 0.1785,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 1.2674477840040754,
|
|
"grad_norm": 1.0990139245986938,
|
|
"learning_rate": 3.211320754716981e-06,
|
|
"loss": 0.1759,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 1.2694854814060113,
|
|
"grad_norm": 1.2469940185546875,
|
|
"learning_rate": 3.207547169811321e-06,
|
|
"loss": 0.1738,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 1.271523178807947,
|
|
"grad_norm": 1.163061261177063,
|
|
"learning_rate": 3.2037735849056608e-06,
|
|
"loss": 0.1881,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 1.273560876209883,
|
|
"grad_norm": 1.1554782390594482,
|
|
"learning_rate": 3.2000000000000003e-06,
|
|
"loss": 0.1728,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 1.2755985736118187,
|
|
"grad_norm": 1.1074477434158325,
|
|
"learning_rate": 3.19622641509434e-06,
|
|
"loss": 0.1762,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 1.2776362710137543,
|
|
"grad_norm": 1.1363695859909058,
|
|
"learning_rate": 3.1924528301886793e-06,
|
|
"loss": 0.172,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 1.2796739684156901,
|
|
"grad_norm": 1.0740599632263184,
|
|
"learning_rate": 3.188679245283019e-06,
|
|
"loss": 0.1634,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 1.281711665817626,
|
|
"grad_norm": 1.0671052932739258,
|
|
"learning_rate": 3.1849056603773587e-06,
|
|
"loss": 0.1749,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 1.2837493632195618,
|
|
"grad_norm": 1.1366360187530518,
|
|
"learning_rate": 3.1811320754716986e-06,
|
|
"loss": 0.1685,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 1.2857870606214976,
|
|
"grad_norm": 1.250622272491455,
|
|
"learning_rate": 3.177358490566038e-06,
|
|
"loss": 0.1758,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 1.2878247580234334,
|
|
"grad_norm": 1.145407795906067,
|
|
"learning_rate": 3.1735849056603776e-06,
|
|
"loss": 0.1881,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 1.2898624554253693,
|
|
"grad_norm": 1.1561169624328613,
|
|
"learning_rate": 3.169811320754717e-06,
|
|
"loss": 0.1846,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 1.291900152827305,
|
|
"grad_norm": 1.1274852752685547,
|
|
"learning_rate": 3.166037735849057e-06,
|
|
"loss": 0.1765,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 1.293937850229241,
|
|
"grad_norm": 1.2915289402008057,
|
|
"learning_rate": 3.1622641509433965e-06,
|
|
"loss": 0.1767,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 1.2959755476311767,
|
|
"grad_norm": 1.1345237493515015,
|
|
"learning_rate": 3.1584905660377364e-06,
|
|
"loss": 0.1689,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 1.2980132450331126,
|
|
"grad_norm": 1.2380014657974243,
|
|
"learning_rate": 3.1547169811320754e-06,
|
|
"loss": 0.19,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 1.3000509424350484,
|
|
"grad_norm": 1.1787712574005127,
|
|
"learning_rate": 3.1509433962264154e-06,
|
|
"loss": 0.187,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 1.3020886398369842,
|
|
"grad_norm": 1.172777771949768,
|
|
"learning_rate": 3.147169811320755e-06,
|
|
"loss": 0.1951,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 1.30412633723892,
|
|
"grad_norm": 1.1491492986679077,
|
|
"learning_rate": 3.1433962264150948e-06,
|
|
"loss": 0.1651,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 1.3061640346408558,
|
|
"grad_norm": 1.1255732774734497,
|
|
"learning_rate": 3.1396226415094343e-06,
|
|
"loss": 0.1838,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 1.3082017320427917,
|
|
"grad_norm": 1.2315205335617065,
|
|
"learning_rate": 3.1358490566037737e-06,
|
|
"loss": 0.1785,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 1.3102394294447275,
|
|
"grad_norm": 1.1849606037139893,
|
|
"learning_rate": 3.1320754716981132e-06,
|
|
"loss": 0.177,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 1.3122771268466633,
|
|
"grad_norm": 1.1372692584991455,
|
|
"learning_rate": 3.128301886792453e-06,
|
|
"loss": 0.1747,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 1.3143148242485991,
|
|
"grad_norm": 1.2609679698944092,
|
|
"learning_rate": 3.1245283018867926e-06,
|
|
"loss": 0.1836,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 1.316352521650535,
|
|
"grad_norm": 1.179504156112671,
|
|
"learning_rate": 3.1207547169811326e-06,
|
|
"loss": 0.1771,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 1.3183902190524708,
|
|
"grad_norm": 1.2097948789596558,
|
|
"learning_rate": 3.1169811320754716e-06,
|
|
"loss": 0.1873,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 1.3204279164544066,
|
|
"grad_norm": 1.1823457479476929,
|
|
"learning_rate": 3.1132075471698115e-06,
|
|
"loss": 0.1893,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 1.3224656138563424,
|
|
"grad_norm": 1.1036756038665771,
|
|
"learning_rate": 3.109433962264151e-06,
|
|
"loss": 0.1651,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 1.3245033112582782,
|
|
"grad_norm": 1.1787657737731934,
|
|
"learning_rate": 3.105660377358491e-06,
|
|
"loss": 0.1675,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 1.326541008660214,
|
|
"grad_norm": 1.2578370571136475,
|
|
"learning_rate": 3.1018867924528304e-06,
|
|
"loss": 0.1854,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 1.32857870606215,
|
|
"grad_norm": 1.136648178100586,
|
|
"learning_rate": 3.09811320754717e-06,
|
|
"loss": 0.1744,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 1.3306164034640857,
|
|
"grad_norm": 1.1764239072799683,
|
|
"learning_rate": 3.0943396226415094e-06,
|
|
"loss": 0.1721,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 1.3326541008660213,
|
|
"grad_norm": 1.0723998546600342,
|
|
"learning_rate": 3.0905660377358493e-06,
|
|
"loss": 0.1719,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 1.3346917982679571,
|
|
"grad_norm": 1.1238343715667725,
|
|
"learning_rate": 3.086792452830189e-06,
|
|
"loss": 0.1752,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 1.336729495669893,
|
|
"grad_norm": 1.1110343933105469,
|
|
"learning_rate": 3.0830188679245287e-06,
|
|
"loss": 0.1806,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 1.3387671930718288,
|
|
"grad_norm": 1.1625982522964478,
|
|
"learning_rate": 3.079245283018868e-06,
|
|
"loss": 0.1919,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 1.3408048904737646,
|
|
"grad_norm": 1.2139103412628174,
|
|
"learning_rate": 3.0754716981132077e-06,
|
|
"loss": 0.1807,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 1.3428425878757004,
|
|
"grad_norm": 1.2624015808105469,
|
|
"learning_rate": 3.071698113207547e-06,
|
|
"loss": 0.1774,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 1.3448802852776363,
|
|
"grad_norm": 1.1833164691925049,
|
|
"learning_rate": 3.067924528301887e-06,
|
|
"loss": 0.1765,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 1.346917982679572,
|
|
"grad_norm": 1.3421837091445923,
|
|
"learning_rate": 3.064150943396227e-06,
|
|
"loss": 0.1861,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 1.348955680081508,
|
|
"grad_norm": 1.1380257606506348,
|
|
"learning_rate": 3.060377358490566e-06,
|
|
"loss": 0.1642,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 1.3509933774834437,
|
|
"grad_norm": 1.2193323373794556,
|
|
"learning_rate": 3.0566037735849056e-06,
|
|
"loss": 0.1804,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 1.3530310748853795,
|
|
"grad_norm": 1.0917553901672363,
|
|
"learning_rate": 3.0528301886792455e-06,
|
|
"loss": 0.1761,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 1.3550687722873154,
|
|
"grad_norm": 1.252640724182129,
|
|
"learning_rate": 3.0490566037735854e-06,
|
|
"loss": 0.1892,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 1.3571064696892512,
|
|
"grad_norm": 1.2436408996582031,
|
|
"learning_rate": 3.045283018867925e-06,
|
|
"loss": 0.1865,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 1.359144167091187,
|
|
"grad_norm": 1.0737476348876953,
|
|
"learning_rate": 3.041509433962264e-06,
|
|
"loss": 0.173,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 1.3611818644931228,
|
|
"grad_norm": 1.3767677545547485,
|
|
"learning_rate": 3.037735849056604e-06,
|
|
"loss": 0.1855,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 1.3632195618950587,
|
|
"grad_norm": 1.1147671937942505,
|
|
"learning_rate": 3.033962264150944e-06,
|
|
"loss": 0.175,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 1.3652572592969943,
|
|
"grad_norm": 1.2812708616256714,
|
|
"learning_rate": 3.0301886792452833e-06,
|
|
"loss": 0.1844,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 1.36729495669893,
|
|
"grad_norm": 1.028883695602417,
|
|
"learning_rate": 3.0264150943396232e-06,
|
|
"loss": 0.1641,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 1.369332654100866,
|
|
"grad_norm": 1.2508153915405273,
|
|
"learning_rate": 3.0226415094339623e-06,
|
|
"loss": 0.1884,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 1.3713703515028017,
|
|
"grad_norm": 1.2635626792907715,
|
|
"learning_rate": 3.018867924528302e-06,
|
|
"loss": 0.1835,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 1.3734080489047376,
|
|
"grad_norm": 1.1258081197738647,
|
|
"learning_rate": 3.0150943396226417e-06,
|
|
"loss": 0.1694,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 1.3754457463066734,
|
|
"grad_norm": 1.1584776639938354,
|
|
"learning_rate": 3.0113207547169816e-06,
|
|
"loss": 0.1928,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 1.3774834437086092,
|
|
"grad_norm": 1.1394814252853394,
|
|
"learning_rate": 3.007547169811321e-06,
|
|
"loss": 0.1698,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 1.379521141110545,
|
|
"grad_norm": 1.1019212007522583,
|
|
"learning_rate": 3.0037735849056606e-06,
|
|
"loss": 0.1604,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 1.3815588385124808,
|
|
"grad_norm": 1.262918472290039,
|
|
"learning_rate": 3e-06,
|
|
"loss": 0.1713,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 1.3835965359144167,
|
|
"grad_norm": 1.1134512424468994,
|
|
"learning_rate": 2.99622641509434e-06,
|
|
"loss": 0.1738,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 1.3856342333163525,
|
|
"grad_norm": 1.1910215616226196,
|
|
"learning_rate": 2.9924528301886795e-06,
|
|
"loss": 0.1854,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.3876719307182883,
|
|
"grad_norm": 1.0705041885375977,
|
|
"learning_rate": 2.9886792452830194e-06,
|
|
"loss": 0.1679,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 1.3897096281202241,
|
|
"grad_norm": 1.0849546194076538,
|
|
"learning_rate": 2.9849056603773585e-06,
|
|
"loss": 0.1732,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 1.39174732552216,
|
|
"grad_norm": 1.1088389158248901,
|
|
"learning_rate": 2.9811320754716984e-06,
|
|
"loss": 0.1832,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 1.3937850229240958,
|
|
"grad_norm": 1.1701173782348633,
|
|
"learning_rate": 2.977358490566038e-06,
|
|
"loss": 0.1832,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 1.3958227203260316,
|
|
"grad_norm": 1.1918519735336304,
|
|
"learning_rate": 2.9735849056603778e-06,
|
|
"loss": 0.1863,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 1.3978604177279674,
|
|
"grad_norm": 1.207116723060608,
|
|
"learning_rate": 2.9698113207547173e-06,
|
|
"loss": 0.1806,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 1.3998981151299033,
|
|
"grad_norm": 1.2102634906768799,
|
|
"learning_rate": 2.9660377358490568e-06,
|
|
"loss": 0.1759,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 1.401935812531839,
|
|
"grad_norm": 1.1316732168197632,
|
|
"learning_rate": 2.9622641509433963e-06,
|
|
"loss": 0.17,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 1.403973509933775,
|
|
"grad_norm": 1.204567790031433,
|
|
"learning_rate": 2.958490566037736e-06,
|
|
"loss": 0.1908,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 1.4060112073357107,
|
|
"grad_norm": 1.0931925773620605,
|
|
"learning_rate": 2.9547169811320757e-06,
|
|
"loss": 0.1784,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 1.4080489047376465,
|
|
"grad_norm": 1.2366472482681274,
|
|
"learning_rate": 2.9509433962264156e-06,
|
|
"loss": 0.2053,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 1.4100866021395824,
|
|
"grad_norm": 1.169756531715393,
|
|
"learning_rate": 2.9471698113207546e-06,
|
|
"loss": 0.1803,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 1.4121242995415182,
|
|
"grad_norm": 1.271429419517517,
|
|
"learning_rate": 2.9433962264150946e-06,
|
|
"loss": 0.1867,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 1.414161996943454,
|
|
"grad_norm": 1.2226650714874268,
|
|
"learning_rate": 2.939622641509434e-06,
|
|
"loss": 0.1643,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 1.4161996943453898,
|
|
"grad_norm": 1.2417409420013428,
|
|
"learning_rate": 2.935849056603774e-06,
|
|
"loss": 0.1897,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 1.4182373917473257,
|
|
"grad_norm": 1.24673593044281,
|
|
"learning_rate": 2.932075471698114e-06,
|
|
"loss": 0.1648,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 1.4202750891492613,
|
|
"grad_norm": 1.336515188217163,
|
|
"learning_rate": 2.928301886792453e-06,
|
|
"loss": 0.1913,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 1.422312786551197,
|
|
"grad_norm": 1.1495544910430908,
|
|
"learning_rate": 2.9245283018867924e-06,
|
|
"loss": 0.1825,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 1.424350483953133,
|
|
"grad_norm": 1.181207537651062,
|
|
"learning_rate": 2.9207547169811324e-06,
|
|
"loss": 0.1814,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 1.4263881813550687,
|
|
"grad_norm": 1.2883107662200928,
|
|
"learning_rate": 2.9169811320754723e-06,
|
|
"loss": 0.1859,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 1.4284258787570046,
|
|
"grad_norm": 1.14235520362854,
|
|
"learning_rate": 2.9132075471698118e-06,
|
|
"loss": 0.1846,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 1.4304635761589404,
|
|
"grad_norm": 1.0994147062301636,
|
|
"learning_rate": 2.909433962264151e-06,
|
|
"loss": 0.18,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 1.4325012735608762,
|
|
"grad_norm": 1.2511541843414307,
|
|
"learning_rate": 2.9056603773584907e-06,
|
|
"loss": 0.1759,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 1.434538970962812,
|
|
"grad_norm": 1.0954980850219727,
|
|
"learning_rate": 2.9018867924528307e-06,
|
|
"loss": 0.1724,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 1.4365766683647478,
|
|
"grad_norm": 1.3084522485733032,
|
|
"learning_rate": 2.89811320754717e-06,
|
|
"loss": 0.179,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 1.4386143657666837,
|
|
"grad_norm": 1.1592984199523926,
|
|
"learning_rate": 2.89433962264151e-06,
|
|
"loss": 0.1798,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 1.4406520631686195,
|
|
"grad_norm": 1.1409646272659302,
|
|
"learning_rate": 2.890566037735849e-06,
|
|
"loss": 0.175,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 1.4426897605705553,
|
|
"grad_norm": 1.3026984930038452,
|
|
"learning_rate": 2.886792452830189e-06,
|
|
"loss": 0.1801,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 1.4447274579724911,
|
|
"grad_norm": 1.115729570388794,
|
|
"learning_rate": 2.8830188679245285e-06,
|
|
"loss": 0.188,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 1.446765155374427,
|
|
"grad_norm": 1.3142112493515015,
|
|
"learning_rate": 2.8792452830188684e-06,
|
|
"loss": 0.187,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 1.4488028527763628,
|
|
"grad_norm": 1.2339842319488525,
|
|
"learning_rate": 2.875471698113208e-06,
|
|
"loss": 0.1614,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 1.4508405501782986,
|
|
"grad_norm": 1.2981687784194946,
|
|
"learning_rate": 2.871698113207547e-06,
|
|
"loss": 0.1688,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 1.4528782475802342,
|
|
"grad_norm": 1.1264586448669434,
|
|
"learning_rate": 2.867924528301887e-06,
|
|
"loss": 0.1754,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 1.45491594498217,
|
|
"grad_norm": 1.1794300079345703,
|
|
"learning_rate": 2.864150943396227e-06,
|
|
"loss": 0.1876,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 1.4569536423841059,
|
|
"grad_norm": 1.0934234857559204,
|
|
"learning_rate": 2.8603773584905663e-06,
|
|
"loss": 0.18,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 1.4589913397860417,
|
|
"grad_norm": 1.1383419036865234,
|
|
"learning_rate": 2.8566037735849062e-06,
|
|
"loss": 0.1812,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 1.4610290371879775,
|
|
"grad_norm": 1.1334176063537598,
|
|
"learning_rate": 2.8528301886792453e-06,
|
|
"loss": 0.1699,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 1.4630667345899133,
|
|
"grad_norm": 1.2105752229690552,
|
|
"learning_rate": 2.8490566037735852e-06,
|
|
"loss": 0.1886,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 1.4651044319918491,
|
|
"grad_norm": 1.1222751140594482,
|
|
"learning_rate": 2.8452830188679247e-06,
|
|
"loss": 0.1677,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 1.467142129393785,
|
|
"grad_norm": 1.0429809093475342,
|
|
"learning_rate": 2.8415094339622646e-06,
|
|
"loss": 0.1801,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 1.4691798267957208,
|
|
"grad_norm": 1.1673039197921753,
|
|
"learning_rate": 2.837735849056604e-06,
|
|
"loss": 0.1824,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 1.4712175241976566,
|
|
"grad_norm": 1.2965126037597656,
|
|
"learning_rate": 2.8339622641509436e-06,
|
|
"loss": 0.1789,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 1.4732552215995924,
|
|
"grad_norm": 1.1965491771697998,
|
|
"learning_rate": 2.830188679245283e-06,
|
|
"loss": 0.1875,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 1.4752929190015283,
|
|
"grad_norm": 1.1529309749603271,
|
|
"learning_rate": 2.826415094339623e-06,
|
|
"loss": 0.1846,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 1.477330616403464,
|
|
"grad_norm": 1.1195148229599,
|
|
"learning_rate": 2.8226415094339625e-06,
|
|
"loss": 0.1655,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 1.4793683138054,
|
|
"grad_norm": 1.2534137964248657,
|
|
"learning_rate": 2.8188679245283024e-06,
|
|
"loss": 0.1779,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 1.4814060112073357,
|
|
"grad_norm": 1.1430234909057617,
|
|
"learning_rate": 2.8150943396226415e-06,
|
|
"loss": 0.1855,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 1.4834437086092715,
|
|
"grad_norm": 1.1733477115631104,
|
|
"learning_rate": 2.8113207547169814e-06,
|
|
"loss": 0.1818,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 1.4854814060112074,
|
|
"grad_norm": 1.2729791402816772,
|
|
"learning_rate": 2.807547169811321e-06,
|
|
"loss": 0.1845,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 1.4875191034131432,
|
|
"grad_norm": 1.2047133445739746,
|
|
"learning_rate": 2.803773584905661e-06,
|
|
"loss": 0.1853,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 1.489556800815079,
|
|
"grad_norm": 1.0154218673706055,
|
|
"learning_rate": 2.8000000000000003e-06,
|
|
"loss": 0.1845,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 1.4915944982170148,
|
|
"grad_norm": 1.0939674377441406,
|
|
"learning_rate": 2.7962264150943398e-06,
|
|
"loss": 0.1856,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 1.4936321956189507,
|
|
"grad_norm": 1.1324870586395264,
|
|
"learning_rate": 2.7924528301886793e-06,
|
|
"loss": 0.1755,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 1.4956698930208865,
|
|
"grad_norm": 1.4036580324172974,
|
|
"learning_rate": 2.788679245283019e-06,
|
|
"loss": 0.2023,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 1.4977075904228223,
|
|
"grad_norm": 1.1628963947296143,
|
|
"learning_rate": 2.7849056603773587e-06,
|
|
"loss": 0.1787,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 1.4997452878247581,
|
|
"grad_norm": 1.0612685680389404,
|
|
"learning_rate": 2.7811320754716986e-06,
|
|
"loss": 0.1709,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 1.501782985226694,
|
|
"grad_norm": 1.1758002042770386,
|
|
"learning_rate": 2.7773584905660377e-06,
|
|
"loss": 0.1844,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 1.5038206826286298,
|
|
"grad_norm": 1.1747825145721436,
|
|
"learning_rate": 2.7735849056603776e-06,
|
|
"loss": 0.1759,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 1.5058583800305656,
|
|
"grad_norm": 1.1918827295303345,
|
|
"learning_rate": 2.769811320754717e-06,
|
|
"loss": 0.1685,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 1.5078960774325014,
|
|
"grad_norm": 1.1047258377075195,
|
|
"learning_rate": 2.766037735849057e-06,
|
|
"loss": 0.1826,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 1.5099337748344372,
|
|
"grad_norm": 1.209409236907959,
|
|
"learning_rate": 2.762264150943397e-06,
|
|
"loss": 0.1703,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 1.5119714722363728,
|
|
"grad_norm": 1.1031354665756226,
|
|
"learning_rate": 2.758490566037736e-06,
|
|
"loss": 0.1766,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 1.5140091696383087,
|
|
"grad_norm": 1.2434014081954956,
|
|
"learning_rate": 2.7547169811320755e-06,
|
|
"loss": 0.1844,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 1.5160468670402445,
|
|
"grad_norm": 1.177281379699707,
|
|
"learning_rate": 2.7509433962264154e-06,
|
|
"loss": 0.186,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 1.5180845644421803,
|
|
"grad_norm": 1.0548818111419678,
|
|
"learning_rate": 2.7471698113207553e-06,
|
|
"loss": 0.1675,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 1.5201222618441161,
|
|
"grad_norm": 1.1306318044662476,
|
|
"learning_rate": 2.7433962264150944e-06,
|
|
"loss": 0.1713,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 1.522159959246052,
|
|
"grad_norm": 1.205263376235962,
|
|
"learning_rate": 2.739622641509434e-06,
|
|
"loss": 0.1906,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 1.5241976566479878,
|
|
"grad_norm": 2.5892493724823,
|
|
"learning_rate": 2.7358490566037738e-06,
|
|
"loss": 0.1757,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 1.5262353540499236,
|
|
"grad_norm": 1.0715084075927734,
|
|
"learning_rate": 2.7320754716981137e-06,
|
|
"loss": 0.173,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 1.5282730514518594,
|
|
"grad_norm": 1.231529712677002,
|
|
"learning_rate": 2.728301886792453e-06,
|
|
"loss": 0.1701,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 1.5303107488537953,
|
|
"grad_norm": 1.2592768669128418,
|
|
"learning_rate": 2.7245283018867922e-06,
|
|
"loss": 0.1774,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 1.532348446255731,
|
|
"grad_norm": 1.2342033386230469,
|
|
"learning_rate": 2.720754716981132e-06,
|
|
"loss": 0.17,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 1.5343861436576667,
|
|
"grad_norm": 1.1225703954696655,
|
|
"learning_rate": 2.716981132075472e-06,
|
|
"loss": 0.1786,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 1.5364238410596025,
|
|
"grad_norm": 1.204437494277954,
|
|
"learning_rate": 2.7132075471698116e-06,
|
|
"loss": 0.182,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 1.5384615384615383,
|
|
"grad_norm": 1.152274489402771,
|
|
"learning_rate": 2.7094339622641515e-06,
|
|
"loss": 0.1845,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 1.5404992358634741,
|
|
"grad_norm": 1.268399715423584,
|
|
"learning_rate": 2.7056603773584905e-06,
|
|
"loss": 0.1866,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 1.54253693326541,
|
|
"grad_norm": 1.3325903415679932,
|
|
"learning_rate": 2.7018867924528304e-06,
|
|
"loss": 0.1788,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 1.5445746306673458,
|
|
"grad_norm": 1.164884090423584,
|
|
"learning_rate": 2.69811320754717e-06,
|
|
"loss": 0.1863,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 1.5466123280692816,
|
|
"grad_norm": 1.1347957849502563,
|
|
"learning_rate": 2.69433962264151e-06,
|
|
"loss": 0.1902,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 1.5486500254712174,
|
|
"grad_norm": 1.1705092191696167,
|
|
"learning_rate": 2.6905660377358493e-06,
|
|
"loss": 0.1757,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 1.5506877228731533,
|
|
"grad_norm": 1.1735482215881348,
|
|
"learning_rate": 2.686792452830189e-06,
|
|
"loss": 0.1743,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 1.552725420275089,
|
|
"grad_norm": 1.1496127843856812,
|
|
"learning_rate": 2.6830188679245283e-06,
|
|
"loss": 0.1704,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 1.554763117677025,
|
|
"grad_norm": 1.1327245235443115,
|
|
"learning_rate": 2.6792452830188682e-06,
|
|
"loss": 0.1687,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 1.5568008150789607,
|
|
"grad_norm": 1.235737919807434,
|
|
"learning_rate": 2.6754716981132077e-06,
|
|
"loss": 0.1699,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 1.5588385124808966,
|
|
"grad_norm": 1.0961453914642334,
|
|
"learning_rate": 2.6716981132075476e-06,
|
|
"loss": 0.174,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 1.5608762098828324,
|
|
"grad_norm": 1.1706377267837524,
|
|
"learning_rate": 2.6679245283018867e-06,
|
|
"loss": 0.1679,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 1.5629139072847682,
|
|
"grad_norm": 1.314253330230713,
|
|
"learning_rate": 2.6641509433962266e-06,
|
|
"loss": 0.1859,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 1.564951604686704,
|
|
"grad_norm": 1.0271321535110474,
|
|
"learning_rate": 2.660377358490566e-06,
|
|
"loss": 0.1717,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 1.5669893020886398,
|
|
"grad_norm": 1.11105215549469,
|
|
"learning_rate": 2.656603773584906e-06,
|
|
"loss": 0.1699,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 1.5690269994905757,
|
|
"grad_norm": 1.2342256307601929,
|
|
"learning_rate": 2.6528301886792455e-06,
|
|
"loss": 0.1836,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 1.5710646968925115,
|
|
"grad_norm": 1.208130121231079,
|
|
"learning_rate": 2.649056603773585e-06,
|
|
"loss": 0.1708,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 1.5731023942944473,
|
|
"grad_norm": 1.235351324081421,
|
|
"learning_rate": 2.6452830188679245e-06,
|
|
"loss": 0.1976,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 1.5751400916963831,
|
|
"grad_norm": 1.0710421800613403,
|
|
"learning_rate": 2.6415094339622644e-06,
|
|
"loss": 0.1734,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 1.577177789098319,
|
|
"grad_norm": 0.9788026213645935,
|
|
"learning_rate": 2.637735849056604e-06,
|
|
"loss": 0.1701,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 1.5792154865002548,
|
|
"grad_norm": 1.1931087970733643,
|
|
"learning_rate": 2.633962264150944e-06,
|
|
"loss": 0.1667,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 1.5812531839021906,
|
|
"grad_norm": 1.242144227027893,
|
|
"learning_rate": 2.630188679245283e-06,
|
|
"loss": 0.1923,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 1.5832908813041264,
|
|
"grad_norm": 1.2944048643112183,
|
|
"learning_rate": 2.626415094339623e-06,
|
|
"loss": 0.1768,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 1.5853285787060623,
|
|
"grad_norm": 1.0808852910995483,
|
|
"learning_rate": 2.6226415094339623e-06,
|
|
"loss": 0.1721,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 1.587366276107998,
|
|
"grad_norm": 1.147532343864441,
|
|
"learning_rate": 2.6188679245283022e-06,
|
|
"loss": 0.1618,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 1.589403973509934,
|
|
"grad_norm": 1.2777063846588135,
|
|
"learning_rate": 2.615094339622642e-06,
|
|
"loss": 0.1831,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 1.5914416709118697,
|
|
"grad_norm": 1.1522384881973267,
|
|
"learning_rate": 2.611320754716981e-06,
|
|
"loss": 0.1725,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 1.5934793683138055,
|
|
"grad_norm": 1.1645333766937256,
|
|
"learning_rate": 2.6075471698113207e-06,
|
|
"loss": 0.1724,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 1.5955170657157414,
|
|
"grad_norm": 1.1945953369140625,
|
|
"learning_rate": 2.6037735849056606e-06,
|
|
"loss": 0.182,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 1.5975547631176772,
|
|
"grad_norm": 1.2776046991348267,
|
|
"learning_rate": 2.6e-06,
|
|
"loss": 0.1783,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 1.5995924605196128,
|
|
"grad_norm": 1.0407108068466187,
|
|
"learning_rate": 2.59622641509434e-06,
|
|
"loss": 0.1651,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 1.6016301579215486,
|
|
"grad_norm": 1.1741459369659424,
|
|
"learning_rate": 2.592452830188679e-06,
|
|
"loss": 0.1759,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 1.6036678553234844,
|
|
"grad_norm": 1.1339528560638428,
|
|
"learning_rate": 2.588679245283019e-06,
|
|
"loss": 0.1753,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 1.6057055527254203,
|
|
"grad_norm": 1.5073323249816895,
|
|
"learning_rate": 2.5849056603773585e-06,
|
|
"loss": 0.1828,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 1.607743250127356,
|
|
"grad_norm": 1.097970962524414,
|
|
"learning_rate": 2.5811320754716984e-06,
|
|
"loss": 0.1709,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 1.609780947529292,
|
|
"grad_norm": 1.0759773254394531,
|
|
"learning_rate": 2.5773584905660383e-06,
|
|
"loss": 0.1562,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 1.6118186449312277,
|
|
"grad_norm": 1.1199358701705933,
|
|
"learning_rate": 2.5735849056603774e-06,
|
|
"loss": 0.1751,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 1.6138563423331636,
|
|
"grad_norm": 1.162474513053894,
|
|
"learning_rate": 2.569811320754717e-06,
|
|
"loss": 0.1691,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 1.6158940397350994,
|
|
"grad_norm": 1.170835256576538,
|
|
"learning_rate": 2.5660377358490568e-06,
|
|
"loss": 0.179,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 1.6179317371370352,
|
|
"grad_norm": 1.087983250617981,
|
|
"learning_rate": 2.5622641509433967e-06,
|
|
"loss": 0.1736,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 1.6199694345389708,
|
|
"grad_norm": 1.1620844602584839,
|
|
"learning_rate": 2.558490566037736e-06,
|
|
"loss": 0.1815,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 1.6220071319409066,
|
|
"grad_norm": 1.1823047399520874,
|
|
"learning_rate": 2.5547169811320753e-06,
|
|
"loss": 0.1797,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 1.6240448293428424,
|
|
"grad_norm": 1.1422289609909058,
|
|
"learning_rate": 2.550943396226415e-06,
|
|
"loss": 0.1812,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 1.6260825267447783,
|
|
"grad_norm": 1.2025611400604248,
|
|
"learning_rate": 2.547169811320755e-06,
|
|
"loss": 0.1807,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 1.628120224146714,
|
|
"grad_norm": 1.140370488166809,
|
|
"learning_rate": 2.5433962264150946e-06,
|
|
"loss": 0.1782,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 1.63015792154865,
|
|
"grad_norm": 1.1452966928482056,
|
|
"learning_rate": 2.5396226415094345e-06,
|
|
"loss": 0.1724,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 1.6321956189505857,
|
|
"grad_norm": 1.217185616493225,
|
|
"learning_rate": 2.5358490566037736e-06,
|
|
"loss": 0.1807,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 1.6342333163525216,
|
|
"grad_norm": 1.0574156045913696,
|
|
"learning_rate": 2.5320754716981135e-06,
|
|
"loss": 0.1694,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 1.6362710137544574,
|
|
"grad_norm": 1.015283226966858,
|
|
"learning_rate": 2.528301886792453e-06,
|
|
"loss": 0.1713,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 1.6383087111563932,
|
|
"grad_norm": 1.1992040872573853,
|
|
"learning_rate": 2.524528301886793e-06,
|
|
"loss": 0.1844,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 1.640346408558329,
|
|
"grad_norm": 1.2918540239334106,
|
|
"learning_rate": 2.5207547169811324e-06,
|
|
"loss": 0.1813,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 1.6423841059602649,
|
|
"grad_norm": 1.1141362190246582,
|
|
"learning_rate": 2.516981132075472e-06,
|
|
"loss": 0.1763,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 1.6444218033622007,
|
|
"grad_norm": 1.0930787324905396,
|
|
"learning_rate": 2.5132075471698114e-06,
|
|
"loss": 0.184,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 1.6464595007641365,
|
|
"grad_norm": 1.1243940591812134,
|
|
"learning_rate": 2.5094339622641513e-06,
|
|
"loss": 0.1821,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 1.6484971981660723,
|
|
"grad_norm": 1.1842948198318481,
|
|
"learning_rate": 2.5056603773584908e-06,
|
|
"loss": 0.1684,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 1.6505348955680081,
|
|
"grad_norm": 1.2824788093566895,
|
|
"learning_rate": 2.5018867924528307e-06,
|
|
"loss": 0.182,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 1.652572592969944,
|
|
"grad_norm": 1.1476082801818848,
|
|
"learning_rate": 2.49811320754717e-06,
|
|
"loss": 0.1847,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 1.6546102903718798,
|
|
"grad_norm": 1.1569533348083496,
|
|
"learning_rate": 2.4943396226415097e-06,
|
|
"loss": 0.1815,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 1.6566479877738156,
|
|
"grad_norm": 1.1782304048538208,
|
|
"learning_rate": 2.490566037735849e-06,
|
|
"loss": 0.1754,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 1.6586856851757514,
|
|
"grad_norm": 1.1351999044418335,
|
|
"learning_rate": 2.486792452830189e-06,
|
|
"loss": 0.189,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 1.6607233825776873,
|
|
"grad_norm": 1.1230946779251099,
|
|
"learning_rate": 2.4830188679245285e-06,
|
|
"loss": 0.1781,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 1.662761079979623,
|
|
"grad_norm": 1.062568187713623,
|
|
"learning_rate": 2.479245283018868e-06,
|
|
"loss": 0.1665,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 1.664798777381559,
|
|
"grad_norm": 1.1602753400802612,
|
|
"learning_rate": 2.4754716981132075e-06,
|
|
"loss": 0.1738,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 1.6668364747834947,
|
|
"grad_norm": 1.1816747188568115,
|
|
"learning_rate": 2.4716981132075474e-06,
|
|
"loss": 0.1675,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 1.6688741721854305,
|
|
"grad_norm": 1.1682571172714233,
|
|
"learning_rate": 2.467924528301887e-06,
|
|
"loss": 0.1657,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 1.6709118695873664,
|
|
"grad_norm": 1.0787543058395386,
|
|
"learning_rate": 2.4641509433962264e-06,
|
|
"loss": 0.1717,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 1.6729495669893022,
|
|
"grad_norm": 1.1307450532913208,
|
|
"learning_rate": 2.4603773584905663e-06,
|
|
"loss": 0.1708,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 1.674987264391238,
|
|
"grad_norm": 1.1192117929458618,
|
|
"learning_rate": 2.456603773584906e-06,
|
|
"loss": 0.1799,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 1.6770249617931738,
|
|
"grad_norm": 1.1960910558700562,
|
|
"learning_rate": 2.4528301886792453e-06,
|
|
"loss": 0.1703,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 1.6790626591951097,
|
|
"grad_norm": 1.1331156492233276,
|
|
"learning_rate": 2.4490566037735852e-06,
|
|
"loss": 0.1593,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 1.6811003565970455,
|
|
"grad_norm": 1.2134394645690918,
|
|
"learning_rate": 2.4452830188679247e-06,
|
|
"loss": 0.1964,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 1.6831380539989813,
|
|
"grad_norm": 1.178653597831726,
|
|
"learning_rate": 2.4415094339622642e-06,
|
|
"loss": 0.1864,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 1.685175751400917,
|
|
"grad_norm": 1.0972850322723389,
|
|
"learning_rate": 2.4377358490566037e-06,
|
|
"loss": 0.1637,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 1.6872134488028527,
|
|
"grad_norm": 1.0110701322555542,
|
|
"learning_rate": 2.4339622641509436e-06,
|
|
"loss": 0.1758,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 1.6892511462047886,
|
|
"grad_norm": 1.0509426593780518,
|
|
"learning_rate": 2.4301886792452835e-06,
|
|
"loss": 0.1708,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 1.6912888436067244,
|
|
"grad_norm": 1.247532844543457,
|
|
"learning_rate": 2.4264150943396226e-06,
|
|
"loss": 0.1754,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 1.6933265410086602,
|
|
"grad_norm": 1.1235079765319824,
|
|
"learning_rate": 2.4226415094339625e-06,
|
|
"loss": 0.1699,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 1.695364238410596,
|
|
"grad_norm": 1.2192139625549316,
|
|
"learning_rate": 2.418867924528302e-06,
|
|
"loss": 0.1851,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 1.6974019358125318,
|
|
"grad_norm": 1.2487667798995972,
|
|
"learning_rate": 2.415094339622642e-06,
|
|
"loss": 0.1926,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 1.6994396332144677,
|
|
"grad_norm": 1.1878374814987183,
|
|
"learning_rate": 2.4113207547169814e-06,
|
|
"loss": 0.2017,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 1.7014773306164035,
|
|
"grad_norm": 1.027300238609314,
|
|
"learning_rate": 2.407547169811321e-06,
|
|
"loss": 0.1725,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 1.7035150280183393,
|
|
"grad_norm": 1.0987987518310547,
|
|
"learning_rate": 2.403773584905661e-06,
|
|
"loss": 0.1793,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 1.7055527254202751,
|
|
"grad_norm": 1.08310067653656,
|
|
"learning_rate": 2.4000000000000003e-06,
|
|
"loss": 0.1715,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 1.7075904228222107,
|
|
"grad_norm": 1.255993366241455,
|
|
"learning_rate": 2.39622641509434e-06,
|
|
"loss": 0.1769,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 1.7096281202241466,
|
|
"grad_norm": 1.1966819763183594,
|
|
"learning_rate": 2.3924528301886797e-06,
|
|
"loss": 0.1661,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 1.7116658176260824,
|
|
"grad_norm": 1.22041916847229,
|
|
"learning_rate": 2.388679245283019e-06,
|
|
"loss": 0.172,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 1.7137035150280182,
|
|
"grad_norm": 1.0473703145980835,
|
|
"learning_rate": 2.3849056603773587e-06,
|
|
"loss": 0.1555,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 1.715741212429954,
|
|
"grad_norm": 1.0921486616134644,
|
|
"learning_rate": 2.381132075471698e-06,
|
|
"loss": 0.1739,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 1.7177789098318899,
|
|
"grad_norm": 1.1403447389602661,
|
|
"learning_rate": 2.377358490566038e-06,
|
|
"loss": 0.1807,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 1.7198166072338257,
|
|
"grad_norm": 1.1131690740585327,
|
|
"learning_rate": 2.3735849056603776e-06,
|
|
"loss": 0.1874,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 1.7218543046357615,
|
|
"grad_norm": 1.1460295915603638,
|
|
"learning_rate": 2.369811320754717e-06,
|
|
"loss": 0.1709,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 1.7238920020376973,
|
|
"grad_norm": 1.1869096755981445,
|
|
"learning_rate": 2.366037735849057e-06,
|
|
"loss": 0.1883,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 1.7259296994396331,
|
|
"grad_norm": 1.1736819744110107,
|
|
"learning_rate": 2.3622641509433965e-06,
|
|
"loss": 0.1754,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 1.727967396841569,
|
|
"grad_norm": 1.213629126548767,
|
|
"learning_rate": 2.358490566037736e-06,
|
|
"loss": 0.1677,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 1.7300050942435048,
|
|
"grad_norm": 1.0772464275360107,
|
|
"learning_rate": 2.3547169811320755e-06,
|
|
"loss": 0.1769,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 1.7320427916454406,
|
|
"grad_norm": 1.1553244590759277,
|
|
"learning_rate": 2.3509433962264154e-06,
|
|
"loss": 0.1817,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 1.7340804890473764,
|
|
"grad_norm": 1.0742902755737305,
|
|
"learning_rate": 2.347169811320755e-06,
|
|
"loss": 0.1635,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 1.7361181864493123,
|
|
"grad_norm": 1.1944258213043213,
|
|
"learning_rate": 2.3433962264150944e-06,
|
|
"loss": 0.1757,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 1.738155883851248,
|
|
"grad_norm": 1.1923333406448364,
|
|
"learning_rate": 2.3396226415094343e-06,
|
|
"loss": 0.1665,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 1.740193581253184,
|
|
"grad_norm": 1.086665153503418,
|
|
"learning_rate": 2.3358490566037738e-06,
|
|
"loss": 0.1803,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 1.7422312786551197,
|
|
"grad_norm": 1.0686219930648804,
|
|
"learning_rate": 2.3320754716981133e-06,
|
|
"loss": 0.1757,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 1.7442689760570556,
|
|
"grad_norm": 1.6613824367523193,
|
|
"learning_rate": 2.328301886792453e-06,
|
|
"loss": 0.1927,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 1.7463066734589914,
|
|
"grad_norm": 1.305106282234192,
|
|
"learning_rate": 2.3245283018867927e-06,
|
|
"loss": 0.1882,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 1.7483443708609272,
|
|
"grad_norm": 1.091124176979065,
|
|
"learning_rate": 2.320754716981132e-06,
|
|
"loss": 0.1707,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 1.750382068262863,
|
|
"grad_norm": 1.073729157447815,
|
|
"learning_rate": 2.3169811320754717e-06,
|
|
"loss": 0.1891,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 1.7524197656647988,
|
|
"grad_norm": 1.2147339582443237,
|
|
"learning_rate": 2.3132075471698116e-06,
|
|
"loss": 0.1734,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 1.7544574630667347,
|
|
"grad_norm": 1.085634708404541,
|
|
"learning_rate": 2.309433962264151e-06,
|
|
"loss": 0.1577,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 1.7564951604686705,
|
|
"grad_norm": 1.220919132232666,
|
|
"learning_rate": 2.3056603773584906e-06,
|
|
"loss": 0.1763,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 1.7585328578706063,
|
|
"grad_norm": 1.3067682981491089,
|
|
"learning_rate": 2.3018867924528305e-06,
|
|
"loss": 0.1805,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 1.7605705552725421,
|
|
"grad_norm": 1.3163460493087769,
|
|
"learning_rate": 2.29811320754717e-06,
|
|
"loss": 0.1737,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 1.762608252674478,
|
|
"grad_norm": 1.1450026035308838,
|
|
"learning_rate": 2.2943396226415095e-06,
|
|
"loss": 0.1727,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 1.7646459500764138,
|
|
"grad_norm": 1.0936638116836548,
|
|
"learning_rate": 2.2905660377358494e-06,
|
|
"loss": 0.1772,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 1.7666836474783496,
|
|
"grad_norm": 1.2066489458084106,
|
|
"learning_rate": 2.286792452830189e-06,
|
|
"loss": 0.1777,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 1.7687213448802854,
|
|
"grad_norm": 1.2631739377975464,
|
|
"learning_rate": 2.2830188679245283e-06,
|
|
"loss": 0.1765,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 1.7707590422822213,
|
|
"grad_norm": 1.1708970069885254,
|
|
"learning_rate": 2.279245283018868e-06,
|
|
"loss": 0.1834,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 1.7727967396841569,
|
|
"grad_norm": 1.0745712518692017,
|
|
"learning_rate": 2.2754716981132078e-06,
|
|
"loss": 0.1659,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 1.7748344370860927,
|
|
"grad_norm": 1.243639588356018,
|
|
"learning_rate": 2.2716981132075477e-06,
|
|
"loss": 0.1683,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 1.7768721344880285,
|
|
"grad_norm": 1.2835688591003418,
|
|
"learning_rate": 2.2679245283018867e-06,
|
|
"loss": 0.1749,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 1.7789098318899643,
|
|
"grad_norm": 1.3315813541412354,
|
|
"learning_rate": 2.2641509433962266e-06,
|
|
"loss": 0.187,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 1.7809475292919001,
|
|
"grad_norm": 1.525321125984192,
|
|
"learning_rate": 2.260377358490566e-06,
|
|
"loss": 0.1838,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 1.782985226693836,
|
|
"grad_norm": 1.1951662302017212,
|
|
"learning_rate": 2.256603773584906e-06,
|
|
"loss": 0.1902,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 1.7850229240957718,
|
|
"grad_norm": 1.2421764135360718,
|
|
"learning_rate": 2.2528301886792455e-06,
|
|
"loss": 0.1825,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 1.7870606214977076,
|
|
"grad_norm": 1.1425124406814575,
|
|
"learning_rate": 2.249056603773585e-06,
|
|
"loss": 0.1647,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 1.7890983188996434,
|
|
"grad_norm": 1.6294941902160645,
|
|
"learning_rate": 2.245283018867925e-06,
|
|
"loss": 0.1848,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 1.7911360163015793,
|
|
"grad_norm": 1.0840221643447876,
|
|
"learning_rate": 2.241509433962264e-06,
|
|
"loss": 0.178,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 1.793173713703515,
|
|
"grad_norm": 1.1134402751922607,
|
|
"learning_rate": 2.237735849056604e-06,
|
|
"loss": 0.1563,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 1.7952114111054507,
|
|
"grad_norm": 1.2107329368591309,
|
|
"learning_rate": 2.233962264150944e-06,
|
|
"loss": 0.1756,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 1.7972491085073865,
|
|
"grad_norm": 1.2982094287872314,
|
|
"learning_rate": 2.2301886792452833e-06,
|
|
"loss": 0.1793,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 1.7992868059093223,
|
|
"grad_norm": 1.2917886972427368,
|
|
"learning_rate": 2.226415094339623e-06,
|
|
"loss": 0.1666,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 1.8013245033112582,
|
|
"grad_norm": 1.23494553565979,
|
|
"learning_rate": 2.2226415094339623e-06,
|
|
"loss": 0.1707,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 1.803362200713194,
|
|
"grad_norm": 1.1923739910125732,
|
|
"learning_rate": 2.2188679245283022e-06,
|
|
"loss": 0.1767,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 1.8053998981151298,
|
|
"grad_norm": 1.1137254238128662,
|
|
"learning_rate": 2.2150943396226417e-06,
|
|
"loss": 0.1795,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 1.8074375955170656,
|
|
"grad_norm": 1.1190637350082397,
|
|
"learning_rate": 2.2113207547169812e-06,
|
|
"loss": 0.1766,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 1.8094752929190014,
|
|
"grad_norm": 1.1797064542770386,
|
|
"learning_rate": 2.207547169811321e-06,
|
|
"loss": 0.1713,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 1.8115129903209373,
|
|
"grad_norm": 1.1107820272445679,
|
|
"learning_rate": 2.2037735849056606e-06,
|
|
"loss": 0.1826,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 1.813550687722873,
|
|
"grad_norm": 1.1796709299087524,
|
|
"learning_rate": 2.2e-06,
|
|
"loss": 0.1771,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 1.815588385124809,
|
|
"grad_norm": 1.0448757410049438,
|
|
"learning_rate": 2.19622641509434e-06,
|
|
"loss": 0.1673,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 1.8176260825267447,
|
|
"grad_norm": 1.1002962589263916,
|
|
"learning_rate": 2.1924528301886795e-06,
|
|
"loss": 0.1612,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 1.8196637799286806,
|
|
"grad_norm": 1.2181810140609741,
|
|
"learning_rate": 2.188679245283019e-06,
|
|
"loss": 0.1752,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 1.8217014773306164,
|
|
"grad_norm": 1.2177342176437378,
|
|
"learning_rate": 2.1849056603773585e-06,
|
|
"loss": 0.1713,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 1.8237391747325522,
|
|
"grad_norm": 1.070660948753357,
|
|
"learning_rate": 2.1811320754716984e-06,
|
|
"loss": 0.1707,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 1.825776872134488,
|
|
"grad_norm": 1.083571434020996,
|
|
"learning_rate": 2.177358490566038e-06,
|
|
"loss": 0.1743,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 1.8278145695364238,
|
|
"grad_norm": 1.2324374914169312,
|
|
"learning_rate": 2.1735849056603774e-06,
|
|
"loss": 0.1876,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 1.8298522669383597,
|
|
"grad_norm": 1.1662664413452148,
|
|
"learning_rate": 2.1698113207547173e-06,
|
|
"loss": 0.172,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 1.8318899643402955,
|
|
"grad_norm": 1.0966416597366333,
|
|
"learning_rate": 2.166037735849057e-06,
|
|
"loss": 0.1784,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 1.8339276617422313,
|
|
"grad_norm": 1.0962932109832764,
|
|
"learning_rate": 2.1622641509433963e-06,
|
|
"loss": 0.1791,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 1.8359653591441671,
|
|
"grad_norm": 1.1369909048080444,
|
|
"learning_rate": 2.158490566037736e-06,
|
|
"loss": 0.1797,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 1.838003056546103,
|
|
"grad_norm": 1.1816999912261963,
|
|
"learning_rate": 2.1547169811320757e-06,
|
|
"loss": 0.169,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 1.8400407539480388,
|
|
"grad_norm": 1.1556625366210938,
|
|
"learning_rate": 2.150943396226415e-06,
|
|
"loss": 0.17,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 1.8420784513499746,
|
|
"grad_norm": 1.0892881155014038,
|
|
"learning_rate": 2.1471698113207547e-06,
|
|
"loss": 0.1652,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 1.8441161487519104,
|
|
"grad_norm": 1.3906255960464478,
|
|
"learning_rate": 2.1433962264150946e-06,
|
|
"loss": 0.1839,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 1.8461538461538463,
|
|
"grad_norm": 1.0891425609588623,
|
|
"learning_rate": 2.139622641509434e-06,
|
|
"loss": 0.1773,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 1.848191543555782,
|
|
"grad_norm": 1.1463353633880615,
|
|
"learning_rate": 2.1358490566037736e-06,
|
|
"loss": 0.1808,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 1.850229240957718,
|
|
"grad_norm": 1.086715579032898,
|
|
"learning_rate": 2.1320754716981135e-06,
|
|
"loss": 0.1804,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 1.8522669383596537,
|
|
"grad_norm": 1.102216124534607,
|
|
"learning_rate": 2.128301886792453e-06,
|
|
"loss": 0.1729,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 1.8543046357615895,
|
|
"grad_norm": 1.2313193082809448,
|
|
"learning_rate": 2.1245283018867925e-06,
|
|
"loss": 0.1906,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 1.8563423331635254,
|
|
"grad_norm": 1.3457517623901367,
|
|
"learning_rate": 2.120754716981132e-06,
|
|
"loss": 0.1675,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 1.8583800305654612,
|
|
"grad_norm": 1.1635335683822632,
|
|
"learning_rate": 2.116981132075472e-06,
|
|
"loss": 0.1775,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 1.8604177279673968,
|
|
"grad_norm": 1.2560811042785645,
|
|
"learning_rate": 2.1132075471698114e-06,
|
|
"loss": 0.1694,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 1.8624554253693326,
|
|
"grad_norm": 1.1669859886169434,
|
|
"learning_rate": 2.109433962264151e-06,
|
|
"loss": 0.1724,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 1.8644931227712684,
|
|
"grad_norm": 1.1948050260543823,
|
|
"learning_rate": 2.1056603773584908e-06,
|
|
"loss": 0.1858,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 1.8665308201732043,
|
|
"grad_norm": 1.201643705368042,
|
|
"learning_rate": 2.1018867924528303e-06,
|
|
"loss": 0.1636,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 1.86856851757514,
|
|
"grad_norm": 1.0382106304168701,
|
|
"learning_rate": 2.0981132075471698e-06,
|
|
"loss": 0.1664,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 1.870606214977076,
|
|
"grad_norm": 1.1447466611862183,
|
|
"learning_rate": 2.0943396226415097e-06,
|
|
"loss": 0.1805,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 1.8726439123790117,
|
|
"grad_norm": 1.0567753314971924,
|
|
"learning_rate": 2.090566037735849e-06,
|
|
"loss": 0.1847,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 1.8746816097809476,
|
|
"grad_norm": 1.1714054346084595,
|
|
"learning_rate": 2.086792452830189e-06,
|
|
"loss": 0.1699,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 1.8767193071828834,
|
|
"grad_norm": 1.112230658531189,
|
|
"learning_rate": 2.083018867924528e-06,
|
|
"loss": 0.1709,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 1.8787570045848192,
|
|
"grad_norm": 1.248382329940796,
|
|
"learning_rate": 2.079245283018868e-06,
|
|
"loss": 0.1854,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 1.8807947019867548,
|
|
"grad_norm": 1.0857242345809937,
|
|
"learning_rate": 2.075471698113208e-06,
|
|
"loss": 0.1647,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 1.8828323993886906,
|
|
"grad_norm": 1.1596136093139648,
|
|
"learning_rate": 2.0716981132075475e-06,
|
|
"loss": 0.1884,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 1.8848700967906264,
|
|
"grad_norm": 1.0483487844467163,
|
|
"learning_rate": 2.067924528301887e-06,
|
|
"loss": 0.1825,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 1.8869077941925623,
|
|
"grad_norm": 1.1504698991775513,
|
|
"learning_rate": 2.0641509433962264e-06,
|
|
"loss": 0.1788,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 1.888945491594498,
|
|
"grad_norm": 1.0937446355819702,
|
|
"learning_rate": 2.0603773584905664e-06,
|
|
"loss": 0.1714,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 1.890983188996434,
|
|
"grad_norm": 1.2522544860839844,
|
|
"learning_rate": 2.056603773584906e-06,
|
|
"loss": 0.1895,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 1.8930208863983697,
|
|
"grad_norm": 1.0965933799743652,
|
|
"learning_rate": 2.0528301886792453e-06,
|
|
"loss": 0.1735,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 1.8950585838003056,
|
|
"grad_norm": 1.3030322790145874,
|
|
"learning_rate": 2.0490566037735853e-06,
|
|
"loss": 0.1705,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 1.8970962812022414,
|
|
"grad_norm": 1.1427980661392212,
|
|
"learning_rate": 2.0452830188679247e-06,
|
|
"loss": 0.1791,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 1.8991339786041772,
|
|
"grad_norm": 1.1021360158920288,
|
|
"learning_rate": 2.0415094339622642e-06,
|
|
"loss": 0.1626,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 1.901171676006113,
|
|
"grad_norm": 1.225327968597412,
|
|
"learning_rate": 2.037735849056604e-06,
|
|
"loss": 0.1746,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 1.9032093734080489,
|
|
"grad_norm": 1.4384862184524536,
|
|
"learning_rate": 2.0339622641509436e-06,
|
|
"loss": 0.1811,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 1.9052470708099847,
|
|
"grad_norm": 1.1396024227142334,
|
|
"learning_rate": 2.030188679245283e-06,
|
|
"loss": 0.1645,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 1.9072847682119205,
|
|
"grad_norm": 1.1487840414047241,
|
|
"learning_rate": 2.0264150943396226e-06,
|
|
"loss": 0.1727,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 1.9093224656138563,
|
|
"grad_norm": 1.137575387954712,
|
|
"learning_rate": 2.0226415094339625e-06,
|
|
"loss": 0.1673,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 1.9113601630157921,
|
|
"grad_norm": 1.1038920879364014,
|
|
"learning_rate": 2.018867924528302e-06,
|
|
"loss": 0.1758,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 1.913397860417728,
|
|
"grad_norm": 1.162651777267456,
|
|
"learning_rate": 2.0150943396226415e-06,
|
|
"loss": 0.1658,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 1.9154355578196638,
|
|
"grad_norm": 1.0977519750595093,
|
|
"learning_rate": 2.0113207547169814e-06,
|
|
"loss": 0.179,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 1.9174732552215996,
|
|
"grad_norm": 1.3130261898040771,
|
|
"learning_rate": 2.007547169811321e-06,
|
|
"loss": 0.1701,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 1.9195109526235354,
|
|
"grad_norm": 1.1742639541625977,
|
|
"learning_rate": 2.0037735849056604e-06,
|
|
"loss": 0.1674,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 1.9215486500254713,
|
|
"grad_norm": 1.160561203956604,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": 0.1765,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 1.923586347427407,
|
|
"grad_norm": 1.3576925992965698,
|
|
"learning_rate": 1.99622641509434e-06,
|
|
"loss": 0.1718,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 1.925624044829343,
|
|
"grad_norm": 1.101428747177124,
|
|
"learning_rate": 1.9924528301886793e-06,
|
|
"loss": 0.1843,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 1.9276617422312787,
|
|
"grad_norm": 1.271204948425293,
|
|
"learning_rate": 1.988679245283019e-06,
|
|
"loss": 0.1696,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 1.9296994396332146,
|
|
"grad_norm": 1.2334147691726685,
|
|
"learning_rate": 1.9849056603773587e-06,
|
|
"loss": 0.1713,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 1.9317371370351504,
|
|
"grad_norm": 1.2214933633804321,
|
|
"learning_rate": 1.981132075471698e-06,
|
|
"loss": 0.1819,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 1.9337748344370862,
|
|
"grad_norm": 1.1319000720977783,
|
|
"learning_rate": 1.9773584905660377e-06,
|
|
"loss": 0.1763,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 1.935812531839022,
|
|
"grad_norm": 1.0747102499008179,
|
|
"learning_rate": 1.9735849056603776e-06,
|
|
"loss": 0.179,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 1.9378502292409578,
|
|
"grad_norm": 1.1016148328781128,
|
|
"learning_rate": 1.969811320754717e-06,
|
|
"loss": 0.1723,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 1.9398879266428937,
|
|
"grad_norm": 1.3073527812957764,
|
|
"learning_rate": 1.9660377358490566e-06,
|
|
"loss": 0.18,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 1.9419256240448295,
|
|
"grad_norm": 1.2624202966690063,
|
|
"learning_rate": 1.9622641509433965e-06,
|
|
"loss": 0.1945,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 1.9439633214467653,
|
|
"grad_norm": 1.3091782331466675,
|
|
"learning_rate": 1.958490566037736e-06,
|
|
"loss": 0.1938,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 1.946001018848701,
|
|
"grad_norm": 1.136667251586914,
|
|
"learning_rate": 1.9547169811320755e-06,
|
|
"loss": 0.1808,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 1.9480387162506367,
|
|
"grad_norm": 1.1663713455200195,
|
|
"learning_rate": 1.950943396226415e-06,
|
|
"loss": 0.1735,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 1.9500764136525726,
|
|
"grad_norm": 1.2320809364318848,
|
|
"learning_rate": 1.947169811320755e-06,
|
|
"loss": 0.1757,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 1.9521141110545084,
|
|
"grad_norm": 1.174214243888855,
|
|
"learning_rate": 1.943396226415095e-06,
|
|
"loss": 0.1776,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 1.9541518084564442,
|
|
"grad_norm": 1.2423794269561768,
|
|
"learning_rate": 1.939622641509434e-06,
|
|
"loss": 0.1921,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 1.95618950585838,
|
|
"grad_norm": 1.1554875373840332,
|
|
"learning_rate": 1.935849056603774e-06,
|
|
"loss": 0.1698,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 1.9582272032603159,
|
|
"grad_norm": 1.1640571355819702,
|
|
"learning_rate": 1.9320754716981133e-06,
|
|
"loss": 0.1838,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 1.9602649006622517,
|
|
"grad_norm": 1.1926047801971436,
|
|
"learning_rate": 1.928301886792453e-06,
|
|
"loss": 0.1775,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 1.9623025980641875,
|
|
"grad_norm": 1.2760028839111328,
|
|
"learning_rate": 1.9245283018867927e-06,
|
|
"loss": 0.178,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 1.9643402954661233,
|
|
"grad_norm": 1.1897207498550415,
|
|
"learning_rate": 1.920754716981132e-06,
|
|
"loss": 0.1675,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 1.9663779928680591,
|
|
"grad_norm": 1.0810887813568115,
|
|
"learning_rate": 1.916981132075472e-06,
|
|
"loss": 0.1631,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 1.9684156902699947,
|
|
"grad_norm": 1.1327540874481201,
|
|
"learning_rate": 1.9132075471698116e-06,
|
|
"loss": 0.1834,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 1.9704533876719306,
|
|
"grad_norm": 1.0766308307647705,
|
|
"learning_rate": 1.909433962264151e-06,
|
|
"loss": 0.1756,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 1.9724910850738664,
|
|
"grad_norm": 1.1231815814971924,
|
|
"learning_rate": 1.9056603773584908e-06,
|
|
"loss": 0.1736,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 1.9745287824758022,
|
|
"grad_norm": 1.10451078414917,
|
|
"learning_rate": 1.9018867924528303e-06,
|
|
"loss": 0.1848,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 1.976566479877738,
|
|
"grad_norm": 1.114749789237976,
|
|
"learning_rate": 1.89811320754717e-06,
|
|
"loss": 0.1685,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 1.9786041772796739,
|
|
"grad_norm": 1.1218091249465942,
|
|
"learning_rate": 1.8943396226415095e-06,
|
|
"loss": 0.1644,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 1.9806418746816097,
|
|
"grad_norm": 1.1256656646728516,
|
|
"learning_rate": 1.8905660377358492e-06,
|
|
"loss": 0.1686,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 1.9826795720835455,
|
|
"grad_norm": 1.2012169361114502,
|
|
"learning_rate": 1.8867924528301889e-06,
|
|
"loss": 0.1906,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 1.9847172694854813,
|
|
"grad_norm": 1.1859033107757568,
|
|
"learning_rate": 1.8830188679245284e-06,
|
|
"loss": 0.1818,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 1.9867549668874172,
|
|
"grad_norm": 1.1662039756774902,
|
|
"learning_rate": 1.879245283018868e-06,
|
|
"loss": 0.1928,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 1.988792664289353,
|
|
"grad_norm": 1.1107443571090698,
|
|
"learning_rate": 1.8754716981132076e-06,
|
|
"loss": 0.1701,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 1.9908303616912888,
|
|
"grad_norm": 1.1272541284561157,
|
|
"learning_rate": 1.8716981132075473e-06,
|
|
"loss": 0.1676,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 1.9928680590932246,
|
|
"grad_norm": 1.158721923828125,
|
|
"learning_rate": 1.8679245283018868e-06,
|
|
"loss": 0.1642,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 1.9949057564951604,
|
|
"grad_norm": 1.1401432752609253,
|
|
"learning_rate": 1.8641509433962265e-06,
|
|
"loss": 0.1674,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 1.9969434538970963,
|
|
"grad_norm": 1.1835023164749146,
|
|
"learning_rate": 1.8603773584905664e-06,
|
|
"loss": 0.1812,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 1.998981151299032,
|
|
"grad_norm": 1.2545579671859741,
|
|
"learning_rate": 1.8566037735849056e-06,
|
|
"loss": 0.1858,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 2.001018848700968,
|
|
"grad_norm": 1.0314708948135376,
|
|
"learning_rate": 1.8528301886792456e-06,
|
|
"loss": 0.1435,
|
|
"step": 982
|
|
}
|
|
],
|
|
"logging_steps": 1.0,
|
|
"max_steps": 1473,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 4,
|
|
"save_steps": 491,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.3189159645267624e+19,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|