22224 lines
538 KiB
JSON
22224 lines
538 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 9.997639653815893,
|
|
"eval_steps": 500,
|
|
"global_step": 3170,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.003147128245476003,
|
|
"grad_norm": 2.0566761052521856,
|
|
"learning_rate": 4.1666666666666667e-07,
|
|
"loss": 1.0248,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.006294256490952006,
|
|
"grad_norm": 2.078116756187581,
|
|
"learning_rate": 8.333333333333333e-07,
|
|
"loss": 1.0183,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.00944138473642801,
|
|
"grad_norm": 2.055186894692077,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": 1.0165,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.012588512981904013,
|
|
"grad_norm": 2.0254172121373073,
|
|
"learning_rate": 1.6666666666666667e-06,
|
|
"loss": 1.0132,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.015735641227380016,
|
|
"grad_norm": 1.979378254901161,
|
|
"learning_rate": 2.0833333333333334e-06,
|
|
"loss": 1.0338,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.01888276947285602,
|
|
"grad_norm": 1.6978990047138407,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 1.0147,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.022029897718332022,
|
|
"grad_norm": 1.5934719348056317,
|
|
"learning_rate": 2.916666666666667e-06,
|
|
"loss": 1.0025,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.025177025963808025,
|
|
"grad_norm": 1.1983077219680367,
|
|
"learning_rate": 3.3333333333333333e-06,
|
|
"loss": 0.9763,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.02832415420928403,
|
|
"grad_norm": 1.0943483464567008,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": 0.9673,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.03147128245476003,
|
|
"grad_norm": 1.0795799352113267,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": 0.9625,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.03461841070023604,
|
|
"grad_norm": 1.2586895987651956,
|
|
"learning_rate": 4.583333333333333e-06,
|
|
"loss": 0.9443,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.03776553894571204,
|
|
"grad_norm": 1.2360462710902367,
|
|
"learning_rate": 5e-06,
|
|
"loss": 0.952,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.040912667191188044,
|
|
"grad_norm": 1.158712657793634,
|
|
"learning_rate": 5.416666666666667e-06,
|
|
"loss": 0.9186,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.044059795436664044,
|
|
"grad_norm": 0.9342993351221153,
|
|
"learning_rate": 5.833333333333334e-06,
|
|
"loss": 0.8874,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.04720692368214005,
|
|
"grad_norm": 1.0504874222027794,
|
|
"learning_rate": 6.25e-06,
|
|
"loss": 0.878,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.05035405192761605,
|
|
"grad_norm": 0.8975104331120672,
|
|
"learning_rate": 6.666666666666667e-06,
|
|
"loss": 0.8668,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.05350118017309206,
|
|
"grad_norm": 0.6476430875482199,
|
|
"learning_rate": 7.083333333333335e-06,
|
|
"loss": 0.8655,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.05664830841856806,
|
|
"grad_norm": 0.49682103011953394,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 0.8502,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.05979543666404406,
|
|
"grad_norm": 0.5685849690063021,
|
|
"learning_rate": 7.916666666666667e-06,
|
|
"loss": 0.8249,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.06294256490952006,
|
|
"grad_norm": 0.7286039018171099,
|
|
"learning_rate": 8.333333333333334e-06,
|
|
"loss": 0.8183,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.06608969315499606,
|
|
"grad_norm": 0.650325267587393,
|
|
"learning_rate": 8.750000000000001e-06,
|
|
"loss": 0.8078,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.06923682140047208,
|
|
"grad_norm": 0.524249002042332,
|
|
"learning_rate": 9.166666666666666e-06,
|
|
"loss": 0.7968,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.07238394964594808,
|
|
"grad_norm": 0.42266135038030506,
|
|
"learning_rate": 9.583333333333335e-06,
|
|
"loss": 0.793,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.07553107789142408,
|
|
"grad_norm": 0.45652357144630545,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.786,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.07867820613690008,
|
|
"grad_norm": 0.48851305915388266,
|
|
"learning_rate": 1.0416666666666668e-05,
|
|
"loss": 0.7842,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.08182533438237609,
|
|
"grad_norm": 0.47219631692611636,
|
|
"learning_rate": 1.0833333333333334e-05,
|
|
"loss": 0.7726,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.08497246262785209,
|
|
"grad_norm": 0.39201363065054773,
|
|
"learning_rate": 1.125e-05,
|
|
"loss": 0.768,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.08811959087332809,
|
|
"grad_norm": 0.3342565416438565,
|
|
"learning_rate": 1.1666666666666668e-05,
|
|
"loss": 0.7589,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.09126671911880409,
|
|
"grad_norm": 0.35827703185804977,
|
|
"learning_rate": 1.2083333333333333e-05,
|
|
"loss": 0.7723,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.0944138473642801,
|
|
"grad_norm": 0.3625916446194259,
|
|
"learning_rate": 1.25e-05,
|
|
"loss": 0.7524,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.0975609756097561,
|
|
"grad_norm": 0.32147227722705174,
|
|
"learning_rate": 1.2916666666666668e-05,
|
|
"loss": 0.7462,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.1007081038552321,
|
|
"grad_norm": 0.3105919347762339,
|
|
"learning_rate": 1.3333333333333333e-05,
|
|
"loss": 0.7432,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.1038552321007081,
|
|
"grad_norm": 0.2941313278165609,
|
|
"learning_rate": 1.375e-05,
|
|
"loss": 0.7487,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.10700236034618411,
|
|
"grad_norm": 0.2847875994844311,
|
|
"learning_rate": 1.416666666666667e-05,
|
|
"loss": 0.7279,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.11014948859166011,
|
|
"grad_norm": 0.29110050664950804,
|
|
"learning_rate": 1.4583333333333333e-05,
|
|
"loss": 0.7264,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.11329661683713611,
|
|
"grad_norm": 0.2758326744258242,
|
|
"learning_rate": 1.5000000000000002e-05,
|
|
"loss": 0.7289,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.11644374508261211,
|
|
"grad_norm": 0.25172506909717546,
|
|
"learning_rate": 1.5416666666666668e-05,
|
|
"loss": 0.7233,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.11959087332808813,
|
|
"grad_norm": 0.2472744394585722,
|
|
"learning_rate": 1.5833333333333333e-05,
|
|
"loss": 0.729,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.12273800157356413,
|
|
"grad_norm": 0.2646648296393675,
|
|
"learning_rate": 1.6250000000000002e-05,
|
|
"loss": 0.7279,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.12588512981904013,
|
|
"grad_norm": 0.24358773217689053,
|
|
"learning_rate": 1.6666666666666667e-05,
|
|
"loss": 0.7184,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.12903225806451613,
|
|
"grad_norm": 0.2393241155630341,
|
|
"learning_rate": 1.7083333333333333e-05,
|
|
"loss": 0.7136,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.13217938630999213,
|
|
"grad_norm": 0.2330003474602153,
|
|
"learning_rate": 1.7500000000000002e-05,
|
|
"loss": 0.711,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.13532651455546812,
|
|
"grad_norm": 0.22294095752365714,
|
|
"learning_rate": 1.7916666666666667e-05,
|
|
"loss": 0.7126,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.13847364280094415,
|
|
"grad_norm": 0.23816885540234745,
|
|
"learning_rate": 1.8333333333333333e-05,
|
|
"loss": 0.7188,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.14162077104642015,
|
|
"grad_norm": 0.2257889298086421,
|
|
"learning_rate": 1.8750000000000002e-05,
|
|
"loss": 0.6991,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.14476789929189615,
|
|
"grad_norm": 0.20099324635222396,
|
|
"learning_rate": 1.916666666666667e-05,
|
|
"loss": 0.7006,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.14791502753737215,
|
|
"grad_norm": 0.25186139333784574,
|
|
"learning_rate": 1.9583333333333333e-05,
|
|
"loss": 0.7087,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.15106215578284815,
|
|
"grad_norm": 0.2232374205375328,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.6971,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.15420928402832415,
|
|
"grad_norm": 0.21825531385293007,
|
|
"learning_rate": 2.0416666666666667e-05,
|
|
"loss": 0.697,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.15735641227380015,
|
|
"grad_norm": 0.21596204587349424,
|
|
"learning_rate": 2.0833333333333336e-05,
|
|
"loss": 0.6887,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16050354051927615,
|
|
"grad_norm": 0.23116942027734438,
|
|
"learning_rate": 2.125e-05,
|
|
"loss": 0.6885,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.16365066876475218,
|
|
"grad_norm": 0.21015812381257615,
|
|
"learning_rate": 2.1666666666666667e-05,
|
|
"loss": 0.6866,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.16679779701022818,
|
|
"grad_norm": 0.19996909500963955,
|
|
"learning_rate": 2.2083333333333336e-05,
|
|
"loss": 0.6898,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.16994492525570418,
|
|
"grad_norm": 0.20997251324092625,
|
|
"learning_rate": 2.25e-05,
|
|
"loss": 0.6836,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.17309205350118018,
|
|
"grad_norm": 0.20108945450898513,
|
|
"learning_rate": 2.2916666666666667e-05,
|
|
"loss": 0.6868,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.17623918174665618,
|
|
"grad_norm": 0.2035143838254788,
|
|
"learning_rate": 2.3333333333333336e-05,
|
|
"loss": 0.6936,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.17938630999213218,
|
|
"grad_norm": 0.2004298904967849,
|
|
"learning_rate": 2.375e-05,
|
|
"loss": 0.6746,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.18253343823760818,
|
|
"grad_norm": 0.20059328010088198,
|
|
"learning_rate": 2.4166666666666667e-05,
|
|
"loss": 0.682,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.18568056648308418,
|
|
"grad_norm": 0.21755269002083433,
|
|
"learning_rate": 2.4583333333333336e-05,
|
|
"loss": 0.6735,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.1888276947285602,
|
|
"grad_norm": 0.2129373116359228,
|
|
"learning_rate": 2.5e-05,
|
|
"loss": 0.6663,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.1919748229740362,
|
|
"grad_norm": 0.1995735536259152,
|
|
"learning_rate": 2.5416666666666667e-05,
|
|
"loss": 0.6787,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.1951219512195122,
|
|
"grad_norm": 0.23037748881825523,
|
|
"learning_rate": 2.5833333333333336e-05,
|
|
"loss": 0.6703,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.1982690794649882,
|
|
"grad_norm": 0.18391751461207972,
|
|
"learning_rate": 2.625e-05,
|
|
"loss": 0.6764,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.2014162077104642,
|
|
"grad_norm": 0.2123421226257098,
|
|
"learning_rate": 2.6666666666666667e-05,
|
|
"loss": 0.6714,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.2045633359559402,
|
|
"grad_norm": 0.20183153602864587,
|
|
"learning_rate": 2.7083333333333335e-05,
|
|
"loss": 0.6656,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.2077104642014162,
|
|
"grad_norm": 0.19119357792446254,
|
|
"learning_rate": 2.75e-05,
|
|
"loss": 0.6684,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.2108575924468922,
|
|
"grad_norm": 0.20177148219300692,
|
|
"learning_rate": 2.7916666666666666e-05,
|
|
"loss": 0.6458,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.21400472069236823,
|
|
"grad_norm": 0.22326018847799878,
|
|
"learning_rate": 2.833333333333334e-05,
|
|
"loss": 0.6659,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.21715184893784423,
|
|
"grad_norm": 0.22960589662619602,
|
|
"learning_rate": 2.875e-05,
|
|
"loss": 0.6814,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.22029897718332023,
|
|
"grad_norm": 0.20556408160244669,
|
|
"learning_rate": 2.9166666666666666e-05,
|
|
"loss": 0.6651,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.22344610542879623,
|
|
"grad_norm": 0.23091408485344644,
|
|
"learning_rate": 2.958333333333334e-05,
|
|
"loss": 0.6587,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.22659323367427223,
|
|
"grad_norm": 0.24593395754345967,
|
|
"learning_rate": 3.0000000000000004e-05,
|
|
"loss": 0.6559,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.22974036191974823,
|
|
"grad_norm": 0.2941140735112936,
|
|
"learning_rate": 3.0416666666666666e-05,
|
|
"loss": 0.6523,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.23288749016522423,
|
|
"grad_norm": 0.2726093572840182,
|
|
"learning_rate": 3.0833333333333335e-05,
|
|
"loss": 0.6685,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.23603461841070023,
|
|
"grad_norm": 0.28195810887905565,
|
|
"learning_rate": 3.125e-05,
|
|
"loss": 0.6614,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.23918174665617625,
|
|
"grad_norm": 0.24022069913129832,
|
|
"learning_rate": 3.1666666666666666e-05,
|
|
"loss": 0.6593,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.24232887490165225,
|
|
"grad_norm": 0.25139586251835144,
|
|
"learning_rate": 3.208333333333334e-05,
|
|
"loss": 0.659,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.24547600314712825,
|
|
"grad_norm": 0.2566709925175564,
|
|
"learning_rate": 3.2500000000000004e-05,
|
|
"loss": 0.6542,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.24862313139260425,
|
|
"grad_norm": 0.2883214448935213,
|
|
"learning_rate": 3.291666666666667e-05,
|
|
"loss": 0.6471,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.25177025963808025,
|
|
"grad_norm": 0.30668415687027056,
|
|
"learning_rate": 3.3333333333333335e-05,
|
|
"loss": 0.6439,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.2549173878835563,
|
|
"grad_norm": 0.29042450307830464,
|
|
"learning_rate": 3.375e-05,
|
|
"loss": 0.651,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.25806451612903225,
|
|
"grad_norm": 0.2736791002695721,
|
|
"learning_rate": 3.4166666666666666e-05,
|
|
"loss": 0.6467,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.2612116443745083,
|
|
"grad_norm": 0.265465779092424,
|
|
"learning_rate": 3.458333333333334e-05,
|
|
"loss": 0.6412,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.26435877261998425,
|
|
"grad_norm": 0.2968535790814613,
|
|
"learning_rate": 3.5000000000000004e-05,
|
|
"loss": 0.6574,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.2675059008654603,
|
|
"grad_norm": 0.43190214956783235,
|
|
"learning_rate": 3.541666666666667e-05,
|
|
"loss": 0.6495,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.27065302911093625,
|
|
"grad_norm": 0.632308039014685,
|
|
"learning_rate": 3.5833333333333335e-05,
|
|
"loss": 0.6515,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.2738001573564123,
|
|
"grad_norm": 0.7849780285031561,
|
|
"learning_rate": 3.625e-05,
|
|
"loss": 0.6546,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.2769472856018883,
|
|
"grad_norm": 0.7233136246737597,
|
|
"learning_rate": 3.6666666666666666e-05,
|
|
"loss": 0.6468,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.2800944138473643,
|
|
"grad_norm": 0.5392685671512011,
|
|
"learning_rate": 3.708333333333334e-05,
|
|
"loss": 0.6366,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.2832415420928403,
|
|
"grad_norm": 0.6548726987366142,
|
|
"learning_rate": 3.7500000000000003e-05,
|
|
"loss": 0.6433,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.2863886703383163,
|
|
"grad_norm": 0.74072769066675,
|
|
"learning_rate": 3.791666666666667e-05,
|
|
"loss": 0.6421,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.2895357985837923,
|
|
"grad_norm": 0.37857830042275176,
|
|
"learning_rate": 3.833333333333334e-05,
|
|
"loss": 0.65,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.2926829268292683,
|
|
"grad_norm": 0.5508648164496704,
|
|
"learning_rate": 3.875e-05,
|
|
"loss": 0.6493,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.2958300550747443,
|
|
"grad_norm": 0.5478861545420471,
|
|
"learning_rate": 3.9166666666666665e-05,
|
|
"loss": 0.6469,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.2989771833202203,
|
|
"grad_norm": 0.3468090216652617,
|
|
"learning_rate": 3.958333333333334e-05,
|
|
"loss": 0.6514,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.3021243115656963,
|
|
"grad_norm": 0.6547220457148604,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.649,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.30527143981117233,
|
|
"grad_norm": 0.46761393726738054,
|
|
"learning_rate": 3.999999059985635e-05,
|
|
"loss": 0.6408,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.3084185680566483,
|
|
"grad_norm": 0.39367909064047446,
|
|
"learning_rate": 3.99999623994352e-05,
|
|
"loss": 0.6365,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.31156569630212433,
|
|
"grad_norm": 0.5946767742649087,
|
|
"learning_rate": 3.9999915398766006e-05,
|
|
"loss": 0.6366,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.3147128245476003,
|
|
"grad_norm": 0.31375774268214407,
|
|
"learning_rate": 3.999984959789786e-05,
|
|
"loss": 0.6389,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.31785995279307633,
|
|
"grad_norm": 0.5057217370873666,
|
|
"learning_rate": 3.9999764996899494e-05,
|
|
"loss": 0.6457,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.3210070810385523,
|
|
"grad_norm": 0.35265559358910226,
|
|
"learning_rate": 3.9999661595859275e-05,
|
|
"loss": 0.6438,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.3241542092840283,
|
|
"grad_norm": 0.341984180495186,
|
|
"learning_rate": 3.9999539394885177e-05,
|
|
"loss": 0.6275,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.32730133752950435,
|
|
"grad_norm": 0.3862289663549392,
|
|
"learning_rate": 3.999939839410486e-05,
|
|
"loss": 0.6279,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.3304484657749803,
|
|
"grad_norm": 0.30610508770190564,
|
|
"learning_rate": 3.999923859366557e-05,
|
|
"loss": 0.6335,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.33359559402045635,
|
|
"grad_norm": 0.39738483622597887,
|
|
"learning_rate": 3.999905999373424e-05,
|
|
"loss": 0.6275,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.3367427222659323,
|
|
"grad_norm": 0.34695466353973403,
|
|
"learning_rate": 3.9998862594497396e-05,
|
|
"loss": 0.634,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.33988985051140835,
|
|
"grad_norm": 0.4434518808465586,
|
|
"learning_rate": 3.999864639616121e-05,
|
|
"loss": 0.6374,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.3430369787568843,
|
|
"grad_norm": 0.33772070770009105,
|
|
"learning_rate": 3.99984113989515e-05,
|
|
"loss": 0.6266,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.34618410700236035,
|
|
"grad_norm": 0.2584585866122632,
|
|
"learning_rate": 3.99981576031137e-05,
|
|
"loss": 0.6292,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.3493312352478363,
|
|
"grad_norm": 0.3611261393186681,
|
|
"learning_rate": 3.9997885008912905e-05,
|
|
"loss": 0.6361,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.35247836349331235,
|
|
"grad_norm": 0.3023341429429724,
|
|
"learning_rate": 3.999759361663381e-05,
|
|
"loss": 0.6325,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.3556254917387884,
|
|
"grad_norm": 0.30908333541351135,
|
|
"learning_rate": 3.999728342658079e-05,
|
|
"loss": 0.6368,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.35877261998426435,
|
|
"grad_norm": 0.265928899655407,
|
|
"learning_rate": 3.999695443907781e-05,
|
|
"loss": 0.6303,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.3619197482297404,
|
|
"grad_norm": 0.27333927680685793,
|
|
"learning_rate": 3.9996606654468476e-05,
|
|
"loss": 0.6277,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.36506687647521635,
|
|
"grad_norm": 0.2744818724487684,
|
|
"learning_rate": 3.9996240073116044e-05,
|
|
"loss": 0.6272,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.3682140047206924,
|
|
"grad_norm": 0.2869505492537586,
|
|
"learning_rate": 3.99958546954034e-05,
|
|
"loss": 0.6165,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.37136113296616835,
|
|
"grad_norm": 0.26133884085799125,
|
|
"learning_rate": 3.9995450521733044e-05,
|
|
"loss": 0.6303,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.3745082612116444,
|
|
"grad_norm": 0.28364779766814496,
|
|
"learning_rate": 3.9995027552527126e-05,
|
|
"loss": 0.6355,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.3776553894571204,
|
|
"grad_norm": 0.26991948715363395,
|
|
"learning_rate": 3.9994585788227425e-05,
|
|
"loss": 0.6353,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.3808025177025964,
|
|
"grad_norm": 0.29168641093859365,
|
|
"learning_rate": 3.9994125229295335e-05,
|
|
"loss": 0.6347,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.3839496459480724,
|
|
"grad_norm": 0.2778112649452421,
|
|
"learning_rate": 3.999364587621189e-05,
|
|
"loss": 0.6314,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.3870967741935484,
|
|
"grad_norm": 0.25501755874765036,
|
|
"learning_rate": 3.9993147729477775e-05,
|
|
"loss": 0.6287,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.3902439024390244,
|
|
"grad_norm": 0.34414954964963435,
|
|
"learning_rate": 3.999263078961327e-05,
|
|
"loss": 0.6278,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.3933910306845004,
|
|
"grad_norm": 0.34610957165859696,
|
|
"learning_rate": 3.9992095057158304e-05,
|
|
"loss": 0.6216,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.3965381589299764,
|
|
"grad_norm": 0.28674531389146546,
|
|
"learning_rate": 3.999154053267242e-05,
|
|
"loss": 0.638,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.3996852871754524,
|
|
"grad_norm": 0.25576505127419086,
|
|
"learning_rate": 3.99909672167348e-05,
|
|
"loss": 0.6259,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.4028324154209284,
|
|
"grad_norm": 0.31068566841934725,
|
|
"learning_rate": 3.9990375109944254e-05,
|
|
"loss": 0.6266,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.40597954366640443,
|
|
"grad_norm": 0.43495770039787945,
|
|
"learning_rate": 3.998976421291921e-05,
|
|
"loss": 0.6194,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.4091266719118804,
|
|
"grad_norm": 0.40876214950723583,
|
|
"learning_rate": 3.998913452629773e-05,
|
|
"loss": 0.6261,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.41227380015735643,
|
|
"grad_norm": 0.26678746806822895,
|
|
"learning_rate": 3.998848605073749e-05,
|
|
"loss": 0.63,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.4154209284028324,
|
|
"grad_norm": 0.2878327006301991,
|
|
"learning_rate": 3.9987818786915807e-05,
|
|
"loss": 0.6204,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.41856805664830843,
|
|
"grad_norm": 0.3111848739668028,
|
|
"learning_rate": 3.9987132735529594e-05,
|
|
"loss": 0.6297,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.4217151848937844,
|
|
"grad_norm": 0.2609480696864346,
|
|
"learning_rate": 3.998642789729543e-05,
|
|
"loss": 0.6231,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.42486231313926043,
|
|
"grad_norm": 0.2811609267853307,
|
|
"learning_rate": 3.998570427294947e-05,
|
|
"loss": 0.6187,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.42800944138473646,
|
|
"grad_norm": 0.3489218624854075,
|
|
"learning_rate": 3.998496186324753e-05,
|
|
"loss": 0.6286,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.4311565696302124,
|
|
"grad_norm": 0.3209965825815324,
|
|
"learning_rate": 3.9984200668965e-05,
|
|
"loss": 0.6146,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.43430369787568845,
|
|
"grad_norm": 0.2527692210447605,
|
|
"learning_rate": 3.998342069089694e-05,
|
|
"loss": 0.6203,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.4374508261211644,
|
|
"grad_norm": 0.2863723927860823,
|
|
"learning_rate": 3.9982621929857994e-05,
|
|
"loss": 0.6186,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.44059795436664045,
|
|
"grad_norm": 0.3362120402580175,
|
|
"learning_rate": 3.998180438668244e-05,
|
|
"loss": 0.6173,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.4437450826121164,
|
|
"grad_norm": 0.29944731618068043,
|
|
"learning_rate": 3.998096806222417e-05,
|
|
"loss": 0.6079,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.44689221085759245,
|
|
"grad_norm": 0.2521452559671069,
|
|
"learning_rate": 3.9980112957356705e-05,
|
|
"loss": 0.6249,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.4500393391030684,
|
|
"grad_norm": 0.2545938988617545,
|
|
"learning_rate": 3.997923907297315e-05,
|
|
"loss": 0.6083,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.45318646734854445,
|
|
"grad_norm": 0.25898746551692964,
|
|
"learning_rate": 3.997834640998624e-05,
|
|
"loss": 0.6146,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.4563335955940205,
|
|
"grad_norm": 0.2771475593887788,
|
|
"learning_rate": 3.9977434969328344e-05,
|
|
"loss": 0.6155,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.45948072383949645,
|
|
"grad_norm": 0.2715220470047786,
|
|
"learning_rate": 3.9976504751951415e-05,
|
|
"loss": 0.6139,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.4626278520849725,
|
|
"grad_norm": 0.262357157875343,
|
|
"learning_rate": 3.997555575882702e-05,
|
|
"loss": 0.6109,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.46577498033044845,
|
|
"grad_norm": 0.2656139774401674,
|
|
"learning_rate": 3.9974587990946365e-05,
|
|
"loss": 0.6195,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.4689221085759245,
|
|
"grad_norm": 0.27484911731602474,
|
|
"learning_rate": 3.997360144932023e-05,
|
|
"loss": 0.6167,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.47206923682140045,
|
|
"grad_norm": 0.27057626890655806,
|
|
"learning_rate": 3.997259613497902e-05,
|
|
"loss": 0.6268,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.4752163650668765,
|
|
"grad_norm": 0.22502846697134835,
|
|
"learning_rate": 3.9971572048972754e-05,
|
|
"loss": 0.6159,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.4783634933123525,
|
|
"grad_norm": 0.2812778266326769,
|
|
"learning_rate": 3.997052919237105e-05,
|
|
"loss": 0.621,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.4815106215578285,
|
|
"grad_norm": 0.28278818974675024,
|
|
"learning_rate": 3.9969467566263115e-05,
|
|
"loss": 0.6238,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.4846577498033045,
|
|
"grad_norm": 0.3420285701015734,
|
|
"learning_rate": 3.996838717175779e-05,
|
|
"loss": 0.6118,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.4878048780487805,
|
|
"grad_norm": 0.35539942258827856,
|
|
"learning_rate": 3.9967288009983496e-05,
|
|
"loss": 0.6168,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.4909520062942565,
|
|
"grad_norm": 0.29368139728658277,
|
|
"learning_rate": 3.996617008208827e-05,
|
|
"loss": 0.6049,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.4940991345397325,
|
|
"grad_norm": 0.27690796066202816,
|
|
"learning_rate": 3.996503338923974e-05,
|
|
"loss": 0.6151,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.4972462627852085,
|
|
"grad_norm": 0.41334059184826466,
|
|
"learning_rate": 3.9963877932625134e-05,
|
|
"loss": 0.6184,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.5003933910306845,
|
|
"grad_norm": 0.42636029329489616,
|
|
"learning_rate": 3.996270371345129e-05,
|
|
"loss": 0.6144,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.5035405192761605,
|
|
"grad_norm": 0.2810420864283484,
|
|
"learning_rate": 3.9961510732944624e-05,
|
|
"loss": 0.6185,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.5066876475216365,
|
|
"grad_norm": 0.3032979670400302,
|
|
"learning_rate": 3.996029899235116e-05,
|
|
"loss": 0.6009,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.5098347757671126,
|
|
"grad_norm": 0.4124798386817749,
|
|
"learning_rate": 3.9959068492936517e-05,
|
|
"loss": 0.608,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.5129819040125885,
|
|
"grad_norm": 0.46292941599151394,
|
|
"learning_rate": 3.99578192359859e-05,
|
|
"loss": 0.6303,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.5161290322580645,
|
|
"grad_norm": 0.4249023229216657,
|
|
"learning_rate": 3.99565512228041e-05,
|
|
"loss": 0.6158,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.5192761605035405,
|
|
"grad_norm": 0.3081385660055859,
|
|
"learning_rate": 3.9955264454715524e-05,
|
|
"loss": 0.604,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.5224232887490166,
|
|
"grad_norm": 0.3115598725592295,
|
|
"learning_rate": 3.995395893306414e-05,
|
|
"loss": 0.6132,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.5255704169944925,
|
|
"grad_norm": 0.423339283046509,
|
|
"learning_rate": 3.995263465921351e-05,
|
|
"loss": 0.6133,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.5287175452399685,
|
|
"grad_norm": 0.38579662867406184,
|
|
"learning_rate": 3.9951291634546784e-05,
|
|
"loss": 0.6046,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.5318646734854445,
|
|
"grad_norm": 0.2646661405581634,
|
|
"learning_rate": 3.9949929860466715e-05,
|
|
"loss": 0.6065,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.5350118017309206,
|
|
"grad_norm": 0.29299500671952294,
|
|
"learning_rate": 3.994854933839561e-05,
|
|
"loss": 0.597,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.5381589299763966,
|
|
"grad_norm": 0.3801312863346971,
|
|
"learning_rate": 3.994715006977536e-05,
|
|
"loss": 0.609,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.5413060582218725,
|
|
"grad_norm": 0.2636893704090895,
|
|
"learning_rate": 3.994573205606747e-05,
|
|
"loss": 0.6059,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.5444531864673485,
|
|
"grad_norm": 0.2587055020132136,
|
|
"learning_rate": 3.994429529875298e-05,
|
|
"loss": 0.5968,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.5476003147128246,
|
|
"grad_norm": 0.3153597828303425,
|
|
"learning_rate": 3.994283979933254e-05,
|
|
"loss": 0.6133,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.5507474429583006,
|
|
"grad_norm": 0.26775970858600634,
|
|
"learning_rate": 3.994136555932635e-05,
|
|
"loss": 0.6045,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.5538945712037766,
|
|
"grad_norm": 0.3094235529607136,
|
|
"learning_rate": 3.993987258027419e-05,
|
|
"loss": 0.6089,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.5570416994492525,
|
|
"grad_norm": 0.28763348823077006,
|
|
"learning_rate": 3.9938360863735435e-05,
|
|
"loss": 0.609,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.5601888276947286,
|
|
"grad_norm": 0.2177547953667043,
|
|
"learning_rate": 3.9936830411289e-05,
|
|
"loss": 0.6154,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.5633359559402046,
|
|
"grad_norm": 0.2817386027141762,
|
|
"learning_rate": 3.993528122453339e-05,
|
|
"loss": 0.6119,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.5664830841856806,
|
|
"grad_norm": 0.2844686972678976,
|
|
"learning_rate": 3.993371330508666e-05,
|
|
"loss": 0.5981,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.5696302124311565,
|
|
"grad_norm": 0.2448670332544363,
|
|
"learning_rate": 3.9932126654586446e-05,
|
|
"loss": 0.5915,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.5727773406766326,
|
|
"grad_norm": 0.2597177617957836,
|
|
"learning_rate": 3.993052127468994e-05,
|
|
"loss": 0.5928,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.5759244689221086,
|
|
"grad_norm": 0.2215768221036163,
|
|
"learning_rate": 3.99288971670739e-05,
|
|
"loss": 0.6161,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.5790715971675846,
|
|
"grad_norm": 0.2699731828037381,
|
|
"learning_rate": 3.9927254333434656e-05,
|
|
"loss": 0.5921,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.5822187254130606,
|
|
"grad_norm": 0.29227902549722135,
|
|
"learning_rate": 3.9925592775488046e-05,
|
|
"loss": 0.5976,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.5853658536585366,
|
|
"grad_norm": 0.2541877399622803,
|
|
"learning_rate": 3.9923912494969536e-05,
|
|
"loss": 0.6102,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.5885129819040126,
|
|
"grad_norm": 0.3043738596400513,
|
|
"learning_rate": 3.9922213493634096e-05,
|
|
"loss": 0.611,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.5916601101494886,
|
|
"grad_norm": 0.2769382151889082,
|
|
"learning_rate": 3.992049577325627e-05,
|
|
"loss": 0.609,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.5948072383949646,
|
|
"grad_norm": 0.23411388895804314,
|
|
"learning_rate": 3.991875933563014e-05,
|
|
"loss": 0.5983,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.5979543666404405,
|
|
"grad_norm": 0.31989694156952164,
|
|
"learning_rate": 3.991700418256936e-05,
|
|
"loss": 0.6045,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.6011014948859166,
|
|
"grad_norm": 0.38404257854635715,
|
|
"learning_rate": 3.991523031590711e-05,
|
|
"loss": 0.6063,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.6042486231313926,
|
|
"grad_norm": 0.33761081359143924,
|
|
"learning_rate": 3.9913437737496135e-05,
|
|
"loss": 0.5951,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.6073957513768686,
|
|
"grad_norm": 0.2381342715991919,
|
|
"learning_rate": 3.9911626449208694e-05,
|
|
"loss": 0.601,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.6105428796223447,
|
|
"grad_norm": 0.31880643686538623,
|
|
"learning_rate": 3.9909796452936616e-05,
|
|
"loss": 0.6009,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.6136900078678206,
|
|
"grad_norm": 0.3563025725018504,
|
|
"learning_rate": 3.990794775059126e-05,
|
|
"loss": 0.6009,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.6168371361132966,
|
|
"grad_norm": 0.3033415317564058,
|
|
"learning_rate": 3.9906080344103516e-05,
|
|
"loss": 0.5992,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.6199842643587726,
|
|
"grad_norm": 0.2775053050931378,
|
|
"learning_rate": 3.990419423542383e-05,
|
|
"loss": 0.5987,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.6231313926042487,
|
|
"grad_norm": 0.2614901374015711,
|
|
"learning_rate": 3.990228942652215e-05,
|
|
"loss": 0.5918,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.6262785208497246,
|
|
"grad_norm": 0.2977635557001149,
|
|
"learning_rate": 3.9900365919387985e-05,
|
|
"loss": 0.6046,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.6294256490952006,
|
|
"grad_norm": 0.30438529335477493,
|
|
"learning_rate": 3.9898423716030364e-05,
|
|
"loss": 0.5966,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.6325727773406766,
|
|
"grad_norm": 0.28279872927198246,
|
|
"learning_rate": 3.989646281847783e-05,
|
|
"loss": 0.5943,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.6357199055861527,
|
|
"grad_norm": 0.25795220306495825,
|
|
"learning_rate": 3.989448322877848e-05,
|
|
"loss": 0.5989,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.6388670338316287,
|
|
"grad_norm": 0.280857506484411,
|
|
"learning_rate": 3.98924849489999e-05,
|
|
"loss": 0.595,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.6420141620771046,
|
|
"grad_norm": 0.28147222245655734,
|
|
"learning_rate": 3.989046798122922e-05,
|
|
"loss": 0.5968,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.6451612903225806,
|
|
"grad_norm": 0.22772337446133548,
|
|
"learning_rate": 3.988843232757308e-05,
|
|
"loss": 0.5895,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.6483084185680567,
|
|
"grad_norm": 0.24725422874191516,
|
|
"learning_rate": 3.9886377990157645e-05,
|
|
"loss": 0.5915,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.6514555468135327,
|
|
"grad_norm": 0.2478883381265632,
|
|
"learning_rate": 3.988430497112859e-05,
|
|
"loss": 0.5946,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.6546026750590087,
|
|
"grad_norm": 0.20803618305397573,
|
|
"learning_rate": 3.988221327265111e-05,
|
|
"loss": 0.6081,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.6577498033044846,
|
|
"grad_norm": 0.25475933367836384,
|
|
"learning_rate": 3.988010289690987e-05,
|
|
"loss": 0.6017,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.6608969315499607,
|
|
"grad_norm": 0.24824235295137567,
|
|
"learning_rate": 3.987797384610911e-05,
|
|
"loss": 0.6028,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.6640440597954367,
|
|
"grad_norm": 0.23024676822564225,
|
|
"learning_rate": 3.9875826122472514e-05,
|
|
"loss": 0.5947,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.6671911880409127,
|
|
"grad_norm": 0.27973892618861823,
|
|
"learning_rate": 3.987365972824331e-05,
|
|
"loss": 0.5977,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.6703383162863886,
|
|
"grad_norm": 0.21516896519325748,
|
|
"learning_rate": 3.98714746656842e-05,
|
|
"loss": 0.601,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.6734854445318647,
|
|
"grad_norm": 0.20803886239420252,
|
|
"learning_rate": 3.98692709370774e-05,
|
|
"loss": 0.5969,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.6766325727773407,
|
|
"grad_norm": 0.238940879654807,
|
|
"learning_rate": 3.986704854472462e-05,
|
|
"loss": 0.5985,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.6797797010228167,
|
|
"grad_norm": 0.24651655899332123,
|
|
"learning_rate": 3.9864807490947056e-05,
|
|
"loss": 0.5984,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.6829268292682927,
|
|
"grad_norm": 0.26084076415464624,
|
|
"learning_rate": 3.98625477780854e-05,
|
|
"loss": 0.5932,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.6860739575137687,
|
|
"grad_norm": 0.2386108054460097,
|
|
"learning_rate": 3.9860269408499844e-05,
|
|
"loss": 0.5842,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.6892210857592447,
|
|
"grad_norm": 0.2844335881193996,
|
|
"learning_rate": 3.9857972384570035e-05,
|
|
"loss": 0.595,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.6923682140047207,
|
|
"grad_norm": 0.2700792716329994,
|
|
"learning_rate": 3.985565670869513e-05,
|
|
"loss": 0.5965,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.6955153422501967,
|
|
"grad_norm": 0.25960095481333906,
|
|
"learning_rate": 3.985332238329378e-05,
|
|
"loss": 0.5916,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.6986624704956726,
|
|
"grad_norm": 0.28288643057296725,
|
|
"learning_rate": 3.9850969410804065e-05,
|
|
"loss": 0.5995,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.7018095987411487,
|
|
"grad_norm": 0.24327917475329708,
|
|
"learning_rate": 3.98485977936836e-05,
|
|
"loss": 0.5959,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.7049567269866247,
|
|
"grad_norm": 0.2721668752895481,
|
|
"learning_rate": 3.984620753440943e-05,
|
|
"loss": 0.5994,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.7081038552321007,
|
|
"grad_norm": 0.2607076361644052,
|
|
"learning_rate": 3.984379863547808e-05,
|
|
"loss": 0.5943,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.7112509834775768,
|
|
"grad_norm": 0.32932315230284676,
|
|
"learning_rate": 3.984137109940556e-05,
|
|
"loss": 0.5918,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.7143981117230527,
|
|
"grad_norm": 0.2696383359131296,
|
|
"learning_rate": 3.983892492872733e-05,
|
|
"loss": 0.5906,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.7175452399685287,
|
|
"grad_norm": 0.3585597053241887,
|
|
"learning_rate": 3.9836460125998334e-05,
|
|
"loss": 0.5948,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.7206923682140047,
|
|
"grad_norm": 0.2909815414499778,
|
|
"learning_rate": 3.9833976693792937e-05,
|
|
"loss": 0.5967,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.7238394964594808,
|
|
"grad_norm": 0.26425876255452163,
|
|
"learning_rate": 3.9831474634705005e-05,
|
|
"loss": 0.5935,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.7269866247049567,
|
|
"grad_norm": 0.2425151164990489,
|
|
"learning_rate": 3.982895395134782e-05,
|
|
"loss": 0.589,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.7301337529504327,
|
|
"grad_norm": 0.2616193102994722,
|
|
"learning_rate": 3.982641464635416e-05,
|
|
"loss": 0.6018,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.7332808811959087,
|
|
"grad_norm": 0.24405998282776664,
|
|
"learning_rate": 3.982385672237621e-05,
|
|
"loss": 0.5784,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.7364280094413848,
|
|
"grad_norm": 0.21832468260666305,
|
|
"learning_rate": 3.9821280182085625e-05,
|
|
"loss": 0.6015,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.7395751376868608,
|
|
"grad_norm": 0.25650060654854345,
|
|
"learning_rate": 3.98186850281735e-05,
|
|
"loss": 0.5913,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.7427222659323367,
|
|
"grad_norm": 0.27580046393197283,
|
|
"learning_rate": 3.981607126335038e-05,
|
|
"loss": 0.5895,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.7458693941778127,
|
|
"grad_norm": 0.2565257806459118,
|
|
"learning_rate": 3.981343889034622e-05,
|
|
"loss": 0.5919,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.7490165224232888,
|
|
"grad_norm": 0.28129400118590575,
|
|
"learning_rate": 3.981078791191044e-05,
|
|
"loss": 0.5824,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.7521636506687648,
|
|
"grad_norm": 0.27891204249277274,
|
|
"learning_rate": 3.980811833081189e-05,
|
|
"loss": 0.592,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.7553107789142408,
|
|
"grad_norm": 0.23957189182523364,
|
|
"learning_rate": 3.9805430149838826e-05,
|
|
"loss": 0.5923,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.7584579071597167,
|
|
"grad_norm": 0.267527485388114,
|
|
"learning_rate": 3.980272337179895e-05,
|
|
"loss": 0.5915,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.7616050354051928,
|
|
"grad_norm": 0.2893661929586173,
|
|
"learning_rate": 3.97999979995194e-05,
|
|
"loss": 0.5911,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.7647521636506688,
|
|
"grad_norm": 0.28637666161476577,
|
|
"learning_rate": 3.97972540358467e-05,
|
|
"loss": 0.5752,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.7678992918961448,
|
|
"grad_norm": 0.22704240396877157,
|
|
"learning_rate": 3.979449148364682e-05,
|
|
"loss": 0.5755,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.7710464201416207,
|
|
"grad_norm": 0.25773571819059654,
|
|
"learning_rate": 3.979171034580514e-05,
|
|
"loss": 0.5983,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.7741935483870968,
|
|
"grad_norm": 0.3238069584967583,
|
|
"learning_rate": 3.9788910625226435e-05,
|
|
"loss": 0.5841,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.7773406766325728,
|
|
"grad_norm": 0.3222043091055347,
|
|
"learning_rate": 3.978609232483491e-05,
|
|
"loss": 0.59,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.7804878048780488,
|
|
"grad_norm": 0.23894790835594737,
|
|
"learning_rate": 3.978325544757419e-05,
|
|
"loss": 0.5855,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.7836349331235248,
|
|
"grad_norm": 0.3065610131625477,
|
|
"learning_rate": 3.9780399996407235e-05,
|
|
"loss": 0.5872,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.7867820613690008,
|
|
"grad_norm": 0.41439567088342205,
|
|
"learning_rate": 3.977752597431649e-05,
|
|
"loss": 0.5922,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.7899291896144768,
|
|
"grad_norm": 0.33164301867172113,
|
|
"learning_rate": 3.977463338430375e-05,
|
|
"loss": 0.5966,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.7930763178599528,
|
|
"grad_norm": 0.338521625558078,
|
|
"learning_rate": 3.977172222939019e-05,
|
|
"loss": 0.5907,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.7962234461054288,
|
|
"grad_norm": 0.4350142509786572,
|
|
"learning_rate": 3.976879251261641e-05,
|
|
"loss": 0.5841,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.7993705743509048,
|
|
"grad_norm": 0.36993049984822834,
|
|
"learning_rate": 3.9765844237042385e-05,
|
|
"loss": 0.5864,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.8025177025963808,
|
|
"grad_norm": 0.313011441001989,
|
|
"learning_rate": 3.976287740574748e-05,
|
|
"loss": 0.5955,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.8056648308418568,
|
|
"grad_norm": 0.3540292456063131,
|
|
"learning_rate": 3.975989202183041e-05,
|
|
"loss": 0.5957,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.8088119590873328,
|
|
"grad_norm": 0.33646403434651423,
|
|
"learning_rate": 3.9756888088409314e-05,
|
|
"loss": 0.5847,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.8119590873328089,
|
|
"grad_norm": 0.26982248963868355,
|
|
"learning_rate": 3.975386560862166e-05,
|
|
"loss": 0.5885,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.8151062155782848,
|
|
"grad_norm": 0.29403731858846366,
|
|
"learning_rate": 3.975082458562433e-05,
|
|
"loss": 0.5897,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.8182533438237608,
|
|
"grad_norm": 0.4082653715355324,
|
|
"learning_rate": 3.974776502259354e-05,
|
|
"loss": 0.5791,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.8214004720692368,
|
|
"grad_norm": 0.3510739201797744,
|
|
"learning_rate": 3.9744686922724876e-05,
|
|
"loss": 0.593,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.8245476003147129,
|
|
"grad_norm": 0.3513487612479217,
|
|
"learning_rate": 3.97415902892333e-05,
|
|
"loss": 0.5836,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.8276947285601888,
|
|
"grad_norm": 0.28586935653924184,
|
|
"learning_rate": 3.973847512535313e-05,
|
|
"loss": 0.5826,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.8308418568056648,
|
|
"grad_norm": 0.2318470192789233,
|
|
"learning_rate": 3.973534143433802e-05,
|
|
"loss": 0.5814,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.8339889850511408,
|
|
"grad_norm": 0.29154683610806104,
|
|
"learning_rate": 3.9732189219460994e-05,
|
|
"loss": 0.5797,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.8371361132966169,
|
|
"grad_norm": 0.31587273712728664,
|
|
"learning_rate": 3.972901848401441e-05,
|
|
"loss": 0.5831,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.8402832415420929,
|
|
"grad_norm": 0.23591831976720817,
|
|
"learning_rate": 3.972582923130998e-05,
|
|
"loss": 0.5737,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.8434303697875688,
|
|
"grad_norm": 0.25691795262588335,
|
|
"learning_rate": 3.972262146467874e-05,
|
|
"loss": 0.5786,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.8465774980330448,
|
|
"grad_norm": 0.32432381849212155,
|
|
"learning_rate": 3.971939518747109e-05,
|
|
"loss": 0.593,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.8497246262785209,
|
|
"grad_norm": 0.28695101570482007,
|
|
"learning_rate": 3.9716150403056746e-05,
|
|
"loss": 0.5796,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.8528717545239969,
|
|
"grad_norm": 0.2724926989579401,
|
|
"learning_rate": 3.971288711482476e-05,
|
|
"loss": 0.5741,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.8560188827694729,
|
|
"grad_norm": 0.2806703214608174,
|
|
"learning_rate": 3.970960532618349e-05,
|
|
"loss": 0.5836,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.8591660110149488,
|
|
"grad_norm": 0.24677303795151184,
|
|
"learning_rate": 3.9706305040560644e-05,
|
|
"loss": 0.5818,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.8623131392604249,
|
|
"grad_norm": 0.2771238866050482,
|
|
"learning_rate": 3.9702986261403255e-05,
|
|
"loss": 0.5781,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.8654602675059009,
|
|
"grad_norm": 0.2924337657823486,
|
|
"learning_rate": 3.9699648992177626e-05,
|
|
"loss": 0.5756,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.8686073957513769,
|
|
"grad_norm": 0.25885016469830363,
|
|
"learning_rate": 3.969629323636944e-05,
|
|
"loss": 0.5844,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.8717545239968528,
|
|
"grad_norm": 0.23987091757624832,
|
|
"learning_rate": 3.9692918997483614e-05,
|
|
"loss": 0.5733,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.8749016522423289,
|
|
"grad_norm": 0.3337090298700013,
|
|
"learning_rate": 3.968952627904443e-05,
|
|
"loss": 0.571,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.8780487804878049,
|
|
"grad_norm": 0.29538904985776865,
|
|
"learning_rate": 3.9686115084595444e-05,
|
|
"loss": 0.5801,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.8811959087332809,
|
|
"grad_norm": 0.2312585310388234,
|
|
"learning_rate": 3.968268541769951e-05,
|
|
"loss": 0.5835,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.8843430369787569,
|
|
"grad_norm": 0.24512094230884698,
|
|
"learning_rate": 3.967923728193878e-05,
|
|
"loss": 0.5854,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.8874901652242329,
|
|
"grad_norm": 0.2903110478982194,
|
|
"learning_rate": 3.96757706809147e-05,
|
|
"loss": 0.5837,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.8906372934697089,
|
|
"grad_norm": 0.34977008813511384,
|
|
"learning_rate": 3.967228561824798e-05,
|
|
"loss": 0.5767,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.8937844217151849,
|
|
"grad_norm": 0.25337253280718625,
|
|
"learning_rate": 3.9668782097578656e-05,
|
|
"loss": 0.5839,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.8969315499606609,
|
|
"grad_norm": 0.21636074579356032,
|
|
"learning_rate": 3.9665260122566e-05,
|
|
"loss": 0.5804,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.9000786782061369,
|
|
"grad_norm": 0.33233711739245153,
|
|
"learning_rate": 3.966171969688858e-05,
|
|
"loss": 0.5737,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.9032258064516129,
|
|
"grad_norm": 0.2680586022952418,
|
|
"learning_rate": 3.965816082424423e-05,
|
|
"loss": 0.5729,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.9063729346970889,
|
|
"grad_norm": 0.24065778232025806,
|
|
"learning_rate": 3.965458350835005e-05,
|
|
"loss": 0.5786,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.9095200629425649,
|
|
"grad_norm": 0.24359799626407486,
|
|
"learning_rate": 3.965098775294241e-05,
|
|
"loss": 0.5743,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.912667191188041,
|
|
"grad_norm": 0.21669535169553875,
|
|
"learning_rate": 3.964737356177692e-05,
|
|
"loss": 0.5798,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.9158143194335169,
|
|
"grad_norm": 0.24527385896938736,
|
|
"learning_rate": 3.9643740938628485e-05,
|
|
"loss": 0.5771,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.9189614476789929,
|
|
"grad_norm": 0.24211076465986903,
|
|
"learning_rate": 3.964008988729121e-05,
|
|
"loss": 0.5733,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.9221085759244689,
|
|
"grad_norm": 0.2589502093646541,
|
|
"learning_rate": 3.9636420411578486e-05,
|
|
"loss": 0.5755,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.925255704169945,
|
|
"grad_norm": 0.25126849259271594,
|
|
"learning_rate": 3.963273251532294e-05,
|
|
"loss": 0.5866,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.9284028324154209,
|
|
"grad_norm": 0.28516274664724456,
|
|
"learning_rate": 3.962902620237642e-05,
|
|
"loss": 0.5803,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.9315499606608969,
|
|
"grad_norm": 0.23946824983891027,
|
|
"learning_rate": 3.9625301476610035e-05,
|
|
"loss": 0.588,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.9346970889063729,
|
|
"grad_norm": 0.25073581626718944,
|
|
"learning_rate": 3.9621558341914104e-05,
|
|
"loss": 0.5811,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.937844217151849,
|
|
"grad_norm": 0.22421588771080747,
|
|
"learning_rate": 3.9617796802198193e-05,
|
|
"loss": 0.5809,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.940991345397325,
|
|
"grad_norm": 0.28680785833388234,
|
|
"learning_rate": 3.961401686139108e-05,
|
|
"loss": 0.5762,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.9441384736428009,
|
|
"grad_norm": 0.2912561340919721,
|
|
"learning_rate": 3.961021852344075e-05,
|
|
"loss": 0.5685,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.9472856018882769,
|
|
"grad_norm": 0.25530982724652573,
|
|
"learning_rate": 3.960640179231443e-05,
|
|
"loss": 0.571,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.950432730133753,
|
|
"grad_norm": 0.23274023004495994,
|
|
"learning_rate": 3.960256667199854e-05,
|
|
"loss": 0.5766,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.953579858379229,
|
|
"grad_norm": 0.2526565624008317,
|
|
"learning_rate": 3.959871316649872e-05,
|
|
"loss": 0.5811,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.956726986624705,
|
|
"grad_norm": 0.2669242519610837,
|
|
"learning_rate": 3.959484127983979e-05,
|
|
"loss": 0.5786,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.9598741148701809,
|
|
"grad_norm": 0.2272805823487035,
|
|
"learning_rate": 3.959095101606579e-05,
|
|
"loss": 0.5804,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.963021243115657,
|
|
"grad_norm": 0.32599108271772875,
|
|
"learning_rate": 3.958704237923994e-05,
|
|
"loss": 0.565,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.966168371361133,
|
|
"grad_norm": 0.31736583791904754,
|
|
"learning_rate": 3.958311537344467e-05,
|
|
"loss": 0.5744,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.969315499606609,
|
|
"grad_norm": 0.25057991032117954,
|
|
"learning_rate": 3.957917000278156e-05,
|
|
"loss": 0.5744,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.9724626278520849,
|
|
"grad_norm": 0.3171592672476318,
|
|
"learning_rate": 3.9575206271371416e-05,
|
|
"loss": 0.5727,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.975609756097561,
|
|
"grad_norm": 0.2698770129342592,
|
|
"learning_rate": 3.957122418335419e-05,
|
|
"loss": 0.5684,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.978756884343037,
|
|
"grad_norm": 0.22946805293885594,
|
|
"learning_rate": 3.956722374288902e-05,
|
|
"loss": 0.5675,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.981904012588513,
|
|
"grad_norm": 0.3173852201061959,
|
|
"learning_rate": 3.9563204954154194e-05,
|
|
"loss": 0.5691,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.985051140833989,
|
|
"grad_norm": 0.3705950113218817,
|
|
"learning_rate": 3.955916782134719e-05,
|
|
"loss": 0.5841,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.988198269079465,
|
|
"grad_norm": 0.24571947086560714,
|
|
"learning_rate": 3.9555112348684626e-05,
|
|
"loss": 0.5763,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.991345397324941,
|
|
"grad_norm": 0.3095154791392979,
|
|
"learning_rate": 3.955103854040228e-05,
|
|
"loss": 0.5672,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.994492525570417,
|
|
"grad_norm": 0.3430882274581819,
|
|
"learning_rate": 3.9546946400755104e-05,
|
|
"loss": 0.5623,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.997639653815893,
|
|
"grad_norm": 0.21959520260949345,
|
|
"learning_rate": 3.954283593401715e-05,
|
|
"loss": 0.5667,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 1.003147128245476,
|
|
"grad_norm": 0.6182457687415346,
|
|
"learning_rate": 3.9538707144481656e-05,
|
|
"loss": 1.1053,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 1.006294256490952,
|
|
"grad_norm": 0.8091197174119846,
|
|
"learning_rate": 3.953456003646097e-05,
|
|
"loss": 0.5479,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 1.009441384736428,
|
|
"grad_norm": 0.5734582312017469,
|
|
"learning_rate": 3.953039461428659e-05,
|
|
"loss": 0.5401,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 1.012588512981904,
|
|
"grad_norm": 0.5553450254239277,
|
|
"learning_rate": 3.952621088230912e-05,
|
|
"loss": 0.5444,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 1.01573564122738,
|
|
"grad_norm": 0.715209019372237,
|
|
"learning_rate": 3.9522008844898316e-05,
|
|
"loss": 0.5482,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 1.018882769472856,
|
|
"grad_norm": 0.3194143113644003,
|
|
"learning_rate": 3.9517788506443036e-05,
|
|
"loss": 0.5486,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 1.022029897718332,
|
|
"grad_norm": 0.5794083060853625,
|
|
"learning_rate": 3.9513549871351244e-05,
|
|
"loss": 0.5387,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 1.025177025963808,
|
|
"grad_norm": 0.4549904471790254,
|
|
"learning_rate": 3.950929294405005e-05,
|
|
"loss": 0.5417,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 1.028324154209284,
|
|
"grad_norm": 0.39940752965584325,
|
|
"learning_rate": 3.950501772898563e-05,
|
|
"loss": 0.5383,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 1.03147128245476,
|
|
"grad_norm": 0.37376657983185657,
|
|
"learning_rate": 3.9500724230623285e-05,
|
|
"loss": 0.5438,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 1.034618410700236,
|
|
"grad_norm": 0.3249939876794576,
|
|
"learning_rate": 3.9496412453447396e-05,
|
|
"loss": 0.5423,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 1.037765538945712,
|
|
"grad_norm": 0.362363957521138,
|
|
"learning_rate": 3.949208240196145e-05,
|
|
"loss": 0.5341,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 1.0409126671911881,
|
|
"grad_norm": 0.3241126445768653,
|
|
"learning_rate": 3.948773408068801e-05,
|
|
"loss": 0.5377,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 1.044059795436664,
|
|
"grad_norm": 0.3368256917676624,
|
|
"learning_rate": 3.948336749416873e-05,
|
|
"loss": 0.55,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 1.04720692368214,
|
|
"grad_norm": 0.32581092992360716,
|
|
"learning_rate": 3.947898264696433e-05,
|
|
"loss": 0.5427,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 1.050354051927616,
|
|
"grad_norm": 0.31016030261198896,
|
|
"learning_rate": 3.947457954365461e-05,
|
|
"loss": 0.5355,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 1.053501180173092,
|
|
"grad_norm": 0.3133942156383092,
|
|
"learning_rate": 3.947015818883845e-05,
|
|
"loss": 0.5337,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 1.056648308418568,
|
|
"grad_norm": 0.3012762044302009,
|
|
"learning_rate": 3.946571858713376e-05,
|
|
"loss": 0.5451,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 1.059795436664044,
|
|
"grad_norm": 0.32025563813718905,
|
|
"learning_rate": 3.946126074317755e-05,
|
|
"loss": 0.5263,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 1.06294256490952,
|
|
"grad_norm": 0.31757922834182667,
|
|
"learning_rate": 3.9456784661625845e-05,
|
|
"loss": 0.5407,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 1.0660896931549961,
|
|
"grad_norm": 0.2724133341234047,
|
|
"learning_rate": 3.945229034715374e-05,
|
|
"loss": 0.5451,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 1.0692368214004722,
|
|
"grad_norm": 0.31611533863952757,
|
|
"learning_rate": 3.944777780445537e-05,
|
|
"loss": 0.5314,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 1.072383949645948,
|
|
"grad_norm": 0.264029405971174,
|
|
"learning_rate": 3.94432470382439e-05,
|
|
"loss": 0.5395,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 1.075531077891424,
|
|
"grad_norm": 0.2403700990983929,
|
|
"learning_rate": 3.9438698053251545e-05,
|
|
"loss": 0.5454,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 1.0786782061369,
|
|
"grad_norm": 0.26014062877023636,
|
|
"learning_rate": 3.943413085422954e-05,
|
|
"loss": 0.5303,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 1.081825334382376,
|
|
"grad_norm": 0.24331842687849348,
|
|
"learning_rate": 3.942954544594814e-05,
|
|
"loss": 0.5393,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 1.084972462627852,
|
|
"grad_norm": 0.2246092745659522,
|
|
"learning_rate": 3.942494183319662e-05,
|
|
"loss": 0.5388,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 1.088119590873328,
|
|
"grad_norm": 0.26647954408169594,
|
|
"learning_rate": 3.942032002078326e-05,
|
|
"loss": 0.5412,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 1.0912667191188041,
|
|
"grad_norm": 0.22770410942233646,
|
|
"learning_rate": 3.941568001353539e-05,
|
|
"loss": 0.5319,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 1.0944138473642802,
|
|
"grad_norm": 0.2554265454591065,
|
|
"learning_rate": 3.94110218162993e-05,
|
|
"loss": 0.5356,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 1.0975609756097562,
|
|
"grad_norm": 0.29466263774038076,
|
|
"learning_rate": 3.9406345433940284e-05,
|
|
"loss": 0.5375,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 1.100708103855232,
|
|
"grad_norm": 0.25834878315712795,
|
|
"learning_rate": 3.940165087134264e-05,
|
|
"loss": 0.5379,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 1.103855232100708,
|
|
"grad_norm": 0.25134217558928273,
|
|
"learning_rate": 3.939693813340966e-05,
|
|
"loss": 0.5249,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 1.107002360346184,
|
|
"grad_norm": 0.29107674814907275,
|
|
"learning_rate": 3.939220722506361e-05,
|
|
"loss": 0.5397,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 1.11014948859166,
|
|
"grad_norm": 0.22038795971485264,
|
|
"learning_rate": 3.938745815124574e-05,
|
|
"loss": 0.5178,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 1.113296616837136,
|
|
"grad_norm": 0.20162768321484986,
|
|
"learning_rate": 3.938269091691626e-05,
|
|
"loss": 0.5424,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 1.1164437450826121,
|
|
"grad_norm": 0.22798636250647,
|
|
"learning_rate": 3.937790552705437e-05,
|
|
"loss": 0.5401,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 1.1195908733280882,
|
|
"grad_norm": 0.24904854499387244,
|
|
"learning_rate": 3.9373101986658204e-05,
|
|
"loss": 0.5405,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 1.1227380015735642,
|
|
"grad_norm": 0.21942469794432654,
|
|
"learning_rate": 3.936828030074488e-05,
|
|
"loss": 0.5375,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 1.1258851298190402,
|
|
"grad_norm": 0.2072161315607419,
|
|
"learning_rate": 3.936344047435046e-05,
|
|
"loss": 0.5324,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 1.129032258064516,
|
|
"grad_norm": 0.22290097704007697,
|
|
"learning_rate": 3.935858251252994e-05,
|
|
"loss": 0.5408,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 1.132179386309992,
|
|
"grad_norm": 0.2194104787972607,
|
|
"learning_rate": 3.935370642035729e-05,
|
|
"loss": 0.5295,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 1.135326514555468,
|
|
"grad_norm": 0.22998068727153642,
|
|
"learning_rate": 3.9348812202925375e-05,
|
|
"loss": 0.5299,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 1.138473642800944,
|
|
"grad_norm": 0.20288897468564898,
|
|
"learning_rate": 3.9343899865346015e-05,
|
|
"loss": 0.5181,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 1.1416207710464201,
|
|
"grad_norm": 0.2533470728298271,
|
|
"learning_rate": 3.933896941274996e-05,
|
|
"loss": 0.5403,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 1.1447678992918962,
|
|
"grad_norm": 0.24735853692572296,
|
|
"learning_rate": 3.933402085028687e-05,
|
|
"loss": 0.5275,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 1.1479150275373722,
|
|
"grad_norm": 0.21213304673435288,
|
|
"learning_rate": 3.932905418312531e-05,
|
|
"loss": 0.5299,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 1.1510621557828482,
|
|
"grad_norm": 0.2570391773369148,
|
|
"learning_rate": 3.932406941645278e-05,
|
|
"loss": 0.5346,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 1.1542092840283242,
|
|
"grad_norm": 0.20961822348437115,
|
|
"learning_rate": 3.931906655547568e-05,
|
|
"loss": 0.5329,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 1.1573564122738,
|
|
"grad_norm": 0.19241742932101327,
|
|
"learning_rate": 3.9314045605419286e-05,
|
|
"loss": 0.5161,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 1.160503540519276,
|
|
"grad_norm": 0.1981209025761205,
|
|
"learning_rate": 3.930900657152777e-05,
|
|
"loss": 0.5285,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 1.163650668764752,
|
|
"grad_norm": 0.20579170791942883,
|
|
"learning_rate": 3.930394945906423e-05,
|
|
"loss": 0.5337,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 1.1667977970102281,
|
|
"grad_norm": 0.23447783712452946,
|
|
"learning_rate": 3.929887427331061e-05,
|
|
"loss": 0.5319,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 1.1699449252557041,
|
|
"grad_norm": 0.23240141396634467,
|
|
"learning_rate": 3.9293781019567736e-05,
|
|
"loss": 0.5216,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 1.1730920535011802,
|
|
"grad_norm": 0.20717972878459204,
|
|
"learning_rate": 3.9288669703155305e-05,
|
|
"loss": 0.5285,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 1.1762391817466562,
|
|
"grad_norm": 0.24921261028100655,
|
|
"learning_rate": 3.92835403294119e-05,
|
|
"loss": 0.5307,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 1.1793863099921322,
|
|
"grad_norm": 0.2541092436236979,
|
|
"learning_rate": 3.927839290369494e-05,
|
|
"loss": 0.529,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 1.1825334382376083,
|
|
"grad_norm": 0.20270164744890468,
|
|
"learning_rate": 3.927322743138071e-05,
|
|
"loss": 0.526,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 1.185680566483084,
|
|
"grad_norm": 0.2399584297754137,
|
|
"learning_rate": 3.926804391786433e-05,
|
|
"loss": 0.5365,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 1.1888276947285603,
|
|
"grad_norm": 0.24063071154269497,
|
|
"learning_rate": 3.926284236855979e-05,
|
|
"loss": 0.5359,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 1.1919748229740361,
|
|
"grad_norm": 0.2147952601728759,
|
|
"learning_rate": 3.92576227888999e-05,
|
|
"loss": 0.5369,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 1.1951219512195121,
|
|
"grad_norm": 0.23183738138121565,
|
|
"learning_rate": 3.925238518433629e-05,
|
|
"loss": 0.5406,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 1.1982690794649882,
|
|
"grad_norm": 0.24958601341879394,
|
|
"learning_rate": 3.924712956033945e-05,
|
|
"loss": 0.5257,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 1.2014162077104642,
|
|
"grad_norm": 0.2107193249163421,
|
|
"learning_rate": 3.9241855922398664e-05,
|
|
"loss": 0.5265,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 1.2045633359559402,
|
|
"grad_norm": 0.21211894634268036,
|
|
"learning_rate": 3.923656427602203e-05,
|
|
"loss": 0.5209,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 1.2077104642014163,
|
|
"grad_norm": 0.19007716139862285,
|
|
"learning_rate": 3.9231254626736475e-05,
|
|
"loss": 0.5248,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 1.2108575924468923,
|
|
"grad_norm": 0.2106932996483277,
|
|
"learning_rate": 3.922592698008771e-05,
|
|
"loss": 0.5255,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 1.2140047206923683,
|
|
"grad_norm": 0.20944881028089596,
|
|
"learning_rate": 3.922058134164025e-05,
|
|
"loss": 0.5383,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 1.2171518489378443,
|
|
"grad_norm": 0.19091968823309646,
|
|
"learning_rate": 3.9215217716977405e-05,
|
|
"loss": 0.5321,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 1.2202989771833201,
|
|
"grad_norm": 0.24949906631514807,
|
|
"learning_rate": 3.9209836111701274e-05,
|
|
"loss": 0.5337,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 1.2234461054287962,
|
|
"grad_norm": 0.24811252472670564,
|
|
"learning_rate": 3.9204436531432725e-05,
|
|
"loss": 0.5305,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 1.2265932336742722,
|
|
"grad_norm": 0.21473975761364125,
|
|
"learning_rate": 3.9199018981811405e-05,
|
|
"loss": 0.5203,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 1.2297403619197482,
|
|
"grad_norm": 0.21533576750411015,
|
|
"learning_rate": 3.919358346849573e-05,
|
|
"loss": 0.5433,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 1.2328874901652243,
|
|
"grad_norm": 0.21369216327652596,
|
|
"learning_rate": 3.918812999716288e-05,
|
|
"loss": 0.5305,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 1.2360346184107003,
|
|
"grad_norm": 0.21462424108637662,
|
|
"learning_rate": 3.918265857350879e-05,
|
|
"loss": 0.5359,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 1.2391817466561763,
|
|
"grad_norm": 0.24722325499788872,
|
|
"learning_rate": 3.917716920324815e-05,
|
|
"loss": 0.528,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 1.2423288749016523,
|
|
"grad_norm": 0.2011641071003589,
|
|
"learning_rate": 3.917166189211438e-05,
|
|
"loss": 0.5314,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 1.2454760031471284,
|
|
"grad_norm": 0.2129227867593859,
|
|
"learning_rate": 3.916613664585966e-05,
|
|
"loss": 0.536,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 1.2486231313926042,
|
|
"grad_norm": 0.22745233172578577,
|
|
"learning_rate": 3.9160593470254884e-05,
|
|
"loss": 0.5313,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 1.2517702596380802,
|
|
"grad_norm": 0.21406731944348806,
|
|
"learning_rate": 3.915503237108967e-05,
|
|
"loss": 0.5225,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 1.2549173878835562,
|
|
"grad_norm": 0.23441772239476547,
|
|
"learning_rate": 3.9149453354172387e-05,
|
|
"loss": 0.5288,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 1.2580645161290323,
|
|
"grad_norm": 0.23499009277537974,
|
|
"learning_rate": 3.914385642533008e-05,
|
|
"loss": 0.5334,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 1.2612116443745083,
|
|
"grad_norm": 0.19728137418451414,
|
|
"learning_rate": 3.913824159040853e-05,
|
|
"loss": 0.529,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 1.2643587726199843,
|
|
"grad_norm": 0.24085359726093897,
|
|
"learning_rate": 3.913260885527221e-05,
|
|
"loss": 0.5276,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 1.2675059008654603,
|
|
"grad_norm": 0.25321890170169037,
|
|
"learning_rate": 3.912695822580428e-05,
|
|
"loss": 0.5317,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 1.2706530291109361,
|
|
"grad_norm": 0.21705384607485445,
|
|
"learning_rate": 3.912128970790659e-05,
|
|
"loss": 0.5232,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 1.2738001573564124,
|
|
"grad_norm": 0.20299880408423301,
|
|
"learning_rate": 3.911560330749971e-05,
|
|
"loss": 0.5366,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 1.2769472856018882,
|
|
"grad_norm": 0.22689509092035035,
|
|
"learning_rate": 3.9109899030522846e-05,
|
|
"loss": 0.5304,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 1.2800944138473642,
|
|
"grad_norm": 0.20734100246332385,
|
|
"learning_rate": 3.910417688293389e-05,
|
|
"loss": 0.5308,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 1.2832415420928402,
|
|
"grad_norm": 0.2079086103791129,
|
|
"learning_rate": 3.909843687070939e-05,
|
|
"loss": 0.533,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 1.2863886703383163,
|
|
"grad_norm": 0.2383801744359338,
|
|
"learning_rate": 3.9092678999844575e-05,
|
|
"loss": 0.5261,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 1.2895357985837923,
|
|
"grad_norm": 0.26103176241188575,
|
|
"learning_rate": 3.90869032763533e-05,
|
|
"loss": 0.525,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 1.2926829268292683,
|
|
"grad_norm": 0.23498553551570836,
|
|
"learning_rate": 3.90811097062681e-05,
|
|
"loss": 0.5257,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 1.2958300550747444,
|
|
"grad_norm": 0.22833679759966044,
|
|
"learning_rate": 3.90752982956401e-05,
|
|
"loss": 0.5328,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 1.2989771833202202,
|
|
"grad_norm": 0.25544426763785694,
|
|
"learning_rate": 3.906946905053912e-05,
|
|
"loss": 0.5238,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 1.3021243115656964,
|
|
"grad_norm": 0.2672450987154701,
|
|
"learning_rate": 3.906362197705355e-05,
|
|
"loss": 0.5389,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 1.3052714398111722,
|
|
"grad_norm": 0.2619780531405743,
|
|
"learning_rate": 3.905775708129045e-05,
|
|
"loss": 0.5289,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 1.3084185680566482,
|
|
"grad_norm": 0.25026525055288934,
|
|
"learning_rate": 3.905187436937545e-05,
|
|
"loss": 0.5251,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 1.3115656963021243,
|
|
"grad_norm": 0.2615761605038083,
|
|
"learning_rate": 3.904597384745282e-05,
|
|
"loss": 0.5232,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 1.3147128245476003,
|
|
"grad_norm": 0.2209468332287666,
|
|
"learning_rate": 3.904005552168541e-05,
|
|
"loss": 0.5247,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 1.3178599527930763,
|
|
"grad_norm": 0.27687648105272317,
|
|
"learning_rate": 3.9034119398254703e-05,
|
|
"loss": 0.5394,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 1.3210070810385524,
|
|
"grad_norm": 0.264646195521817,
|
|
"learning_rate": 3.902816548336072e-05,
|
|
"loss": 0.5195,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 1.3241542092840284,
|
|
"grad_norm": 0.24319496232874022,
|
|
"learning_rate": 3.90221937832221e-05,
|
|
"loss": 0.5297,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 1.3273013375295044,
|
|
"grad_norm": 0.2682903371006935,
|
|
"learning_rate": 3.901620430407605e-05,
|
|
"loss": 0.5288,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 1.3304484657749804,
|
|
"grad_norm": 0.25696926111301793,
|
|
"learning_rate": 3.9010197052178334e-05,
|
|
"loss": 0.5321,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 1.3335955940204562,
|
|
"grad_norm": 0.21737804255664558,
|
|
"learning_rate": 3.9004172033803294e-05,
|
|
"loss": 0.527,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 1.3367427222659323,
|
|
"grad_norm": 0.25367064453625227,
|
|
"learning_rate": 3.899812925524382e-05,
|
|
"loss": 0.5294,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 1.3398898505114083,
|
|
"grad_norm": 0.23247403805521405,
|
|
"learning_rate": 3.8992068722811366e-05,
|
|
"loss": 0.5268,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 1.3430369787568843,
|
|
"grad_norm": 0.20969217116963346,
|
|
"learning_rate": 3.89859904428359e-05,
|
|
"loss": 0.525,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 1.3461841070023604,
|
|
"grad_norm": 0.21944665805836538,
|
|
"learning_rate": 3.897989442166597e-05,
|
|
"loss": 0.5303,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 1.3493312352478364,
|
|
"grad_norm": 0.20387874014114413,
|
|
"learning_rate": 3.89737806656686e-05,
|
|
"loss": 0.5294,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 1.3524783634933124,
|
|
"grad_norm": 0.23174913286284687,
|
|
"learning_rate": 3.8967649181229384e-05,
|
|
"loss": 0.5279,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 1.3556254917387884,
|
|
"grad_norm": 0.2435719017940357,
|
|
"learning_rate": 3.896149997475241e-05,
|
|
"loss": 0.5246,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 1.3587726199842645,
|
|
"grad_norm": 0.22427280573004413,
|
|
"learning_rate": 3.895533305266029e-05,
|
|
"loss": 0.5175,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 1.3619197482297403,
|
|
"grad_norm": 0.23620945890539627,
|
|
"learning_rate": 3.894914842139411e-05,
|
|
"loss": 0.5263,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 1.3650668764752163,
|
|
"grad_norm": 0.3013716827125622,
|
|
"learning_rate": 3.894294608741349e-05,
|
|
"loss": 0.5301,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 1.3682140047206923,
|
|
"grad_norm": 0.2147253729034818,
|
|
"learning_rate": 3.893672605719651e-05,
|
|
"loss": 0.5324,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 1.3713611329661684,
|
|
"grad_norm": 0.2818860617617866,
|
|
"learning_rate": 3.893048833723976e-05,
|
|
"loss": 0.5257,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 1.3745082612116444,
|
|
"grad_norm": 0.29077510474579804,
|
|
"learning_rate": 3.892423293405828e-05,
|
|
"loss": 0.5329,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 1.3776553894571204,
|
|
"grad_norm": 0.24939201266271596,
|
|
"learning_rate": 3.891795985418559e-05,
|
|
"loss": 0.5249,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 1.3808025177025964,
|
|
"grad_norm": 0.2715447424635029,
|
|
"learning_rate": 3.891166910417368e-05,
|
|
"loss": 0.5176,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 1.3839496459480725,
|
|
"grad_norm": 0.31564837515854355,
|
|
"learning_rate": 3.890536069059299e-05,
|
|
"loss": 0.5247,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 1.3870967741935485,
|
|
"grad_norm": 0.28321629275081395,
|
|
"learning_rate": 3.88990346200324e-05,
|
|
"loss": 0.5292,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 1.3902439024390243,
|
|
"grad_norm": 0.255553001892679,
|
|
"learning_rate": 3.889269089909924e-05,
|
|
"loss": 0.5274,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 1.3933910306845003,
|
|
"grad_norm": 0.22898176345383003,
|
|
"learning_rate": 3.888632953441929e-05,
|
|
"loss": 0.5278,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 1.3965381589299763,
|
|
"grad_norm": 0.25102215567626407,
|
|
"learning_rate": 3.887995053263673e-05,
|
|
"loss": 0.5263,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 1.3996852871754524,
|
|
"grad_norm": 0.20971240214227582,
|
|
"learning_rate": 3.887355390041418e-05,
|
|
"loss": 0.5226,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 1.4028324154209284,
|
|
"grad_norm": 0.20633909928885785,
|
|
"learning_rate": 3.886713964443266e-05,
|
|
"loss": 0.5289,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 1.4059795436664044,
|
|
"grad_norm": 0.2574749675063496,
|
|
"learning_rate": 3.886070777139163e-05,
|
|
"loss": 0.5312,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 1.4091266719118805,
|
|
"grad_norm": 0.261674083730878,
|
|
"learning_rate": 3.88542582880089e-05,
|
|
"loss": 0.536,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 1.4122738001573565,
|
|
"grad_norm": 0.23684316877305508,
|
|
"learning_rate": 3.884779120102071e-05,
|
|
"loss": 0.5305,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 1.4154209284028325,
|
|
"grad_norm": 0.2641611884928782,
|
|
"learning_rate": 3.884130651718168e-05,
|
|
"loss": 0.5242,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 1.4185680566483083,
|
|
"grad_norm": 0.26330433265613506,
|
|
"learning_rate": 3.883480424326481e-05,
|
|
"loss": 0.5336,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 1.4217151848937843,
|
|
"grad_norm": 0.24912300302600038,
|
|
"learning_rate": 3.882828438606145e-05,
|
|
"loss": 0.5349,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 1.4248623131392604,
|
|
"grad_norm": 0.20605940059141603,
|
|
"learning_rate": 3.882174695238135e-05,
|
|
"loss": 0.5287,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 1.4280094413847364,
|
|
"grad_norm": 0.2693403536580001,
|
|
"learning_rate": 3.8815191949052586e-05,
|
|
"loss": 0.5367,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 1.4311565696302124,
|
|
"grad_norm": 0.314210855668854,
|
|
"learning_rate": 3.880861938292162e-05,
|
|
"loss": 0.523,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 1.4343036978756885,
|
|
"grad_norm": 0.24090278288346206,
|
|
"learning_rate": 3.880202926085321e-05,
|
|
"loss": 0.5224,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 1.4374508261211645,
|
|
"grad_norm": 0.30398502103831454,
|
|
"learning_rate": 3.87954215897305e-05,
|
|
"loss": 0.5302,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 1.4405979543666405,
|
|
"grad_norm": 0.3216734104886104,
|
|
"learning_rate": 3.8788796376454936e-05,
|
|
"loss": 0.5274,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 1.4437450826121165,
|
|
"grad_norm": 0.22333114204077223,
|
|
"learning_rate": 3.878215362794628e-05,
|
|
"loss": 0.5239,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 1.4468922108575923,
|
|
"grad_norm": 0.23789190915199318,
|
|
"learning_rate": 3.877549335114263e-05,
|
|
"loss": 0.5209,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 1.4500393391030684,
|
|
"grad_norm": 0.1890051988727759,
|
|
"learning_rate": 3.8768815553000376e-05,
|
|
"loss": 0.5269,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 1.4531864673485444,
|
|
"grad_norm": 0.22963562375613875,
|
|
"learning_rate": 3.8762120240494223e-05,
|
|
"loss": 0.529,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 1.4563335955940204,
|
|
"grad_norm": 0.24014125362476896,
|
|
"learning_rate": 3.875540742061715e-05,
|
|
"loss": 0.5249,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 1.4594807238394965,
|
|
"grad_norm": 0.25487459533999646,
|
|
"learning_rate": 3.874867710038044e-05,
|
|
"loss": 0.5238,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 1.4626278520849725,
|
|
"grad_norm": 0.2329939674520019,
|
|
"learning_rate": 3.874192928681364e-05,
|
|
"loss": 0.5245,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 1.4657749803304485,
|
|
"grad_norm": 0.2375922581202063,
|
|
"learning_rate": 3.873516398696457e-05,
|
|
"loss": 0.5253,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 1.4689221085759245,
|
|
"grad_norm": 0.22120136053113018,
|
|
"learning_rate": 3.8728381207899326e-05,
|
|
"loss": 0.5322,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 1.4720692368214006,
|
|
"grad_norm": 0.2575168541935438,
|
|
"learning_rate": 3.872158095670225e-05,
|
|
"loss": 0.5163,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 1.4752163650668764,
|
|
"grad_norm": 0.24759034856123785,
|
|
"learning_rate": 3.871476324047593e-05,
|
|
"loss": 0.5219,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 1.4783634933123526,
|
|
"grad_norm": 0.2122545341024054,
|
|
"learning_rate": 3.870792806634121e-05,
|
|
"loss": 0.5219,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 1.4815106215578284,
|
|
"grad_norm": 0.21548255412844264,
|
|
"learning_rate": 3.8701075441437156e-05,
|
|
"loss": 0.5139,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 1.4846577498033044,
|
|
"grad_norm": 0.23209201852334332,
|
|
"learning_rate": 3.8694205372921054e-05,
|
|
"loss": 0.5255,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 1.4878048780487805,
|
|
"grad_norm": 0.22357478851651288,
|
|
"learning_rate": 3.868731786796843e-05,
|
|
"loss": 0.5173,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 1.4909520062942565,
|
|
"grad_norm": 0.23628716934333172,
|
|
"learning_rate": 3.8680412933773007e-05,
|
|
"loss": 0.5166,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 1.4940991345397325,
|
|
"grad_norm": 0.2840109021374993,
|
|
"learning_rate": 3.867349057754671e-05,
|
|
"loss": 0.5248,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 1.4972462627852086,
|
|
"grad_norm": 0.19780778754861236,
|
|
"learning_rate": 3.8666550806519676e-05,
|
|
"loss": 0.5309,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 1.5003933910306846,
|
|
"grad_norm": 0.23377357292240997,
|
|
"learning_rate": 3.8659593627940204e-05,
|
|
"loss": 0.5242,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 1.5035405192761604,
|
|
"grad_norm": 0.27389049583059905,
|
|
"learning_rate": 3.8652619049074814e-05,
|
|
"loss": 0.5326,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 1.5066876475216366,
|
|
"grad_norm": 0.2155931645603816,
|
|
"learning_rate": 3.8645627077208166e-05,
|
|
"loss": 0.5195,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 1.5098347757671124,
|
|
"grad_norm": 0.22156798104402176,
|
|
"learning_rate": 3.8638617719643095e-05,
|
|
"loss": 0.5171,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 1.5129819040125885,
|
|
"grad_norm": 0.23067806483053538,
|
|
"learning_rate": 3.8631590983700606e-05,
|
|
"loss": 0.5152,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 1.5161290322580645,
|
|
"grad_norm": 0.1921108745110929,
|
|
"learning_rate": 3.8624546876719834e-05,
|
|
"loss": 0.5283,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 1.5192761605035405,
|
|
"grad_norm": 0.21072974454084317,
|
|
"learning_rate": 3.861748540605808e-05,
|
|
"loss": 0.5171,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 1.5224232887490166,
|
|
"grad_norm": 0.21970389437346471,
|
|
"learning_rate": 3.8610406579090766e-05,
|
|
"loss": 0.5219,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 1.5255704169944924,
|
|
"grad_norm": 0.20711315573580885,
|
|
"learning_rate": 3.860331040321145e-05,
|
|
"loss": 0.5253,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 1.5287175452399686,
|
|
"grad_norm": 0.24879575847966118,
|
|
"learning_rate": 3.8596196885831804e-05,
|
|
"loss": 0.5302,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 1.5318646734854444,
|
|
"grad_norm": 0.28066766221968237,
|
|
"learning_rate": 3.858906603438161e-05,
|
|
"loss": 0.5372,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 1.5350118017309207,
|
|
"grad_norm": 0.23712162618272606,
|
|
"learning_rate": 3.8581917856308775e-05,
|
|
"loss": 0.53,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 1.5381589299763965,
|
|
"grad_norm": 0.24813061042956056,
|
|
"learning_rate": 3.857475235907928e-05,
|
|
"loss": 0.5204,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 1.5413060582218725,
|
|
"grad_norm": 0.20722206925566874,
|
|
"learning_rate": 3.8567569550177195e-05,
|
|
"loss": 0.5318,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 1.5444531864673485,
|
|
"grad_norm": 0.2282801127025407,
|
|
"learning_rate": 3.856036943710469e-05,
|
|
"loss": 0.5238,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 1.5476003147128246,
|
|
"grad_norm": 0.25422893533824964,
|
|
"learning_rate": 3.8553152027382e-05,
|
|
"loss": 0.5204,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 1.5507474429583006,
|
|
"grad_norm": 0.2259564938535116,
|
|
"learning_rate": 3.854591732854741e-05,
|
|
"loss": 0.5257,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 1.5538945712037766,
|
|
"grad_norm": 0.23136885066637908,
|
|
"learning_rate": 3.853866534815728e-05,
|
|
"loss": 0.5253,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 1.5570416994492526,
|
|
"grad_norm": 0.25141412317565787,
|
|
"learning_rate": 3.853139609378603e-05,
|
|
"loss": 0.5215,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 1.5601888276947284,
|
|
"grad_norm": 0.216386761256824,
|
|
"learning_rate": 3.85241095730261e-05,
|
|
"loss": 0.5175,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 1.5633359559402047,
|
|
"grad_norm": 0.24355134099428055,
|
|
"learning_rate": 3.8516805793487974e-05,
|
|
"loss": 0.519,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 1.5664830841856805,
|
|
"grad_norm": 0.19505711198842687,
|
|
"learning_rate": 3.850948476280015e-05,
|
|
"loss": 0.5327,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 1.5696302124311565,
|
|
"grad_norm": 0.2447172400294907,
|
|
"learning_rate": 3.8502146488609164e-05,
|
|
"loss": 0.5212,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 1.5727773406766326,
|
|
"grad_norm": 0.19962243706421512,
|
|
"learning_rate": 3.8494790978579565e-05,
|
|
"loss": 0.5142,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 1.5759244689221086,
|
|
"grad_norm": 0.2841517257055549,
|
|
"learning_rate": 3.848741824039386e-05,
|
|
"loss": 0.5178,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 1.5790715971675846,
|
|
"grad_norm": 0.20724334543587292,
|
|
"learning_rate": 3.8480028281752615e-05,
|
|
"loss": 0.5249,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 1.5822187254130606,
|
|
"grad_norm": 0.28838849111673964,
|
|
"learning_rate": 3.8472621110374335e-05,
|
|
"loss": 0.5173,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 1.5853658536585367,
|
|
"grad_norm": 0.24186826741838155,
|
|
"learning_rate": 3.8465196733995514e-05,
|
|
"loss": 0.5154,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 1.5885129819040125,
|
|
"grad_norm": 0.1983223561298523,
|
|
"learning_rate": 3.8457755160370625e-05,
|
|
"loss": 0.509,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 1.5916601101494887,
|
|
"grad_norm": 0.21649035102901398,
|
|
"learning_rate": 3.8450296397272095e-05,
|
|
"loss": 0.5321,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 1.5948072383949645,
|
|
"grad_norm": 0.25388593478131405,
|
|
"learning_rate": 3.8442820452490305e-05,
|
|
"loss": 0.5249,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 1.5979543666404405,
|
|
"grad_norm": 0.2642843241520677,
|
|
"learning_rate": 3.843532733383358e-05,
|
|
"loss": 0.5256,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 1.6011014948859166,
|
|
"grad_norm": 0.23572275621033986,
|
|
"learning_rate": 3.8427817049128194e-05,
|
|
"loss": 0.5216,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 1.6042486231313926,
|
|
"grad_norm": 0.2110591043789785,
|
|
"learning_rate": 3.842028960621834e-05,
|
|
"loss": 0.5149,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 1.6073957513768686,
|
|
"grad_norm": 0.22877095520766147,
|
|
"learning_rate": 3.841274501296613e-05,
|
|
"loss": 0.5235,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 1.6105428796223447,
|
|
"grad_norm": 0.24251420163715415,
|
|
"learning_rate": 3.84051832772516e-05,
|
|
"loss": 0.5144,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 1.6136900078678207,
|
|
"grad_norm": 0.20511713060127806,
|
|
"learning_rate": 3.839760440697268e-05,
|
|
"loss": 0.5258,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 1.6168371361132965,
|
|
"grad_norm": 0.23501746426992737,
|
|
"learning_rate": 3.83900084100452e-05,
|
|
"loss": 0.5218,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 1.6199842643587727,
|
|
"grad_norm": 0.24300834825222642,
|
|
"learning_rate": 3.838239529440287e-05,
|
|
"loss": 0.5201,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 1.6231313926042485,
|
|
"grad_norm": 0.2415867797392267,
|
|
"learning_rate": 3.83747650679973e-05,
|
|
"loss": 0.5214,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 1.6262785208497246,
|
|
"grad_norm": 0.24575403339954768,
|
|
"learning_rate": 3.836711773879795e-05,
|
|
"loss": 0.5262,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 1.6294256490952006,
|
|
"grad_norm": 0.19675787482506665,
|
|
"learning_rate": 3.835945331479216e-05,
|
|
"loss": 0.5144,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 1.6325727773406766,
|
|
"grad_norm": 0.26577039634688837,
|
|
"learning_rate": 3.8351771803985115e-05,
|
|
"loss": 0.5192,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 1.6357199055861527,
|
|
"grad_norm": 0.286665435100784,
|
|
"learning_rate": 3.8344073214399845e-05,
|
|
"loss": 0.5291,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 1.6388670338316287,
|
|
"grad_norm": 0.18836938646912646,
|
|
"learning_rate": 3.833635755407723e-05,
|
|
"loss": 0.5109,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 1.6420141620771047,
|
|
"grad_norm": 0.2650168026961069,
|
|
"learning_rate": 3.832862483107597e-05,
|
|
"loss": 0.5221,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 1.6451612903225805,
|
|
"grad_norm": 0.2361325083482264,
|
|
"learning_rate": 3.832087505347257e-05,
|
|
"loss": 0.5219,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 1.6483084185680568,
|
|
"grad_norm": 0.19034304753715936,
|
|
"learning_rate": 3.831310822936139e-05,
|
|
"loss": 0.5249,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 1.6514555468135326,
|
|
"grad_norm": 0.22876829423081257,
|
|
"learning_rate": 3.830532436685457e-05,
|
|
"loss": 0.5144,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 1.6546026750590088,
|
|
"grad_norm": 0.19602966120535223,
|
|
"learning_rate": 3.829752347408202e-05,
|
|
"loss": 0.5137,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 1.6577498033044846,
|
|
"grad_norm": 0.19990915987444982,
|
|
"learning_rate": 3.8289705559191495e-05,
|
|
"loss": 0.5188,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 1.6608969315499607,
|
|
"grad_norm": 0.22044066914833604,
|
|
"learning_rate": 3.8281870630348483e-05,
|
|
"loss": 0.5147,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 1.6640440597954367,
|
|
"grad_norm": 0.21061663961165006,
|
|
"learning_rate": 3.827401869573626e-05,
|
|
"loss": 0.5231,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 1.6671911880409127,
|
|
"grad_norm": 0.21712763474322638,
|
|
"learning_rate": 3.826614976355584e-05,
|
|
"loss": 0.5276,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 1.6703383162863887,
|
|
"grad_norm": 0.23698263316551577,
|
|
"learning_rate": 3.825826384202604e-05,
|
|
"loss": 0.512,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 1.6734854445318645,
|
|
"grad_norm": 0.26433812000215734,
|
|
"learning_rate": 3.8250360939383384e-05,
|
|
"loss": 0.5205,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 1.6766325727773408,
|
|
"grad_norm": 0.23388260377969083,
|
|
"learning_rate": 3.8242441063882145e-05,
|
|
"loss": 0.5158,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 1.6797797010228166,
|
|
"grad_norm": 0.23031333677705712,
|
|
"learning_rate": 3.82345042237943e-05,
|
|
"loss": 0.5211,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 1.6829268292682928,
|
|
"grad_norm": 0.2548042458770785,
|
|
"learning_rate": 3.822655042740959e-05,
|
|
"loss": 0.5198,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 1.6860739575137687,
|
|
"grad_norm": 0.22638295828893965,
|
|
"learning_rate": 3.8218579683035425e-05,
|
|
"loss": 0.5238,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 1.6892210857592447,
|
|
"grad_norm": 0.2262970560082153,
|
|
"learning_rate": 3.8210591998996924e-05,
|
|
"loss": 0.5202,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 1.6923682140047207,
|
|
"grad_norm": 0.21411782792791356,
|
|
"learning_rate": 3.8202587383636926e-05,
|
|
"loss": 0.5222,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 1.6955153422501967,
|
|
"grad_norm": 0.20447045372047343,
|
|
"learning_rate": 3.8194565845315936e-05,
|
|
"loss": 0.5173,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 1.6986624704956728,
|
|
"grad_norm": 0.2162189855266448,
|
|
"learning_rate": 3.818652739241211e-05,
|
|
"loss": 0.5144,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 1.7018095987411486,
|
|
"grad_norm": 0.23817213025322223,
|
|
"learning_rate": 3.817847203332131e-05,
|
|
"loss": 0.5239,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 1.7049567269866248,
|
|
"grad_norm": 0.2451599843139077,
|
|
"learning_rate": 3.8170399776457044e-05,
|
|
"loss": 0.5252,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 1.7081038552321006,
|
|
"grad_norm": 0.21173486339403857,
|
|
"learning_rate": 3.816231063025045e-05,
|
|
"loss": 0.5144,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 1.7112509834775769,
|
|
"grad_norm": 0.22798045746972564,
|
|
"learning_rate": 3.8154204603150334e-05,
|
|
"loss": 0.5246,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 1.7143981117230527,
|
|
"grad_norm": 0.22874807203605335,
|
|
"learning_rate": 3.814608170362311e-05,
|
|
"loss": 0.5171,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 1.7175452399685287,
|
|
"grad_norm": 0.20891847321188745,
|
|
"learning_rate": 3.8137941940152834e-05,
|
|
"loss": 0.5196,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 1.7206923682140047,
|
|
"grad_norm": 0.21664552315581692,
|
|
"learning_rate": 3.812978532124116e-05,
|
|
"loss": 0.5074,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 1.7238394964594808,
|
|
"grad_norm": 0.20341701815710325,
|
|
"learning_rate": 3.812161185540736e-05,
|
|
"loss": 0.5167,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 1.7269866247049568,
|
|
"grad_norm": 0.21511450945343744,
|
|
"learning_rate": 3.811342155118829e-05,
|
|
"loss": 0.5192,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 1.7301337529504326,
|
|
"grad_norm": 0.2194147912509461,
|
|
"learning_rate": 3.81052144171384e-05,
|
|
"loss": 0.5225,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 1.7332808811959088,
|
|
"grad_norm": 0.22863514788872993,
|
|
"learning_rate": 3.809699046182972e-05,
|
|
"loss": 0.5081,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 1.7364280094413846,
|
|
"grad_norm": 0.207709622035488,
|
|
"learning_rate": 3.808874969385184e-05,
|
|
"loss": 0.5089,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 1.739575137686861,
|
|
"grad_norm": 0.21852716207508774,
|
|
"learning_rate": 3.808049212181192e-05,
|
|
"loss": 0.5198,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 1.7427222659323367,
|
|
"grad_norm": 0.22298890603933133,
|
|
"learning_rate": 3.8072217754334655e-05,
|
|
"loss": 0.52,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 1.7458693941778127,
|
|
"grad_norm": 0.2532878106195187,
|
|
"learning_rate": 3.8063926600062315e-05,
|
|
"loss": 0.5145,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 1.7490165224232888,
|
|
"grad_norm": 0.24360819970385728,
|
|
"learning_rate": 3.805561866765467e-05,
|
|
"loss": 0.5141,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 1.7521636506687648,
|
|
"grad_norm": 0.20861292145147736,
|
|
"learning_rate": 3.8047293965789025e-05,
|
|
"loss": 0.5196,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 1.7553107789142408,
|
|
"grad_norm": 0.21285677902378522,
|
|
"learning_rate": 3.803895250316021e-05,
|
|
"loss": 0.5121,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 1.7584579071597166,
|
|
"grad_norm": 0.2123709291037103,
|
|
"learning_rate": 3.803059428848054e-05,
|
|
"loss": 0.5176,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 1.7616050354051929,
|
|
"grad_norm": 0.20536678541188122,
|
|
"learning_rate": 3.8022219330479854e-05,
|
|
"loss": 0.5209,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 1.7647521636506687,
|
|
"grad_norm": 0.21563446035753572,
|
|
"learning_rate": 3.801382763790546e-05,
|
|
"loss": 0.5206,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 1.767899291896145,
|
|
"grad_norm": 0.1986468939526788,
|
|
"learning_rate": 3.800541921952213e-05,
|
|
"loss": 0.5208,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 1.7710464201416207,
|
|
"grad_norm": 0.1966987426971003,
|
|
"learning_rate": 3.799699408411215e-05,
|
|
"loss": 0.5073,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 1.7741935483870968,
|
|
"grad_norm": 0.216109363155467,
|
|
"learning_rate": 3.7988552240475235e-05,
|
|
"loss": 0.5148,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 1.7773406766325728,
|
|
"grad_norm": 0.1879949670400624,
|
|
"learning_rate": 3.7980093697428545e-05,
|
|
"loss": 0.5253,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 1.7804878048780488,
|
|
"grad_norm": 0.21164171435059104,
|
|
"learning_rate": 3.797161846380669e-05,
|
|
"loss": 0.5131,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 1.7836349331235248,
|
|
"grad_norm": 0.21236700030393402,
|
|
"learning_rate": 3.796312654846174e-05,
|
|
"loss": 0.5262,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 1.7867820613690006,
|
|
"grad_norm": 0.23772908828608705,
|
|
"learning_rate": 3.795461796026314e-05,
|
|
"loss": 0.5161,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 1.789929189614477,
|
|
"grad_norm": 0.1989690141679018,
|
|
"learning_rate": 3.794609270809779e-05,
|
|
"loss": 0.5148,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 1.7930763178599527,
|
|
"grad_norm": 0.19386929212613396,
|
|
"learning_rate": 3.793755080086997e-05,
|
|
"loss": 0.5136,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 1.796223446105429,
|
|
"grad_norm": 0.20873828711474318,
|
|
"learning_rate": 3.792899224750136e-05,
|
|
"loss": 0.5285,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 1.7993705743509048,
|
|
"grad_norm": 0.21110002963411045,
|
|
"learning_rate": 3.7920417056931046e-05,
|
|
"loss": 0.5261,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 1.8025177025963808,
|
|
"grad_norm": 0.1720427492785605,
|
|
"learning_rate": 3.791182523811545e-05,
|
|
"loss": 0.5144,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 1.8056648308418568,
|
|
"grad_norm": 0.2174774642242585,
|
|
"learning_rate": 3.7903216800028416e-05,
|
|
"loss": 0.5106,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 1.8088119590873328,
|
|
"grad_norm": 0.19514122415387664,
|
|
"learning_rate": 3.789459175166109e-05,
|
|
"loss": 0.5228,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 1.8119590873328089,
|
|
"grad_norm": 0.18369904706820767,
|
|
"learning_rate": 3.7885950102022014e-05,
|
|
"loss": 0.5135,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 1.8151062155782847,
|
|
"grad_norm": 0.20445427723961576,
|
|
"learning_rate": 3.787729186013704e-05,
|
|
"loss": 0.5148,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 1.818253343823761,
|
|
"grad_norm": 0.1933783032598251,
|
|
"learning_rate": 3.786861703504936e-05,
|
|
"loss": 0.5215,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 1.8214004720692367,
|
|
"grad_norm": 0.18342847430157735,
|
|
"learning_rate": 3.7859925635819476e-05,
|
|
"loss": 0.5128,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 1.824547600314713,
|
|
"grad_norm": 0.2226340450308752,
|
|
"learning_rate": 3.785121767152523e-05,
|
|
"loss": 0.5283,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 1.8276947285601888,
|
|
"grad_norm": 0.19369436637649629,
|
|
"learning_rate": 3.784249315126173e-05,
|
|
"loss": 0.5148,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 1.8308418568056648,
|
|
"grad_norm": 0.19807163830925228,
|
|
"learning_rate": 3.783375208414139e-05,
|
|
"loss": 0.5151,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 1.8339889850511408,
|
|
"grad_norm": 0.18328220410897705,
|
|
"learning_rate": 3.782499447929392e-05,
|
|
"loss": 0.514,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 1.8371361132966169,
|
|
"grad_norm": 0.1839242669305662,
|
|
"learning_rate": 3.7816220345866294e-05,
|
|
"loss": 0.5251,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 1.8402832415420929,
|
|
"grad_norm": 0.18986955880243364,
|
|
"learning_rate": 3.780742969302273e-05,
|
|
"loss": 0.5131,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 1.8434303697875687,
|
|
"grad_norm": 0.19594785121809236,
|
|
"learning_rate": 3.7798622529944735e-05,
|
|
"loss": 0.5161,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 1.846577498033045,
|
|
"grad_norm": 0.2357648214241525,
|
|
"learning_rate": 3.7789798865831024e-05,
|
|
"loss": 0.5156,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 1.8497246262785207,
|
|
"grad_norm": 0.19304354227740758,
|
|
"learning_rate": 3.778095870989758e-05,
|
|
"loss": 0.5203,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 1.852871754523997,
|
|
"grad_norm": 0.22728068274130966,
|
|
"learning_rate": 3.777210207137759e-05,
|
|
"loss": 0.5321,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 1.8560188827694728,
|
|
"grad_norm": 0.2017775324752321,
|
|
"learning_rate": 3.7763228959521465e-05,
|
|
"loss": 0.5242,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 1.8591660110149488,
|
|
"grad_norm": 0.22300635096362367,
|
|
"learning_rate": 3.775433938359681e-05,
|
|
"loss": 0.5231,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 1.8623131392604249,
|
|
"grad_norm": 0.1996306525906858,
|
|
"learning_rate": 3.774543335288845e-05,
|
|
"loss": 0.5221,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 1.8654602675059009,
|
|
"grad_norm": 0.16852847329609896,
|
|
"learning_rate": 3.773651087669837e-05,
|
|
"loss": 0.5107,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 1.868607395751377,
|
|
"grad_norm": 0.24727169403577154,
|
|
"learning_rate": 3.7727571964345745e-05,
|
|
"loss": 0.522,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 1.8717545239968527,
|
|
"grad_norm": 0.22616575051796836,
|
|
"learning_rate": 3.771861662516692e-05,
|
|
"loss": 0.5109,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 1.874901652242329,
|
|
"grad_norm": 0.2500551252876427,
|
|
"learning_rate": 3.7709644868515386e-05,
|
|
"loss": 0.514,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 1.8780487804878048,
|
|
"grad_norm": 0.19733979061423199,
|
|
"learning_rate": 3.770065670376179e-05,
|
|
"loss": 0.5138,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 1.881195908733281,
|
|
"grad_norm": 0.23793266287535486,
|
|
"learning_rate": 3.769165214029392e-05,
|
|
"loss": 0.5151,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 1.8843430369787568,
|
|
"grad_norm": 0.20047355737675948,
|
|
"learning_rate": 3.768263118751667e-05,
|
|
"loss": 0.5195,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 1.8874901652242329,
|
|
"grad_norm": 0.1734693730368024,
|
|
"learning_rate": 3.767359385485208e-05,
|
|
"loss": 0.5067,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 1.8906372934697089,
|
|
"grad_norm": 0.20206213476307608,
|
|
"learning_rate": 3.766454015173929e-05,
|
|
"loss": 0.5161,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 1.893784421715185,
|
|
"grad_norm": 0.20882657364965593,
|
|
"learning_rate": 3.765547008763453e-05,
|
|
"loss": 0.5103,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 1.896931549960661,
|
|
"grad_norm": 0.2021864503575807,
|
|
"learning_rate": 3.764638367201112e-05,
|
|
"loss": 0.5004,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 1.9000786782061367,
|
|
"grad_norm": 0.20424529742189995,
|
|
"learning_rate": 3.763728091435946e-05,
|
|
"loss": 0.5162,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 1.903225806451613,
|
|
"grad_norm": 0.1961209704262886,
|
|
"learning_rate": 3.7628161824187025e-05,
|
|
"loss": 0.518,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 1.9063729346970888,
|
|
"grad_norm": 0.23767968238521123,
|
|
"learning_rate": 3.7619026411018345e-05,
|
|
"loss": 0.5069,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 1.909520062942565,
|
|
"grad_norm": 0.21212885274385532,
|
|
"learning_rate": 3.7609874684394994e-05,
|
|
"loss": 0.519,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 1.9126671911880408,
|
|
"grad_norm": 0.21268544914645238,
|
|
"learning_rate": 3.760070665387558e-05,
|
|
"loss": 0.5136,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 1.9158143194335169,
|
|
"grad_norm": 0.20821784947192484,
|
|
"learning_rate": 3.7591522329035763e-05,
|
|
"loss": 0.5159,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 1.918961447678993,
|
|
"grad_norm": 0.22337933592922737,
|
|
"learning_rate": 3.75823217194682e-05,
|
|
"loss": 0.519,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 1.922108575924469,
|
|
"grad_norm": 0.25135156804926945,
|
|
"learning_rate": 3.7573104834782566e-05,
|
|
"loss": 0.5153,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 1.925255704169945,
|
|
"grad_norm": 0.197509832149269,
|
|
"learning_rate": 3.756387168460552e-05,
|
|
"loss": 0.5247,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 1.9284028324154208,
|
|
"grad_norm": 0.2389600063017852,
|
|
"learning_rate": 3.7554622278580735e-05,
|
|
"loss": 0.5166,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 1.931549960660897,
|
|
"grad_norm": 0.26288745403046027,
|
|
"learning_rate": 3.754535662636884e-05,
|
|
"loss": 0.5236,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 1.9346970889063728,
|
|
"grad_norm": 0.20996861395700098,
|
|
"learning_rate": 3.7536074737647455e-05,
|
|
"loss": 0.5168,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 1.937844217151849,
|
|
"grad_norm": 0.23502763223514755,
|
|
"learning_rate": 3.752677662211114e-05,
|
|
"loss": 0.5185,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 1.9409913453973249,
|
|
"grad_norm": 0.22436762734979818,
|
|
"learning_rate": 3.75174622894714e-05,
|
|
"loss": 0.5207,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 1.944138473642801,
|
|
"grad_norm": 0.1810432051540689,
|
|
"learning_rate": 3.7508131749456696e-05,
|
|
"loss": 0.5161,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 1.947285601888277,
|
|
"grad_norm": 0.2522520252638888,
|
|
"learning_rate": 3.74987850118124e-05,
|
|
"loss": 0.5112,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 1.950432730133753,
|
|
"grad_norm": 0.25396670669216415,
|
|
"learning_rate": 3.748942208630082e-05,
|
|
"loss": 0.5221,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 1.953579858379229,
|
|
"grad_norm": 0.22197793438383823,
|
|
"learning_rate": 3.748004298270115e-05,
|
|
"loss": 0.5162,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 1.956726986624705,
|
|
"grad_norm": 0.23177715709488106,
|
|
"learning_rate": 3.74706477108095e-05,
|
|
"loss": 0.5106,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 1.959874114870181,
|
|
"grad_norm": 0.22079140874889208,
|
|
"learning_rate": 3.746123628043886e-05,
|
|
"loss": 0.5193,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 1.9630212431156568,
|
|
"grad_norm": 0.22902705592617217,
|
|
"learning_rate": 3.745180870141908e-05,
|
|
"loss": 0.5147,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 1.966168371361133,
|
|
"grad_norm": 0.21218179122207526,
|
|
"learning_rate": 3.744236498359692e-05,
|
|
"loss": 0.5139,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 1.969315499606609,
|
|
"grad_norm": 0.2289304429026494,
|
|
"learning_rate": 3.743290513683595e-05,
|
|
"loss": 0.509,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 1.972462627852085,
|
|
"grad_norm": 0.19761688027384242,
|
|
"learning_rate": 3.742342917101661e-05,
|
|
"loss": 0.5108,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 1.975609756097561,
|
|
"grad_norm": 0.20658238399745513,
|
|
"learning_rate": 3.741393709603617e-05,
|
|
"loss": 0.5162,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 1.978756884343037,
|
|
"grad_norm": 0.22387644395974296,
|
|
"learning_rate": 3.740442892180873e-05,
|
|
"loss": 0.5176,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 1.981904012588513,
|
|
"grad_norm": 0.2099407145968934,
|
|
"learning_rate": 3.7394904658265205e-05,
|
|
"loss": 0.5193,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 1.985051140833989,
|
|
"grad_norm": 0.20355440734215408,
|
|
"learning_rate": 3.7385364315353305e-05,
|
|
"loss": 0.502,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 1.988198269079465,
|
|
"grad_norm": 0.2737712591861954,
|
|
"learning_rate": 3.7375807903037534e-05,
|
|
"loss": 0.5146,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 1.9913453973249409,
|
|
"grad_norm": 0.3107234957383597,
|
|
"learning_rate": 3.73662354312992e-05,
|
|
"loss": 0.5181,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 1.9944925255704171,
|
|
"grad_norm": 0.24548807968691783,
|
|
"learning_rate": 3.735664691013636e-05,
|
|
"loss": 0.5078,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 1.997639653815893,
|
|
"grad_norm": 0.21987770768265905,
|
|
"learning_rate": 3.734704234956385e-05,
|
|
"loss": 0.5089,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 2.003147128245476,
|
|
"grad_norm": 0.5383301018870941,
|
|
"learning_rate": 3.7337421759613255e-05,
|
|
"loss": 1.0306,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 2.006294256490952,
|
|
"grad_norm": 0.7404093702367391,
|
|
"learning_rate": 3.7327785150332896e-05,
|
|
"loss": 0.4838,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 2.009441384736428,
|
|
"grad_norm": 0.7862043686171012,
|
|
"learning_rate": 3.7318132531787835e-05,
|
|
"loss": 0.481,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 2.012588512981904,
|
|
"grad_norm": 0.5852793874262968,
|
|
"learning_rate": 3.7308463914059846e-05,
|
|
"loss": 0.4815,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 2.01573564122738,
|
|
"grad_norm": 0.3672440684726176,
|
|
"learning_rate": 3.729877930724741e-05,
|
|
"loss": 0.4752,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 2.018882769472856,
|
|
"grad_norm": 0.5159055161972755,
|
|
"learning_rate": 3.7289078721465735e-05,
|
|
"loss": 0.4769,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 2.022029897718332,
|
|
"grad_norm": 0.397198576601526,
|
|
"learning_rate": 3.7279362166846677e-05,
|
|
"loss": 0.4794,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 2.025177025963808,
|
|
"grad_norm": 0.42002417967064054,
|
|
"learning_rate": 3.726962965353881e-05,
|
|
"loss": 0.4833,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 2.028324154209284,
|
|
"grad_norm": 0.44343130968400324,
|
|
"learning_rate": 3.725988119170735e-05,
|
|
"loss": 0.4759,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 2.03147128245476,
|
|
"grad_norm": 0.3602187948121981,
|
|
"learning_rate": 3.725011679153418e-05,
|
|
"loss": 0.47,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 2.034618410700236,
|
|
"grad_norm": 0.3689059788914393,
|
|
"learning_rate": 3.7240336463217824e-05,
|
|
"loss": 0.4845,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 2.037765538945712,
|
|
"grad_norm": 0.34291838849435596,
|
|
"learning_rate": 3.723054021697346e-05,
|
|
"loss": 0.4788,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 2.040912667191188,
|
|
"grad_norm": 0.34322337603866276,
|
|
"learning_rate": 3.722072806303287e-05,
|
|
"loss": 0.4714,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 2.044059795436664,
|
|
"grad_norm": 0.3243197383293679,
|
|
"learning_rate": 3.721090001164447e-05,
|
|
"loss": 0.4784,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 2.04720692368214,
|
|
"grad_norm": 0.28998221744555436,
|
|
"learning_rate": 3.720105607307326e-05,
|
|
"loss": 0.4787,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 2.050354051927616,
|
|
"grad_norm": 0.3306107669906591,
|
|
"learning_rate": 3.7191196257600845e-05,
|
|
"loss": 0.475,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 2.0535011801730922,
|
|
"grad_norm": 0.25616746795125434,
|
|
"learning_rate": 3.718132057552542e-05,
|
|
"loss": 0.4727,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 2.056648308418568,
|
|
"grad_norm": 0.3110708535061029,
|
|
"learning_rate": 3.7171429037161735e-05,
|
|
"loss": 0.4815,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 2.059795436664044,
|
|
"grad_norm": 0.27174269235855014,
|
|
"learning_rate": 3.7161521652841114e-05,
|
|
"loss": 0.4792,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 2.06294256490952,
|
|
"grad_norm": 0.2751966115922098,
|
|
"learning_rate": 3.715159843291143e-05,
|
|
"loss": 0.4737,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 2.066089693154996,
|
|
"grad_norm": 0.2793498805071301,
|
|
"learning_rate": 3.714165938773709e-05,
|
|
"loss": 0.4797,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 2.069236821400472,
|
|
"grad_norm": 0.22565153747735522,
|
|
"learning_rate": 3.713170452769903e-05,
|
|
"loss": 0.4734,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 2.072383949645948,
|
|
"grad_norm": 0.2703231691240761,
|
|
"learning_rate": 3.712173386319472e-05,
|
|
"loss": 0.4798,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 2.075531077891424,
|
|
"grad_norm": 0.20941889089054108,
|
|
"learning_rate": 3.711174740463811e-05,
|
|
"loss": 0.4767,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 2.0786782061369,
|
|
"grad_norm": 0.24127646716392062,
|
|
"learning_rate": 3.710174516245967e-05,
|
|
"loss": 0.4752,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 2.0818253343823763,
|
|
"grad_norm": 0.22857637349920826,
|
|
"learning_rate": 3.7091727147106336e-05,
|
|
"loss": 0.4816,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 2.084972462627852,
|
|
"grad_norm": 0.2403738146911501,
|
|
"learning_rate": 3.7081693369041544e-05,
|
|
"loss": 0.4802,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 2.088119590873328,
|
|
"grad_norm": 0.2239762593172141,
|
|
"learning_rate": 3.707164383874516e-05,
|
|
"loss": 0.4729,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 2.091266719118804,
|
|
"grad_norm": 0.2532472006395691,
|
|
"learning_rate": 3.706157856671353e-05,
|
|
"loss": 0.4775,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 2.09441384736428,
|
|
"grad_norm": 0.2368186581534253,
|
|
"learning_rate": 3.7051497563459436e-05,
|
|
"loss": 0.4747,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 2.097560975609756,
|
|
"grad_norm": 0.21848874983233801,
|
|
"learning_rate": 3.704140083951208e-05,
|
|
"loss": 0.4744,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 2.100708103855232,
|
|
"grad_norm": 0.2589340916721653,
|
|
"learning_rate": 3.703128840541709e-05,
|
|
"loss": 0.4686,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 2.1038552321007082,
|
|
"grad_norm": 0.1951064514885459,
|
|
"learning_rate": 3.7021160271736505e-05,
|
|
"loss": 0.4716,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 2.107002360346184,
|
|
"grad_norm": 0.27598581262671057,
|
|
"learning_rate": 3.701101644904876e-05,
|
|
"loss": 0.474,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 2.1101494885916603,
|
|
"grad_norm": 0.2065313958477124,
|
|
"learning_rate": 3.7000856947948676e-05,
|
|
"loss": 0.4715,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 2.113296616837136,
|
|
"grad_norm": 0.23178764564943305,
|
|
"learning_rate": 3.699068177904745e-05,
|
|
"loss": 0.4806,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 2.116443745082612,
|
|
"grad_norm": 0.20461181525036945,
|
|
"learning_rate": 3.698049095297265e-05,
|
|
"loss": 0.4748,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 2.119590873328088,
|
|
"grad_norm": 0.2350705336847509,
|
|
"learning_rate": 3.697028448036817e-05,
|
|
"loss": 0.4729,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 2.122738001573564,
|
|
"grad_norm": 0.20120939367328008,
|
|
"learning_rate": 3.696006237189429e-05,
|
|
"loss": 0.4786,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 2.12588512981904,
|
|
"grad_norm": 0.22060728235722682,
|
|
"learning_rate": 3.6949824638227585e-05,
|
|
"loss": 0.4774,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 2.129032258064516,
|
|
"grad_norm": 0.19177609563558462,
|
|
"learning_rate": 3.693957129006096e-05,
|
|
"loss": 0.484,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 2.1321793863099923,
|
|
"grad_norm": 0.25855343543212544,
|
|
"learning_rate": 3.692930233810364e-05,
|
|
"loss": 0.4837,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 2.135326514555468,
|
|
"grad_norm": 0.22818364530705276,
|
|
"learning_rate": 3.691901779308113e-05,
|
|
"loss": 0.4774,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 2.1384736428009443,
|
|
"grad_norm": 0.20914360052852593,
|
|
"learning_rate": 3.690871766573523e-05,
|
|
"loss": 0.4728,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 2.14162077104642,
|
|
"grad_norm": 0.25587656475568926,
|
|
"learning_rate": 3.6898401966824035e-05,
|
|
"loss": 0.4698,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 2.144767899291896,
|
|
"grad_norm": 0.1826067226207546,
|
|
"learning_rate": 3.688807070712186e-05,
|
|
"loss": 0.4761,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 2.147915027537372,
|
|
"grad_norm": 0.22738148878540357,
|
|
"learning_rate": 3.68777238974193e-05,
|
|
"loss": 0.4714,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 2.151062155782848,
|
|
"grad_norm": 0.2111664202105763,
|
|
"learning_rate": 3.68673615485232e-05,
|
|
"loss": 0.4774,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 2.1542092840283242,
|
|
"grad_norm": 0.24863885294296703,
|
|
"learning_rate": 3.685698367125662e-05,
|
|
"loss": 0.4743,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 2.1573564122738,
|
|
"grad_norm": 0.21812972928565583,
|
|
"learning_rate": 3.684659027645884e-05,
|
|
"loss": 0.469,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 2.1605035405192763,
|
|
"grad_norm": 0.24327955100377346,
|
|
"learning_rate": 3.683618137498535e-05,
|
|
"loss": 0.4781,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 2.163650668764752,
|
|
"grad_norm": 0.2120560203500377,
|
|
"learning_rate": 3.6825756977707826e-05,
|
|
"loss": 0.4718,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 2.1667977970102283,
|
|
"grad_norm": 0.24161382970696385,
|
|
"learning_rate": 3.6815317095514145e-05,
|
|
"loss": 0.4767,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 2.169944925255704,
|
|
"grad_norm": 0.20329140472495538,
|
|
"learning_rate": 3.680486173930835e-05,
|
|
"loss": 0.4827,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 2.17309205350118,
|
|
"grad_norm": 0.24761268049420182,
|
|
"learning_rate": 3.679439092001065e-05,
|
|
"loss": 0.4608,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 2.176239181746656,
|
|
"grad_norm": 0.1871251156119415,
|
|
"learning_rate": 3.6783904648557396e-05,
|
|
"loss": 0.4695,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 2.179386309992132,
|
|
"grad_norm": 0.2205720555772292,
|
|
"learning_rate": 3.67734029359011e-05,
|
|
"loss": 0.4717,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 2.1825334382376083,
|
|
"grad_norm": 0.21386365254718184,
|
|
"learning_rate": 3.676288579301036e-05,
|
|
"loss": 0.4764,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 2.185680566483084,
|
|
"grad_norm": 0.18868765682443855,
|
|
"learning_rate": 3.6752353230869925e-05,
|
|
"loss": 0.4698,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 2.1888276947285603,
|
|
"grad_norm": 0.22008810612187404,
|
|
"learning_rate": 3.6741805260480644e-05,
|
|
"loss": 0.4713,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 2.191974822974036,
|
|
"grad_norm": 0.20786113581001195,
|
|
"learning_rate": 3.673124189285945e-05,
|
|
"loss": 0.4806,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 2.1951219512195124,
|
|
"grad_norm": 0.21496969539942087,
|
|
"learning_rate": 3.672066313903937e-05,
|
|
"loss": 0.4713,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 2.198269079464988,
|
|
"grad_norm": 0.19763508819414857,
|
|
"learning_rate": 3.671006901006948e-05,
|
|
"loss": 0.4736,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 2.201416207710464,
|
|
"grad_norm": 0.2293997083330246,
|
|
"learning_rate": 3.669945951701494e-05,
|
|
"loss": 0.4764,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 2.2045633359559402,
|
|
"grad_norm": 0.22585825760183822,
|
|
"learning_rate": 3.668883467095694e-05,
|
|
"loss": 0.4734,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 2.207710464201416,
|
|
"grad_norm": 0.20186256772917444,
|
|
"learning_rate": 3.6678194482992716e-05,
|
|
"loss": 0.4777,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 2.2108575924468923,
|
|
"grad_norm": 0.19540930931295644,
|
|
"learning_rate": 3.666753896423551e-05,
|
|
"loss": 0.4846,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 2.214004720692368,
|
|
"grad_norm": 0.21556486232945823,
|
|
"learning_rate": 3.6656868125814605e-05,
|
|
"loss": 0.4797,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 2.2171518489378443,
|
|
"grad_norm": 0.19136441764044193,
|
|
"learning_rate": 3.664618197887526e-05,
|
|
"loss": 0.4722,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 2.22029897718332,
|
|
"grad_norm": 0.24889924698530747,
|
|
"learning_rate": 3.663548053457873e-05,
|
|
"loss": 0.4824,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 2.2234461054287964,
|
|
"grad_norm": 0.20658957990757543,
|
|
"learning_rate": 3.662476380410227e-05,
|
|
"loss": 0.4728,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 2.226593233674272,
|
|
"grad_norm": 0.19277909826894754,
|
|
"learning_rate": 3.661403179863905e-05,
|
|
"loss": 0.4724,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 2.229740361919748,
|
|
"grad_norm": 0.21254918935528136,
|
|
"learning_rate": 3.660328452939825e-05,
|
|
"loss": 0.4762,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 2.2328874901652243,
|
|
"grad_norm": 0.2037212638248634,
|
|
"learning_rate": 3.659252200760495e-05,
|
|
"loss": 0.4609,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 2.2360346184107,
|
|
"grad_norm": 0.17871023945609968,
|
|
"learning_rate": 3.658174424450019e-05,
|
|
"loss": 0.4748,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 2.2391817466561763,
|
|
"grad_norm": 0.21443695153487202,
|
|
"learning_rate": 3.657095125134091e-05,
|
|
"loss": 0.4753,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 2.242328874901652,
|
|
"grad_norm": 0.19069156080979188,
|
|
"learning_rate": 3.656014303939996e-05,
|
|
"loss": 0.4717,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 2.2454760031471284,
|
|
"grad_norm": 0.2069028744399135,
|
|
"learning_rate": 3.654931961996611e-05,
|
|
"loss": 0.4783,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 2.248623131392604,
|
|
"grad_norm": 0.18637707862361488,
|
|
"learning_rate": 3.653848100434397e-05,
|
|
"loss": 0.4832,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 2.2517702596380804,
|
|
"grad_norm": 0.21210206014309427,
|
|
"learning_rate": 3.652762720385406e-05,
|
|
"loss": 0.4826,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 2.2549173878835562,
|
|
"grad_norm": 0.18374915556791416,
|
|
"learning_rate": 3.651675822983273e-05,
|
|
"loss": 0.4728,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 2.258064516129032,
|
|
"grad_norm": 0.21024735238705872,
|
|
"learning_rate": 3.65058740936322e-05,
|
|
"loss": 0.4706,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 2.2612116443745083,
|
|
"grad_norm": 0.18392863904521575,
|
|
"learning_rate": 3.649497480662053e-05,
|
|
"loss": 0.4795,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 2.264358772619984,
|
|
"grad_norm": 0.19781125359127497,
|
|
"learning_rate": 3.648406038018158e-05,
|
|
"loss": 0.4774,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 2.2675059008654603,
|
|
"grad_norm": 0.21244857359184766,
|
|
"learning_rate": 3.6473130825715036e-05,
|
|
"loss": 0.4778,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 2.270653029110936,
|
|
"grad_norm": 0.2240315261327545,
|
|
"learning_rate": 3.64621861546364e-05,
|
|
"loss": 0.4768,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 2.2738001573564124,
|
|
"grad_norm": 0.19195015387576453,
|
|
"learning_rate": 3.645122637837693e-05,
|
|
"loss": 0.4761,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 2.276947285601888,
|
|
"grad_norm": 0.23948727145219656,
|
|
"learning_rate": 3.644025150838368e-05,
|
|
"loss": 0.4843,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 2.2800944138473644,
|
|
"grad_norm": 0.2044549094308154,
|
|
"learning_rate": 3.642926155611949e-05,
|
|
"loss": 0.4799,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 2.2832415420928402,
|
|
"grad_norm": 0.1912737434372698,
|
|
"learning_rate": 3.64182565330629e-05,
|
|
"loss": 0.477,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 2.286388670338316,
|
|
"grad_norm": 0.2225878785242701,
|
|
"learning_rate": 3.6407236450708235e-05,
|
|
"loss": 0.4659,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 2.2895357985837923,
|
|
"grad_norm": 0.18432866393581965,
|
|
"learning_rate": 3.639620132056553e-05,
|
|
"loss": 0.4817,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 2.292682926829268,
|
|
"grad_norm": 0.19897028209983136,
|
|
"learning_rate": 3.638515115416055e-05,
|
|
"loss": 0.4833,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 2.2958300550747444,
|
|
"grad_norm": 0.20081135317562743,
|
|
"learning_rate": 3.637408596303476e-05,
|
|
"loss": 0.4704,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 2.29897718332022,
|
|
"grad_norm": 0.1910907865185398,
|
|
"learning_rate": 3.63630057587453e-05,
|
|
"loss": 0.4825,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 2.3021243115656964,
|
|
"grad_norm": 0.2079462815404506,
|
|
"learning_rate": 3.6351910552865e-05,
|
|
"loss": 0.4757,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 2.305271439811172,
|
|
"grad_norm": 0.18921524052483815,
|
|
"learning_rate": 3.634080035698238e-05,
|
|
"loss": 0.4828,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 2.3084185680566485,
|
|
"grad_norm": 0.19748083973385058,
|
|
"learning_rate": 3.632967518270159e-05,
|
|
"loss": 0.4747,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 2.3115656963021243,
|
|
"grad_norm": 0.18371527979865251,
|
|
"learning_rate": 3.6318535041642434e-05,
|
|
"loss": 0.4787,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 2.3147128245476,
|
|
"grad_norm": 0.1802590221169126,
|
|
"learning_rate": 3.630737994544036e-05,
|
|
"loss": 0.4771,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 2.3178599527930763,
|
|
"grad_norm": 0.178911843071879,
|
|
"learning_rate": 3.6296209905746416e-05,
|
|
"loss": 0.4691,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 2.321007081038552,
|
|
"grad_norm": 0.20053075969824302,
|
|
"learning_rate": 3.628502493422726e-05,
|
|
"loss": 0.4779,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 2.3241542092840284,
|
|
"grad_norm": 0.17770666358757115,
|
|
"learning_rate": 3.627382504256516e-05,
|
|
"loss": 0.4771,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 2.327301337529504,
|
|
"grad_norm": 0.19730337436045323,
|
|
"learning_rate": 3.626261024245795e-05,
|
|
"loss": 0.4707,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 2.3304484657749804,
|
|
"grad_norm": 0.19412352087085247,
|
|
"learning_rate": 3.625138054561906e-05,
|
|
"loss": 0.4781,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 2.3335955940204562,
|
|
"grad_norm": 0.17759746668876183,
|
|
"learning_rate": 3.6240135963777446e-05,
|
|
"loss": 0.4705,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 2.3367427222659325,
|
|
"grad_norm": 0.2043623039312422,
|
|
"learning_rate": 3.622887650867765e-05,
|
|
"loss": 0.4684,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 2.3398898505114083,
|
|
"grad_norm": 0.20845845270662547,
|
|
"learning_rate": 3.6217602192079706e-05,
|
|
"loss": 0.477,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 2.343036978756884,
|
|
"grad_norm": 0.18687350571910952,
|
|
"learning_rate": 3.620631302575921e-05,
|
|
"loss": 0.4768,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 2.3461841070023604,
|
|
"grad_norm": 0.21502507739077148,
|
|
"learning_rate": 3.619500902150723e-05,
|
|
"loss": 0.4772,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 2.349331235247836,
|
|
"grad_norm": 0.18886284710156062,
|
|
"learning_rate": 3.6183690191130365e-05,
|
|
"loss": 0.4812,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 2.3524783634933124,
|
|
"grad_norm": 0.17965446422580023,
|
|
"learning_rate": 3.617235654645068e-05,
|
|
"loss": 0.4774,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 2.355625491738788,
|
|
"grad_norm": 0.20326170603684612,
|
|
"learning_rate": 3.616100809930572e-05,
|
|
"loss": 0.4768,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 2.3587726199842645,
|
|
"grad_norm": 0.1882843118623647,
|
|
"learning_rate": 3.614964486154848e-05,
|
|
"loss": 0.4722,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 2.3619197482297403,
|
|
"grad_norm": 0.17204335083123673,
|
|
"learning_rate": 3.613826684504743e-05,
|
|
"loss": 0.4674,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 2.3650668764752165,
|
|
"grad_norm": 0.19139874893907688,
|
|
"learning_rate": 3.612687406168644e-05,
|
|
"loss": 0.4681,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 2.3682140047206923,
|
|
"grad_norm": 0.1742655605287443,
|
|
"learning_rate": 3.611546652336482e-05,
|
|
"loss": 0.4735,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 2.371361132966168,
|
|
"grad_norm": 0.1785986233463769,
|
|
"learning_rate": 3.610404424199732e-05,
|
|
"loss": 0.4725,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 2.3745082612116444,
|
|
"grad_norm": 0.1770999597168329,
|
|
"learning_rate": 3.6092607229514026e-05,
|
|
"loss": 0.4751,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 2.3776553894571206,
|
|
"grad_norm": 0.1861951910359395,
|
|
"learning_rate": 3.608115549786047e-05,
|
|
"loss": 0.4772,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 2.3808025177025964,
|
|
"grad_norm": 0.1789049105676948,
|
|
"learning_rate": 3.6069689058997506e-05,
|
|
"loss": 0.4717,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 2.3839496459480722,
|
|
"grad_norm": 0.18717193575284816,
|
|
"learning_rate": 3.60582079249014e-05,
|
|
"loss": 0.4742,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 2.3870967741935485,
|
|
"grad_norm": 0.2319346851175833,
|
|
"learning_rate": 3.604671210756373e-05,
|
|
"loss": 0.48,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 2.3902439024390243,
|
|
"grad_norm": 0.20723294874930143,
|
|
"learning_rate": 3.603520161899144e-05,
|
|
"loss": 0.4728,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 2.3933910306845005,
|
|
"grad_norm": 0.21571506744754684,
|
|
"learning_rate": 3.6023676471206746e-05,
|
|
"loss": 0.4695,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 2.3965381589299763,
|
|
"grad_norm": 0.17314280493311868,
|
|
"learning_rate": 3.601213667624724e-05,
|
|
"loss": 0.4735,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 2.399685287175452,
|
|
"grad_norm": 0.21517245659461837,
|
|
"learning_rate": 3.600058224616576e-05,
|
|
"loss": 0.4805,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 2.4028324154209284,
|
|
"grad_norm": 0.20430687151416146,
|
|
"learning_rate": 3.598901319303047e-05,
|
|
"loss": 0.4843,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 2.4059795436664047,
|
|
"grad_norm": 0.2004177768069127,
|
|
"learning_rate": 3.597742952892477e-05,
|
|
"loss": 0.4833,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 2.4091266719118805,
|
|
"grad_norm": 0.24567032007723139,
|
|
"learning_rate": 3.5965831265947344e-05,
|
|
"loss": 0.4686,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 2.4122738001573563,
|
|
"grad_norm": 0.21956644771343653,
|
|
"learning_rate": 3.595421841621212e-05,
|
|
"loss": 0.478,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 2.4154209284028325,
|
|
"grad_norm": 0.2038846900874555,
|
|
"learning_rate": 3.594259099184826e-05,
|
|
"loss": 0.4739,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 2.4185680566483083,
|
|
"grad_norm": 0.21879240881473924,
|
|
"learning_rate": 3.593094900500015e-05,
|
|
"loss": 0.4713,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 2.4217151848937846,
|
|
"grad_norm": 0.22973634489226144,
|
|
"learning_rate": 3.591929246782738e-05,
|
|
"loss": 0.4848,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 2.4248623131392604,
|
|
"grad_norm": 0.19432937590163568,
|
|
"learning_rate": 3.5907621392504747e-05,
|
|
"loss": 0.4791,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 2.4280094413847366,
|
|
"grad_norm": 0.19157056326864344,
|
|
"learning_rate": 3.589593579122222e-05,
|
|
"loss": 0.4801,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 2.4311565696302124,
|
|
"grad_norm": 0.19492660523958835,
|
|
"learning_rate": 3.588423567618496e-05,
|
|
"loss": 0.4739,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 2.4343036978756887,
|
|
"grad_norm": 0.19603103831816215,
|
|
"learning_rate": 3.5872521059613254e-05,
|
|
"loss": 0.4783,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 2.4374508261211645,
|
|
"grad_norm": 0.16793141618091936,
|
|
"learning_rate": 3.5860791953742574e-05,
|
|
"loss": 0.4828,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 2.4405979543666403,
|
|
"grad_norm": 0.18926327402558274,
|
|
"learning_rate": 3.5849048370823496e-05,
|
|
"loss": 0.462,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 2.4437450826121165,
|
|
"grad_norm": 0.20901177449925584,
|
|
"learning_rate": 3.583729032312173e-05,
|
|
"loss": 0.4704,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 2.4468922108575923,
|
|
"grad_norm": 0.21820920525411092,
|
|
"learning_rate": 3.582551782291809e-05,
|
|
"loss": 0.4661,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 2.4500393391030686,
|
|
"grad_norm": 0.18520013651848868,
|
|
"learning_rate": 3.581373088250849e-05,
|
|
"loss": 0.4755,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 2.4531864673485444,
|
|
"grad_norm": 0.2426404576575414,
|
|
"learning_rate": 3.580192951420391e-05,
|
|
"loss": 0.4723,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 2.4563335955940206,
|
|
"grad_norm": 0.23249501540895848,
|
|
"learning_rate": 3.579011373033044e-05,
|
|
"loss": 0.4755,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 2.4594807238394965,
|
|
"grad_norm": 0.2161001745094506,
|
|
"learning_rate": 3.577828354322917e-05,
|
|
"loss": 0.4773,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 2.4626278520849727,
|
|
"grad_norm": 0.21146417784405747,
|
|
"learning_rate": 3.576643896525628e-05,
|
|
"loss": 0.4871,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 2.4657749803304485,
|
|
"grad_norm": 0.20723676007259584,
|
|
"learning_rate": 3.575458000878294e-05,
|
|
"loss": 0.4783,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 2.4689221085759243,
|
|
"grad_norm": 0.2584014129384574,
|
|
"learning_rate": 3.5742706686195386e-05,
|
|
"loss": 0.4767,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 2.4720692368214006,
|
|
"grad_norm": 0.19393702218745548,
|
|
"learning_rate": 3.573081900989482e-05,
|
|
"loss": 0.4804,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 2.4752163650668764,
|
|
"grad_norm": 0.2246269196846869,
|
|
"learning_rate": 3.5718916992297456e-05,
|
|
"loss": 0.4748,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 2.4783634933123526,
|
|
"grad_norm": 0.2016712184696746,
|
|
"learning_rate": 3.5707000645834476e-05,
|
|
"loss": 0.4839,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 2.4815106215578284,
|
|
"grad_norm": 0.20378549394086407,
|
|
"learning_rate": 3.569506998295203e-05,
|
|
"loss": 0.4726,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 2.4846577498033047,
|
|
"grad_norm": 0.2337914377259457,
|
|
"learning_rate": 3.568312501611123e-05,
|
|
"loss": 0.4814,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 2.4878048780487805,
|
|
"grad_norm": 0.19396608409990398,
|
|
"learning_rate": 3.5671165757788115e-05,
|
|
"loss": 0.4761,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 2.4909520062942567,
|
|
"grad_norm": 0.2692755192747761,
|
|
"learning_rate": 3.5659192220473654e-05,
|
|
"loss": 0.4785,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 2.4940991345397325,
|
|
"grad_norm": 0.18587733519714691,
|
|
"learning_rate": 3.5647204416673746e-05,
|
|
"loss": 0.4864,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 2.4972462627852083,
|
|
"grad_norm": 0.2449196618260009,
|
|
"learning_rate": 3.5635202358909164e-05,
|
|
"loss": 0.4763,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 2.5003933910306846,
|
|
"grad_norm": 0.22615305309932182,
|
|
"learning_rate": 3.562318605971559e-05,
|
|
"loss": 0.4851,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 2.5035405192761604,
|
|
"grad_norm": 0.2043080610888049,
|
|
"learning_rate": 3.561115553164356e-05,
|
|
"loss": 0.4726,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 2.5066876475216366,
|
|
"grad_norm": 0.22066196853846168,
|
|
"learning_rate": 3.55991107872585e-05,
|
|
"loss": 0.475,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 2.5098347757671124,
|
|
"grad_norm": 0.17253963662301974,
|
|
"learning_rate": 3.558705183914066e-05,
|
|
"loss": 0.4734,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 2.5129819040125883,
|
|
"grad_norm": 0.19881124746164847,
|
|
"learning_rate": 3.5574978699885134e-05,
|
|
"loss": 0.4832,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 2.5161290322580645,
|
|
"grad_norm": 0.19723415337076033,
|
|
"learning_rate": 3.556289138210185e-05,
|
|
"loss": 0.4689,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 2.5192761605035408,
|
|
"grad_norm": 0.18954283260561922,
|
|
"learning_rate": 3.555078989841551e-05,
|
|
"loss": 0.4757,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 2.5224232887490166,
|
|
"grad_norm": 0.19352808470983424,
|
|
"learning_rate": 3.5538674261465655e-05,
|
|
"loss": 0.4713,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 2.5255704169944924,
|
|
"grad_norm": 0.20041238629382177,
|
|
"learning_rate": 3.5526544483906575e-05,
|
|
"loss": 0.4845,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 2.5287175452399686,
|
|
"grad_norm": 0.19181251462489346,
|
|
"learning_rate": 3.551440057840736e-05,
|
|
"loss": 0.4882,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 2.5318646734854444,
|
|
"grad_norm": 0.18016461406070697,
|
|
"learning_rate": 3.5502242557651813e-05,
|
|
"loss": 0.4805,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 2.5350118017309207,
|
|
"grad_norm": 0.20267911141580347,
|
|
"learning_rate": 3.5490070434338525e-05,
|
|
"loss": 0.4776,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 2.5381589299763965,
|
|
"grad_norm": 0.19059012370650052,
|
|
"learning_rate": 3.5477884221180785e-05,
|
|
"loss": 0.4886,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 2.5413060582218723,
|
|
"grad_norm": 0.21515842908040148,
|
|
"learning_rate": 3.546568393090662e-05,
|
|
"loss": 0.483,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 2.5444531864673485,
|
|
"grad_norm": 0.19422697970653924,
|
|
"learning_rate": 3.5453469576258744e-05,
|
|
"loss": 0.4692,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 2.5476003147128248,
|
|
"grad_norm": 0.209559004728807,
|
|
"learning_rate": 3.544124116999457e-05,
|
|
"loss": 0.4865,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 2.5507474429583006,
|
|
"grad_norm": 0.1920441375389536,
|
|
"learning_rate": 3.542899872488618e-05,
|
|
"loss": 0.4793,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 2.5538945712037764,
|
|
"grad_norm": 0.2319852120314673,
|
|
"learning_rate": 3.541674225372033e-05,
|
|
"loss": 0.4773,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 2.5570416994492526,
|
|
"grad_norm": 0.19321156216806282,
|
|
"learning_rate": 3.540447176929841e-05,
|
|
"loss": 0.4757,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 2.5601888276947284,
|
|
"grad_norm": 0.21514651398865584,
|
|
"learning_rate": 3.539218728443646e-05,
|
|
"loss": 0.4785,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 2.5633359559402047,
|
|
"grad_norm": 0.21148869854656144,
|
|
"learning_rate": 3.537988881196514e-05,
|
|
"loss": 0.4746,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 2.5664830841856805,
|
|
"grad_norm": 0.17595269634550698,
|
|
"learning_rate": 3.536757636472972e-05,
|
|
"loss": 0.4685,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 2.5696302124311563,
|
|
"grad_norm": 0.20185229955902398,
|
|
"learning_rate": 3.5355249955590056e-05,
|
|
"loss": 0.4783,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 2.5727773406766326,
|
|
"grad_norm": 0.17164696512537717,
|
|
"learning_rate": 3.53429095974206e-05,
|
|
"loss": 0.4775,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 2.575924468922109,
|
|
"grad_norm": 0.23266847949769962,
|
|
"learning_rate": 3.533055530311036e-05,
|
|
"loss": 0.4692,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 2.5790715971675846,
|
|
"grad_norm": 0.16306518710715495,
|
|
"learning_rate": 3.531818708556292e-05,
|
|
"loss": 0.4783,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 2.5822187254130604,
|
|
"grad_norm": 0.20716704551095141,
|
|
"learning_rate": 3.530580495769638e-05,
|
|
"loss": 0.4785,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 2.5853658536585367,
|
|
"grad_norm": 0.20742333714577207,
|
|
"learning_rate": 3.5293408932443384e-05,
|
|
"loss": 0.4795,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 2.5885129819040125,
|
|
"grad_norm": 0.19428872955255258,
|
|
"learning_rate": 3.5280999022751095e-05,
|
|
"loss": 0.4853,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 2.5916601101494887,
|
|
"grad_norm": 0.20147158146515537,
|
|
"learning_rate": 3.526857524158117e-05,
|
|
"loss": 0.468,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 2.5948072383949645,
|
|
"grad_norm": 0.1893299986265306,
|
|
"learning_rate": 3.525613760190977e-05,
|
|
"loss": 0.4774,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 2.5979543666404403,
|
|
"grad_norm": 0.17391513342893553,
|
|
"learning_rate": 3.524368611672749e-05,
|
|
"loss": 0.4698,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 2.6011014948859166,
|
|
"grad_norm": 0.18505439269447876,
|
|
"learning_rate": 3.5231220799039434e-05,
|
|
"loss": 0.4759,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 2.604248623131393,
|
|
"grad_norm": 0.18433327533924215,
|
|
"learning_rate": 3.521874166186512e-05,
|
|
"loss": 0.4745,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 2.6073957513768686,
|
|
"grad_norm": 0.17820635418388936,
|
|
"learning_rate": 3.5206248718238525e-05,
|
|
"loss": 0.4862,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 2.6105428796223444,
|
|
"grad_norm": 0.18696671678977475,
|
|
"learning_rate": 3.519374198120803e-05,
|
|
"loss": 0.4758,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 2.6136900078678207,
|
|
"grad_norm": 0.18769968466549117,
|
|
"learning_rate": 3.5181221463836426e-05,
|
|
"loss": 0.4778,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 2.6168371361132965,
|
|
"grad_norm": 0.191487891133253,
|
|
"learning_rate": 3.51686871792009e-05,
|
|
"loss": 0.4707,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 2.6199842643587727,
|
|
"grad_norm": 0.1825542674839225,
|
|
"learning_rate": 3.5156139140393e-05,
|
|
"loss": 0.4706,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 2.6231313926042485,
|
|
"grad_norm": 0.18855481959761283,
|
|
"learning_rate": 3.514357736051868e-05,
|
|
"loss": 0.4838,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 2.6262785208497244,
|
|
"grad_norm": 0.18817841391423204,
|
|
"learning_rate": 3.513100185269821e-05,
|
|
"loss": 0.4685,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 2.6294256490952006,
|
|
"grad_norm": 0.18928717937714,
|
|
"learning_rate": 3.51184126300662e-05,
|
|
"loss": 0.4781,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 2.632572777340677,
|
|
"grad_norm": 0.2256933473661262,
|
|
"learning_rate": 3.510580970577161e-05,
|
|
"loss": 0.4739,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 2.6357199055861527,
|
|
"grad_norm": 0.17564318088263306,
|
|
"learning_rate": 3.5093193092977694e-05,
|
|
"loss": 0.4718,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 2.6388670338316285,
|
|
"grad_norm": 0.20104156818385482,
|
|
"learning_rate": 3.5080562804861996e-05,
|
|
"loss": 0.4802,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 2.6420141620771047,
|
|
"grad_norm": 0.18928735051422238,
|
|
"learning_rate": 3.506791885461636e-05,
|
|
"loss": 0.4799,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 2.6451612903225805,
|
|
"grad_norm": 0.17754299417673466,
|
|
"learning_rate": 3.505526125544688e-05,
|
|
"loss": 0.4739,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 2.6483084185680568,
|
|
"grad_norm": 0.17924816010567865,
|
|
"learning_rate": 3.504259002057394e-05,
|
|
"loss": 0.4833,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 2.6514555468135326,
|
|
"grad_norm": 0.18330420763324123,
|
|
"learning_rate": 3.5029905163232114e-05,
|
|
"loss": 0.4809,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 2.654602675059009,
|
|
"grad_norm": 0.17763845173295095,
|
|
"learning_rate": 3.501720669667025e-05,
|
|
"loss": 0.478,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 2.6577498033044846,
|
|
"grad_norm": 0.20606096391374554,
|
|
"learning_rate": 3.500449463415139e-05,
|
|
"loss": 0.4803,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 2.660896931549961,
|
|
"grad_norm": 0.16091387572699684,
|
|
"learning_rate": 3.4991768988952794e-05,
|
|
"loss": 0.4777,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 2.6640440597954367,
|
|
"grad_norm": 0.17951419173964245,
|
|
"learning_rate": 3.497902977436587e-05,
|
|
"loss": 0.4786,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 2.6671911880409125,
|
|
"grad_norm": 0.17938289165750493,
|
|
"learning_rate": 3.4966277003696236e-05,
|
|
"loss": 0.4818,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 2.6703383162863887,
|
|
"grad_norm": 0.18410261157957933,
|
|
"learning_rate": 3.495351069026365e-05,
|
|
"loss": 0.4738,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 2.6734854445318645,
|
|
"grad_norm": 0.20590921125222097,
|
|
"learning_rate": 3.494073084740204e-05,
|
|
"loss": 0.486,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 2.676632572777341,
|
|
"grad_norm": 0.20239376639918705,
|
|
"learning_rate": 3.492793748845942e-05,
|
|
"loss": 0.4782,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 2.6797797010228166,
|
|
"grad_norm": 0.1916725798858272,
|
|
"learning_rate": 3.491513062679796e-05,
|
|
"loss": 0.47,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 2.682926829268293,
|
|
"grad_norm": 0.2033931578311126,
|
|
"learning_rate": 3.490231027579393e-05,
|
|
"loss": 0.4791,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 2.6860739575137687,
|
|
"grad_norm": 0.1941921749860495,
|
|
"learning_rate": 3.4889476448837656e-05,
|
|
"loss": 0.4882,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 2.689221085759245,
|
|
"grad_norm": 0.19874836532319543,
|
|
"learning_rate": 3.4876629159333575e-05,
|
|
"loss": 0.4756,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 2.6923682140047207,
|
|
"grad_norm": 0.20304633766767777,
|
|
"learning_rate": 3.486376842070017e-05,
|
|
"loss": 0.4793,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 2.6955153422501965,
|
|
"grad_norm": 0.18295353981827106,
|
|
"learning_rate": 3.485089424636997e-05,
|
|
"loss": 0.4822,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 2.6986624704956728,
|
|
"grad_norm": 0.2050344464266888,
|
|
"learning_rate": 3.4838006649789546e-05,
|
|
"loss": 0.4711,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 2.7018095987411486,
|
|
"grad_norm": 0.17837345576674332,
|
|
"learning_rate": 3.482510564441949e-05,
|
|
"loss": 0.4835,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 2.704956726986625,
|
|
"grad_norm": 0.22822296718181742,
|
|
"learning_rate": 3.4812191243734375e-05,
|
|
"loss": 0.4762,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 2.7081038552321006,
|
|
"grad_norm": 0.1759543201588798,
|
|
"learning_rate": 3.479926346122279e-05,
|
|
"loss": 0.4738,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 2.711250983477577,
|
|
"grad_norm": 0.2319777833974616,
|
|
"learning_rate": 3.478632231038729e-05,
|
|
"loss": 0.4794,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 2.7143981117230527,
|
|
"grad_norm": 0.17035781259506136,
|
|
"learning_rate": 3.477336780474439e-05,
|
|
"loss": 0.4769,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 2.717545239968529,
|
|
"grad_norm": 0.21472875867046778,
|
|
"learning_rate": 3.4760399957824576e-05,
|
|
"loss": 0.4818,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 2.7206923682140047,
|
|
"grad_norm": 0.19547886779373266,
|
|
"learning_rate": 3.474741878317223e-05,
|
|
"loss": 0.4756,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 2.7238394964594805,
|
|
"grad_norm": 0.1896076340759897,
|
|
"learning_rate": 3.4734424294345673e-05,
|
|
"loss": 0.4826,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 2.726986624704957,
|
|
"grad_norm": 0.2005371691635383,
|
|
"learning_rate": 3.472141650491716e-05,
|
|
"loss": 0.4898,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 2.7301337529504326,
|
|
"grad_norm": 0.2152647700389423,
|
|
"learning_rate": 3.470839542847279e-05,
|
|
"loss": 0.4816,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 2.733280881195909,
|
|
"grad_norm": 0.2073082020273619,
|
|
"learning_rate": 3.4695361078612565e-05,
|
|
"loss": 0.4766,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 2.7364280094413846,
|
|
"grad_norm": 0.22650494077281716,
|
|
"learning_rate": 3.468231346895035e-05,
|
|
"loss": 0.4773,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 2.739575137686861,
|
|
"grad_norm": 0.18847447202117246,
|
|
"learning_rate": 3.466925261311386e-05,
|
|
"loss": 0.4757,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 2.7427222659323367,
|
|
"grad_norm": 0.19717018165808387,
|
|
"learning_rate": 3.4656178524744644e-05,
|
|
"loss": 0.4723,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 2.745869394177813,
|
|
"grad_norm": 0.20419628183572036,
|
|
"learning_rate": 3.464309121749805e-05,
|
|
"loss": 0.4685,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 2.7490165224232888,
|
|
"grad_norm": 0.20942542946004844,
|
|
"learning_rate": 3.4629990705043274e-05,
|
|
"loss": 0.4807,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 2.7521636506687646,
|
|
"grad_norm": 0.19751423169250182,
|
|
"learning_rate": 3.461687700106327e-05,
|
|
"loss": 0.478,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 2.755310778914241,
|
|
"grad_norm": 0.20839382920660143,
|
|
"learning_rate": 3.46037501192548e-05,
|
|
"loss": 0.4796,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 2.7584579071597166,
|
|
"grad_norm": 0.21264262241811938,
|
|
"learning_rate": 3.459061007332835e-05,
|
|
"loss": 0.483,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 2.761605035405193,
|
|
"grad_norm": 0.21206865795633334,
|
|
"learning_rate": 3.457745687700818e-05,
|
|
"loss": 0.482,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 2.7647521636506687,
|
|
"grad_norm": 0.2267998478365036,
|
|
"learning_rate": 3.4564290544032304e-05,
|
|
"loss": 0.4852,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 2.767899291896145,
|
|
"grad_norm": 0.19962244872664645,
|
|
"learning_rate": 3.455111108815242e-05,
|
|
"loss": 0.4781,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 2.7710464201416207,
|
|
"grad_norm": 0.20223940173326052,
|
|
"learning_rate": 3.453791852313395e-05,
|
|
"loss": 0.4815,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 2.774193548387097,
|
|
"grad_norm": 0.2497760291214373,
|
|
"learning_rate": 3.4524712862756004e-05,
|
|
"loss": 0.4737,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 2.777340676632573,
|
|
"grad_norm": 0.21392418161927715,
|
|
"learning_rate": 3.451149412081137e-05,
|
|
"loss": 0.4849,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 2.7804878048780486,
|
|
"grad_norm": 0.20756513406871988,
|
|
"learning_rate": 3.4498262311106505e-05,
|
|
"loss": 0.4794,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 2.783634933123525,
|
|
"grad_norm": 0.19434508478386714,
|
|
"learning_rate": 3.448501744746151e-05,
|
|
"loss": 0.4717,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 2.7867820613690006,
|
|
"grad_norm": 0.17815978736962265,
|
|
"learning_rate": 3.4471759543710115e-05,
|
|
"loss": 0.479,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 2.789929189614477,
|
|
"grad_norm": 0.21692701459569838,
|
|
"learning_rate": 3.4458488613699686e-05,
|
|
"loss": 0.4711,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 2.7930763178599527,
|
|
"grad_norm": 0.1873399635626309,
|
|
"learning_rate": 3.444520467129118e-05,
|
|
"loss": 0.484,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 2.796223446105429,
|
|
"grad_norm": 0.1976319148693629,
|
|
"learning_rate": 3.4431907730359137e-05,
|
|
"loss": 0.4777,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 2.7993705743509048,
|
|
"grad_norm": 0.19261763199922333,
|
|
"learning_rate": 3.44185978047917e-05,
|
|
"loss": 0.4658,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 2.802517702596381,
|
|
"grad_norm": 0.20902878866014565,
|
|
"learning_rate": 3.440527490849055e-05,
|
|
"loss": 0.4751,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 2.805664830841857,
|
|
"grad_norm": 0.196677203826585,
|
|
"learning_rate": 3.439193905537094e-05,
|
|
"loss": 0.4739,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 2.8088119590873326,
|
|
"grad_norm": 0.18777554851103495,
|
|
"learning_rate": 3.4378590259361626e-05,
|
|
"loss": 0.471,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 2.811959087332809,
|
|
"grad_norm": 0.2206571953754081,
|
|
"learning_rate": 3.4365228534404895e-05,
|
|
"loss": 0.479,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 2.8151062155782847,
|
|
"grad_norm": 0.16662192562786868,
|
|
"learning_rate": 3.435185389445655e-05,
|
|
"loss": 0.4745,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 2.818253343823761,
|
|
"grad_norm": 0.19851273382341908,
|
|
"learning_rate": 3.433846635348587e-05,
|
|
"loss": 0.4773,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 2.8214004720692367,
|
|
"grad_norm": 0.19549393874279214,
|
|
"learning_rate": 3.43250659254756e-05,
|
|
"loss": 0.4683,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 2.824547600314713,
|
|
"grad_norm": 0.16826088149239804,
|
|
"learning_rate": 3.4311652624421976e-05,
|
|
"loss": 0.48,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 2.8276947285601888,
|
|
"grad_norm": 0.1762752510049042,
|
|
"learning_rate": 3.429822646433464e-05,
|
|
"loss": 0.479,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 2.830841856805665,
|
|
"grad_norm": 0.18678077326219097,
|
|
"learning_rate": 3.4284787459236705e-05,
|
|
"loss": 0.4723,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 2.833988985051141,
|
|
"grad_norm": 0.1630882015779384,
|
|
"learning_rate": 3.427133562316466e-05,
|
|
"loss": 0.4782,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 2.8371361132966166,
|
|
"grad_norm": 0.17882705777771293,
|
|
"learning_rate": 3.425787097016843e-05,
|
|
"loss": 0.4714,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 2.840283241542093,
|
|
"grad_norm": 0.17187562653472938,
|
|
"learning_rate": 3.424439351431131e-05,
|
|
"loss": 0.4742,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 2.8434303697875687,
|
|
"grad_norm": 0.174319821594161,
|
|
"learning_rate": 3.423090326966996e-05,
|
|
"loss": 0.4823,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 2.846577498033045,
|
|
"grad_norm": 0.1690838895842069,
|
|
"learning_rate": 3.4217400250334416e-05,
|
|
"loss": 0.4773,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 2.8497246262785207,
|
|
"grad_norm": 0.1731044457799335,
|
|
"learning_rate": 3.420388447040804e-05,
|
|
"loss": 0.4684,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 2.852871754523997,
|
|
"grad_norm": 0.177357699597428,
|
|
"learning_rate": 3.419035594400753e-05,
|
|
"loss": 0.477,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 2.856018882769473,
|
|
"grad_norm": 0.1827484820724141,
|
|
"learning_rate": 3.41768146852629e-05,
|
|
"loss": 0.4791,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 2.859166011014949,
|
|
"grad_norm": 0.20812185117779652,
|
|
"learning_rate": 3.416326070831746e-05,
|
|
"loss": 0.4818,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 2.862313139260425,
|
|
"grad_norm": 0.19582434700048504,
|
|
"learning_rate": 3.414969402732779e-05,
|
|
"loss": 0.4736,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 2.8654602675059007,
|
|
"grad_norm": 0.17405726221555853,
|
|
"learning_rate": 3.4136114656463766e-05,
|
|
"loss": 0.4822,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 2.868607395751377,
|
|
"grad_norm": 0.1886351350028885,
|
|
"learning_rate": 3.4122522609908504e-05,
|
|
"loss": 0.4799,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 2.8717545239968527,
|
|
"grad_norm": 0.17300845271944784,
|
|
"learning_rate": 3.410891790185834e-05,
|
|
"loss": 0.4737,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 2.874901652242329,
|
|
"grad_norm": 0.17440320919074534,
|
|
"learning_rate": 3.409530054652287e-05,
|
|
"loss": 0.4731,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 2.8780487804878048,
|
|
"grad_norm": 0.1803068800569239,
|
|
"learning_rate": 3.408167055812488e-05,
|
|
"loss": 0.4769,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 2.881195908733281,
|
|
"grad_norm": 0.181432403586137,
|
|
"learning_rate": 3.406802795090034e-05,
|
|
"loss": 0.4915,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 2.884343036978757,
|
|
"grad_norm": 0.16179024096943073,
|
|
"learning_rate": 3.405437273909843e-05,
|
|
"loss": 0.4795,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 2.887490165224233,
|
|
"grad_norm": 0.17972825825989644,
|
|
"learning_rate": 3.4040704936981475e-05,
|
|
"loss": 0.4761,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 2.890637293469709,
|
|
"grad_norm": 0.17303120146453108,
|
|
"learning_rate": 3.4027024558824956e-05,
|
|
"loss": 0.4737,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 2.8937844217151847,
|
|
"grad_norm": 0.19260008079093655,
|
|
"learning_rate": 3.401333161891747e-05,
|
|
"loss": 0.4827,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 2.896931549960661,
|
|
"grad_norm": 0.19408527896636127,
|
|
"learning_rate": 3.3999626131560754e-05,
|
|
"loss": 0.4791,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 2.9000786782061367,
|
|
"grad_norm": 0.17709492241260272,
|
|
"learning_rate": 3.398590811106966e-05,
|
|
"loss": 0.4758,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 2.903225806451613,
|
|
"grad_norm": 0.24719908496023188,
|
|
"learning_rate": 3.397217757177211e-05,
|
|
"loss": 0.478,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 2.906372934697089,
|
|
"grad_norm": 0.21088224277568318,
|
|
"learning_rate": 3.395843452800912e-05,
|
|
"loss": 0.4677,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 2.909520062942565,
|
|
"grad_norm": 0.18357158178528057,
|
|
"learning_rate": 3.394467899413473e-05,
|
|
"loss": 0.4822,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 2.912667191188041,
|
|
"grad_norm": 0.20061925910804337,
|
|
"learning_rate": 3.393091098451607e-05,
|
|
"loss": 0.4796,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 2.915814319433517,
|
|
"grad_norm": 0.257783364662756,
|
|
"learning_rate": 3.391713051353328e-05,
|
|
"loss": 0.4823,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 2.918961447678993,
|
|
"grad_norm": 0.20566149454081753,
|
|
"learning_rate": 3.39033375955795e-05,
|
|
"loss": 0.4806,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 2.9221085759244687,
|
|
"grad_norm": 0.21739345983865224,
|
|
"learning_rate": 3.388953224506091e-05,
|
|
"loss": 0.479,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 2.925255704169945,
|
|
"grad_norm": 0.2019066969178866,
|
|
"learning_rate": 3.3875714476396635e-05,
|
|
"loss": 0.4791,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 2.9284028324154208,
|
|
"grad_norm": 0.18964152674565116,
|
|
"learning_rate": 3.38618843040188e-05,
|
|
"loss": 0.4872,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 2.931549960660897,
|
|
"grad_norm": 0.21572408114004044,
|
|
"learning_rate": 3.384804174237246e-05,
|
|
"loss": 0.4856,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 2.934697088906373,
|
|
"grad_norm": 0.20435168904218748,
|
|
"learning_rate": 3.3834186805915634e-05,
|
|
"loss": 0.4823,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 2.937844217151849,
|
|
"grad_norm": 0.23036698086458443,
|
|
"learning_rate": 3.382031950911925e-05,
|
|
"loss": 0.4842,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 2.940991345397325,
|
|
"grad_norm": 0.2559922205766231,
|
|
"learning_rate": 3.380643986646714e-05,
|
|
"loss": 0.4683,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 2.944138473642801,
|
|
"grad_norm": 0.20816226293770818,
|
|
"learning_rate": 3.3792547892456045e-05,
|
|
"loss": 0.478,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 2.947285601888277,
|
|
"grad_norm": 0.27147963925238433,
|
|
"learning_rate": 3.37786436015956e-05,
|
|
"loss": 0.4716,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 2.9504327301337527,
|
|
"grad_norm": 0.1917348894021088,
|
|
"learning_rate": 3.376472700840827e-05,
|
|
"loss": 0.4855,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 2.953579858379229,
|
|
"grad_norm": 0.26586313382080357,
|
|
"learning_rate": 3.375079812742939e-05,
|
|
"loss": 0.4751,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 2.9567269866247052,
|
|
"grad_norm": 0.22006766096753588,
|
|
"learning_rate": 3.373685697320713e-05,
|
|
"loss": 0.4777,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 2.959874114870181,
|
|
"grad_norm": 0.22686294528313417,
|
|
"learning_rate": 3.372290356030246e-05,
|
|
"loss": 0.4788,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 2.963021243115657,
|
|
"grad_norm": 0.19665794947101586,
|
|
"learning_rate": 3.370893790328917e-05,
|
|
"loss": 0.4904,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 2.966168371361133,
|
|
"grad_norm": 0.21016583933505117,
|
|
"learning_rate": 3.369496001675385e-05,
|
|
"loss": 0.4846,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 2.969315499606609,
|
|
"grad_norm": 0.1823767869897883,
|
|
"learning_rate": 3.368096991529583e-05,
|
|
"loss": 0.474,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 2.972462627852085,
|
|
"grad_norm": 0.19889515201753574,
|
|
"learning_rate": 3.366696761352723e-05,
|
|
"loss": 0.4744,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 2.975609756097561,
|
|
"grad_norm": 0.19883183201167187,
|
|
"learning_rate": 3.36529531260729e-05,
|
|
"loss": 0.4872,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 2.9787568843430368,
|
|
"grad_norm": 0.17416758317261993,
|
|
"learning_rate": 3.363892646757041e-05,
|
|
"loss": 0.4791,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 2.981904012588513,
|
|
"grad_norm": 0.1922592641536021,
|
|
"learning_rate": 3.362488765267006e-05,
|
|
"loss": 0.4815,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 2.9850511408339893,
|
|
"grad_norm": 0.17209027351228018,
|
|
"learning_rate": 3.361083669603482e-05,
|
|
"loss": 0.4796,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 2.988198269079465,
|
|
"grad_norm": 0.17084312866058127,
|
|
"learning_rate": 3.3596773612340375e-05,
|
|
"loss": 0.4805,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 2.991345397324941,
|
|
"grad_norm": 0.16455840604865776,
|
|
"learning_rate": 3.358269841627504e-05,
|
|
"loss": 0.4734,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 2.994492525570417,
|
|
"grad_norm": 0.17609503259030376,
|
|
"learning_rate": 3.356861112253982e-05,
|
|
"loss": 0.4813,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 2.997639653815893,
|
|
"grad_norm": 0.16815201729549342,
|
|
"learning_rate": 3.355451174584834e-05,
|
|
"loss": 0.477,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 3.003147128245476,
|
|
"grad_norm": 0.4088775165911742,
|
|
"learning_rate": 3.35404003009268e-05,
|
|
"loss": 0.9,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 3.006294256490952,
|
|
"grad_norm": 0.32652509762470977,
|
|
"learning_rate": 3.352627680251409e-05,
|
|
"loss": 0.4413,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 3.009441384736428,
|
|
"grad_norm": 0.31181526619361466,
|
|
"learning_rate": 3.3512141265361625e-05,
|
|
"loss": 0.4442,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 3.012588512981904,
|
|
"grad_norm": 0.28249452749334586,
|
|
"learning_rate": 3.3497993704233415e-05,
|
|
"loss": 0.4341,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 3.01573564122738,
|
|
"grad_norm": 0.2469014492840724,
|
|
"learning_rate": 3.348383413390603e-05,
|
|
"loss": 0.4357,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 3.018882769472856,
|
|
"grad_norm": 0.29518171306893987,
|
|
"learning_rate": 3.346966256916858e-05,
|
|
"loss": 0.4331,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 3.022029897718332,
|
|
"grad_norm": 0.24550117952865097,
|
|
"learning_rate": 3.345547902482271e-05,
|
|
"loss": 0.4328,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 3.025177025963808,
|
|
"grad_norm": 0.26322265876569234,
|
|
"learning_rate": 3.344128351568255e-05,
|
|
"loss": 0.4296,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 3.028324154209284,
|
|
"grad_norm": 0.3355141854171973,
|
|
"learning_rate": 3.3427076056574765e-05,
|
|
"loss": 0.4399,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 3.03147128245476,
|
|
"grad_norm": 0.24848144520611637,
|
|
"learning_rate": 3.341285666233849e-05,
|
|
"loss": 0.4379,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 3.034618410700236,
|
|
"grad_norm": 0.3051743839260837,
|
|
"learning_rate": 3.3398625347825295e-05,
|
|
"loss": 0.4321,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 3.037765538945712,
|
|
"grad_norm": 0.2520446263992142,
|
|
"learning_rate": 3.3384382127899254e-05,
|
|
"loss": 0.4326,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 3.040912667191188,
|
|
"grad_norm": 0.23491369763876913,
|
|
"learning_rate": 3.337012701743682e-05,
|
|
"loss": 0.4304,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 3.044059795436664,
|
|
"grad_norm": 0.22739503178054998,
|
|
"learning_rate": 3.33558600313269e-05,
|
|
"loss": 0.4316,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 3.04720692368214,
|
|
"grad_norm": 0.23290557210702711,
|
|
"learning_rate": 3.334158118447081e-05,
|
|
"loss": 0.4205,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 3.050354051927616,
|
|
"grad_norm": 0.23718213519938236,
|
|
"learning_rate": 3.3327290491782214e-05,
|
|
"loss": 0.4276,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 3.0535011801730922,
|
|
"grad_norm": 0.22184226524551506,
|
|
"learning_rate": 3.331298796818719e-05,
|
|
"loss": 0.4336,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 3.056648308418568,
|
|
"grad_norm": 0.24883677057218126,
|
|
"learning_rate": 3.329867362862416e-05,
|
|
"loss": 0.4202,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 3.059795436664044,
|
|
"grad_norm": 0.20245231032426314,
|
|
"learning_rate": 3.328434748804389e-05,
|
|
"loss": 0.4283,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 3.06294256490952,
|
|
"grad_norm": 0.22154485320162687,
|
|
"learning_rate": 3.327000956140944e-05,
|
|
"loss": 0.4276,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 3.066089693154996,
|
|
"grad_norm": 0.23045678598682195,
|
|
"learning_rate": 3.325565986369624e-05,
|
|
"loss": 0.438,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 3.069236821400472,
|
|
"grad_norm": 0.21978195581485033,
|
|
"learning_rate": 3.3241298409891967e-05,
|
|
"loss": 0.4347,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 3.072383949645948,
|
|
"grad_norm": 0.2459169417158989,
|
|
"learning_rate": 3.3226925214996586e-05,
|
|
"loss": 0.426,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 3.075531077891424,
|
|
"grad_norm": 0.2087661039669764,
|
|
"learning_rate": 3.3212540294022324e-05,
|
|
"loss": 0.424,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 3.0786782061369,
|
|
"grad_norm": 0.22053026125524922,
|
|
"learning_rate": 3.319814366199368e-05,
|
|
"loss": 0.4358,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 3.0818253343823763,
|
|
"grad_norm": 0.22791109755583255,
|
|
"learning_rate": 3.318373533394735e-05,
|
|
"loss": 0.4339,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 3.084972462627852,
|
|
"grad_norm": 0.2097976976701724,
|
|
"learning_rate": 3.3169315324932276e-05,
|
|
"loss": 0.4315,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 3.088119590873328,
|
|
"grad_norm": 0.18718087218415436,
|
|
"learning_rate": 3.3154883650009584e-05,
|
|
"loss": 0.4311,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 3.091266719118804,
|
|
"grad_norm": 0.20933947694391003,
|
|
"learning_rate": 3.314044032425258e-05,
|
|
"loss": 0.4391,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 3.09441384736428,
|
|
"grad_norm": 0.22529123099122955,
|
|
"learning_rate": 3.3125985362746745e-05,
|
|
"loss": 0.4262,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 3.097560975609756,
|
|
"grad_norm": 0.1786820765800306,
|
|
"learning_rate": 3.3111518780589723e-05,
|
|
"loss": 0.4397,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 3.100708103855232,
|
|
"grad_norm": 0.22962197514376026,
|
|
"learning_rate": 3.3097040592891284e-05,
|
|
"loss": 0.4308,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 3.1038552321007082,
|
|
"grad_norm": 0.24051879415776045,
|
|
"learning_rate": 3.30825508147733e-05,
|
|
"loss": 0.4318,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 3.107002360346184,
|
|
"grad_norm": 0.20025474171903923,
|
|
"learning_rate": 3.30680494613698e-05,
|
|
"loss": 0.4384,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 3.1101494885916603,
|
|
"grad_norm": 0.19206558390260614,
|
|
"learning_rate": 3.305353654782687e-05,
|
|
"loss": 0.4297,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 3.113296616837136,
|
|
"grad_norm": 0.20782219172064703,
|
|
"learning_rate": 3.303901208930266e-05,
|
|
"loss": 0.4231,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 3.116443745082612,
|
|
"grad_norm": 0.20010226717176327,
|
|
"learning_rate": 3.30244761009674e-05,
|
|
"loss": 0.4254,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 3.119590873328088,
|
|
"grad_norm": 0.18984132712344276,
|
|
"learning_rate": 3.300992859800336e-05,
|
|
"loss": 0.4244,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 3.122738001573564,
|
|
"grad_norm": 0.23519299020760812,
|
|
"learning_rate": 3.299536959560481e-05,
|
|
"loss": 0.4365,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 3.12588512981904,
|
|
"grad_norm": 0.1950168518008789,
|
|
"learning_rate": 3.2980799108978065e-05,
|
|
"loss": 0.434,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 3.129032258064516,
|
|
"grad_norm": 0.17931137612435868,
|
|
"learning_rate": 3.296621715334143e-05,
|
|
"loss": 0.4321,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 3.1321793863099923,
|
|
"grad_norm": 0.20090892857586998,
|
|
"learning_rate": 3.295162374392518e-05,
|
|
"loss": 0.4205,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 3.135326514555468,
|
|
"grad_norm": 0.18545062281039967,
|
|
"learning_rate": 3.293701889597153e-05,
|
|
"loss": 0.4289,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 3.1384736428009443,
|
|
"grad_norm": 0.23236278068083843,
|
|
"learning_rate": 3.292240262473469e-05,
|
|
"loss": 0.4268,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 3.14162077104642,
|
|
"grad_norm": 0.2119237430411335,
|
|
"learning_rate": 3.290777494548075e-05,
|
|
"loss": 0.4361,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 3.144767899291896,
|
|
"grad_norm": 0.18687999372723774,
|
|
"learning_rate": 3.289313587348778e-05,
|
|
"loss": 0.4285,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 3.147915027537372,
|
|
"grad_norm": 0.242430728271403,
|
|
"learning_rate": 3.287848542404568e-05,
|
|
"loss": 0.4322,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 3.151062155782848,
|
|
"grad_norm": 0.20753524478639734,
|
|
"learning_rate": 3.2863823612456264e-05,
|
|
"loss": 0.4286,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 3.1542092840283242,
|
|
"grad_norm": 0.2204970218249786,
|
|
"learning_rate": 3.284915045403325e-05,
|
|
"loss": 0.4213,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 3.1573564122738,
|
|
"grad_norm": 0.2077798788084683,
|
|
"learning_rate": 3.283446596410212e-05,
|
|
"loss": 0.4243,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 3.1605035405192763,
|
|
"grad_norm": 0.20913787484273705,
|
|
"learning_rate": 3.281977015800028e-05,
|
|
"loss": 0.4349,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 3.163650668764752,
|
|
"grad_norm": 0.1845966280590288,
|
|
"learning_rate": 3.28050630510769e-05,
|
|
"loss": 0.4367,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 3.1667977970102283,
|
|
"grad_norm": 0.19783164507955084,
|
|
"learning_rate": 3.279034465869298e-05,
|
|
"loss": 0.4256,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 3.169944925255704,
|
|
"grad_norm": 0.2645215067092485,
|
|
"learning_rate": 3.277561499622129e-05,
|
|
"loss": 0.4358,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 3.17309205350118,
|
|
"grad_norm": 0.17753403080442126,
|
|
"learning_rate": 3.276087407904639e-05,
|
|
"loss": 0.4298,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 3.176239181746656,
|
|
"grad_norm": 0.23434775817714046,
|
|
"learning_rate": 3.274612192256457e-05,
|
|
"loss": 0.4328,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 3.179386309992132,
|
|
"grad_norm": 0.18670646442316036,
|
|
"learning_rate": 3.273135854218389e-05,
|
|
"loss": 0.4289,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 3.1825334382376083,
|
|
"grad_norm": 0.18890780180865613,
|
|
"learning_rate": 3.2716583953324094e-05,
|
|
"loss": 0.4377,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 3.185680566483084,
|
|
"grad_norm": 0.21371681553609642,
|
|
"learning_rate": 3.2701798171416674e-05,
|
|
"loss": 0.4315,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 3.1888276947285603,
|
|
"grad_norm": 0.1965403832948832,
|
|
"learning_rate": 3.268700121190479e-05,
|
|
"loss": 0.4349,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 3.191974822974036,
|
|
"grad_norm": 0.19377735620113185,
|
|
"learning_rate": 3.267219309024328e-05,
|
|
"loss": 0.427,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 3.1951219512195124,
|
|
"grad_norm": 0.18770678346838374,
|
|
"learning_rate": 3.265737382189863e-05,
|
|
"loss": 0.4267,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 3.198269079464988,
|
|
"grad_norm": 0.2042394668976984,
|
|
"learning_rate": 3.2642543422349e-05,
|
|
"loss": 0.4385,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 3.201416207710464,
|
|
"grad_norm": 0.19338325740387202,
|
|
"learning_rate": 3.2627701907084136e-05,
|
|
"loss": 0.4232,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 3.2045633359559402,
|
|
"grad_norm": 0.19512605168524141,
|
|
"learning_rate": 3.2612849291605425e-05,
|
|
"loss": 0.4291,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 3.207710464201416,
|
|
"grad_norm": 0.18182108276136827,
|
|
"learning_rate": 3.259798559142583e-05,
|
|
"loss": 0.4339,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 3.2108575924468923,
|
|
"grad_norm": 0.17815282317494613,
|
|
"learning_rate": 3.2583110822069894e-05,
|
|
"loss": 0.4348,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 3.214004720692368,
|
|
"grad_norm": 0.19146604497628397,
|
|
"learning_rate": 3.2568224999073725e-05,
|
|
"loss": 0.4253,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 3.2171518489378443,
|
|
"grad_norm": 0.17868445991882773,
|
|
"learning_rate": 3.255332813798499e-05,
|
|
"loss": 0.4366,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 3.22029897718332,
|
|
"grad_norm": 0.22039699239981317,
|
|
"learning_rate": 3.253842025436286e-05,
|
|
"loss": 0.4288,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 3.2234461054287964,
|
|
"grad_norm": 0.1801947638201736,
|
|
"learning_rate": 3.252350136377802e-05,
|
|
"loss": 0.4271,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 3.226593233674272,
|
|
"grad_norm": 0.1849719695926772,
|
|
"learning_rate": 3.2508571481812686e-05,
|
|
"loss": 0.4305,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 3.229740361919748,
|
|
"grad_norm": 0.17788240132478986,
|
|
"learning_rate": 3.2493630624060494e-05,
|
|
"loss": 0.4402,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 3.2328874901652243,
|
|
"grad_norm": 0.1845071818835128,
|
|
"learning_rate": 3.2478678806126614e-05,
|
|
"loss": 0.4389,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 3.2360346184107,
|
|
"grad_norm": 0.1858623426241377,
|
|
"learning_rate": 3.24637160436276e-05,
|
|
"loss": 0.4339,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 3.2391817466561763,
|
|
"grad_norm": 0.17220083563874175,
|
|
"learning_rate": 3.2448742352191476e-05,
|
|
"loss": 0.4331,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 3.242328874901652,
|
|
"grad_norm": 0.2247895258402866,
|
|
"learning_rate": 3.243375774745768e-05,
|
|
"loss": 0.4289,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 3.2454760031471284,
|
|
"grad_norm": 0.16684480973484458,
|
|
"learning_rate": 3.241876224507702e-05,
|
|
"loss": 0.4334,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 3.248623131392604,
|
|
"grad_norm": 0.1779584572036571,
|
|
"learning_rate": 3.240375586071171e-05,
|
|
"loss": 0.4399,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 3.2517702596380804,
|
|
"grad_norm": 0.191113784730864,
|
|
"learning_rate": 3.238873861003533e-05,
|
|
"loss": 0.4367,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 3.2549173878835562,
|
|
"grad_norm": 0.18550711603052794,
|
|
"learning_rate": 3.237371050873281e-05,
|
|
"loss": 0.4406,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 3.258064516129032,
|
|
"grad_norm": 0.1783887143847433,
|
|
"learning_rate": 3.235867157250039e-05,
|
|
"loss": 0.4418,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 3.2612116443745083,
|
|
"grad_norm": 0.17835625882815775,
|
|
"learning_rate": 3.234362181704565e-05,
|
|
"loss": 0.4295,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 3.264358772619984,
|
|
"grad_norm": 0.18249704241735015,
|
|
"learning_rate": 3.232856125808746e-05,
|
|
"loss": 0.4294,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 3.2675059008654603,
|
|
"grad_norm": 0.17475471863362183,
|
|
"learning_rate": 3.231348991135599e-05,
|
|
"loss": 0.4364,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 3.270653029110936,
|
|
"grad_norm": 0.19935489428291137,
|
|
"learning_rate": 3.229840779259266e-05,
|
|
"loss": 0.4255,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 3.2738001573564124,
|
|
"grad_norm": 0.1693521531229919,
|
|
"learning_rate": 3.2283314917550136e-05,
|
|
"loss": 0.4359,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 3.276947285601888,
|
|
"grad_norm": 0.19130787480398617,
|
|
"learning_rate": 3.226821130199233e-05,
|
|
"loss": 0.431,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 3.2800944138473644,
|
|
"grad_norm": 0.16737976497369075,
|
|
"learning_rate": 3.225309696169438e-05,
|
|
"loss": 0.4311,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 3.2832415420928402,
|
|
"grad_norm": 0.18392433219239301,
|
|
"learning_rate": 3.223797191244261e-05,
|
|
"loss": 0.4356,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 3.286388670338316,
|
|
"grad_norm": 0.19864523441807563,
|
|
"learning_rate": 3.2222836170034543e-05,
|
|
"loss": 0.4247,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 3.2895357985837923,
|
|
"grad_norm": 0.1984089785922852,
|
|
"learning_rate": 3.220768975027886e-05,
|
|
"loss": 0.4374,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 3.292682926829268,
|
|
"grad_norm": 0.19402343905084715,
|
|
"learning_rate": 3.2192532668995385e-05,
|
|
"loss": 0.4254,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 3.2958300550747444,
|
|
"grad_norm": 0.2041163961571167,
|
|
"learning_rate": 3.21773649420151e-05,
|
|
"loss": 0.4358,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 3.29897718332022,
|
|
"grad_norm": 0.1787832950909068,
|
|
"learning_rate": 3.2162186585180095e-05,
|
|
"loss": 0.4231,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 3.3021243115656964,
|
|
"grad_norm": 0.19740027959624745,
|
|
"learning_rate": 3.214699761434355e-05,
|
|
"loss": 0.4302,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 3.305271439811172,
|
|
"grad_norm": 0.18856045951279926,
|
|
"learning_rate": 3.2131798045369765e-05,
|
|
"loss": 0.4308,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 3.3084185680566485,
|
|
"grad_norm": 0.20807270153545412,
|
|
"learning_rate": 3.211658789413408e-05,
|
|
"loss": 0.4351,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 3.3115656963021243,
|
|
"grad_norm": 0.19162342835863225,
|
|
"learning_rate": 3.2101367176522886e-05,
|
|
"loss": 0.4354,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 3.3147128245476,
|
|
"grad_norm": 0.19387129843147574,
|
|
"learning_rate": 3.2086135908433634e-05,
|
|
"loss": 0.43,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 3.3178599527930763,
|
|
"grad_norm": 0.18150899186866062,
|
|
"learning_rate": 3.2070894105774766e-05,
|
|
"loss": 0.4344,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 3.321007081038552,
|
|
"grad_norm": 0.21537331116177905,
|
|
"learning_rate": 3.2055641784465745e-05,
|
|
"loss": 0.4415,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 3.3241542092840284,
|
|
"grad_norm": 0.2400691954661002,
|
|
"learning_rate": 3.2040378960437024e-05,
|
|
"loss": 0.4406,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 3.327301337529504,
|
|
"grad_norm": 0.19817600331351765,
|
|
"learning_rate": 3.2025105649630014e-05,
|
|
"loss": 0.4315,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 3.3304484657749804,
|
|
"grad_norm": 0.2084827766948241,
|
|
"learning_rate": 3.200982186799709e-05,
|
|
"loss": 0.4187,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 3.3335955940204562,
|
|
"grad_norm": 0.22112170756734634,
|
|
"learning_rate": 3.199452763150155e-05,
|
|
"loss": 0.4315,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 3.3367427222659325,
|
|
"grad_norm": 0.19512676684498764,
|
|
"learning_rate": 3.197922295611762e-05,
|
|
"loss": 0.4345,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 3.3398898505114083,
|
|
"grad_norm": 0.2663842173534082,
|
|
"learning_rate": 3.196390785783043e-05,
|
|
"loss": 0.4346,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 3.343036978756884,
|
|
"grad_norm": 0.21826809183094342,
|
|
"learning_rate": 3.194858235263598e-05,
|
|
"loss": 0.4355,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 3.3461841070023604,
|
|
"grad_norm": 0.20953163723288945,
|
|
"learning_rate": 3.193324645654118e-05,
|
|
"loss": 0.4301,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 3.349331235247836,
|
|
"grad_norm": 0.24320015638205855,
|
|
"learning_rate": 3.191790018556373e-05,
|
|
"loss": 0.4425,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 3.3524783634933124,
|
|
"grad_norm": 0.1904143985171064,
|
|
"learning_rate": 3.190254355573223e-05,
|
|
"loss": 0.4378,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 3.355625491738788,
|
|
"grad_norm": 0.22691576279072934,
|
|
"learning_rate": 3.1887176583086066e-05,
|
|
"loss": 0.4263,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 3.3587726199842645,
|
|
"grad_norm": 0.21161650205231633,
|
|
"learning_rate": 3.187179928367544e-05,
|
|
"loss": 0.4251,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 3.3619197482297403,
|
|
"grad_norm": 0.20609048888260278,
|
|
"learning_rate": 3.185641167356131e-05,
|
|
"loss": 0.4283,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 3.3650668764752165,
|
|
"grad_norm": 0.2065246689397034,
|
|
"learning_rate": 3.184101376881545e-05,
|
|
"loss": 0.4292,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 3.3682140047206923,
|
|
"grad_norm": 0.21953821452618635,
|
|
"learning_rate": 3.1825605585520343e-05,
|
|
"loss": 0.4334,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 3.371361132966168,
|
|
"grad_norm": 0.19911994569988575,
|
|
"learning_rate": 3.181018713976924e-05,
|
|
"loss": 0.4286,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 3.3745082612116444,
|
|
"grad_norm": 0.24900209916902774,
|
|
"learning_rate": 3.179475844766608e-05,
|
|
"loss": 0.4332,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 3.3776553894571206,
|
|
"grad_norm": 0.23150612649346244,
|
|
"learning_rate": 3.1779319525325546e-05,
|
|
"loss": 0.4268,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 3.3808025177025964,
|
|
"grad_norm": 0.2158927674109808,
|
|
"learning_rate": 3.176387038887296e-05,
|
|
"loss": 0.4462,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 3.3839496459480722,
|
|
"grad_norm": 0.24924200720982193,
|
|
"learning_rate": 3.174841105444434e-05,
|
|
"loss": 0.4408,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 3.3870967741935485,
|
|
"grad_norm": 0.2239815418692921,
|
|
"learning_rate": 3.173294153818635e-05,
|
|
"loss": 0.4326,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 3.3902439024390243,
|
|
"grad_norm": 0.2199325533660836,
|
|
"learning_rate": 3.17174618562563e-05,
|
|
"loss": 0.4312,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 3.3933910306845005,
|
|
"grad_norm": 0.22972172889576944,
|
|
"learning_rate": 3.170197202482208e-05,
|
|
"loss": 0.4343,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 3.3965381589299763,
|
|
"grad_norm": 0.2094072355653265,
|
|
"learning_rate": 3.168647206006221e-05,
|
|
"loss": 0.4362,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 3.399685287175452,
|
|
"grad_norm": 0.19900277532000674,
|
|
"learning_rate": 3.167096197816581e-05,
|
|
"loss": 0.4346,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 3.4028324154209284,
|
|
"grad_norm": 0.19471319908950105,
|
|
"learning_rate": 3.1655441795332523e-05,
|
|
"loss": 0.434,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 3.4059795436664047,
|
|
"grad_norm": 0.24652320632202052,
|
|
"learning_rate": 3.163991152777259e-05,
|
|
"loss": 0.4446,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 3.4091266719118805,
|
|
"grad_norm": 0.18487872132027222,
|
|
"learning_rate": 3.162437119170673e-05,
|
|
"loss": 0.4428,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 3.4122738001573563,
|
|
"grad_norm": 0.18070644674221434,
|
|
"learning_rate": 3.160882080336624e-05,
|
|
"loss": 0.4345,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 3.4154209284028325,
|
|
"grad_norm": 0.20214727431927682,
|
|
"learning_rate": 3.1593260378992856e-05,
|
|
"loss": 0.4393,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 3.4185680566483083,
|
|
"grad_norm": 0.19354862366840503,
|
|
"learning_rate": 3.1577689934838847e-05,
|
|
"loss": 0.4286,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 3.4217151848937846,
|
|
"grad_norm": 0.17360041262575412,
|
|
"learning_rate": 3.156210948716691e-05,
|
|
"loss": 0.4395,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 3.4248623131392604,
|
|
"grad_norm": 0.21602699476201023,
|
|
"learning_rate": 3.1546519052250216e-05,
|
|
"loss": 0.4363,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 3.4280094413847366,
|
|
"grad_norm": 0.19400691100542458,
|
|
"learning_rate": 3.153091864637236e-05,
|
|
"loss": 0.4465,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 3.4311565696302124,
|
|
"grad_norm": 0.18853705117037867,
|
|
"learning_rate": 3.151530828582734e-05,
|
|
"loss": 0.4367,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 3.4343036978756887,
|
|
"grad_norm": 0.20156628021291076,
|
|
"learning_rate": 3.149968798691956e-05,
|
|
"loss": 0.4312,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 3.4374508261211645,
|
|
"grad_norm": 0.17508099200640428,
|
|
"learning_rate": 3.148405776596381e-05,
|
|
"loss": 0.4387,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 3.4405979543666403,
|
|
"grad_norm": 0.19428180773708023,
|
|
"learning_rate": 3.1468417639285234e-05,
|
|
"loss": 0.4372,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 3.4437450826121165,
|
|
"grad_norm": 0.18564695290847033,
|
|
"learning_rate": 3.145276762321932e-05,
|
|
"loss": 0.4372,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 3.4468922108575923,
|
|
"grad_norm": 0.18386558110382897,
|
|
"learning_rate": 3.1437107734111885e-05,
|
|
"loss": 0.4303,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 3.4500393391030686,
|
|
"grad_norm": 0.19976923603089122,
|
|
"learning_rate": 3.142143798831908e-05,
|
|
"loss": 0.4387,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 3.4531864673485444,
|
|
"grad_norm": 0.1967807399314723,
|
|
"learning_rate": 3.140575840220733e-05,
|
|
"loss": 0.4422,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 3.4563335955940206,
|
|
"grad_norm": 0.18143971647103654,
|
|
"learning_rate": 3.1390068992153336e-05,
|
|
"loss": 0.4427,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 3.4594807238394965,
|
|
"grad_norm": 0.19909894071095616,
|
|
"learning_rate": 3.137436977454406e-05,
|
|
"loss": 0.4413,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 3.4626278520849727,
|
|
"grad_norm": 0.17902127709322027,
|
|
"learning_rate": 3.135866076577673e-05,
|
|
"loss": 0.4408,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 3.4657749803304485,
|
|
"grad_norm": 0.1874350511676143,
|
|
"learning_rate": 3.134294198225877e-05,
|
|
"loss": 0.4458,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 3.4689221085759243,
|
|
"grad_norm": 0.19777946007747293,
|
|
"learning_rate": 3.132721344040783e-05,
|
|
"loss": 0.4363,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 3.4720692368214006,
|
|
"grad_norm": 0.21042566361266743,
|
|
"learning_rate": 3.1311475156651755e-05,
|
|
"loss": 0.4287,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 3.4752163650668764,
|
|
"grad_norm": 0.18234625120759887,
|
|
"learning_rate": 3.129572714742855e-05,
|
|
"loss": 0.4389,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 3.4783634933123526,
|
|
"grad_norm": 0.2094029102938534,
|
|
"learning_rate": 3.12799694291864e-05,
|
|
"loss": 0.4306,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 3.4815106215578284,
|
|
"grad_norm": 0.16484345005981205,
|
|
"learning_rate": 3.12642020183836e-05,
|
|
"loss": 0.4322,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 3.4846577498033047,
|
|
"grad_norm": 0.2218688219231824,
|
|
"learning_rate": 3.12484249314886e-05,
|
|
"loss": 0.4313,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 3.4878048780487805,
|
|
"grad_norm": 0.18992515126275933,
|
|
"learning_rate": 3.1232638184979934e-05,
|
|
"loss": 0.4378,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 3.4909520062942567,
|
|
"grad_norm": 0.1871055104215194,
|
|
"learning_rate": 3.1216841795346246e-05,
|
|
"loss": 0.4303,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 3.4940991345397325,
|
|
"grad_norm": 0.19199964417557105,
|
|
"learning_rate": 3.120103577908623e-05,
|
|
"loss": 0.441,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 3.4972462627852083,
|
|
"grad_norm": 0.1856197404311817,
|
|
"learning_rate": 3.1185220152708645e-05,
|
|
"loss": 0.4327,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 3.5003933910306846,
|
|
"grad_norm": 0.1770808469670125,
|
|
"learning_rate": 3.116939493273228e-05,
|
|
"loss": 0.4379,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 3.5035405192761604,
|
|
"grad_norm": 0.17080394749356279,
|
|
"learning_rate": 3.115356013568597e-05,
|
|
"loss": 0.434,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 3.5066876475216366,
|
|
"grad_norm": 0.19343264397189958,
|
|
"learning_rate": 3.113771577810852e-05,
|
|
"loss": 0.4349,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 3.5098347757671124,
|
|
"grad_norm": 0.17286156111192222,
|
|
"learning_rate": 3.1121861876548736e-05,
|
|
"loss": 0.443,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 3.5129819040125883,
|
|
"grad_norm": 0.19138302591060105,
|
|
"learning_rate": 3.1105998447565383e-05,
|
|
"loss": 0.4447,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 3.5161290322580645,
|
|
"grad_norm": 0.16413512248871734,
|
|
"learning_rate": 3.10901255077272e-05,
|
|
"loss": 0.4468,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 3.5192761605035408,
|
|
"grad_norm": 0.1914477065729763,
|
|
"learning_rate": 3.1074243073612834e-05,
|
|
"loss": 0.4309,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 3.5224232887490166,
|
|
"grad_norm": 0.188288353753066,
|
|
"learning_rate": 3.105835116181086e-05,
|
|
"loss": 0.4355,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 3.5255704169944924,
|
|
"grad_norm": 0.17465184377745524,
|
|
"learning_rate": 3.104244978891975e-05,
|
|
"loss": 0.4355,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 3.5287175452399686,
|
|
"grad_norm": 0.1825953673131463,
|
|
"learning_rate": 3.102653897154786e-05,
|
|
"loss": 0.4316,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 3.5318646734854444,
|
|
"grad_norm": 0.1743986993607113,
|
|
"learning_rate": 3.1010618726313405e-05,
|
|
"loss": 0.4331,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 3.5350118017309207,
|
|
"grad_norm": 0.17821957810877814,
|
|
"learning_rate": 3.099468906984446e-05,
|
|
"loss": 0.4345,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 3.5381589299763965,
|
|
"grad_norm": 0.2093960838490045,
|
|
"learning_rate": 3.097875001877891e-05,
|
|
"loss": 0.4387,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 3.5413060582218723,
|
|
"grad_norm": 0.17177569091014847,
|
|
"learning_rate": 3.0962801589764474e-05,
|
|
"loss": 0.4282,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 3.5444531864673485,
|
|
"grad_norm": 0.1748649502758884,
|
|
"learning_rate": 3.094684379945865e-05,
|
|
"loss": 0.434,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 3.5476003147128248,
|
|
"grad_norm": 0.19080807496580413,
|
|
"learning_rate": 3.093087666452871e-05,
|
|
"loss": 0.4386,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 3.5507474429583006,
|
|
"grad_norm": 0.17652468445293854,
|
|
"learning_rate": 3.09149002016517e-05,
|
|
"loss": 0.4391,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 3.5538945712037764,
|
|
"grad_norm": 0.19062712789092418,
|
|
"learning_rate": 3.08989144275144e-05,
|
|
"loss": 0.43,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 3.5570416994492526,
|
|
"grad_norm": 0.17546579858596842,
|
|
"learning_rate": 3.088291935881333e-05,
|
|
"loss": 0.435,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 3.5601888276947284,
|
|
"grad_norm": 0.21065753936700307,
|
|
"learning_rate": 3.08669150122547e-05,
|
|
"loss": 0.4233,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 3.5633359559402047,
|
|
"grad_norm": 0.16676658656556034,
|
|
"learning_rate": 3.0850901404554404e-05,
|
|
"loss": 0.4419,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 3.5664830841856805,
|
|
"grad_norm": 0.2075035100795957,
|
|
"learning_rate": 3.083487855243804e-05,
|
|
"loss": 0.4374,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 3.5696302124311563,
|
|
"grad_norm": 0.16571698042635005,
|
|
"learning_rate": 3.081884647264083e-05,
|
|
"loss": 0.4385,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 3.5727773406766326,
|
|
"grad_norm": 0.19707520555123104,
|
|
"learning_rate": 3.080280518190765e-05,
|
|
"loss": 0.4445,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 3.575924468922109,
|
|
"grad_norm": 0.17132281905111424,
|
|
"learning_rate": 3.078675469699299e-05,
|
|
"loss": 0.4379,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 3.5790715971675846,
|
|
"grad_norm": 0.1865379519918738,
|
|
"learning_rate": 3.077069503466095e-05,
|
|
"loss": 0.4324,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 3.5822187254130604,
|
|
"grad_norm": 0.20303319475170387,
|
|
"learning_rate": 3.075462621168521e-05,
|
|
"loss": 0.4335,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 3.5853658536585367,
|
|
"grad_norm": 0.1769716572277089,
|
|
"learning_rate": 3.0738548244849024e-05,
|
|
"loss": 0.4414,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 3.5885129819040125,
|
|
"grad_norm": 0.2119201910437755,
|
|
"learning_rate": 3.072246115094519e-05,
|
|
"loss": 0.4347,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 3.5916601101494887,
|
|
"grad_norm": 0.18291536465188502,
|
|
"learning_rate": 3.070636494677603e-05,
|
|
"loss": 0.4297,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 3.5948072383949645,
|
|
"grad_norm": 0.2018907273302855,
|
|
"learning_rate": 3.0690259649153414e-05,
|
|
"loss": 0.4369,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 3.5979543666404403,
|
|
"grad_norm": 0.18931019337202662,
|
|
"learning_rate": 3.067414527489866e-05,
|
|
"loss": 0.4385,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 3.6011014948859166,
|
|
"grad_norm": 0.17894402262594664,
|
|
"learning_rate": 3.0658021840842615e-05,
|
|
"loss": 0.4317,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 3.604248623131393,
|
|
"grad_norm": 0.19224386856066833,
|
|
"learning_rate": 3.0641889363825566e-05,
|
|
"loss": 0.4295,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 3.6073957513768686,
|
|
"grad_norm": 0.1823104308503007,
|
|
"learning_rate": 3.062574786069723e-05,
|
|
"loss": 0.4381,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 3.6105428796223444,
|
|
"grad_norm": 0.1845807265379901,
|
|
"learning_rate": 3.0609597348316784e-05,
|
|
"loss": 0.4443,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 3.6136900078678207,
|
|
"grad_norm": 0.1752786351405681,
|
|
"learning_rate": 3.05934378435528e-05,
|
|
"loss": 0.4269,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 3.6168371361132965,
|
|
"grad_norm": 0.17650135711502488,
|
|
"learning_rate": 3.057726936328323e-05,
|
|
"loss": 0.4344,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 3.6199842643587727,
|
|
"grad_norm": 0.19322880994465225,
|
|
"learning_rate": 3.056109192439541e-05,
|
|
"loss": 0.4286,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 3.6231313926042485,
|
|
"grad_norm": 0.1664813830978989,
|
|
"learning_rate": 3.0544905543786045e-05,
|
|
"loss": 0.434,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 3.6262785208497244,
|
|
"grad_norm": 0.19170305910822624,
|
|
"learning_rate": 3.052871023836116e-05,
|
|
"loss": 0.4432,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 3.6294256490952006,
|
|
"grad_norm": 0.1854811898273995,
|
|
"learning_rate": 3.051250602503612e-05,
|
|
"loss": 0.4335,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 3.632572777340677,
|
|
"grad_norm": 0.1822902853195308,
|
|
"learning_rate": 3.0496292920735574e-05,
|
|
"loss": 0.4397,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 3.6357199055861527,
|
|
"grad_norm": 0.16630950273159906,
|
|
"learning_rate": 3.0480070942393483e-05,
|
|
"loss": 0.4441,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 3.6388670338316285,
|
|
"grad_norm": 0.1661679586279354,
|
|
"learning_rate": 3.046384010695304e-05,
|
|
"loss": 0.4394,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 3.6420141620771047,
|
|
"grad_norm": 0.1564352636813857,
|
|
"learning_rate": 3.0447600431366724e-05,
|
|
"loss": 0.4438,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 3.6451612903225805,
|
|
"grad_norm": 0.17161921802692476,
|
|
"learning_rate": 3.043135193259623e-05,
|
|
"loss": 0.4343,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 3.6483084185680568,
|
|
"grad_norm": 0.18351798204850334,
|
|
"learning_rate": 3.0415094627612464e-05,
|
|
"loss": 0.4402,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 3.6514555468135326,
|
|
"grad_norm": 0.17135389498561554,
|
|
"learning_rate": 3.0398828533395547e-05,
|
|
"loss": 0.4324,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 3.654602675059009,
|
|
"grad_norm": 0.19814568295329985,
|
|
"learning_rate": 3.0382553666934777e-05,
|
|
"loss": 0.4418,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 3.6577498033044846,
|
|
"grad_norm": 0.17713962766442853,
|
|
"learning_rate": 3.036627004522859e-05,
|
|
"loss": 0.4258,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 3.660896931549961,
|
|
"grad_norm": 0.17627230414185083,
|
|
"learning_rate": 3.0349977685284596e-05,
|
|
"loss": 0.437,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 3.6640440597954367,
|
|
"grad_norm": 0.21033833731933352,
|
|
"learning_rate": 3.0333676604119512e-05,
|
|
"loss": 0.4359,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 3.6671911880409125,
|
|
"grad_norm": 0.16631632754069195,
|
|
"learning_rate": 3.0317366818759183e-05,
|
|
"loss": 0.4416,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 3.6703383162863887,
|
|
"grad_norm": 0.16042229001940653,
|
|
"learning_rate": 3.0301048346238522e-05,
|
|
"loss": 0.4332,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 3.6734854445318645,
|
|
"grad_norm": 0.1681130029892185,
|
|
"learning_rate": 3.028472120360153e-05,
|
|
"loss": 0.4435,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 3.676632572777341,
|
|
"grad_norm": 0.15489295343278095,
|
|
"learning_rate": 3.0268385407901267e-05,
|
|
"loss": 0.4301,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 3.6797797010228166,
|
|
"grad_norm": 0.1856105350391872,
|
|
"learning_rate": 3.025204097619982e-05,
|
|
"loss": 0.4384,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 3.682926829268293,
|
|
"grad_norm": 0.17420244127013473,
|
|
"learning_rate": 3.0235687925568308e-05,
|
|
"loss": 0.4474,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 3.6860739575137687,
|
|
"grad_norm": 0.16283454261310373,
|
|
"learning_rate": 3.021932627308684e-05,
|
|
"loss": 0.446,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 3.689221085759245,
|
|
"grad_norm": 0.16652733620538568,
|
|
"learning_rate": 3.020295603584451e-05,
|
|
"loss": 0.4385,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 3.6923682140047207,
|
|
"grad_norm": 0.16838451835416196,
|
|
"learning_rate": 3.0186577230939383e-05,
|
|
"loss": 0.4383,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 3.6955153422501965,
|
|
"grad_norm": 0.20256218511665658,
|
|
"learning_rate": 3.017018987547848e-05,
|
|
"loss": 0.4468,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 3.6986624704956728,
|
|
"grad_norm": 0.1789463978366356,
|
|
"learning_rate": 3.015379398657774e-05,
|
|
"loss": 0.4436,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 3.7018095987411486,
|
|
"grad_norm": 0.2254816031556007,
|
|
"learning_rate": 3.0137389581362012e-05,
|
|
"loss": 0.4402,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 3.704956726986625,
|
|
"grad_norm": 0.19908506142514173,
|
|
"learning_rate": 3.0120976676965065e-05,
|
|
"loss": 0.437,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 3.7081038552321006,
|
|
"grad_norm": 0.17538124081020218,
|
|
"learning_rate": 3.010455529052952e-05,
|
|
"loss": 0.4495,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 3.711250983477577,
|
|
"grad_norm": 0.20509162893468286,
|
|
"learning_rate": 3.0088125439206854e-05,
|
|
"loss": 0.4432,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 3.7143981117230527,
|
|
"grad_norm": 0.18192579811095336,
|
|
"learning_rate": 3.0071687140157413e-05,
|
|
"loss": 0.4388,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 3.717545239968529,
|
|
"grad_norm": 0.21503535709810237,
|
|
"learning_rate": 3.005524041055034e-05,
|
|
"loss": 0.4351,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 3.7206923682140047,
|
|
"grad_norm": 0.17400711170435434,
|
|
"learning_rate": 3.00387852675636e-05,
|
|
"loss": 0.4492,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 3.7238394964594805,
|
|
"grad_norm": 0.24313295347800967,
|
|
"learning_rate": 3.0022321728383933e-05,
|
|
"loss": 0.4315,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 3.726986624704957,
|
|
"grad_norm": 0.1478818146322245,
|
|
"learning_rate": 3.0005849810206845e-05,
|
|
"loss": 0.4363,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 3.7301337529504326,
|
|
"grad_norm": 0.20144526501549903,
|
|
"learning_rate": 2.9989369530236618e-05,
|
|
"loss": 0.4426,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 3.733280881195909,
|
|
"grad_norm": 0.18206029920285993,
|
|
"learning_rate": 2.9972880905686246e-05,
|
|
"loss": 0.4344,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 3.7364280094413846,
|
|
"grad_norm": 0.18180540417395807,
|
|
"learning_rate": 2.9956383953777442e-05,
|
|
"loss": 0.4424,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 3.739575137686861,
|
|
"grad_norm": 0.191677223977247,
|
|
"learning_rate": 2.9939878691740625e-05,
|
|
"loss": 0.4304,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 3.7427222659323367,
|
|
"grad_norm": 0.17576860046619955,
|
|
"learning_rate": 2.9923365136814876e-05,
|
|
"loss": 0.4432,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 3.745869394177813,
|
|
"grad_norm": 0.18177898566663142,
|
|
"learning_rate": 2.9906843306247965e-05,
|
|
"loss": 0.4315,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 3.7490165224232888,
|
|
"grad_norm": 0.19227267623614985,
|
|
"learning_rate": 2.9890313217296277e-05,
|
|
"loss": 0.4368,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 3.7521636506687646,
|
|
"grad_norm": 0.19404291468297713,
|
|
"learning_rate": 2.9873774887224844e-05,
|
|
"loss": 0.4418,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 3.755310778914241,
|
|
"grad_norm": 0.17297469013091823,
|
|
"learning_rate": 2.985722833330729e-05,
|
|
"loss": 0.4276,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 3.7584579071597166,
|
|
"grad_norm": 0.21580498854793478,
|
|
"learning_rate": 2.984067357282584e-05,
|
|
"loss": 0.438,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 3.761605035405193,
|
|
"grad_norm": 0.19970366933915254,
|
|
"learning_rate": 2.9824110623071285e-05,
|
|
"loss": 0.4429,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 3.7647521636506687,
|
|
"grad_norm": 0.1893900043304486,
|
|
"learning_rate": 2.980753950134297e-05,
|
|
"loss": 0.4425,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 3.767899291896145,
|
|
"grad_norm": 0.1844133531075253,
|
|
"learning_rate": 2.979096022494878e-05,
|
|
"loss": 0.4345,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 3.7710464201416207,
|
|
"grad_norm": 0.1764704510378753,
|
|
"learning_rate": 2.9774372811205104e-05,
|
|
"loss": 0.4404,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 3.774193548387097,
|
|
"grad_norm": 0.1937035801525317,
|
|
"learning_rate": 2.975777727743684e-05,
|
|
"loss": 0.4386,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 3.777340676632573,
|
|
"grad_norm": 0.18365070141139342,
|
|
"learning_rate": 2.9741173640977372e-05,
|
|
"loss": 0.4331,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 3.7804878048780486,
|
|
"grad_norm": 0.17907574402326445,
|
|
"learning_rate": 2.9724561919168536e-05,
|
|
"loss": 0.4411,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 3.783634933123525,
|
|
"grad_norm": 0.1915338900258077,
|
|
"learning_rate": 2.9707942129360622e-05,
|
|
"loss": 0.4336,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 3.7867820613690006,
|
|
"grad_norm": 0.18426098050440218,
|
|
"learning_rate": 2.969131428891234e-05,
|
|
"loss": 0.4352,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 3.789929189614477,
|
|
"grad_norm": 0.19246246891896052,
|
|
"learning_rate": 2.967467841519081e-05,
|
|
"loss": 0.4281,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 3.7930763178599527,
|
|
"grad_norm": 0.199901468879607,
|
|
"learning_rate": 2.9658034525571543e-05,
|
|
"loss": 0.4401,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 3.796223446105429,
|
|
"grad_norm": 0.17624439473519934,
|
|
"learning_rate": 2.964138263743843e-05,
|
|
"loss": 0.4343,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 3.7993705743509048,
|
|
"grad_norm": 0.20949517161628303,
|
|
"learning_rate": 2.96247227681837e-05,
|
|
"loss": 0.4284,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 3.802517702596381,
|
|
"grad_norm": 0.1841705003012857,
|
|
"learning_rate": 2.9608054935207925e-05,
|
|
"loss": 0.4392,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 3.805664830841857,
|
|
"grad_norm": 0.20023545812113352,
|
|
"learning_rate": 2.959137915592e-05,
|
|
"loss": 0.4403,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 3.8088119590873326,
|
|
"grad_norm": 0.18181285159081859,
|
|
"learning_rate": 2.9574695447737126e-05,
|
|
"loss": 0.4301,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 3.811959087332809,
|
|
"grad_norm": 0.4591999499033323,
|
|
"learning_rate": 2.9558003828084768e-05,
|
|
"loss": 0.4444,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 3.8151062155782847,
|
|
"grad_norm": 0.2085648435684365,
|
|
"learning_rate": 2.9541304314396653e-05,
|
|
"loss": 0.4325,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 3.818253343823761,
|
|
"grad_norm": 0.20463216335646361,
|
|
"learning_rate": 2.9524596924114776e-05,
|
|
"loss": 0.4345,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 3.8214004720692367,
|
|
"grad_norm": 0.2006047130461185,
|
|
"learning_rate": 2.950788167468934e-05,
|
|
"loss": 0.4391,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 3.824547600314713,
|
|
"grad_norm": 0.18827426151401724,
|
|
"learning_rate": 2.9491158583578753e-05,
|
|
"loss": 0.4358,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 3.8276947285601888,
|
|
"grad_norm": 0.19581009849077824,
|
|
"learning_rate": 2.947442766824963e-05,
|
|
"loss": 0.4441,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 3.830841856805665,
|
|
"grad_norm": 0.17734874484349197,
|
|
"learning_rate": 2.9457688946176746e-05,
|
|
"loss": 0.4274,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 3.833988985051141,
|
|
"grad_norm": 0.17277936701165808,
|
|
"learning_rate": 2.9440942434843042e-05,
|
|
"loss": 0.4367,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 3.8371361132966166,
|
|
"grad_norm": 0.18624753492705168,
|
|
"learning_rate": 2.942418815173958e-05,
|
|
"loss": 0.4431,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 3.840283241542093,
|
|
"grad_norm": 0.17981530092582268,
|
|
"learning_rate": 2.9407426114365538e-05,
|
|
"loss": 0.4488,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 3.8434303697875687,
|
|
"grad_norm": 0.17417936860740793,
|
|
"learning_rate": 2.9390656340228215e-05,
|
|
"loss": 0.4386,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 3.846577498033045,
|
|
"grad_norm": 0.1767282033703042,
|
|
"learning_rate": 2.9373878846842964e-05,
|
|
"loss": 0.4232,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 3.8497246262785207,
|
|
"grad_norm": 0.17726819566715343,
|
|
"learning_rate": 2.935709365173321e-05,
|
|
"loss": 0.4372,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 3.852871754523997,
|
|
"grad_norm": 0.18641228873224577,
|
|
"learning_rate": 2.934030077243044e-05,
|
|
"loss": 0.4539,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 3.856018882769473,
|
|
"grad_norm": 0.18523506965437314,
|
|
"learning_rate": 2.932350022647414e-05,
|
|
"loss": 0.44,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 3.859166011014949,
|
|
"grad_norm": 0.17429118934670704,
|
|
"learning_rate": 2.9306692031411817e-05,
|
|
"loss": 0.4419,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 3.862313139260425,
|
|
"grad_norm": 0.16912829692351863,
|
|
"learning_rate": 2.9289876204798973e-05,
|
|
"loss": 0.445,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 3.8654602675059007,
|
|
"grad_norm": 0.16689978761928095,
|
|
"learning_rate": 2.927305276419906e-05,
|
|
"loss": 0.4399,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 3.868607395751377,
|
|
"grad_norm": 0.1662616285588503,
|
|
"learning_rate": 2.9256221727183508e-05,
|
|
"loss": 0.4439,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 3.8717545239968527,
|
|
"grad_norm": 0.17547371330423203,
|
|
"learning_rate": 2.923938311133165e-05,
|
|
"loss": 0.4374,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 3.874901652242329,
|
|
"grad_norm": 0.16899679456608838,
|
|
"learning_rate": 2.922253693423078e-05,
|
|
"loss": 0.4403,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 3.8780487804878048,
|
|
"grad_norm": 0.17142815065151418,
|
|
"learning_rate": 2.920568321347604e-05,
|
|
"loss": 0.4491,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 3.881195908733281,
|
|
"grad_norm": 0.1659629352966584,
|
|
"learning_rate": 2.918882196667049e-05,
|
|
"loss": 0.4442,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 3.884343036978757,
|
|
"grad_norm": 0.1827137404665224,
|
|
"learning_rate": 2.9171953211425027e-05,
|
|
"loss": 0.4462,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 3.887490165224233,
|
|
"grad_norm": 0.16843932419439855,
|
|
"learning_rate": 2.9155076965358397e-05,
|
|
"loss": 0.4425,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 3.890637293469709,
|
|
"grad_norm": 0.1895572765071493,
|
|
"learning_rate": 2.9138193246097172e-05,
|
|
"loss": 0.4386,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 3.8937844217151847,
|
|
"grad_norm": 0.17922902546291508,
|
|
"learning_rate": 2.912130207127573e-05,
|
|
"loss": 0.4341,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 3.896931549960661,
|
|
"grad_norm": 0.17772714370132225,
|
|
"learning_rate": 2.9104403458536238e-05,
|
|
"loss": 0.4444,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 3.9000786782061367,
|
|
"grad_norm": 0.18719240088700048,
|
|
"learning_rate": 2.9087497425528618e-05,
|
|
"loss": 0.4329,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 3.903225806451613,
|
|
"grad_norm": 0.19116327201990352,
|
|
"learning_rate": 2.9070583989910556e-05,
|
|
"loss": 0.4393,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 3.906372934697089,
|
|
"grad_norm": 0.1888243799061614,
|
|
"learning_rate": 2.905366316934747e-05,
|
|
"loss": 0.4404,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 3.909520062942565,
|
|
"grad_norm": 0.19923427176985103,
|
|
"learning_rate": 2.9036734981512484e-05,
|
|
"loss": 0.4433,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 3.912667191188041,
|
|
"grad_norm": 0.184137878281804,
|
|
"learning_rate": 2.9019799444086425e-05,
|
|
"loss": 0.4451,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 3.915814319433517,
|
|
"grad_norm": 0.16318104017822477,
|
|
"learning_rate": 2.9002856574757777e-05,
|
|
"loss": 0.4459,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 3.918961447678993,
|
|
"grad_norm": 0.1834621256694157,
|
|
"learning_rate": 2.898590639122272e-05,
|
|
"loss": 0.4432,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 3.9221085759244687,
|
|
"grad_norm": 0.19023161460692964,
|
|
"learning_rate": 2.8968948911185018e-05,
|
|
"loss": 0.4411,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 3.925255704169945,
|
|
"grad_norm": 0.18356230097493653,
|
|
"learning_rate": 2.8951984152356117e-05,
|
|
"loss": 0.4365,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 3.9284028324154208,
|
|
"grad_norm": 0.19309532709059088,
|
|
"learning_rate": 2.8935012132455024e-05,
|
|
"loss": 0.4329,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 3.931549960660897,
|
|
"grad_norm": 0.1790999571642992,
|
|
"learning_rate": 2.8918032869208335e-05,
|
|
"loss": 0.44,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 3.934697088906373,
|
|
"grad_norm": 0.17778758227368407,
|
|
"learning_rate": 2.8901046380350227e-05,
|
|
"loss": 0.4369,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 3.937844217151849,
|
|
"grad_norm": 0.16705242650281665,
|
|
"learning_rate": 2.8884052683622408e-05,
|
|
"loss": 0.4416,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 3.940991345397325,
|
|
"grad_norm": 0.17118532687053076,
|
|
"learning_rate": 2.886705179677414e-05,
|
|
"loss": 0.4355,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 3.944138473642801,
|
|
"grad_norm": 0.17794134938829217,
|
|
"learning_rate": 2.885004373756215e-05,
|
|
"loss": 0.4362,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 3.947285601888277,
|
|
"grad_norm": 0.16850951168986514,
|
|
"learning_rate": 2.88330285237507e-05,
|
|
"loss": 0.439,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 3.9504327301337527,
|
|
"grad_norm": 0.17017397926948555,
|
|
"learning_rate": 2.8816006173111504e-05,
|
|
"loss": 0.4379,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 3.953579858379229,
|
|
"grad_norm": 0.1595665503643,
|
|
"learning_rate": 2.8798976703423726e-05,
|
|
"loss": 0.4416,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 3.9567269866247052,
|
|
"grad_norm": 0.17016824414153592,
|
|
"learning_rate": 2.8781940132473977e-05,
|
|
"loss": 0.437,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 3.959874114870181,
|
|
"grad_norm": 0.1768366345187553,
|
|
"learning_rate": 2.8764896478056287e-05,
|
|
"loss": 0.4405,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 3.963021243115657,
|
|
"grad_norm": 0.20215798109859642,
|
|
"learning_rate": 2.874784575797207e-05,
|
|
"loss": 0.4407,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 3.966168371361133,
|
|
"grad_norm": 0.19222521069705137,
|
|
"learning_rate": 2.8730787990030138e-05,
|
|
"loss": 0.4333,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 3.969315499606609,
|
|
"grad_norm": 0.1768154833619316,
|
|
"learning_rate": 2.8713723192046637e-05,
|
|
"loss": 0.4423,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 3.972462627852085,
|
|
"grad_norm": 0.183349858541064,
|
|
"learning_rate": 2.8696651381845094e-05,
|
|
"loss": 0.4443,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 3.975609756097561,
|
|
"grad_norm": 0.19201674367546054,
|
|
"learning_rate": 2.8679572577256324e-05,
|
|
"loss": 0.4362,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 3.9787568843430368,
|
|
"grad_norm": 0.18098429025124854,
|
|
"learning_rate": 2.866248679611846e-05,
|
|
"loss": 0.4339,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 3.981904012588513,
|
|
"grad_norm": 0.21698099039863375,
|
|
"learning_rate": 2.8645394056276936e-05,
|
|
"loss": 0.4356,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 3.9850511408339893,
|
|
"grad_norm": 0.19347949036380474,
|
|
"learning_rate": 2.862829437558443e-05,
|
|
"loss": 0.4435,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 3.988198269079465,
|
|
"grad_norm": 0.19634210846658384,
|
|
"learning_rate": 2.8611187771900897e-05,
|
|
"loss": 0.4359,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 3.991345397324941,
|
|
"grad_norm": 0.20372267012008688,
|
|
"learning_rate": 2.8594074263093495e-05,
|
|
"loss": 0.4435,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 3.994492525570417,
|
|
"grad_norm": 0.19018939442925267,
|
|
"learning_rate": 2.8576953867036605e-05,
|
|
"loss": 0.435,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 3.997639653815893,
|
|
"grad_norm": 0.17380970464724402,
|
|
"learning_rate": 2.855982660161181e-05,
|
|
"loss": 0.4368,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 4.003147128245476,
|
|
"grad_norm": 0.4567009200647522,
|
|
"learning_rate": 2.854269248470786e-05,
|
|
"loss": 0.8291,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 4.006294256490952,
|
|
"grad_norm": 0.3540594453182581,
|
|
"learning_rate": 2.8525551534220657e-05,
|
|
"loss": 0.3842,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 4.009441384736428,
|
|
"grad_norm": 0.3137640124559675,
|
|
"learning_rate": 2.8508403768053242e-05,
|
|
"loss": 0.3803,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 4.012588512981904,
|
|
"grad_norm": 0.3411489806848952,
|
|
"learning_rate": 2.8491249204115784e-05,
|
|
"loss": 0.3877,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 4.01573564122738,
|
|
"grad_norm": 0.32660510853569047,
|
|
"learning_rate": 2.847408786032555e-05,
|
|
"loss": 0.389,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 4.018882769472856,
|
|
"grad_norm": 0.3039688244842211,
|
|
"learning_rate": 2.845691975460688e-05,
|
|
"loss": 0.381,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 4.022029897718332,
|
|
"grad_norm": 0.2867459711491588,
|
|
"learning_rate": 2.8439744904891178e-05,
|
|
"loss": 0.3768,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 4.025177025963808,
|
|
"grad_norm": 0.2971765367481167,
|
|
"learning_rate": 2.8422563329116898e-05,
|
|
"loss": 0.3887,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 4.028324154209284,
|
|
"grad_norm": 0.26012973313503523,
|
|
"learning_rate": 2.8405375045229512e-05,
|
|
"loss": 0.3872,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 4.03147128245476,
|
|
"grad_norm": 0.24236515760372213,
|
|
"learning_rate": 2.83881800711815e-05,
|
|
"loss": 0.3911,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 4.034618410700236,
|
|
"grad_norm": 0.27142579496532293,
|
|
"learning_rate": 2.837097842493234e-05,
|
|
"loss": 0.3927,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 4.037765538945712,
|
|
"grad_norm": 0.24067712701294466,
|
|
"learning_rate": 2.8353770124448467e-05,
|
|
"loss": 0.3851,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 4.040912667191188,
|
|
"grad_norm": 0.2388177326791406,
|
|
"learning_rate": 2.8336555187703266e-05,
|
|
"loss": 0.377,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 4.044059795436664,
|
|
"grad_norm": 0.24190387059110036,
|
|
"learning_rate": 2.8319333632677062e-05,
|
|
"loss": 0.3819,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 4.04720692368214,
|
|
"grad_norm": 0.233811700600284,
|
|
"learning_rate": 2.830210547735708e-05,
|
|
"loss": 0.374,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 4.050354051927616,
|
|
"grad_norm": 0.21990274300198742,
|
|
"learning_rate": 2.8284870739737456e-05,
|
|
"loss": 0.3801,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 4.053501180173092,
|
|
"grad_norm": 0.24925704152570827,
|
|
"learning_rate": 2.826762943781918e-05,
|
|
"loss": 0.3833,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 4.056648308418568,
|
|
"grad_norm": 0.2281128058726442,
|
|
"learning_rate": 2.825038158961012e-05,
|
|
"loss": 0.3849,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 4.059795436664044,
|
|
"grad_norm": 0.20398518468216212,
|
|
"learning_rate": 2.823312721312496e-05,
|
|
"loss": 0.3749,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 4.06294256490952,
|
|
"grad_norm": 0.21358990395381772,
|
|
"learning_rate": 2.8215866326385222e-05,
|
|
"loss": 0.389,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 4.066089693154996,
|
|
"grad_norm": 0.1998290493946192,
|
|
"learning_rate": 2.8198598947419222e-05,
|
|
"loss": 0.3746,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 4.069236821400472,
|
|
"grad_norm": 0.2136797295751118,
|
|
"learning_rate": 2.818132509426204e-05,
|
|
"loss": 0.3873,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 4.072383949645948,
|
|
"grad_norm": 0.19695872962181996,
|
|
"learning_rate": 2.8164044784955536e-05,
|
|
"loss": 0.387,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 4.075531077891424,
|
|
"grad_norm": 0.23535924524823093,
|
|
"learning_rate": 2.814675803754831e-05,
|
|
"loss": 0.3875,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 4.0786782061369005,
|
|
"grad_norm": 0.20439185050875183,
|
|
"learning_rate": 2.8129464870095697e-05,
|
|
"loss": 0.3765,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 4.081825334382376,
|
|
"grad_norm": 0.22225008665143714,
|
|
"learning_rate": 2.8112165300659714e-05,
|
|
"loss": 0.3779,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 4.084972462627852,
|
|
"grad_norm": 0.21036944845424835,
|
|
"learning_rate": 2.809485934730907e-05,
|
|
"loss": 0.4008,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 4.088119590873328,
|
|
"grad_norm": 0.252462473354712,
|
|
"learning_rate": 2.807754702811916e-05,
|
|
"loss": 0.3867,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 4.091266719118804,
|
|
"grad_norm": 0.22970311838363114,
|
|
"learning_rate": 2.8060228361172012e-05,
|
|
"loss": 0.387,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 4.09441384736428,
|
|
"grad_norm": 0.19785518482856718,
|
|
"learning_rate": 2.804290336455629e-05,
|
|
"loss": 0.384,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 4.097560975609756,
|
|
"grad_norm": 0.2375083186949961,
|
|
"learning_rate": 2.8025572056367263e-05,
|
|
"loss": 0.3802,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 4.100708103855232,
|
|
"grad_norm": 0.20170571559199502,
|
|
"learning_rate": 2.8008234454706795e-05,
|
|
"loss": 0.378,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 4.103855232100708,
|
|
"grad_norm": 0.19637734454839414,
|
|
"learning_rate": 2.799089057768333e-05,
|
|
"loss": 0.3841,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 4.1070023603461845,
|
|
"grad_norm": 0.20324902703498704,
|
|
"learning_rate": 2.797354044341186e-05,
|
|
"loss": 0.389,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 4.11014948859166,
|
|
"grad_norm": 0.2011077455123792,
|
|
"learning_rate": 2.7956184070013912e-05,
|
|
"loss": 0.3813,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 4.113296616837136,
|
|
"grad_norm": 0.18553531787475813,
|
|
"learning_rate": 2.7938821475617523e-05,
|
|
"loss": 0.3829,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 4.116443745082612,
|
|
"grad_norm": 0.19878439370656475,
|
|
"learning_rate": 2.792145267835725e-05,
|
|
"loss": 0.3738,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 4.119590873328088,
|
|
"grad_norm": 0.19762338018115141,
|
|
"learning_rate": 2.7904077696374107e-05,
|
|
"loss": 0.3796,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 4.122738001573564,
|
|
"grad_norm": 0.21096214326769303,
|
|
"learning_rate": 2.7886696547815568e-05,
|
|
"loss": 0.3764,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 4.12588512981904,
|
|
"grad_norm": 0.19386489654228445,
|
|
"learning_rate": 2.7869309250835565e-05,
|
|
"loss": 0.3808,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 4.129032258064516,
|
|
"grad_norm": 0.22093269977952554,
|
|
"learning_rate": 2.7851915823594442e-05,
|
|
"loss": 0.3788,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 4.132179386309992,
|
|
"grad_norm": 0.2004090636824933,
|
|
"learning_rate": 2.783451628425893e-05,
|
|
"loss": 0.3789,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 4.1353265145554685,
|
|
"grad_norm": 0.198727713176911,
|
|
"learning_rate": 2.7817110651002183e-05,
|
|
"loss": 0.3818,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 4.138473642800944,
|
|
"grad_norm": 0.24944897390496926,
|
|
"learning_rate": 2.779969894200367e-05,
|
|
"loss": 0.3815,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 4.14162077104642,
|
|
"grad_norm": 0.17429094692844807,
|
|
"learning_rate": 2.7782281175449246e-05,
|
|
"loss": 0.3805,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 4.144767899291896,
|
|
"grad_norm": 0.2344196809712545,
|
|
"learning_rate": 2.7764857369531078e-05,
|
|
"loss": 0.3851,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 4.147915027537372,
|
|
"grad_norm": 0.1697952320993672,
|
|
"learning_rate": 2.774742754244764e-05,
|
|
"loss": 0.3833,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 4.151062155782848,
|
|
"grad_norm": 0.20972633292156506,
|
|
"learning_rate": 2.7729991712403697e-05,
|
|
"loss": 0.3841,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 4.154209284028324,
|
|
"grad_norm": 0.20826674826782807,
|
|
"learning_rate": 2.7712549897610284e-05,
|
|
"loss": 0.3873,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 4.1573564122738,
|
|
"grad_norm": 0.18808361463227552,
|
|
"learning_rate": 2.769510211628468e-05,
|
|
"loss": 0.3831,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 4.160503540519276,
|
|
"grad_norm": 0.20800615549370643,
|
|
"learning_rate": 2.767764838665041e-05,
|
|
"loss": 0.3785,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 4.1636506687647525,
|
|
"grad_norm": 0.22375396662721606,
|
|
"learning_rate": 2.766018872693719e-05,
|
|
"loss": 0.3835,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 4.166797797010228,
|
|
"grad_norm": 0.19240780126585705,
|
|
"learning_rate": 2.764272315538096e-05,
|
|
"loss": 0.3832,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 4.169944925255704,
|
|
"grad_norm": 0.2037920501367335,
|
|
"learning_rate": 2.762525169022381e-05,
|
|
"loss": 0.387,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 4.17309205350118,
|
|
"grad_norm": 0.1946112985945064,
|
|
"learning_rate": 2.7607774349713997e-05,
|
|
"loss": 0.3882,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 4.176239181746656,
|
|
"grad_norm": 0.19471855963610726,
|
|
"learning_rate": 2.7590291152105905e-05,
|
|
"loss": 0.3859,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 4.1793863099921325,
|
|
"grad_norm": 0.208895734249875,
|
|
"learning_rate": 2.7572802115660045e-05,
|
|
"loss": 0.3899,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 4.182533438237608,
|
|
"grad_norm": 0.19332682587345157,
|
|
"learning_rate": 2.7555307258643028e-05,
|
|
"loss": 0.3817,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 4.185680566483084,
|
|
"grad_norm": 0.20554112935607244,
|
|
"learning_rate": 2.753780659932753e-05,
|
|
"loss": 0.3892,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 4.18882769472856,
|
|
"grad_norm": 0.1863703929014815,
|
|
"learning_rate": 2.7520300155992296e-05,
|
|
"loss": 0.3989,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 4.191974822974037,
|
|
"grad_norm": 0.2181184280633318,
|
|
"learning_rate": 2.7502787946922125e-05,
|
|
"loss": 0.3857,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 4.195121951219512,
|
|
"grad_norm": 0.1877040187866126,
|
|
"learning_rate": 2.748526999040782e-05,
|
|
"loss": 0.3846,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 4.198269079464988,
|
|
"grad_norm": 0.2101059077905614,
|
|
"learning_rate": 2.7467746304746192e-05,
|
|
"loss": 0.3791,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 4.201416207710464,
|
|
"grad_norm": 0.20021777258381446,
|
|
"learning_rate": 2.7450216908240037e-05,
|
|
"loss": 0.3829,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 4.20456333595594,
|
|
"grad_norm": 0.2135816346994294,
|
|
"learning_rate": 2.7432681819198114e-05,
|
|
"loss": 0.385,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 4.2077104642014165,
|
|
"grad_norm": 0.18046740545279863,
|
|
"learning_rate": 2.7415141055935132e-05,
|
|
"loss": 0.3744,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 4.210857592446892,
|
|
"grad_norm": 0.20184956661048567,
|
|
"learning_rate": 2.739759463677172e-05,
|
|
"loss": 0.3773,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 4.214004720692368,
|
|
"grad_norm": 0.1683940922170243,
|
|
"learning_rate": 2.738004258003442e-05,
|
|
"loss": 0.3816,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 4.217151848937844,
|
|
"grad_norm": 0.178967099609675,
|
|
"learning_rate": 2.736248490405567e-05,
|
|
"loss": 0.3868,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 4.220298977183321,
|
|
"grad_norm": 0.17657160485024884,
|
|
"learning_rate": 2.7344921627173745e-05,
|
|
"loss": 0.3838,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 4.223446105428796,
|
|
"grad_norm": 0.1871577205312587,
|
|
"learning_rate": 2.732735276773282e-05,
|
|
"loss": 0.3852,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 4.226593233674272,
|
|
"grad_norm": 0.18868214707434067,
|
|
"learning_rate": 2.7309778344082853e-05,
|
|
"loss": 0.3897,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 4.229740361919748,
|
|
"grad_norm": 0.17848169409152329,
|
|
"learning_rate": 2.7292198374579637e-05,
|
|
"loss": 0.3841,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 4.232887490165224,
|
|
"grad_norm": 0.19858344201399433,
|
|
"learning_rate": 2.727461287758476e-05,
|
|
"loss": 0.3877,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 4.2360346184107005,
|
|
"grad_norm": 0.19726514845450016,
|
|
"learning_rate": 2.7257021871465566e-05,
|
|
"loss": 0.3838,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 4.239181746656176,
|
|
"grad_norm": 0.177223025272618,
|
|
"learning_rate": 2.723942537459518e-05,
|
|
"loss": 0.394,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 4.242328874901652,
|
|
"grad_norm": 0.2355427079374641,
|
|
"learning_rate": 2.7221823405352435e-05,
|
|
"loss": 0.3861,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 4.245476003147128,
|
|
"grad_norm": 0.20054623016987655,
|
|
"learning_rate": 2.72042159821219e-05,
|
|
"loss": 0.3888,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 4.248623131392605,
|
|
"grad_norm": 0.2041632465765231,
|
|
"learning_rate": 2.7186603123293824e-05,
|
|
"loss": 0.3795,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 4.25177025963808,
|
|
"grad_norm": 0.22641717412431214,
|
|
"learning_rate": 2.716898484726414e-05,
|
|
"loss": 0.3778,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 4.254917387883556,
|
|
"grad_norm": 0.1916050281481582,
|
|
"learning_rate": 2.7151361172434447e-05,
|
|
"loss": 0.3837,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 4.258064516129032,
|
|
"grad_norm": 0.24137246015980246,
|
|
"learning_rate": 2.713373211721196e-05,
|
|
"loss": 0.3862,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 4.261211644374509,
|
|
"grad_norm": 0.18174131822711345,
|
|
"learning_rate": 2.711609770000955e-05,
|
|
"loss": 0.3816,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 4.2643587726199845,
|
|
"grad_norm": 0.2242923413154568,
|
|
"learning_rate": 2.7098457939245654e-05,
|
|
"loss": 0.3872,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 4.26750590086546,
|
|
"grad_norm": 0.2235462568563175,
|
|
"learning_rate": 2.7080812853344304e-05,
|
|
"loss": 0.3996,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 4.270653029110936,
|
|
"grad_norm": 0.24969061397705838,
|
|
"learning_rate": 2.7063162460735103e-05,
|
|
"loss": 0.3816,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 4.273800157356412,
|
|
"grad_norm": 0.22715980857633994,
|
|
"learning_rate": 2.7045506779853186e-05,
|
|
"loss": 0.3852,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 4.276947285601889,
|
|
"grad_norm": 0.1822166185884246,
|
|
"learning_rate": 2.7027845829139202e-05,
|
|
"loss": 0.3803,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 4.280094413847364,
|
|
"grad_norm": 0.20932165509317502,
|
|
"learning_rate": 2.7010179627039318e-05,
|
|
"loss": 0.387,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 4.28324154209284,
|
|
"grad_norm": 0.20308647316174985,
|
|
"learning_rate": 2.699250819200519e-05,
|
|
"loss": 0.3864,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 4.286388670338316,
|
|
"grad_norm": 0.18217363195930258,
|
|
"learning_rate": 2.6974831542493923e-05,
|
|
"loss": 0.3802,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 4.289535798583792,
|
|
"grad_norm": 0.186444098060944,
|
|
"learning_rate": 2.6957149696968085e-05,
|
|
"loss": 0.3848,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 4.2926829268292686,
|
|
"grad_norm": 0.1844457489090321,
|
|
"learning_rate": 2.6939462673895663e-05,
|
|
"loss": 0.3812,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 4.295830055074744,
|
|
"grad_norm": 0.2010822546007924,
|
|
"learning_rate": 2.6921770491750044e-05,
|
|
"loss": 0.3897,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 4.29897718332022,
|
|
"grad_norm": 0.20222906345664107,
|
|
"learning_rate": 2.690407316901002e-05,
|
|
"loss": 0.3865,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 4.302124311565696,
|
|
"grad_norm": 0.18971869892805163,
|
|
"learning_rate": 2.6886370724159738e-05,
|
|
"loss": 0.3854,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 4.305271439811173,
|
|
"grad_norm": 0.1997246946094328,
|
|
"learning_rate": 2.686866317568871e-05,
|
|
"loss": 0.3868,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 4.3084185680566485,
|
|
"grad_norm": 0.18961656604722107,
|
|
"learning_rate": 2.685095054209176e-05,
|
|
"loss": 0.3904,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 4.311565696302124,
|
|
"grad_norm": 0.18958617985805862,
|
|
"learning_rate": 2.6833232841869038e-05,
|
|
"loss": 0.3832,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 4.3147128245476,
|
|
"grad_norm": 0.20597877250679947,
|
|
"learning_rate": 2.681551009352598e-05,
|
|
"loss": 0.3794,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 4.317859952793077,
|
|
"grad_norm": 0.2060368281881852,
|
|
"learning_rate": 2.679778231557329e-05,
|
|
"loss": 0.3845,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 4.321007081038553,
|
|
"grad_norm": 0.23435327637660347,
|
|
"learning_rate": 2.6780049526526934e-05,
|
|
"loss": 0.392,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 4.324154209284028,
|
|
"grad_norm": 0.19475763086784323,
|
|
"learning_rate": 2.6762311744908106e-05,
|
|
"loss": 0.387,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 4.327301337529504,
|
|
"grad_norm": 0.191113901147632,
|
|
"learning_rate": 2.674456898924322e-05,
|
|
"loss": 0.3873,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 4.33044846577498,
|
|
"grad_norm": 0.18932113213075516,
|
|
"learning_rate": 2.6726821278063878e-05,
|
|
"loss": 0.3815,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 4.333595594020457,
|
|
"grad_norm": 0.18886476158101362,
|
|
"learning_rate": 2.6709068629906867e-05,
|
|
"loss": 0.3826,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 4.3367427222659325,
|
|
"grad_norm": 0.19201665124650338,
|
|
"learning_rate": 2.669131106331412e-05,
|
|
"loss": 0.3926,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 4.339889850511408,
|
|
"grad_norm": 0.20585223962817667,
|
|
"learning_rate": 2.667354859683272e-05,
|
|
"loss": 0.3902,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 4.343036978756884,
|
|
"grad_norm": 0.18087305906555123,
|
|
"learning_rate": 2.6655781249014843e-05,
|
|
"loss": 0.3946,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 4.34618410700236,
|
|
"grad_norm": 0.22199132776028002,
|
|
"learning_rate": 2.6638009038417792e-05,
|
|
"loss": 0.3883,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 4.349331235247837,
|
|
"grad_norm": 0.20789032314071595,
|
|
"learning_rate": 2.662023198360394e-05,
|
|
"loss": 0.3863,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 4.352478363493312,
|
|
"grad_norm": 0.19784286066300738,
|
|
"learning_rate": 2.6602450103140713e-05,
|
|
"loss": 0.3964,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 4.355625491738788,
|
|
"grad_norm": 0.24791934452491476,
|
|
"learning_rate": 2.6584663415600583e-05,
|
|
"loss": 0.3862,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 4.358772619984264,
|
|
"grad_norm": 0.19522453242970436,
|
|
"learning_rate": 2.656687193956104e-05,
|
|
"loss": 0.3907,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 4.361919748229741,
|
|
"grad_norm": 0.21752375333467266,
|
|
"learning_rate": 2.6549075693604575e-05,
|
|
"loss": 0.3864,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 4.3650668764752165,
|
|
"grad_norm": 0.20160529341364714,
|
|
"learning_rate": 2.6531274696318664e-05,
|
|
"loss": 0.3965,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 4.368214004720692,
|
|
"grad_norm": 0.18568303741674552,
|
|
"learning_rate": 2.6513468966295737e-05,
|
|
"loss": 0.3885,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 4.371361132966168,
|
|
"grad_norm": 0.2243222479567149,
|
|
"learning_rate": 2.649565852213318e-05,
|
|
"loss": 0.3868,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 4.374508261211645,
|
|
"grad_norm": 0.19214945590700291,
|
|
"learning_rate": 2.6477843382433302e-05,
|
|
"loss": 0.3911,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 4.377655389457121,
|
|
"grad_norm": 0.20258061934369762,
|
|
"learning_rate": 2.6460023565803305e-05,
|
|
"loss": 0.3823,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 4.380802517702596,
|
|
"grad_norm": 0.2124917879318387,
|
|
"learning_rate": 2.644219909085528e-05,
|
|
"loss": 0.386,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 4.383949645948072,
|
|
"grad_norm": 0.1907323815413866,
|
|
"learning_rate": 2.642436997620619e-05,
|
|
"loss": 0.3912,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 4.387096774193548,
|
|
"grad_norm": 0.201912862490839,
|
|
"learning_rate": 2.6406536240477835e-05,
|
|
"loss": 0.3869,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 4.390243902439025,
|
|
"grad_norm": 0.1897846042537431,
|
|
"learning_rate": 2.6388697902296848e-05,
|
|
"loss": 0.3836,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 4.3933910306845005,
|
|
"grad_norm": 0.22784186892176736,
|
|
"learning_rate": 2.637085498029467e-05,
|
|
"loss": 0.3838,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 4.396538158929976,
|
|
"grad_norm": 0.20376206510573175,
|
|
"learning_rate": 2.6353007493107517e-05,
|
|
"loss": 0.3942,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 4.399685287175452,
|
|
"grad_norm": 0.19112546192154864,
|
|
"learning_rate": 2.6335155459376395e-05,
|
|
"loss": 0.3978,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 4.402832415420928,
|
|
"grad_norm": 0.20554479838475062,
|
|
"learning_rate": 2.6317298897747033e-05,
|
|
"loss": 0.3971,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 4.405979543666405,
|
|
"grad_norm": 0.19449262983527554,
|
|
"learning_rate": 2.6299437826869923e-05,
|
|
"loss": 0.3815,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 4.4091266719118805,
|
|
"grad_norm": 0.22156499828526832,
|
|
"learning_rate": 2.6281572265400223e-05,
|
|
"loss": 0.3866,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 4.412273800157356,
|
|
"grad_norm": 0.22531435072563047,
|
|
"learning_rate": 2.6263702231997824e-05,
|
|
"loss": 0.3807,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 4.415420928402832,
|
|
"grad_norm": 0.1879250329188261,
|
|
"learning_rate": 2.624582774532725e-05,
|
|
"loss": 0.3943,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 4.418568056648309,
|
|
"grad_norm": 0.2321072669481776,
|
|
"learning_rate": 2.6227948824057712e-05,
|
|
"loss": 0.3808,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 4.421715184893785,
|
|
"grad_norm": 0.19421971388680198,
|
|
"learning_rate": 2.6210065486863018e-05,
|
|
"loss": 0.3868,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 4.42486231313926,
|
|
"grad_norm": 0.23006793771084283,
|
|
"learning_rate": 2.6192177752421627e-05,
|
|
"loss": 0.3942,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 4.428009441384736,
|
|
"grad_norm": 0.253444666264355,
|
|
"learning_rate": 2.617428563941655e-05,
|
|
"loss": 0.3833,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 4.431156569630213,
|
|
"grad_norm": 0.21355125032513483,
|
|
"learning_rate": 2.61563891665354e-05,
|
|
"loss": 0.3897,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 4.434303697875689,
|
|
"grad_norm": 0.19507808857241118,
|
|
"learning_rate": 2.613848835247033e-05,
|
|
"loss": 0.3825,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 4.4374508261211645,
|
|
"grad_norm": 0.22181377156510734,
|
|
"learning_rate": 2.6120583215918038e-05,
|
|
"loss": 0.3944,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 4.44059795436664,
|
|
"grad_norm": 0.17048806847005354,
|
|
"learning_rate": 2.6102673775579724e-05,
|
|
"loss": 0.3915,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 4.443745082612116,
|
|
"grad_norm": 0.2068835428974255,
|
|
"learning_rate": 2.6084760050161097e-05,
|
|
"loss": 0.3854,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 4.446892210857593,
|
|
"grad_norm": 0.2207189101858155,
|
|
"learning_rate": 2.606684205837232e-05,
|
|
"loss": 0.3831,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 4.450039339103069,
|
|
"grad_norm": 0.18810634531927864,
|
|
"learning_rate": 2.6048919818928034e-05,
|
|
"loss": 0.3791,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 4.453186467348544,
|
|
"grad_norm": 0.20997580291783224,
|
|
"learning_rate": 2.6030993350547316e-05,
|
|
"loss": 0.3886,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 4.45633359559402,
|
|
"grad_norm": 0.18183918340442795,
|
|
"learning_rate": 2.6013062671953645e-05,
|
|
"loss": 0.3861,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 4.459480723839496,
|
|
"grad_norm": 0.1991423678980766,
|
|
"learning_rate": 2.59951278018749e-05,
|
|
"loss": 0.3867,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 4.462627852084973,
|
|
"grad_norm": 0.2161981116208714,
|
|
"learning_rate": 2.597718875904335e-05,
|
|
"loss": 0.393,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 4.4657749803304485,
|
|
"grad_norm": 0.1851824205661574,
|
|
"learning_rate": 2.5959245562195615e-05,
|
|
"loss": 0.3883,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 4.468922108575924,
|
|
"grad_norm": 0.20523032246874873,
|
|
"learning_rate": 2.594129823007265e-05,
|
|
"loss": 0.3949,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 4.4720692368214,
|
|
"grad_norm": 0.22841297022323917,
|
|
"learning_rate": 2.592334678141973e-05,
|
|
"loss": 0.3896,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 4.475216365066877,
|
|
"grad_norm": 0.20597518241097829,
|
|
"learning_rate": 2.5905391234986445e-05,
|
|
"loss": 0.3967,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 4.478363493312353,
|
|
"grad_norm": 0.200755938384984,
|
|
"learning_rate": 2.5887431609526637e-05,
|
|
"loss": 0.382,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 4.481510621557828,
|
|
"grad_norm": 0.23257612111660253,
|
|
"learning_rate": 2.586946792379844e-05,
|
|
"loss": 0.3903,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 4.484657749803304,
|
|
"grad_norm": 0.2071736737546111,
|
|
"learning_rate": 2.585150019656419e-05,
|
|
"loss": 0.3865,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 4.487804878048781,
|
|
"grad_norm": 0.18980283295679137,
|
|
"learning_rate": 2.5833528446590494e-05,
|
|
"loss": 0.3876,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 4.490952006294257,
|
|
"grad_norm": 0.20509626159829664,
|
|
"learning_rate": 2.581555269264811e-05,
|
|
"loss": 0.3858,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 4.4940991345397325,
|
|
"grad_norm": 0.1972045387860757,
|
|
"learning_rate": 2.5797572953512014e-05,
|
|
"loss": 0.3897,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 4.497246262785208,
|
|
"grad_norm": 0.21509560076923515,
|
|
"learning_rate": 2.5779589247961326e-05,
|
|
"loss": 0.3904,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 4.500393391030684,
|
|
"grad_norm": 0.19228646717933973,
|
|
"learning_rate": 2.576160159477932e-05,
|
|
"loss": 0.3918,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 4.503540519276161,
|
|
"grad_norm": 0.1795849927351903,
|
|
"learning_rate": 2.5743610012753375e-05,
|
|
"loss": 0.3953,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 4.506687647521637,
|
|
"grad_norm": 0.20489830860011532,
|
|
"learning_rate": 2.5725614520675003e-05,
|
|
"loss": 0.3919,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 4.5098347757671124,
|
|
"grad_norm": 0.21882006418429392,
|
|
"learning_rate": 2.5707615137339774e-05,
|
|
"loss": 0.3938,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 4.512981904012588,
|
|
"grad_norm": 0.19280187229135973,
|
|
"learning_rate": 2.5689611881547333e-05,
|
|
"loss": 0.3851,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 4.516129032258064,
|
|
"grad_norm": 0.20929709845343097,
|
|
"learning_rate": 2.5671604772101364e-05,
|
|
"loss": 0.3869,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 4.519276160503541,
|
|
"grad_norm": 0.19034278382916123,
|
|
"learning_rate": 2.565359382780959e-05,
|
|
"loss": 0.3892,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 4.522423288749017,
|
|
"grad_norm": 0.1955253897676763,
|
|
"learning_rate": 2.5635579067483716e-05,
|
|
"loss": 0.3948,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 4.525570416994492,
|
|
"grad_norm": 0.21006893895734358,
|
|
"learning_rate": 2.5617560509939453e-05,
|
|
"loss": 0.3902,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 4.528717545239968,
|
|
"grad_norm": 0.18703258921272223,
|
|
"learning_rate": 2.5599538173996466e-05,
|
|
"loss": 0.3945,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 4.531864673485445,
|
|
"grad_norm": 0.17700701860895593,
|
|
"learning_rate": 2.5581512078478384e-05,
|
|
"loss": 0.3872,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 4.535011801730921,
|
|
"grad_norm": 0.1904737922390524,
|
|
"learning_rate": 2.5563482242212735e-05,
|
|
"loss": 0.3918,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 4.5381589299763965,
|
|
"grad_norm": 0.19827598790350312,
|
|
"learning_rate": 2.554544868403098e-05,
|
|
"loss": 0.3936,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 4.541306058221872,
|
|
"grad_norm": 0.16151435086680768,
|
|
"learning_rate": 2.5527411422768454e-05,
|
|
"loss": 0.3915,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 4.544453186467349,
|
|
"grad_norm": 0.18657335720318344,
|
|
"learning_rate": 2.5509370477264358e-05,
|
|
"loss": 0.3919,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 4.547600314712825,
|
|
"grad_norm": 0.17941003140458014,
|
|
"learning_rate": 2.5491325866361737e-05,
|
|
"loss": 0.3876,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 4.550747442958301,
|
|
"grad_norm": 0.17734405961341654,
|
|
"learning_rate": 2.547327760890749e-05,
|
|
"loss": 0.3982,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 4.553894571203776,
|
|
"grad_norm": 0.17524164503141434,
|
|
"learning_rate": 2.5455225723752308e-05,
|
|
"loss": 0.3858,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 4.557041699449252,
|
|
"grad_norm": 0.17166835200788202,
|
|
"learning_rate": 2.5437170229750655e-05,
|
|
"loss": 0.3926,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 4.560188827694729,
|
|
"grad_norm": 0.17236320310568598,
|
|
"learning_rate": 2.541911114576079e-05,
|
|
"loss": 0.3917,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 4.563335955940205,
|
|
"grad_norm": 0.16586917527643763,
|
|
"learning_rate": 2.5401048490644713e-05,
|
|
"loss": 0.3905,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 4.5664830841856805,
|
|
"grad_norm": 0.1749389748686086,
|
|
"learning_rate": 2.538298228326814e-05,
|
|
"loss": 0.3943,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 4.569630212431156,
|
|
"grad_norm": 0.18445523358454083,
|
|
"learning_rate": 2.536491254250052e-05,
|
|
"loss": 0.3809,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 4.572777340676632,
|
|
"grad_norm": 0.18404046799235896,
|
|
"learning_rate": 2.534683928721498e-05,
|
|
"loss": 0.3937,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 4.575924468922109,
|
|
"grad_norm": 0.17788876905444853,
|
|
"learning_rate": 2.532876253628831e-05,
|
|
"loss": 0.3835,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 4.579071597167585,
|
|
"grad_norm": 0.18602157578478964,
|
|
"learning_rate": 2.5310682308600976e-05,
|
|
"loss": 0.3943,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 4.58221872541306,
|
|
"grad_norm": 0.17232298418778785,
|
|
"learning_rate": 2.5292598623037057e-05,
|
|
"loss": 0.3851,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 4.585365853658536,
|
|
"grad_norm": 0.1905608180973461,
|
|
"learning_rate": 2.5274511498484236e-05,
|
|
"loss": 0.3826,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 4.588512981904013,
|
|
"grad_norm": 0.17927303844283918,
|
|
"learning_rate": 2.5256420953833813e-05,
|
|
"loss": 0.3817,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 4.591660110149489,
|
|
"grad_norm": 0.20088651754247414,
|
|
"learning_rate": 2.5238327007980635e-05,
|
|
"loss": 0.3862,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 4.5948072383949645,
|
|
"grad_norm": 0.1800905518727353,
|
|
"learning_rate": 2.5220229679823113e-05,
|
|
"loss": 0.3935,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 4.59795436664044,
|
|
"grad_norm": 0.19383388730097495,
|
|
"learning_rate": 2.5202128988263183e-05,
|
|
"loss": 0.4014,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 4.601101494885917,
|
|
"grad_norm": 0.20016364463516104,
|
|
"learning_rate": 2.5184024952206315e-05,
|
|
"loss": 0.3904,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 4.604248623131393,
|
|
"grad_norm": 0.19534288874997346,
|
|
"learning_rate": 2.5165917590561453e-05,
|
|
"loss": 0.3884,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 4.607395751376869,
|
|
"grad_norm": 0.19845903664537287,
|
|
"learning_rate": 2.514780692224102e-05,
|
|
"loss": 0.3886,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 4.610542879622344,
|
|
"grad_norm": 0.18935447457983023,
|
|
"learning_rate": 2.5129692966160887e-05,
|
|
"loss": 0.3847,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 4.61369000786782,
|
|
"grad_norm": 0.1825215910021681,
|
|
"learning_rate": 2.511157574124037e-05,
|
|
"loss": 0.396,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 4.616837136113297,
|
|
"grad_norm": 0.1912881105818978,
|
|
"learning_rate": 2.5093455266402185e-05,
|
|
"loss": 0.3891,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 4.619984264358773,
|
|
"grad_norm": 0.18458695403611322,
|
|
"learning_rate": 2.507533156057246e-05,
|
|
"loss": 0.3951,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 4.6231313926042485,
|
|
"grad_norm": 0.17916510574048766,
|
|
"learning_rate": 2.5057204642680684e-05,
|
|
"loss": 0.3915,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 4.626278520849724,
|
|
"grad_norm": 0.17110403262888976,
|
|
"learning_rate": 2.50390745316597e-05,
|
|
"loss": 0.3845,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 4.6294256490952,
|
|
"grad_norm": 0.17903953256524813,
|
|
"learning_rate": 2.50209412464457e-05,
|
|
"loss": 0.383,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 4.632572777340677,
|
|
"grad_norm": 0.17999296179047053,
|
|
"learning_rate": 2.5002804805978177e-05,
|
|
"loss": 0.3944,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 4.635719905586153,
|
|
"grad_norm": 0.17581481498146168,
|
|
"learning_rate": 2.498466522919993e-05,
|
|
"loss": 0.3892,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 4.6388670338316285,
|
|
"grad_norm": 0.1783825034649337,
|
|
"learning_rate": 2.4966522535057024e-05,
|
|
"loss": 0.3891,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 4.642014162077104,
|
|
"grad_norm": 0.18377927440718408,
|
|
"learning_rate": 2.494837674249878e-05,
|
|
"loss": 0.3903,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 4.645161290322581,
|
|
"grad_norm": 0.18349514371989203,
|
|
"learning_rate": 2.4930227870477773e-05,
|
|
"loss": 0.3902,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 4.648308418568057,
|
|
"grad_norm": 0.1763906966839621,
|
|
"learning_rate": 2.491207593794977e-05,
|
|
"loss": 0.3857,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 4.651455546813533,
|
|
"grad_norm": 0.17906752178646956,
|
|
"learning_rate": 2.4893920963873746e-05,
|
|
"loss": 0.3908,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 4.654602675059008,
|
|
"grad_norm": 0.1679211823950241,
|
|
"learning_rate": 2.487576296721186e-05,
|
|
"loss": 0.3955,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 4.657749803304485,
|
|
"grad_norm": 0.1755347291395844,
|
|
"learning_rate": 2.485760196692942e-05,
|
|
"loss": 0.3916,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 4.660896931549961,
|
|
"grad_norm": 0.16465166030319364,
|
|
"learning_rate": 2.4839437981994867e-05,
|
|
"loss": 0.3903,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 4.664044059795437,
|
|
"grad_norm": 0.17776543423913058,
|
|
"learning_rate": 2.4821271031379765e-05,
|
|
"loss": 0.394,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 4.6671911880409125,
|
|
"grad_norm": 0.1781942266460176,
|
|
"learning_rate": 2.4803101134058775e-05,
|
|
"loss": 0.395,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 4.670338316286388,
|
|
"grad_norm": 0.17946693783829906,
|
|
"learning_rate": 2.478492830900964e-05,
|
|
"loss": 0.394,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 4.673485444531865,
|
|
"grad_norm": 0.1919149254694885,
|
|
"learning_rate": 2.4766752575213146e-05,
|
|
"loss": 0.3904,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 4.676632572777341,
|
|
"grad_norm": 0.16955030838772125,
|
|
"learning_rate": 2.4748573951653132e-05,
|
|
"loss": 0.388,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 4.679779701022817,
|
|
"grad_norm": 0.2002044781358719,
|
|
"learning_rate": 2.473039245731646e-05,
|
|
"loss": 0.3934,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 4.682926829268292,
|
|
"grad_norm": 0.17693479469518242,
|
|
"learning_rate": 2.4712208111192965e-05,
|
|
"loss": 0.3908,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 4.686073957513768,
|
|
"grad_norm": 0.19217906289741862,
|
|
"learning_rate": 2.4694020932275483e-05,
|
|
"loss": 0.3816,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 4.689221085759245,
|
|
"grad_norm": 0.1818600906270269,
|
|
"learning_rate": 2.467583093955981e-05,
|
|
"loss": 0.3894,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 4.692368214004721,
|
|
"grad_norm": 0.2030839509068234,
|
|
"learning_rate": 2.4657638152044667e-05,
|
|
"loss": 0.3868,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 4.6955153422501965,
|
|
"grad_norm": 0.18302211645178032,
|
|
"learning_rate": 2.4639442588731695e-05,
|
|
"loss": 0.3894,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 4.698662470495672,
|
|
"grad_norm": 0.17993702783679505,
|
|
"learning_rate": 2.4621244268625448e-05,
|
|
"loss": 0.393,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 4.701809598741149,
|
|
"grad_norm": 0.1836591030041654,
|
|
"learning_rate": 2.4603043210733343e-05,
|
|
"loss": 0.3936,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 4.704956726986625,
|
|
"grad_norm": 0.17969588856182217,
|
|
"learning_rate": 2.4584839434065675e-05,
|
|
"loss": 0.3896,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 4.708103855232101,
|
|
"grad_norm": 0.18627363426302215,
|
|
"learning_rate": 2.4566632957635555e-05,
|
|
"loss": 0.3963,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 4.711250983477576,
|
|
"grad_norm": 0.16348207104757354,
|
|
"learning_rate": 2.454842380045894e-05,
|
|
"loss": 0.38,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 4.714398111723053,
|
|
"grad_norm": 0.1932898447384366,
|
|
"learning_rate": 2.453021198155456e-05,
|
|
"loss": 0.3915,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 4.717545239968529,
|
|
"grad_norm": 0.17318365673903224,
|
|
"learning_rate": 2.451199751994395e-05,
|
|
"loss": 0.3942,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 4.720692368214005,
|
|
"grad_norm": 0.18679178875572805,
|
|
"learning_rate": 2.449378043465139e-05,
|
|
"loss": 0.3916,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 4.7238394964594805,
|
|
"grad_norm": 0.18301203217504775,
|
|
"learning_rate": 2.44755607447039e-05,
|
|
"loss": 0.3958,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 4.726986624704956,
|
|
"grad_norm": 0.18206504927748668,
|
|
"learning_rate": 2.4457338469131235e-05,
|
|
"loss": 0.3935,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 4.730133752950433,
|
|
"grad_norm": 0.19626381276499383,
|
|
"learning_rate": 2.4439113626965832e-05,
|
|
"loss": 0.3921,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 4.733280881195909,
|
|
"grad_norm": 0.209994269745939,
|
|
"learning_rate": 2.4420886237242812e-05,
|
|
"loss": 0.3896,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 4.736428009441385,
|
|
"grad_norm": 0.20966234827738384,
|
|
"learning_rate": 2.440265631899998e-05,
|
|
"loss": 0.3872,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 4.7395751376868605,
|
|
"grad_norm": 0.17956697728831314,
|
|
"learning_rate": 2.438442389127775e-05,
|
|
"loss": 0.3905,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 4.742722265932336,
|
|
"grad_norm": 0.230149257151023,
|
|
"learning_rate": 2.4366188973119173e-05,
|
|
"loss": 0.3942,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 4.745869394177813,
|
|
"grad_norm": 0.16446551541896534,
|
|
"learning_rate": 2.43479515835699e-05,
|
|
"loss": 0.3971,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 4.749016522423289,
|
|
"grad_norm": 0.21606059095007257,
|
|
"learning_rate": 2.4329711741678158e-05,
|
|
"loss": 0.3971,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 4.752163650668765,
|
|
"grad_norm": 0.18761423316527837,
|
|
"learning_rate": 2.4311469466494747e-05,
|
|
"loss": 0.3822,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 4.755310778914241,
|
|
"grad_norm": 0.20024056830304296,
|
|
"learning_rate": 2.429322477707299e-05,
|
|
"loss": 0.394,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 4.758457907159717,
|
|
"grad_norm": 0.20562507776072017,
|
|
"learning_rate": 2.4274977692468765e-05,
|
|
"loss": 0.3895,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 4.761605035405193,
|
|
"grad_norm": 0.1760613692982041,
|
|
"learning_rate": 2.4256728231740406e-05,
|
|
"loss": 0.3999,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 4.764752163650669,
|
|
"grad_norm": 0.22523629907709936,
|
|
"learning_rate": 2.423847641394877e-05,
|
|
"loss": 0.3881,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 4.7678992918961445,
|
|
"grad_norm": 0.18831716496370055,
|
|
"learning_rate": 2.422022225815714e-05,
|
|
"loss": 0.394,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 4.771046420141621,
|
|
"grad_norm": 0.22513412157385423,
|
|
"learning_rate": 2.4201965783431267e-05,
|
|
"loss": 0.3875,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 4.774193548387097,
|
|
"grad_norm": 0.21031494310694007,
|
|
"learning_rate": 2.4183707008839323e-05,
|
|
"loss": 0.3775,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 4.777340676632573,
|
|
"grad_norm": 0.21590978367024655,
|
|
"learning_rate": 2.4165445953451867e-05,
|
|
"loss": 0.3899,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 4.780487804878049,
|
|
"grad_norm": 0.22564896784384017,
|
|
"learning_rate": 2.414718263634185e-05,
|
|
"loss": 0.3913,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 4.783634933123524,
|
|
"grad_norm": 0.19020615700820612,
|
|
"learning_rate": 2.4128917076584587e-05,
|
|
"loss": 0.3944,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 4.786782061369001,
|
|
"grad_norm": 0.22953746850079004,
|
|
"learning_rate": 2.4110649293257728e-05,
|
|
"loss": 0.3986,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 4.789929189614477,
|
|
"grad_norm": 0.1805102124166518,
|
|
"learning_rate": 2.4092379305441252e-05,
|
|
"loss": 0.3898,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 4.793076317859953,
|
|
"grad_norm": 0.2268972739180654,
|
|
"learning_rate": 2.407410713221743e-05,
|
|
"loss": 0.3938,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 4.7962234461054285,
|
|
"grad_norm": 0.19050233625931567,
|
|
"learning_rate": 2.4055832792670842e-05,
|
|
"loss": 0.3924,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 4.799370574350904,
|
|
"grad_norm": 0.1922960780024949,
|
|
"learning_rate": 2.4037556305888288e-05,
|
|
"loss": 0.3813,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 4.802517702596381,
|
|
"grad_norm": 0.1898465812093314,
|
|
"learning_rate": 2.4019277690958856e-05,
|
|
"loss": 0.3939,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 4.805664830841857,
|
|
"grad_norm": 0.18381007690254716,
|
|
"learning_rate": 2.4000996966973817e-05,
|
|
"loss": 0.394,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 4.808811959087333,
|
|
"grad_norm": 0.2103100782295867,
|
|
"learning_rate": 2.398271415302668e-05,
|
|
"loss": 0.3897,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 4.811959087332809,
|
|
"grad_norm": 0.16769183783145522,
|
|
"learning_rate": 2.3964429268213115e-05,
|
|
"loss": 0.3972,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 4.815106215578285,
|
|
"grad_norm": 0.19945262652056686,
|
|
"learning_rate": 2.3946142331630955e-05,
|
|
"loss": 0.3941,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 4.818253343823761,
|
|
"grad_norm": 0.1700848589131017,
|
|
"learning_rate": 2.392785336238019e-05,
|
|
"loss": 0.3902,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 4.821400472069237,
|
|
"grad_norm": 0.1656556368956296,
|
|
"learning_rate": 2.390956237956291e-05,
|
|
"loss": 0.3933,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 4.8245476003147125,
|
|
"grad_norm": 0.18373049425460136,
|
|
"learning_rate": 2.389126940228333e-05,
|
|
"loss": 0.3956,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 4.827694728560189,
|
|
"grad_norm": 0.17964107945591998,
|
|
"learning_rate": 2.387297444964775e-05,
|
|
"loss": 0.3871,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 4.830841856805665,
|
|
"grad_norm": 0.17450741916661142,
|
|
"learning_rate": 2.385467754076451e-05,
|
|
"loss": 0.3788,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 4.833988985051141,
|
|
"grad_norm": 0.18365024872291147,
|
|
"learning_rate": 2.3836378694744014e-05,
|
|
"loss": 0.3986,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 4.837136113296617,
|
|
"grad_norm": 0.18009519290000942,
|
|
"learning_rate": 2.3818077930698683e-05,
|
|
"loss": 0.4009,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 4.840283241542092,
|
|
"grad_norm": 0.1962147329229101,
|
|
"learning_rate": 2.3799775267742934e-05,
|
|
"loss": 0.3919,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 4.843430369787569,
|
|
"grad_norm": 0.1814959883127099,
|
|
"learning_rate": 2.3781470724993186e-05,
|
|
"loss": 0.3894,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 4.846577498033045,
|
|
"grad_norm": 0.19522141468087864,
|
|
"learning_rate": 2.376316432156779e-05,
|
|
"loss": 0.3915,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 4.849724626278521,
|
|
"grad_norm": 0.1824952868828727,
|
|
"learning_rate": 2.3744856076587076e-05,
|
|
"loss": 0.396,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 4.8528717545239966,
|
|
"grad_norm": 0.19970076246737523,
|
|
"learning_rate": 2.3726546009173275e-05,
|
|
"loss": 0.3975,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 4.856018882769473,
|
|
"grad_norm": 0.1788577313593828,
|
|
"learning_rate": 2.3708234138450518e-05,
|
|
"loss": 0.3888,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 4.859166011014949,
|
|
"grad_norm": 0.2006715626887735,
|
|
"learning_rate": 2.368992048354485e-05,
|
|
"loss": 0.3904,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 4.862313139260425,
|
|
"grad_norm": 0.1655853950892595,
|
|
"learning_rate": 2.3671605063584147e-05,
|
|
"loss": 0.3917,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 4.865460267505901,
|
|
"grad_norm": 0.20564415582976606,
|
|
"learning_rate": 2.3653287897698135e-05,
|
|
"loss": 0.3935,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 4.868607395751377,
|
|
"grad_norm": 0.16818135554428862,
|
|
"learning_rate": 2.3634969005018377e-05,
|
|
"loss": 0.39,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 4.871754523996853,
|
|
"grad_norm": 0.17762234599652035,
|
|
"learning_rate": 2.361664840467823e-05,
|
|
"loss": 0.3926,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 4.874901652242329,
|
|
"grad_norm": 0.1932106423562817,
|
|
"learning_rate": 2.359832611581283e-05,
|
|
"loss": 0.385,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 4.878048780487805,
|
|
"grad_norm": 0.186869483874634,
|
|
"learning_rate": 2.358000215755909e-05,
|
|
"loss": 0.388,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 4.881195908733281,
|
|
"grad_norm": 0.18001846076878525,
|
|
"learning_rate": 2.3561676549055646e-05,
|
|
"loss": 0.3915,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 4.884343036978757,
|
|
"grad_norm": 0.1920048629271822,
|
|
"learning_rate": 2.3543349309442887e-05,
|
|
"loss": 0.392,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 4.887490165224233,
|
|
"grad_norm": 0.17088394010939248,
|
|
"learning_rate": 2.3525020457862878e-05,
|
|
"loss": 0.3964,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 4.890637293469709,
|
|
"grad_norm": 0.19786920135937375,
|
|
"learning_rate": 2.3506690013459376e-05,
|
|
"loss": 0.3843,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 4.893784421715185,
|
|
"grad_norm": 0.18033263963836252,
|
|
"learning_rate": 2.348835799537782e-05,
|
|
"loss": 0.3951,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 4.8969315499606605,
|
|
"grad_norm": 0.20462979636333165,
|
|
"learning_rate": 2.3470024422765267e-05,
|
|
"loss": 0.3913,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 4.900078678206137,
|
|
"grad_norm": 0.1679693396240305,
|
|
"learning_rate": 2.3451689314770404e-05,
|
|
"loss": 0.3933,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 4.903225806451613,
|
|
"grad_norm": 0.18705636136779824,
|
|
"learning_rate": 2.3433352690543533e-05,
|
|
"loss": 0.3875,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 4.906372934697089,
|
|
"grad_norm": 0.1725007429553329,
|
|
"learning_rate": 2.3415014569236522e-05,
|
|
"loss": 0.3922,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 4.909520062942565,
|
|
"grad_norm": 0.19115101454023312,
|
|
"learning_rate": 2.3396674970002824e-05,
|
|
"loss": 0.3865,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 4.912667191188041,
|
|
"grad_norm": 0.17053442035080676,
|
|
"learning_rate": 2.337833391199742e-05,
|
|
"loss": 0.3992,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 4.915814319433517,
|
|
"grad_norm": 0.1821548023265103,
|
|
"learning_rate": 2.3359991414376814e-05,
|
|
"loss": 0.388,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 4.918961447678993,
|
|
"grad_norm": 0.1793151595611094,
|
|
"learning_rate": 2.3341647496299025e-05,
|
|
"loss": 0.3893,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 4.922108575924469,
|
|
"grad_norm": 0.180962371465097,
|
|
"learning_rate": 2.3323302176923552e-05,
|
|
"loss": 0.3948,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 4.925255704169945,
|
|
"grad_norm": 0.20297348545826346,
|
|
"learning_rate": 2.3304955475411348e-05,
|
|
"loss": 0.3846,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 4.928402832415421,
|
|
"grad_norm": 0.17305830862076746,
|
|
"learning_rate": 2.3286607410924815e-05,
|
|
"loss": 0.3879,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 4.931549960660897,
|
|
"grad_norm": 0.1913355603249613,
|
|
"learning_rate": 2.3268258002627778e-05,
|
|
"loss": 0.394,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 4.934697088906373,
|
|
"grad_norm": 0.19290416318807532,
|
|
"learning_rate": 2.3249907269685473e-05,
|
|
"loss": 0.3894,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 4.937844217151849,
|
|
"grad_norm": 0.18295228802636587,
|
|
"learning_rate": 2.3231555231264525e-05,
|
|
"loss": 0.3948,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 4.940991345397325,
|
|
"grad_norm": 0.17938423256259314,
|
|
"learning_rate": 2.3213201906532895e-05,
|
|
"loss": 0.3899,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 4.944138473642801,
|
|
"grad_norm": 0.17823287523578704,
|
|
"learning_rate": 2.3194847314659908e-05,
|
|
"loss": 0.3903,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 4.947285601888277,
|
|
"grad_norm": 0.19013188360587602,
|
|
"learning_rate": 2.3176491474816207e-05,
|
|
"loss": 0.3892,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 4.950432730133753,
|
|
"grad_norm": 0.16191899543672794,
|
|
"learning_rate": 2.3158134406173742e-05,
|
|
"loss": 0.3901,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 4.9535798583792285,
|
|
"grad_norm": 0.18934530299370167,
|
|
"learning_rate": 2.3139776127905745e-05,
|
|
"loss": 0.392,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 4.956726986624705,
|
|
"grad_norm": 0.18625354508534378,
|
|
"learning_rate": 2.312141665918671e-05,
|
|
"loss": 0.393,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 4.959874114870181,
|
|
"grad_norm": 0.1716080119401114,
|
|
"learning_rate": 2.3103056019192373e-05,
|
|
"loss": 0.3934,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 4.963021243115657,
|
|
"grad_norm": 0.17658405885657685,
|
|
"learning_rate": 2.3084694227099704e-05,
|
|
"loss": 0.3929,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 4.966168371361133,
|
|
"grad_norm": 0.16588933283792434,
|
|
"learning_rate": 2.3066331302086858e-05,
|
|
"loss": 0.3994,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 4.969315499606609,
|
|
"grad_norm": 0.17724140434357114,
|
|
"learning_rate": 2.3047967263333192e-05,
|
|
"loss": 0.3866,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 4.972462627852085,
|
|
"grad_norm": 0.16829639574698707,
|
|
"learning_rate": 2.3029602130019208e-05,
|
|
"loss": 0.3939,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 4.975609756097561,
|
|
"grad_norm": 0.1696857849206108,
|
|
"learning_rate": 2.301123592132657e-05,
|
|
"loss": 0.3942,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 4.978756884343037,
|
|
"grad_norm": 0.1799379463657916,
|
|
"learning_rate": 2.2992868656438046e-05,
|
|
"loss": 0.3877,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 4.9819040125885135,
|
|
"grad_norm": 0.17173263518727672,
|
|
"learning_rate": 2.297450035453752e-05,
|
|
"loss": 0.3906,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 4.985051140833989,
|
|
"grad_norm": 0.16263371788270237,
|
|
"learning_rate": 2.2956131034809957e-05,
|
|
"loss": 0.3943,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 4.988198269079465,
|
|
"grad_norm": 0.18145271609433958,
|
|
"learning_rate": 2.293776071644139e-05,
|
|
"loss": 0.3993,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 4.991345397324941,
|
|
"grad_norm": 0.17931042976589995,
|
|
"learning_rate": 2.291938941861888e-05,
|
|
"loss": 0.3871,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 4.994492525570417,
|
|
"grad_norm": 0.16386736102567098,
|
|
"learning_rate": 2.290101716053053e-05,
|
|
"loss": 0.3738,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 4.997639653815893,
|
|
"grad_norm": 0.1709713086837328,
|
|
"learning_rate": 2.288264396136543e-05,
|
|
"loss": 0.3928,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 5.003147128245476,
|
|
"grad_norm": 0.46162362322835515,
|
|
"learning_rate": 2.2864269840313654e-05,
|
|
"loss": 0.723,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 5.006294256490952,
|
|
"grad_norm": 0.34659232101376264,
|
|
"learning_rate": 2.284589481656625e-05,
|
|
"loss": 0.3346,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 5.009441384736428,
|
|
"grad_norm": 0.34172986216672346,
|
|
"learning_rate": 2.2827518909315206e-05,
|
|
"loss": 0.3367,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 5.012588512981904,
|
|
"grad_norm": 0.4217601397562919,
|
|
"learning_rate": 2.2809142137753422e-05,
|
|
"loss": 0.3196,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 5.01573564122738,
|
|
"grad_norm": 0.27498607831845434,
|
|
"learning_rate": 2.2790764521074717e-05,
|
|
"loss": 0.3274,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 5.018882769472856,
|
|
"grad_norm": 0.337732177614199,
|
|
"learning_rate": 2.2772386078473775e-05,
|
|
"loss": 0.3283,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 5.022029897718332,
|
|
"grad_norm": 0.31183657652743435,
|
|
"learning_rate": 2.2754006829146155e-05,
|
|
"loss": 0.3296,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 5.025177025963808,
|
|
"grad_norm": 0.3478314549506538,
|
|
"learning_rate": 2.2735626792288263e-05,
|
|
"loss": 0.3268,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 5.028324154209284,
|
|
"grad_norm": 0.321081647039957,
|
|
"learning_rate": 2.27172459870973e-05,
|
|
"loss": 0.3216,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 5.03147128245476,
|
|
"grad_norm": 0.27287489022056094,
|
|
"learning_rate": 2.2698864432771313e-05,
|
|
"loss": 0.3324,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 5.034618410700236,
|
|
"grad_norm": 0.3377680793066894,
|
|
"learning_rate": 2.2680482148509092e-05,
|
|
"loss": 0.33,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 5.037765538945712,
|
|
"grad_norm": 0.3099962793181279,
|
|
"learning_rate": 2.266209915351021e-05,
|
|
"loss": 0.3208,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 5.040912667191188,
|
|
"grad_norm": 0.25169801292292504,
|
|
"learning_rate": 2.2643715466974975e-05,
|
|
"loss": 0.3261,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 5.044059795436664,
|
|
"grad_norm": 0.30251965731477554,
|
|
"learning_rate": 2.2625331108104426e-05,
|
|
"loss": 0.3217,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 5.04720692368214,
|
|
"grad_norm": 0.23662305302926548,
|
|
"learning_rate": 2.2606946096100294e-05,
|
|
"loss": 0.3315,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 5.050354051927616,
|
|
"grad_norm": 0.2651596494454096,
|
|
"learning_rate": 2.258856045016499e-05,
|
|
"loss": 0.3345,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 5.053501180173092,
|
|
"grad_norm": 0.2637402059683434,
|
|
"learning_rate": 2.2570174189501608e-05,
|
|
"loss": 0.3269,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 5.056648308418568,
|
|
"grad_norm": 0.23602081515972934,
|
|
"learning_rate": 2.255178733331385e-05,
|
|
"loss": 0.3229,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 5.059795436664044,
|
|
"grad_norm": 0.25433290519235396,
|
|
"learning_rate": 2.253339990080608e-05,
|
|
"loss": 0.3191,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 5.06294256490952,
|
|
"grad_norm": 0.23107096923107467,
|
|
"learning_rate": 2.251501191118323e-05,
|
|
"loss": 0.3356,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 5.066089693154996,
|
|
"grad_norm": 0.25637351601908676,
|
|
"learning_rate": 2.2496623383650828e-05,
|
|
"loss": 0.3265,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 5.069236821400472,
|
|
"grad_norm": 0.21472300935571184,
|
|
"learning_rate": 2.2478234337414962e-05,
|
|
"loss": 0.33,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 5.072383949645948,
|
|
"grad_norm": 0.2338555364338194,
|
|
"learning_rate": 2.245984479168227e-05,
|
|
"loss": 0.3298,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 5.075531077891424,
|
|
"grad_norm": 0.20022516446625999,
|
|
"learning_rate": 2.2441454765659897e-05,
|
|
"loss": 0.3342,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 5.0786782061369005,
|
|
"grad_norm": 0.22488563846995296,
|
|
"learning_rate": 2.2423064278555503e-05,
|
|
"loss": 0.326,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 5.081825334382376,
|
|
"grad_norm": 0.21302916365806326,
|
|
"learning_rate": 2.2404673349577218e-05,
|
|
"loss": 0.3282,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 5.084972462627852,
|
|
"grad_norm": 0.21796018211989795,
|
|
"learning_rate": 2.2386281997933646e-05,
|
|
"loss": 0.3258,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 5.088119590873328,
|
|
"grad_norm": 0.22425397553381501,
|
|
"learning_rate": 2.2367890242833815e-05,
|
|
"loss": 0.3297,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 5.091266719118804,
|
|
"grad_norm": 0.19706564657591386,
|
|
"learning_rate": 2.2349498103487197e-05,
|
|
"loss": 0.3273,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 5.09441384736428,
|
|
"grad_norm": 0.2071088006731519,
|
|
"learning_rate": 2.233110559910365e-05,
|
|
"loss": 0.3211,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 5.097560975609756,
|
|
"grad_norm": 0.21348102050857448,
|
|
"learning_rate": 2.2312712748893403e-05,
|
|
"loss": 0.3232,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 5.100708103855232,
|
|
"grad_norm": 0.18908349503115035,
|
|
"learning_rate": 2.2294319572067082e-05,
|
|
"loss": 0.3229,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 5.103855232100708,
|
|
"grad_norm": 0.21575807977186254,
|
|
"learning_rate": 2.2275926087835625e-05,
|
|
"loss": 0.3229,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 5.1070023603461845,
|
|
"grad_norm": 0.20410626875283436,
|
|
"learning_rate": 2.2257532315410288e-05,
|
|
"loss": 0.3261,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 5.11014948859166,
|
|
"grad_norm": 0.23313876176564874,
|
|
"learning_rate": 2.2239138274002642e-05,
|
|
"loss": 0.3298,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 5.113296616837136,
|
|
"grad_norm": 0.19848085470082366,
|
|
"learning_rate": 2.2220743982824536e-05,
|
|
"loss": 0.3244,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 5.116443745082612,
|
|
"grad_norm": 0.23764358322337617,
|
|
"learning_rate": 2.2202349461088084e-05,
|
|
"loss": 0.3316,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 5.119590873328088,
|
|
"grad_norm": 0.20745220698687916,
|
|
"learning_rate": 2.2183954728005625e-05,
|
|
"loss": 0.3225,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 5.122738001573564,
|
|
"grad_norm": 0.223041599846075,
|
|
"learning_rate": 2.216555980278974e-05,
|
|
"loss": 0.3261,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 5.12588512981904,
|
|
"grad_norm": 0.19422755456096907,
|
|
"learning_rate": 2.2147164704653202e-05,
|
|
"loss": 0.3271,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 5.129032258064516,
|
|
"grad_norm": 0.20440115995525865,
|
|
"learning_rate": 2.2128769452808956e-05,
|
|
"loss": 0.3272,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 5.132179386309992,
|
|
"grad_norm": 0.2187283808498755,
|
|
"learning_rate": 2.211037406647011e-05,
|
|
"loss": 0.3265,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 5.1353265145554685,
|
|
"grad_norm": 0.2050591037215658,
|
|
"learning_rate": 2.2091978564849926e-05,
|
|
"loss": 0.3229,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 5.138473642800944,
|
|
"grad_norm": 0.22811383006695085,
|
|
"learning_rate": 2.2073582967161768e-05,
|
|
"loss": 0.336,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 5.14162077104642,
|
|
"grad_norm": 0.21037766403293978,
|
|
"learning_rate": 2.2055187292619112e-05,
|
|
"loss": 0.3234,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 5.144767899291896,
|
|
"grad_norm": 0.22544584006363033,
|
|
"learning_rate": 2.2036791560435522e-05,
|
|
"loss": 0.3232,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 5.147915027537372,
|
|
"grad_norm": 0.23307986768402664,
|
|
"learning_rate": 2.20183957898246e-05,
|
|
"loss": 0.3299,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 5.151062155782848,
|
|
"grad_norm": 0.20220722711990272,
|
|
"learning_rate": 2.2000000000000003e-05,
|
|
"loss": 0.3215,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 5.154209284028324,
|
|
"grad_norm": 0.21396702627741238,
|
|
"learning_rate": 2.1981604210175407e-05,
|
|
"loss": 0.3261,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 5.1573564122738,
|
|
"grad_norm": 0.23101866923319364,
|
|
"learning_rate": 2.196320843956449e-05,
|
|
"loss": 0.3234,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 5.160503540519276,
|
|
"grad_norm": 0.22249676919651665,
|
|
"learning_rate": 2.1944812707380897e-05,
|
|
"loss": 0.3278,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 5.1636506687647525,
|
|
"grad_norm": 0.21159027665052352,
|
|
"learning_rate": 2.1926417032838238e-05,
|
|
"loss": 0.3261,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 5.166797797010228,
|
|
"grad_norm": 0.20441901079236766,
|
|
"learning_rate": 2.1908021435150083e-05,
|
|
"loss": 0.3249,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 5.169944925255704,
|
|
"grad_norm": 0.22690097885692212,
|
|
"learning_rate": 2.18896259335299e-05,
|
|
"loss": 0.3263,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 5.17309205350118,
|
|
"grad_norm": 0.19474865782338907,
|
|
"learning_rate": 2.1871230547191057e-05,
|
|
"loss": 0.3241,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 5.176239181746656,
|
|
"grad_norm": 0.24748820815778508,
|
|
"learning_rate": 2.18528352953468e-05,
|
|
"loss": 0.3293,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 5.1793863099921325,
|
|
"grad_norm": 0.21000623423513556,
|
|
"learning_rate": 2.1834440197210254e-05,
|
|
"loss": 0.3396,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 5.182533438237608,
|
|
"grad_norm": 0.2297339762152351,
|
|
"learning_rate": 2.1816045271994377e-05,
|
|
"loss": 0.3355,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 5.185680566483084,
|
|
"grad_norm": 0.23065919694389042,
|
|
"learning_rate": 2.1797650538911922e-05,
|
|
"loss": 0.3266,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 5.18882769472856,
|
|
"grad_norm": 0.21981603962817217,
|
|
"learning_rate": 2.1779256017175473e-05,
|
|
"loss": 0.3216,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 5.191974822974037,
|
|
"grad_norm": 0.2608827800438322,
|
|
"learning_rate": 2.1760861725997367e-05,
|
|
"loss": 0.3191,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 5.195121951219512,
|
|
"grad_norm": 0.19452350370213584,
|
|
"learning_rate": 2.1742467684589725e-05,
|
|
"loss": 0.3259,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 5.198269079464988,
|
|
"grad_norm": 0.22996447660538494,
|
|
"learning_rate": 2.1724073912164387e-05,
|
|
"loss": 0.3284,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 5.201416207710464,
|
|
"grad_norm": 0.22489712820890972,
|
|
"learning_rate": 2.170568042793292e-05,
|
|
"loss": 0.3229,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 5.20456333595594,
|
|
"grad_norm": 0.2002513690412124,
|
|
"learning_rate": 2.16872872511066e-05,
|
|
"loss": 0.3335,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 5.2077104642014165,
|
|
"grad_norm": 0.21768101783798655,
|
|
"learning_rate": 2.166889440089636e-05,
|
|
"loss": 0.3197,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 5.210857592446892,
|
|
"grad_norm": 0.2105177118679401,
|
|
"learning_rate": 2.165050189651281e-05,
|
|
"loss": 0.3312,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 5.214004720692368,
|
|
"grad_norm": 0.21009669854087792,
|
|
"learning_rate": 2.163210975716619e-05,
|
|
"loss": 0.3288,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 5.217151848937844,
|
|
"grad_norm": 0.21015093693379167,
|
|
"learning_rate": 2.1613718002066363e-05,
|
|
"loss": 0.3296,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 5.220298977183321,
|
|
"grad_norm": 0.22642270974424877,
|
|
"learning_rate": 2.1595326650422784e-05,
|
|
"loss": 0.325,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 5.223446105428796,
|
|
"grad_norm": 0.20862201953387366,
|
|
"learning_rate": 2.15769357214445e-05,
|
|
"loss": 0.3287,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 5.226593233674272,
|
|
"grad_norm": 0.23556943931498991,
|
|
"learning_rate": 2.1558545234340108e-05,
|
|
"loss": 0.3208,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 5.229740361919748,
|
|
"grad_norm": 0.21273958624925166,
|
|
"learning_rate": 2.1540155208317736e-05,
|
|
"loss": 0.3254,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 5.232887490165224,
|
|
"grad_norm": 0.21120587575901487,
|
|
"learning_rate": 2.1521765662585047e-05,
|
|
"loss": 0.3278,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 5.2360346184107005,
|
|
"grad_norm": 0.2191912573575056,
|
|
"learning_rate": 2.150337661634918e-05,
|
|
"loss": 0.3275,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 5.239181746656176,
|
|
"grad_norm": 0.18918312365625706,
|
|
"learning_rate": 2.1484988088816784e-05,
|
|
"loss": 0.3245,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 5.242328874901652,
|
|
"grad_norm": 0.24442600792973201,
|
|
"learning_rate": 2.146660009919393e-05,
|
|
"loss": 0.3366,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 5.245476003147128,
|
|
"grad_norm": 0.19190784043500905,
|
|
"learning_rate": 2.1448212666686153e-05,
|
|
"loss": 0.3235,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 5.248623131392605,
|
|
"grad_norm": 0.19845803273670526,
|
|
"learning_rate": 2.1429825810498405e-05,
|
|
"loss": 0.3247,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 5.25177025963808,
|
|
"grad_norm": 0.22683790832172754,
|
|
"learning_rate": 2.141143954983502e-05,
|
|
"loss": 0.3277,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 5.254917387883556,
|
|
"grad_norm": 0.20007675897146535,
|
|
"learning_rate": 2.1393053903899715e-05,
|
|
"loss": 0.3293,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 5.258064516129032,
|
|
"grad_norm": 0.22364967785365925,
|
|
"learning_rate": 2.1374668891895586e-05,
|
|
"loss": 0.3317,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 5.261211644374509,
|
|
"grad_norm": 0.19696570309865535,
|
|
"learning_rate": 2.1356284533025034e-05,
|
|
"loss": 0.3357,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 5.2643587726199845,
|
|
"grad_norm": 0.20720814373699586,
|
|
"learning_rate": 2.1337900846489794e-05,
|
|
"loss": 0.3304,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 5.26750590086546,
|
|
"grad_norm": 0.22251808274139923,
|
|
"learning_rate": 2.1319517851490917e-05,
|
|
"loss": 0.3342,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 5.270653029110936,
|
|
"grad_norm": 0.19960532937969883,
|
|
"learning_rate": 2.130113556722869e-05,
|
|
"loss": 0.3213,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 5.273800157356412,
|
|
"grad_norm": 0.22611359477988568,
|
|
"learning_rate": 2.12827540129027e-05,
|
|
"loss": 0.3304,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 5.276947285601889,
|
|
"grad_norm": 0.21377559505306823,
|
|
"learning_rate": 2.126437320771175e-05,
|
|
"loss": 0.333,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 5.280094413847364,
|
|
"grad_norm": 0.21364402742573374,
|
|
"learning_rate": 2.124599317085385e-05,
|
|
"loss": 0.3252,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 5.28324154209284,
|
|
"grad_norm": 0.20440996232135555,
|
|
"learning_rate": 2.1227613921526234e-05,
|
|
"loss": 0.3302,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 5.286388670338316,
|
|
"grad_norm": 0.20439524727339334,
|
|
"learning_rate": 2.1209235478925292e-05,
|
|
"loss": 0.327,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 5.289535798583792,
|
|
"grad_norm": 0.21129883255126156,
|
|
"learning_rate": 2.1190857862246587e-05,
|
|
"loss": 0.3317,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 5.2926829268292686,
|
|
"grad_norm": 0.1832955706368962,
|
|
"learning_rate": 2.1172481090684803e-05,
|
|
"loss": 0.3285,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 5.295830055074744,
|
|
"grad_norm": 0.21771566260776035,
|
|
"learning_rate": 2.1154105183433758e-05,
|
|
"loss": 0.3296,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 5.29897718332022,
|
|
"grad_norm": 0.1943908762637486,
|
|
"learning_rate": 2.1135730159686355e-05,
|
|
"loss": 0.3378,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 5.302124311565696,
|
|
"grad_norm": 0.19415021103057115,
|
|
"learning_rate": 2.1117356038634584e-05,
|
|
"loss": 0.3284,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 5.305271439811173,
|
|
"grad_norm": 0.19441982459516802,
|
|
"learning_rate": 2.109898283946948e-05,
|
|
"loss": 0.3238,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 5.3084185680566485,
|
|
"grad_norm": 0.19773621537262287,
|
|
"learning_rate": 2.1080610581381128e-05,
|
|
"loss": 0.3285,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 5.311565696302124,
|
|
"grad_norm": 0.2120736125028019,
|
|
"learning_rate": 2.106223928355861e-05,
|
|
"loss": 0.3324,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 5.3147128245476,
|
|
"grad_norm": 0.19760073719764953,
|
|
"learning_rate": 2.1043868965190045e-05,
|
|
"loss": 0.3324,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 5.317859952793077,
|
|
"grad_norm": 0.19405070182884,
|
|
"learning_rate": 2.1025499645462485e-05,
|
|
"loss": 0.3375,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 5.321007081038553,
|
|
"grad_norm": 0.1956189468411377,
|
|
"learning_rate": 2.100713134356196e-05,
|
|
"loss": 0.3255,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 5.324154209284028,
|
|
"grad_norm": 0.19321084864706617,
|
|
"learning_rate": 2.098876407867344e-05,
|
|
"loss": 0.3308,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 5.327301337529504,
|
|
"grad_norm": 0.19304288190055158,
|
|
"learning_rate": 2.0970397869980798e-05,
|
|
"loss": 0.3286,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 5.33044846577498,
|
|
"grad_norm": 0.1986064395829299,
|
|
"learning_rate": 2.0952032736666817e-05,
|
|
"loss": 0.3291,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 5.333595594020457,
|
|
"grad_norm": 0.19746810657897224,
|
|
"learning_rate": 2.0933668697913148e-05,
|
|
"loss": 0.3336,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 5.3367427222659325,
|
|
"grad_norm": 0.19729660360055334,
|
|
"learning_rate": 2.09153057729003e-05,
|
|
"loss": 0.3348,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 5.339889850511408,
|
|
"grad_norm": 0.18855256963341346,
|
|
"learning_rate": 2.0896943980807633e-05,
|
|
"loss": 0.3372,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 5.343036978756884,
|
|
"grad_norm": 0.2009978457776624,
|
|
"learning_rate": 2.0878583340813295e-05,
|
|
"loss": 0.3288,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 5.34618410700236,
|
|
"grad_norm": 0.20225991858456713,
|
|
"learning_rate": 2.0860223872094264e-05,
|
|
"loss": 0.3271,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 5.349331235247837,
|
|
"grad_norm": 0.18569033139215133,
|
|
"learning_rate": 2.084186559382627e-05,
|
|
"loss": 0.3287,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 5.352478363493312,
|
|
"grad_norm": 0.19538260959362502,
|
|
"learning_rate": 2.0823508525183805e-05,
|
|
"loss": 0.3249,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 5.355625491738788,
|
|
"grad_norm": 0.20717894047667273,
|
|
"learning_rate": 2.08051526853401e-05,
|
|
"loss": 0.3336,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 5.358772619984264,
|
|
"grad_norm": 0.19701862641818213,
|
|
"learning_rate": 2.0786798093467114e-05,
|
|
"loss": 0.3344,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 5.361919748229741,
|
|
"grad_norm": 0.1906266432884064,
|
|
"learning_rate": 2.0768444768735478e-05,
|
|
"loss": 0.3334,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 5.3650668764752165,
|
|
"grad_norm": 0.21331000047158513,
|
|
"learning_rate": 2.0750092730314522e-05,
|
|
"loss": 0.3349,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 5.368214004720692,
|
|
"grad_norm": 0.19487279888174047,
|
|
"learning_rate": 2.0731741997372228e-05,
|
|
"loss": 0.3211,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 5.371361132966168,
|
|
"grad_norm": 0.21324736219816784,
|
|
"learning_rate": 2.071339258907519e-05,
|
|
"loss": 0.3385,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 5.374508261211645,
|
|
"grad_norm": 0.18260011303295876,
|
|
"learning_rate": 2.0695044524588658e-05,
|
|
"loss": 0.332,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 5.377655389457121,
|
|
"grad_norm": 0.21643053077446903,
|
|
"learning_rate": 2.0676697823076453e-05,
|
|
"loss": 0.326,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 5.380802517702596,
|
|
"grad_norm": 0.18935558592473964,
|
|
"learning_rate": 2.065835250370098e-05,
|
|
"loss": 0.3286,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 5.383949645948072,
|
|
"grad_norm": 0.21738265810196228,
|
|
"learning_rate": 2.064000858562319e-05,
|
|
"loss": 0.327,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 5.387096774193548,
|
|
"grad_norm": 0.188452014974482,
|
|
"learning_rate": 2.0621666088002586e-05,
|
|
"loss": 0.3363,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 5.390243902439025,
|
|
"grad_norm": 0.21149246855169024,
|
|
"learning_rate": 2.060332502999719e-05,
|
|
"loss": 0.3342,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 5.3933910306845005,
|
|
"grad_norm": 0.1994954339555043,
|
|
"learning_rate": 2.0584985430763483e-05,
|
|
"loss": 0.333,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 5.396538158929976,
|
|
"grad_norm": 0.18853756790169016,
|
|
"learning_rate": 2.0566647309456476e-05,
|
|
"loss": 0.3344,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 5.399685287175452,
|
|
"grad_norm": 0.19943484129450234,
|
|
"learning_rate": 2.0548310685229605e-05,
|
|
"loss": 0.3345,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 5.402832415420928,
|
|
"grad_norm": 0.1941995141451995,
|
|
"learning_rate": 2.052997557723474e-05,
|
|
"loss": 0.3282,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 5.405979543666405,
|
|
"grad_norm": 0.19191713025156307,
|
|
"learning_rate": 2.051164200462218e-05,
|
|
"loss": 0.3345,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 5.4091266719118805,
|
|
"grad_norm": 0.18974347512799264,
|
|
"learning_rate": 2.0493309986540626e-05,
|
|
"loss": 0.3413,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 5.412273800157356,
|
|
"grad_norm": 0.19403906103651297,
|
|
"learning_rate": 2.047497954213713e-05,
|
|
"loss": 0.33,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 5.415420928402832,
|
|
"grad_norm": 0.19827267122676,
|
|
"learning_rate": 2.0456650690557126e-05,
|
|
"loss": 0.3347,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 5.418568056648309,
|
|
"grad_norm": 0.18940704828298557,
|
|
"learning_rate": 2.043832345094436e-05,
|
|
"loss": 0.331,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 5.421715184893785,
|
|
"grad_norm": 0.19382447016721407,
|
|
"learning_rate": 2.041999784244092e-05,
|
|
"loss": 0.3403,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 5.42486231313926,
|
|
"grad_norm": 0.1908670547017546,
|
|
"learning_rate": 2.0401673884187178e-05,
|
|
"loss": 0.3382,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 5.428009441384736,
|
|
"grad_norm": 0.18977270804467197,
|
|
"learning_rate": 2.0383351595321777e-05,
|
|
"loss": 0.3269,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 5.431156569630213,
|
|
"grad_norm": 0.19390756152600916,
|
|
"learning_rate": 2.036503099498163e-05,
|
|
"loss": 0.3351,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 5.434303697875689,
|
|
"grad_norm": 0.1822739884958527,
|
|
"learning_rate": 2.034671210230187e-05,
|
|
"loss": 0.3283,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 5.4374508261211645,
|
|
"grad_norm": 0.20644018287214425,
|
|
"learning_rate": 2.0328394936415862e-05,
|
|
"loss": 0.333,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 5.44059795436664,
|
|
"grad_norm": 0.19229713655862757,
|
|
"learning_rate": 2.0310079516455158e-05,
|
|
"loss": 0.3336,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 5.443745082612116,
|
|
"grad_norm": 0.19157755715085112,
|
|
"learning_rate": 2.0291765861549485e-05,
|
|
"loss": 0.3319,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 5.446892210857593,
|
|
"grad_norm": 0.20424545745124856,
|
|
"learning_rate": 2.0273453990826734e-05,
|
|
"loss": 0.3368,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 5.450039339103069,
|
|
"grad_norm": 0.19085673571348755,
|
|
"learning_rate": 2.0255143923412926e-05,
|
|
"loss": 0.3334,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 5.453186467348544,
|
|
"grad_norm": 0.18918109876476505,
|
|
"learning_rate": 2.0236835678432216e-05,
|
|
"loss": 0.3475,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 5.45633359559402,
|
|
"grad_norm": 0.20280285926641467,
|
|
"learning_rate": 2.0218529275006823e-05,
|
|
"loss": 0.3286,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 5.459480723839496,
|
|
"grad_norm": 0.1916773765490627,
|
|
"learning_rate": 2.020022473225707e-05,
|
|
"loss": 0.3391,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 5.462627852084973,
|
|
"grad_norm": 0.19384714862396943,
|
|
"learning_rate": 2.0181922069301323e-05,
|
|
"loss": 0.3264,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 5.4657749803304485,
|
|
"grad_norm": 0.18923980213552452,
|
|
"learning_rate": 2.016362130525599e-05,
|
|
"loss": 0.3331,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 5.468922108575924,
|
|
"grad_norm": 0.17898327293803962,
|
|
"learning_rate": 2.0145322459235496e-05,
|
|
"loss": 0.3362,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 5.4720692368214,
|
|
"grad_norm": 0.19587397865824116,
|
|
"learning_rate": 2.0127025550352255e-05,
|
|
"loss": 0.3199,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 5.475216365066877,
|
|
"grad_norm": 0.1941075025448282,
|
|
"learning_rate": 2.010873059771667e-05,
|
|
"loss": 0.3349,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 5.478363493312353,
|
|
"grad_norm": 0.1771702814313618,
|
|
"learning_rate": 2.0090437620437097e-05,
|
|
"loss": 0.3255,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 5.481510621557828,
|
|
"grad_norm": 0.19485668971197115,
|
|
"learning_rate": 2.0072146637619822e-05,
|
|
"loss": 0.3227,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 5.484657749803304,
|
|
"grad_norm": 0.19804684145553095,
|
|
"learning_rate": 2.0053857668369054e-05,
|
|
"loss": 0.3282,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 5.487804878048781,
|
|
"grad_norm": 0.1918642737386288,
|
|
"learning_rate": 2.0035570731786898e-05,
|
|
"loss": 0.3308,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 5.490952006294257,
|
|
"grad_norm": 0.196904058951029,
|
|
"learning_rate": 2.001728584697332e-05,
|
|
"loss": 0.3369,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 5.4940991345397325,
|
|
"grad_norm": 0.19456782195452768,
|
|
"learning_rate": 1.999900303302618e-05,
|
|
"loss": 0.3304,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 5.497246262785208,
|
|
"grad_norm": 0.18856607162185943,
|
|
"learning_rate": 1.9980722309041153e-05,
|
|
"loss": 0.3387,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 5.500393391030684,
|
|
"grad_norm": 0.19651502014126437,
|
|
"learning_rate": 1.996244369411171e-05,
|
|
"loss": 0.337,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 5.503540519276161,
|
|
"grad_norm": 0.18726822437116097,
|
|
"learning_rate": 1.9944167207329163e-05,
|
|
"loss": 0.3353,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 5.506687647521637,
|
|
"grad_norm": 0.19603676603474196,
|
|
"learning_rate": 1.992589286778257e-05,
|
|
"loss": 0.3258,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 5.5098347757671124,
|
|
"grad_norm": 0.182602364247368,
|
|
"learning_rate": 1.9907620694558757e-05,
|
|
"loss": 0.3324,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 5.512981904012588,
|
|
"grad_norm": 0.18782759246781305,
|
|
"learning_rate": 1.9889350706742278e-05,
|
|
"loss": 0.3314,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 5.516129032258064,
|
|
"grad_norm": 0.1966254586938506,
|
|
"learning_rate": 1.9871082923415418e-05,
|
|
"loss": 0.3361,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 5.519276160503541,
|
|
"grad_norm": 0.2000474732115638,
|
|
"learning_rate": 1.9852817363658157e-05,
|
|
"loss": 0.334,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 5.522423288749017,
|
|
"grad_norm": 0.18864691630189573,
|
|
"learning_rate": 1.983455404654814e-05,
|
|
"loss": 0.3384,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 5.525570416994492,
|
|
"grad_norm": 0.17699746763746177,
|
|
"learning_rate": 1.9816292991160682e-05,
|
|
"loss": 0.3369,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 5.528717545239968,
|
|
"grad_norm": 0.2046692057530345,
|
|
"learning_rate": 1.979803421656874e-05,
|
|
"loss": 0.3324,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 5.531864673485445,
|
|
"grad_norm": 0.17773907244324694,
|
|
"learning_rate": 1.977977774184287e-05,
|
|
"loss": 0.3306,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 5.535011801730921,
|
|
"grad_norm": 0.2060724377420153,
|
|
"learning_rate": 1.9761523586051247e-05,
|
|
"loss": 0.3347,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 5.5381589299763965,
|
|
"grad_norm": 0.19361078551446964,
|
|
"learning_rate": 1.9743271768259597e-05,
|
|
"loss": 0.3293,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 5.541306058221872,
|
|
"grad_norm": 0.20747437115821293,
|
|
"learning_rate": 1.9725022307531238e-05,
|
|
"loss": 0.3319,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 5.544453186467349,
|
|
"grad_norm": 0.20542783975059725,
|
|
"learning_rate": 1.970677522292701e-05,
|
|
"loss": 0.3293,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 5.547600314712825,
|
|
"grad_norm": 0.20422541341547726,
|
|
"learning_rate": 1.9688530533505262e-05,
|
|
"loss": 0.3298,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 5.550747442958301,
|
|
"grad_norm": 0.19261928228370265,
|
|
"learning_rate": 1.9670288258321844e-05,
|
|
"loss": 0.3291,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 5.553894571203776,
|
|
"grad_norm": 0.21510991198866017,
|
|
"learning_rate": 1.965204841643011e-05,
|
|
"loss": 0.3355,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 5.557041699449252,
|
|
"grad_norm": 0.20838170757363106,
|
|
"learning_rate": 1.9633811026880836e-05,
|
|
"loss": 0.3361,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 5.560188827694729,
|
|
"grad_norm": 0.19924189860550076,
|
|
"learning_rate": 1.961557610872226e-05,
|
|
"loss": 0.3408,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 5.563335955940205,
|
|
"grad_norm": 0.19631983491806956,
|
|
"learning_rate": 1.9597343681000026e-05,
|
|
"loss": 0.3314,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 5.5664830841856805,
|
|
"grad_norm": 0.20427189176713215,
|
|
"learning_rate": 1.9579113762757193e-05,
|
|
"loss": 0.3343,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 5.569630212431156,
|
|
"grad_norm": 0.20654636293900627,
|
|
"learning_rate": 1.956088637303418e-05,
|
|
"loss": 0.3391,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 5.572777340676632,
|
|
"grad_norm": 0.21104773479274883,
|
|
"learning_rate": 1.954266153086877e-05,
|
|
"loss": 0.342,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 5.575924468922109,
|
|
"grad_norm": 0.20296107799646898,
|
|
"learning_rate": 1.9524439255296105e-05,
|
|
"loss": 0.3327,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 5.579071597167585,
|
|
"grad_norm": 0.18312485399358078,
|
|
"learning_rate": 1.9506219565348622e-05,
|
|
"loss": 0.3423,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 5.58221872541306,
|
|
"grad_norm": 0.20577262423625045,
|
|
"learning_rate": 1.948800248005605e-05,
|
|
"loss": 0.3312,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 5.585365853658536,
|
|
"grad_norm": 0.18830784164844272,
|
|
"learning_rate": 1.946978801844544e-05,
|
|
"loss": 0.3314,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 5.588512981904013,
|
|
"grad_norm": 0.1983034432372846,
|
|
"learning_rate": 1.9451576199541063e-05,
|
|
"loss": 0.3369,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 5.591660110149489,
|
|
"grad_norm": 0.18977643322424897,
|
|
"learning_rate": 1.9433367042364447e-05,
|
|
"loss": 0.331,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 5.5948072383949645,
|
|
"grad_norm": 0.20788619352037782,
|
|
"learning_rate": 1.941516056593433e-05,
|
|
"loss": 0.3308,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 5.59795436664044,
|
|
"grad_norm": 0.18636419597292284,
|
|
"learning_rate": 1.9396956789266663e-05,
|
|
"loss": 0.3418,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 5.601101494885917,
|
|
"grad_norm": 0.2241714096919009,
|
|
"learning_rate": 1.9378755731374557e-05,
|
|
"loss": 0.3375,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 5.604248623131393,
|
|
"grad_norm": 0.17829719329566568,
|
|
"learning_rate": 1.9360557411268307e-05,
|
|
"loss": 0.3348,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 5.607395751376869,
|
|
"grad_norm": 0.20049814376921224,
|
|
"learning_rate": 1.9342361847955345e-05,
|
|
"loss": 0.3238,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 5.610542879622344,
|
|
"grad_norm": 0.20325169827837897,
|
|
"learning_rate": 1.9324169060440194e-05,
|
|
"loss": 0.3337,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 5.61369000786782,
|
|
"grad_norm": 0.18595173050891842,
|
|
"learning_rate": 1.930597906772452e-05,
|
|
"loss": 0.3361,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 5.616837136113297,
|
|
"grad_norm": 0.19965999907742346,
|
|
"learning_rate": 1.9287791888807048e-05,
|
|
"loss": 0.338,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 5.619984264358773,
|
|
"grad_norm": 0.19545992600515816,
|
|
"learning_rate": 1.9269607542683552e-05,
|
|
"loss": 0.3359,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 5.6231313926042485,
|
|
"grad_norm": 0.2188054711004221,
|
|
"learning_rate": 1.9251426048346877e-05,
|
|
"loss": 0.3378,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 5.626278520849724,
|
|
"grad_norm": 0.19412474756377676,
|
|
"learning_rate": 1.923324742478686e-05,
|
|
"loss": 0.336,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 5.6294256490952,
|
|
"grad_norm": 0.22045546487078188,
|
|
"learning_rate": 1.9215071690990365e-05,
|
|
"loss": 0.3237,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 5.632572777340677,
|
|
"grad_norm": 0.1936651347415692,
|
|
"learning_rate": 1.9196898865941227e-05,
|
|
"loss": 0.3348,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 5.635719905586153,
|
|
"grad_norm": 0.21256108861119585,
|
|
"learning_rate": 1.917872896862024e-05,
|
|
"loss": 0.3355,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 5.6388670338316285,
|
|
"grad_norm": 0.19411087471884958,
|
|
"learning_rate": 1.916056201800514e-05,
|
|
"loss": 0.328,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 5.642014162077104,
|
|
"grad_norm": 0.19203184010960683,
|
|
"learning_rate": 1.9142398033070585e-05,
|
|
"loss": 0.3332,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 5.645161290322581,
|
|
"grad_norm": 0.19116732074421183,
|
|
"learning_rate": 1.9124237032788144e-05,
|
|
"loss": 0.3289,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 5.648308418568057,
|
|
"grad_norm": 0.2029836866282038,
|
|
"learning_rate": 1.910607903612626e-05,
|
|
"loss": 0.3301,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 5.651455546813533,
|
|
"grad_norm": 0.18620591358236213,
|
|
"learning_rate": 1.9087924062050235e-05,
|
|
"loss": 0.3273,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 5.654602675059008,
|
|
"grad_norm": 0.194743778672627,
|
|
"learning_rate": 1.9069772129522236e-05,
|
|
"loss": 0.3361,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 5.657749803304485,
|
|
"grad_norm": 0.17439717206736727,
|
|
"learning_rate": 1.9051623257501223e-05,
|
|
"loss": 0.3359,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 5.660896931549961,
|
|
"grad_norm": 0.21536385200503502,
|
|
"learning_rate": 1.9033477464942985e-05,
|
|
"loss": 0.3316,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 5.664044059795437,
|
|
"grad_norm": 0.19927232488706062,
|
|
"learning_rate": 1.9015334770800084e-05,
|
|
"loss": 0.3428,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 5.6671911880409125,
|
|
"grad_norm": 0.18222541338952222,
|
|
"learning_rate": 1.899719519402183e-05,
|
|
"loss": 0.3371,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 5.670338316286388,
|
|
"grad_norm": 0.20776846100700097,
|
|
"learning_rate": 1.897905875355431e-05,
|
|
"loss": 0.3333,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 5.673485444531865,
|
|
"grad_norm": 0.20498121411547227,
|
|
"learning_rate": 1.89609254683403e-05,
|
|
"loss": 0.337,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 5.676632572777341,
|
|
"grad_norm": 0.2057071418903273,
|
|
"learning_rate": 1.8942795357319325e-05,
|
|
"loss": 0.3422,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 5.679779701022817,
|
|
"grad_norm": 0.18447805097886538,
|
|
"learning_rate": 1.892466843942754e-05,
|
|
"loss": 0.3357,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 5.682926829268292,
|
|
"grad_norm": 0.20151427634192418,
|
|
"learning_rate": 1.8906544733597817e-05,
|
|
"loss": 0.3341,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 5.686073957513768,
|
|
"grad_norm": 0.19101557171738096,
|
|
"learning_rate": 1.888842425875964e-05,
|
|
"loss": 0.3396,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 5.689221085759245,
|
|
"grad_norm": 0.2109852771210735,
|
|
"learning_rate": 1.887030703383912e-05,
|
|
"loss": 0.3392,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 5.692368214004721,
|
|
"grad_norm": 0.19115514267570324,
|
|
"learning_rate": 1.885219307775899e-05,
|
|
"loss": 0.3363,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 5.6955153422501965,
|
|
"grad_norm": 0.21293008134701097,
|
|
"learning_rate": 1.8834082409438553e-05,
|
|
"loss": 0.3328,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 5.698662470495672,
|
|
"grad_norm": 0.18526154866614372,
|
|
"learning_rate": 1.8815975047793694e-05,
|
|
"loss": 0.3273,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 5.701809598741149,
|
|
"grad_norm": 0.18777683810128346,
|
|
"learning_rate": 1.8797871011736823e-05,
|
|
"loss": 0.3392,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 5.704956726986625,
|
|
"grad_norm": 0.18257028157402827,
|
|
"learning_rate": 1.87797703201769e-05,
|
|
"loss": 0.3303,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 5.708103855232101,
|
|
"grad_norm": 0.19418106072360075,
|
|
"learning_rate": 1.8761672992019377e-05,
|
|
"loss": 0.3344,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 5.711250983477576,
|
|
"grad_norm": 0.18457444042449656,
|
|
"learning_rate": 1.87435790461662e-05,
|
|
"loss": 0.3278,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 5.714398111723053,
|
|
"grad_norm": 0.18582325962284854,
|
|
"learning_rate": 1.872548850151577e-05,
|
|
"loss": 0.3264,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 5.717545239968529,
|
|
"grad_norm": 0.18103618532916696,
|
|
"learning_rate": 1.8707401376962946e-05,
|
|
"loss": 0.3315,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 5.720692368214005,
|
|
"grad_norm": 0.18382505326981982,
|
|
"learning_rate": 1.8689317691399026e-05,
|
|
"loss": 0.3367,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 5.7238394964594805,
|
|
"grad_norm": 0.1908586561032934,
|
|
"learning_rate": 1.867123746371169e-05,
|
|
"loss": 0.3315,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 5.726986624704956,
|
|
"grad_norm": 0.18359448429174133,
|
|
"learning_rate": 1.865316071278503e-05,
|
|
"loss": 0.3352,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 5.730133752950433,
|
|
"grad_norm": 0.19607618023789522,
|
|
"learning_rate": 1.8635087457499485e-05,
|
|
"loss": 0.3319,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 5.733280881195909,
|
|
"grad_norm": 0.17583841531340308,
|
|
"learning_rate": 1.8617017716731865e-05,
|
|
"loss": 0.334,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 5.736428009441385,
|
|
"grad_norm": 0.1866341211246049,
|
|
"learning_rate": 1.8598951509355293e-05,
|
|
"loss": 0.33,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 5.7395751376868605,
|
|
"grad_norm": 0.18608286547921962,
|
|
"learning_rate": 1.8580888854239213e-05,
|
|
"loss": 0.3361,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 5.742722265932336,
|
|
"grad_norm": 0.18536619850476266,
|
|
"learning_rate": 1.856282977024935e-05,
|
|
"loss": 0.3387,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 5.745869394177813,
|
|
"grad_norm": 0.2015065945779001,
|
|
"learning_rate": 1.85447742762477e-05,
|
|
"loss": 0.3413,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 5.749016522423289,
|
|
"grad_norm": 0.19420792957675226,
|
|
"learning_rate": 1.8526722391092513e-05,
|
|
"loss": 0.3379,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 5.752163650668765,
|
|
"grad_norm": 0.20788168048073424,
|
|
"learning_rate": 1.850867413363827e-05,
|
|
"loss": 0.3299,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 5.755310778914241,
|
|
"grad_norm": 0.20012818061899734,
|
|
"learning_rate": 1.8490629522735658e-05,
|
|
"loss": 0.335,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 5.758457907159717,
|
|
"grad_norm": 0.19094123225319237,
|
|
"learning_rate": 1.8472588577231558e-05,
|
|
"loss": 0.3289,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 5.761605035405193,
|
|
"grad_norm": 0.1903248865857444,
|
|
"learning_rate": 1.8454551315969023e-05,
|
|
"loss": 0.3328,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 5.764752163650669,
|
|
"grad_norm": 0.19908659395090694,
|
|
"learning_rate": 1.8436517757787268e-05,
|
|
"loss": 0.3289,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 5.7678992918961445,
|
|
"grad_norm": 0.19909089376488692,
|
|
"learning_rate": 1.841848792152162e-05,
|
|
"loss": 0.3317,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 5.771046420141621,
|
|
"grad_norm": 0.18638784799057997,
|
|
"learning_rate": 1.8400461826003536e-05,
|
|
"loss": 0.3296,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 5.774193548387097,
|
|
"grad_norm": 0.21107469641971743,
|
|
"learning_rate": 1.8382439490060556e-05,
|
|
"loss": 0.341,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 5.777340676632573,
|
|
"grad_norm": 0.18826238700241835,
|
|
"learning_rate": 1.8364420932516296e-05,
|
|
"loss": 0.3352,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 5.780487804878049,
|
|
"grad_norm": 0.2011868737529004,
|
|
"learning_rate": 1.8346406172190415e-05,
|
|
"loss": 0.3373,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 5.783634933123524,
|
|
"grad_norm": 0.18535453103657012,
|
|
"learning_rate": 1.8328395227898638e-05,
|
|
"loss": 0.3324,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 5.786782061369001,
|
|
"grad_norm": 0.2043412279322104,
|
|
"learning_rate": 1.8310388118452676e-05,
|
|
"loss": 0.3263,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 5.789929189614477,
|
|
"grad_norm": 0.18684348371083476,
|
|
"learning_rate": 1.829238486266023e-05,
|
|
"loss": 0.3286,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 5.793076317859953,
|
|
"grad_norm": 0.20419794528852878,
|
|
"learning_rate": 1.8274385479325003e-05,
|
|
"loss": 0.3272,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 5.7962234461054285,
|
|
"grad_norm": 0.1940073858019324,
|
|
"learning_rate": 1.825638998724663e-05,
|
|
"loss": 0.3332,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 5.799370574350904,
|
|
"grad_norm": 0.19347840957399853,
|
|
"learning_rate": 1.8238398405220693e-05,
|
|
"loss": 0.3351,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 5.802517702596381,
|
|
"grad_norm": 0.1889872990752563,
|
|
"learning_rate": 1.8220410752038683e-05,
|
|
"loss": 0.3316,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 5.805664830841857,
|
|
"grad_norm": 0.18586354781067246,
|
|
"learning_rate": 1.8202427046487998e-05,
|
|
"loss": 0.3341,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 5.808811959087333,
|
|
"grad_norm": 0.1966780805383021,
|
|
"learning_rate": 1.8184447307351892e-05,
|
|
"loss": 0.3355,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 5.811959087332809,
|
|
"grad_norm": 0.18121589168030264,
|
|
"learning_rate": 1.8166471553409515e-05,
|
|
"loss": 0.3383,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 5.815106215578285,
|
|
"grad_norm": 0.20505699055277316,
|
|
"learning_rate": 1.8148499803435814e-05,
|
|
"loss": 0.3398,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 5.818253343823761,
|
|
"grad_norm": 0.18962686805322654,
|
|
"learning_rate": 1.8130532076201567e-05,
|
|
"loss": 0.3265,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 5.821400472069237,
|
|
"grad_norm": 0.18582561112060875,
|
|
"learning_rate": 1.811256839047337e-05,
|
|
"loss": 0.3293,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 5.8245476003147125,
|
|
"grad_norm": 0.19257957858004238,
|
|
"learning_rate": 1.809460876501356e-05,
|
|
"loss": 0.3262,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 5.827694728560189,
|
|
"grad_norm": 0.197182828158111,
|
|
"learning_rate": 1.8076653218580275e-05,
|
|
"loss": 0.3323,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 5.830841856805665,
|
|
"grad_norm": 0.1879475617365794,
|
|
"learning_rate": 1.8058701769927355e-05,
|
|
"loss": 0.334,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 5.833988985051141,
|
|
"grad_norm": 0.1826490417159589,
|
|
"learning_rate": 1.8040754437804394e-05,
|
|
"loss": 0.342,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 5.837136113296617,
|
|
"grad_norm": 0.17633023503177056,
|
|
"learning_rate": 1.8022811240956658e-05,
|
|
"loss": 0.3273,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 5.840283241542092,
|
|
"grad_norm": 0.18968026969150417,
|
|
"learning_rate": 1.800487219812511e-05,
|
|
"loss": 0.346,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 5.843430369787569,
|
|
"grad_norm": 0.1784380943724687,
|
|
"learning_rate": 1.7986937328046367e-05,
|
|
"loss": 0.3303,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 5.846577498033045,
|
|
"grad_norm": 0.1823752757582174,
|
|
"learning_rate": 1.796900664945269e-05,
|
|
"loss": 0.34,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 5.849724626278521,
|
|
"grad_norm": 0.1813510684645491,
|
|
"learning_rate": 1.795108018107197e-05,
|
|
"loss": 0.3412,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 5.8528717545239966,
|
|
"grad_norm": 0.1807432625218474,
|
|
"learning_rate": 1.7933157941627685e-05,
|
|
"loss": 0.3373,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 5.856018882769473,
|
|
"grad_norm": 0.19949499343633706,
|
|
"learning_rate": 1.7915239949838912e-05,
|
|
"loss": 0.3287,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 5.859166011014949,
|
|
"grad_norm": 0.18250218771454552,
|
|
"learning_rate": 1.7897326224420278e-05,
|
|
"loss": 0.3405,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 5.862313139260425,
|
|
"grad_norm": 0.194020691023345,
|
|
"learning_rate": 1.7879416784081964e-05,
|
|
"loss": 0.3346,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 5.865460267505901,
|
|
"grad_norm": 0.18925126458655542,
|
|
"learning_rate": 1.7861511647529673e-05,
|
|
"loss": 0.3364,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 5.868607395751377,
|
|
"grad_norm": 0.1911546369042113,
|
|
"learning_rate": 1.7843610833464605e-05,
|
|
"loss": 0.341,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 5.871754523996853,
|
|
"grad_norm": 0.19577966808550543,
|
|
"learning_rate": 1.782571436058346e-05,
|
|
"loss": 0.3364,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 5.874901652242329,
|
|
"grad_norm": 0.17291565385793384,
|
|
"learning_rate": 1.7807822247578385e-05,
|
|
"loss": 0.3338,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 5.878048780487805,
|
|
"grad_norm": 0.18608030763102454,
|
|
"learning_rate": 1.7789934513136988e-05,
|
|
"loss": 0.3334,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 5.881195908733281,
|
|
"grad_norm": 0.1885179067929735,
|
|
"learning_rate": 1.7772051175942294e-05,
|
|
"loss": 0.3379,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 5.884343036978757,
|
|
"grad_norm": 0.19141910450267036,
|
|
"learning_rate": 1.7754172254672758e-05,
|
|
"loss": 0.3361,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 5.887490165224233,
|
|
"grad_norm": 0.19065019403622055,
|
|
"learning_rate": 1.7736297768002185e-05,
|
|
"loss": 0.3387,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 5.890637293469709,
|
|
"grad_norm": 0.19092741311791903,
|
|
"learning_rate": 1.7718427734599783e-05,
|
|
"loss": 0.3428,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 5.893784421715185,
|
|
"grad_norm": 0.18860250598218833,
|
|
"learning_rate": 1.770056217313009e-05,
|
|
"loss": 0.336,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 5.8969315499606605,
|
|
"grad_norm": 0.18271946305489614,
|
|
"learning_rate": 1.7682701102252972e-05,
|
|
"loss": 0.343,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 5.900078678206137,
|
|
"grad_norm": 0.19868228391520923,
|
|
"learning_rate": 1.7664844540623608e-05,
|
|
"loss": 0.3425,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 5.903225806451613,
|
|
"grad_norm": 0.2011603878538626,
|
|
"learning_rate": 1.764699250689249e-05,
|
|
"loss": 0.3353,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 5.906372934697089,
|
|
"grad_norm": 0.1802992343069088,
|
|
"learning_rate": 1.762914501970534e-05,
|
|
"loss": 0.3409,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 5.909520062942565,
|
|
"grad_norm": 0.20444272040489966,
|
|
"learning_rate": 1.7611302097703157e-05,
|
|
"loss": 0.3347,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 5.912667191188041,
|
|
"grad_norm": 0.1862979242929073,
|
|
"learning_rate": 1.7593463759522168e-05,
|
|
"loss": 0.3314,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 5.915814319433517,
|
|
"grad_norm": 0.20220253909918987,
|
|
"learning_rate": 1.7575630023793816e-05,
|
|
"loss": 0.3377,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 5.918961447678993,
|
|
"grad_norm": 0.18686277913829963,
|
|
"learning_rate": 1.7557800909144728e-05,
|
|
"loss": 0.3384,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 5.922108575924469,
|
|
"grad_norm": 0.22245978325014654,
|
|
"learning_rate": 1.75399764341967e-05,
|
|
"loss": 0.3441,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 5.925255704169945,
|
|
"grad_norm": 0.1959826366457521,
|
|
"learning_rate": 1.7522156617566707e-05,
|
|
"loss": 0.3347,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 5.928402832415421,
|
|
"grad_norm": 0.19789454724861258,
|
|
"learning_rate": 1.7504341477866824e-05,
|
|
"loss": 0.3321,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 5.931549960660897,
|
|
"grad_norm": 0.1924382850939243,
|
|
"learning_rate": 1.7486531033704265e-05,
|
|
"loss": 0.3326,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 5.934697088906373,
|
|
"grad_norm": 0.20590864368684875,
|
|
"learning_rate": 1.7468725303681345e-05,
|
|
"loss": 0.3342,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 5.937844217151849,
|
|
"grad_norm": 0.1797118421700361,
|
|
"learning_rate": 1.7450924306395434e-05,
|
|
"loss": 0.3397,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 5.940991345397325,
|
|
"grad_norm": 0.20148625619435923,
|
|
"learning_rate": 1.7433128060438966e-05,
|
|
"loss": 0.3316,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 5.944138473642801,
|
|
"grad_norm": 0.18364545990645328,
|
|
"learning_rate": 1.741533658439942e-05,
|
|
"loss": 0.3362,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 5.947285601888277,
|
|
"grad_norm": 0.20904376933935323,
|
|
"learning_rate": 1.7397549896859286e-05,
|
|
"loss": 0.3363,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 5.950432730133753,
|
|
"grad_norm": 0.17911675822308773,
|
|
"learning_rate": 1.7379768016396062e-05,
|
|
"loss": 0.3426,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 5.9535798583792285,
|
|
"grad_norm": 0.19232534709476046,
|
|
"learning_rate": 1.736199096158221e-05,
|
|
"loss": 0.3347,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 5.956726986624705,
|
|
"grad_norm": 0.19023265048985652,
|
|
"learning_rate": 1.7344218750985166e-05,
|
|
"loss": 0.3388,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 5.959874114870181,
|
|
"grad_norm": 0.17953463564757774,
|
|
"learning_rate": 1.7326451403167293e-05,
|
|
"loss": 0.3329,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 5.963021243115657,
|
|
"grad_norm": 0.1802520063583937,
|
|
"learning_rate": 1.7308688936685882e-05,
|
|
"loss": 0.3432,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 5.966168371361133,
|
|
"grad_norm": 0.17752047516280367,
|
|
"learning_rate": 1.729093137009314e-05,
|
|
"loss": 0.3333,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 5.969315499606609,
|
|
"grad_norm": 0.1986277401406285,
|
|
"learning_rate": 1.7273178721936128e-05,
|
|
"loss": 0.3368,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 5.972462627852085,
|
|
"grad_norm": 0.18718316187850806,
|
|
"learning_rate": 1.7255431010756785e-05,
|
|
"loss": 0.338,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 5.975609756097561,
|
|
"grad_norm": 0.19078572446696887,
|
|
"learning_rate": 1.7237688255091903e-05,
|
|
"loss": 0.336,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 5.978756884343037,
|
|
"grad_norm": 0.20824480546412605,
|
|
"learning_rate": 1.721995047347308e-05,
|
|
"loss": 0.3451,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 5.9819040125885135,
|
|
"grad_norm": 0.1845164093165284,
|
|
"learning_rate": 1.7202217684426717e-05,
|
|
"loss": 0.3391,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 5.985051140833989,
|
|
"grad_norm": 0.19512142305230165,
|
|
"learning_rate": 1.7184489906474028e-05,
|
|
"loss": 0.3343,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 5.988198269079465,
|
|
"grad_norm": 0.17660675867730372,
|
|
"learning_rate": 1.716676715813096e-05,
|
|
"loss": 0.3434,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 5.991345397324941,
|
|
"grad_norm": 0.190088158411966,
|
|
"learning_rate": 1.7149049457908243e-05,
|
|
"loss": 0.3385,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 5.994492525570417,
|
|
"grad_norm": 0.18592216353409444,
|
|
"learning_rate": 1.713133682431129e-05,
|
|
"loss": 0.3351,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 5.997639653815893,
|
|
"grad_norm": 0.18827936723885594,
|
|
"learning_rate": 1.7113629275840265e-05,
|
|
"loss": 0.3375,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 6.003147128245476,
|
|
"grad_norm": 0.4975044241803055,
|
|
"learning_rate": 1.7095926830989985e-05,
|
|
"loss": 0.6166,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 6.006294256490952,
|
|
"grad_norm": 0.3610313120288688,
|
|
"learning_rate": 1.7078229508249965e-05,
|
|
"loss": 0.276,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 6.009441384736428,
|
|
"grad_norm": 0.38073466389678695,
|
|
"learning_rate": 1.706053732610435e-05,
|
|
"loss": 0.2739,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 6.012588512981904,
|
|
"grad_norm": 0.42338349975031847,
|
|
"learning_rate": 1.704285030303192e-05,
|
|
"loss": 0.2676,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 6.01573564122738,
|
|
"grad_norm": 0.29695193868084807,
|
|
"learning_rate": 1.702516845750608e-05,
|
|
"loss": 0.268,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 6.018882769472856,
|
|
"grad_norm": 0.35818009653270233,
|
|
"learning_rate": 1.700749180799482e-05,
|
|
"loss": 0.2675,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 6.022029897718332,
|
|
"grad_norm": 0.29739443630667606,
|
|
"learning_rate": 1.6989820372960685e-05,
|
|
"loss": 0.2606,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 6.025177025963808,
|
|
"grad_norm": 0.31018644023401853,
|
|
"learning_rate": 1.6972154170860807e-05,
|
|
"loss": 0.2663,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 6.028324154209284,
|
|
"grad_norm": 0.3229186420829361,
|
|
"learning_rate": 1.6954493220146827e-05,
|
|
"loss": 0.2616,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 6.03147128245476,
|
|
"grad_norm": 0.2769240182680597,
|
|
"learning_rate": 1.6936837539264903e-05,
|
|
"loss": 0.2687,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 6.034618410700236,
|
|
"grad_norm": 0.27372353067836536,
|
|
"learning_rate": 1.6919187146655698e-05,
|
|
"loss": 0.2662,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 6.037765538945712,
|
|
"grad_norm": 0.26192196377766025,
|
|
"learning_rate": 1.690154206075435e-05,
|
|
"loss": 0.2641,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 6.040912667191188,
|
|
"grad_norm": 0.2671939261828635,
|
|
"learning_rate": 1.6883902299990452e-05,
|
|
"loss": 0.2705,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 6.044059795436664,
|
|
"grad_norm": 0.2718399605137191,
|
|
"learning_rate": 1.6866267882788042e-05,
|
|
"loss": 0.2622,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 6.04720692368214,
|
|
"grad_norm": 0.2548650251007784,
|
|
"learning_rate": 1.684863882756556e-05,
|
|
"loss": 0.2575,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 6.050354051927616,
|
|
"grad_norm": 0.24781471314240483,
|
|
"learning_rate": 1.683101515273587e-05,
|
|
"loss": 0.2626,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 6.053501180173092,
|
|
"grad_norm": 0.24324637074207814,
|
|
"learning_rate": 1.681339687670618e-05,
|
|
"loss": 0.2624,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 6.056648308418568,
|
|
"grad_norm": 0.2436540725116606,
|
|
"learning_rate": 1.679578401787811e-05,
|
|
"loss": 0.2726,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 6.059795436664044,
|
|
"grad_norm": 0.253809992047894,
|
|
"learning_rate": 1.6778176594647574e-05,
|
|
"loss": 0.2638,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 6.06294256490952,
|
|
"grad_norm": 0.24633420682616938,
|
|
"learning_rate": 1.6760574625404827e-05,
|
|
"loss": 0.2502,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 6.066089693154996,
|
|
"grad_norm": 0.24090371222537213,
|
|
"learning_rate": 1.674297812853444e-05,
|
|
"loss": 0.2653,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 6.069236821400472,
|
|
"grad_norm": 0.24757439171536427,
|
|
"learning_rate": 1.6725387122415253e-05,
|
|
"loss": 0.268,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 6.072383949645948,
|
|
"grad_norm": 0.22694322630286495,
|
|
"learning_rate": 1.6707801625420375e-05,
|
|
"loss": 0.2624,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 6.075531077891424,
|
|
"grad_norm": 0.2549701791259606,
|
|
"learning_rate": 1.669022165591716e-05,
|
|
"loss": 0.2655,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 6.0786782061369005,
|
|
"grad_norm": 0.2245811781702429,
|
|
"learning_rate": 1.6672647232267194e-05,
|
|
"loss": 0.2696,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 6.081825334382376,
|
|
"grad_norm": 0.2711918706767307,
|
|
"learning_rate": 1.6655078372826253e-05,
|
|
"loss": 0.2718,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 6.084972462627852,
|
|
"grad_norm": 0.23302194321945463,
|
|
"learning_rate": 1.663751509594434e-05,
|
|
"loss": 0.2649,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 6.088119590873328,
|
|
"grad_norm": 0.23949565312026969,
|
|
"learning_rate": 1.6619957419965582e-05,
|
|
"loss": 0.2708,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 6.091266719118804,
|
|
"grad_norm": 0.22495551097762426,
|
|
"learning_rate": 1.6602405363228286e-05,
|
|
"loss": 0.2643,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 6.09441384736428,
|
|
"grad_norm": 0.2319845411537025,
|
|
"learning_rate": 1.6584858944064874e-05,
|
|
"loss": 0.2669,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 6.097560975609756,
|
|
"grad_norm": 0.22569793590685644,
|
|
"learning_rate": 1.6567318180801892e-05,
|
|
"loss": 0.2726,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 6.100708103855232,
|
|
"grad_norm": 0.23266554290354272,
|
|
"learning_rate": 1.6549783091759972e-05,
|
|
"loss": 0.2719,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 6.103855232100708,
|
|
"grad_norm": 0.2114326207930978,
|
|
"learning_rate": 1.6532253695253814e-05,
|
|
"loss": 0.2631,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 6.1070023603461845,
|
|
"grad_norm": 0.22853221650998712,
|
|
"learning_rate": 1.651473000959219e-05,
|
|
"loss": 0.2693,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 6.11014948859166,
|
|
"grad_norm": 0.22091900772196124,
|
|
"learning_rate": 1.649721205307788e-05,
|
|
"loss": 0.2614,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 6.113296616837136,
|
|
"grad_norm": 0.21192508829344028,
|
|
"learning_rate": 1.6479699844007706e-05,
|
|
"loss": 0.2662,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 6.116443745082612,
|
|
"grad_norm": 0.21103131436590075,
|
|
"learning_rate": 1.646219340067248e-05,
|
|
"loss": 0.2664,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 6.119590873328088,
|
|
"grad_norm": 0.21105523751019395,
|
|
"learning_rate": 1.644469274135698e-05,
|
|
"loss": 0.2602,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 6.122738001573564,
|
|
"grad_norm": 0.1988689455993957,
|
|
"learning_rate": 1.6427197884339964e-05,
|
|
"loss": 0.2692,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 6.12588512981904,
|
|
"grad_norm": 0.22070030023746054,
|
|
"learning_rate": 1.6409708847894097e-05,
|
|
"loss": 0.2634,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 6.129032258064516,
|
|
"grad_norm": 0.21906464799285424,
|
|
"learning_rate": 1.639222565028601e-05,
|
|
"loss": 0.255,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 6.132179386309992,
|
|
"grad_norm": 0.21158801417145398,
|
|
"learning_rate": 1.637474830977619e-05,
|
|
"loss": 0.2745,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 6.1353265145554685,
|
|
"grad_norm": 0.21986221418279994,
|
|
"learning_rate": 1.6357276844619043e-05,
|
|
"loss": 0.2653,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 6.138473642800944,
|
|
"grad_norm": 0.21102204865676896,
|
|
"learning_rate": 1.633981127306281e-05,
|
|
"loss": 0.2689,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 6.14162077104642,
|
|
"grad_norm": 0.21326607733179273,
|
|
"learning_rate": 1.63223516133496e-05,
|
|
"loss": 0.2716,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 6.144767899291896,
|
|
"grad_norm": 0.2110880192361947,
|
|
"learning_rate": 1.6304897883715324e-05,
|
|
"loss": 0.2666,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 6.147915027537372,
|
|
"grad_norm": 0.22051215932311774,
|
|
"learning_rate": 1.6287450102389725e-05,
|
|
"loss": 0.2618,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 6.151062155782848,
|
|
"grad_norm": 0.21340487458450053,
|
|
"learning_rate": 1.6270008287596305e-05,
|
|
"loss": 0.2669,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 6.154209284028324,
|
|
"grad_norm": 0.2146633102609745,
|
|
"learning_rate": 1.6252572457552366e-05,
|
|
"loss": 0.2644,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 6.1573564122738,
|
|
"grad_norm": 0.2102265150162684,
|
|
"learning_rate": 1.6235142630468928e-05,
|
|
"loss": 0.2684,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 6.160503540519276,
|
|
"grad_norm": 0.21811839270757488,
|
|
"learning_rate": 1.621771882455076e-05,
|
|
"loss": 0.2686,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 6.1636506687647525,
|
|
"grad_norm": 0.21075077208103193,
|
|
"learning_rate": 1.6200301057996337e-05,
|
|
"loss": 0.2655,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 6.166797797010228,
|
|
"grad_norm": 0.218688567959288,
|
|
"learning_rate": 1.6182889348997832e-05,
|
|
"loss": 0.2652,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 6.169944925255704,
|
|
"grad_norm": 0.22171656079314364,
|
|
"learning_rate": 1.6165483715741075e-05,
|
|
"loss": 0.2844,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 6.17309205350118,
|
|
"grad_norm": 0.25323058930729025,
|
|
"learning_rate": 1.6148084176405567e-05,
|
|
"loss": 0.2708,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 6.176239181746656,
|
|
"grad_norm": 0.21655864066814562,
|
|
"learning_rate": 1.6130690749164437e-05,
|
|
"loss": 0.2651,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 6.1793863099921325,
|
|
"grad_norm": 0.21929256246949802,
|
|
"learning_rate": 1.6113303452184434e-05,
|
|
"loss": 0.2761,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 6.182533438237608,
|
|
"grad_norm": 0.21900714816732905,
|
|
"learning_rate": 1.6095922303625902e-05,
|
|
"loss": 0.2731,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 6.185680566483084,
|
|
"grad_norm": 0.23059154637063942,
|
|
"learning_rate": 1.6078547321642758e-05,
|
|
"loss": 0.2702,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 6.18882769472856,
|
|
"grad_norm": 0.21225513261785303,
|
|
"learning_rate": 1.6061178524382483e-05,
|
|
"loss": 0.273,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 6.191974822974037,
|
|
"grad_norm": 0.23067148079801209,
|
|
"learning_rate": 1.6043815929986094e-05,
|
|
"loss": 0.2749,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 6.195121951219512,
|
|
"grad_norm": 0.20922145207589546,
|
|
"learning_rate": 1.602645955658815e-05,
|
|
"loss": 0.2603,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 6.198269079464988,
|
|
"grad_norm": 0.23195506433753524,
|
|
"learning_rate": 1.600910942231668e-05,
|
|
"loss": 0.2641,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 6.201416207710464,
|
|
"grad_norm": 0.20849955610662246,
|
|
"learning_rate": 1.599176554529321e-05,
|
|
"loss": 0.2652,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 6.20456333595594,
|
|
"grad_norm": 0.22725404485600895,
|
|
"learning_rate": 1.597442794363275e-05,
|
|
"loss": 0.2714,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 6.2077104642014165,
|
|
"grad_norm": 0.21709605058575326,
|
|
"learning_rate": 1.595709663544372e-05,
|
|
"loss": 0.2631,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 6.210857592446892,
|
|
"grad_norm": 0.21948985507840815,
|
|
"learning_rate": 1.5939771638827997e-05,
|
|
"loss": 0.2659,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 6.214004720692368,
|
|
"grad_norm": 0.22768102749279248,
|
|
"learning_rate": 1.5922452971880848e-05,
|
|
"loss": 0.267,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 6.217151848937844,
|
|
"grad_norm": 0.22470980313296637,
|
|
"learning_rate": 1.5905140652690935e-05,
|
|
"loss": 0.2751,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 6.220298977183321,
|
|
"grad_norm": 0.21601930900048635,
|
|
"learning_rate": 1.5887834699340288e-05,
|
|
"loss": 0.2687,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 6.223446105428796,
|
|
"grad_norm": 0.23789145044732735,
|
|
"learning_rate": 1.587053512990431e-05,
|
|
"loss": 0.2648,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 6.226593233674272,
|
|
"grad_norm": 0.20645827162898345,
|
|
"learning_rate": 1.5853241962451688e-05,
|
|
"loss": 0.2656,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 6.229740361919748,
|
|
"grad_norm": 0.23749596259128172,
|
|
"learning_rate": 1.5835955215044466e-05,
|
|
"loss": 0.2649,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 6.232887490165224,
|
|
"grad_norm": 0.21313415509044598,
|
|
"learning_rate": 1.581867490573797e-05,
|
|
"loss": 0.2724,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 6.2360346184107005,
|
|
"grad_norm": 0.23951028716861328,
|
|
"learning_rate": 1.580140105258079e-05,
|
|
"loss": 0.2706,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 6.239181746656176,
|
|
"grad_norm": 0.22264880703435372,
|
|
"learning_rate": 1.5784133673614787e-05,
|
|
"loss": 0.2745,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 6.242328874901652,
|
|
"grad_norm": 0.21806089788197308,
|
|
"learning_rate": 1.576687278687504e-05,
|
|
"loss": 0.2714,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 6.245476003147128,
|
|
"grad_norm": 0.21519597916517344,
|
|
"learning_rate": 1.5749618410389884e-05,
|
|
"loss": 0.2749,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 6.248623131392605,
|
|
"grad_norm": 0.23270010761799506,
|
|
"learning_rate": 1.5732370562180826e-05,
|
|
"loss": 0.2656,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 6.25177025963808,
|
|
"grad_norm": 0.20586789811939893,
|
|
"learning_rate": 1.5715129260262556e-05,
|
|
"loss": 0.2695,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 6.254917387883556,
|
|
"grad_norm": 0.2437679513570022,
|
|
"learning_rate": 1.5697894522642928e-05,
|
|
"loss": 0.2748,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 6.258064516129032,
|
|
"grad_norm": 0.2281678596319344,
|
|
"learning_rate": 1.568066636732295e-05,
|
|
"loss": 0.2608,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 6.261211644374509,
|
|
"grad_norm": 0.2330467459753673,
|
|
"learning_rate": 1.566344481229674e-05,
|
|
"loss": 0.2582,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 6.2643587726199845,
|
|
"grad_norm": 0.21485682537512538,
|
|
"learning_rate": 1.564622987555154e-05,
|
|
"loss": 0.2753,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 6.26750590086546,
|
|
"grad_norm": 0.22237525288016594,
|
|
"learning_rate": 1.5629021575067662e-05,
|
|
"loss": 0.2681,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 6.270653029110936,
|
|
"grad_norm": 0.22728442867475201,
|
|
"learning_rate": 1.5611819928818502e-05,
|
|
"loss": 0.2714,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 6.273800157356412,
|
|
"grad_norm": 0.22800474162990042,
|
|
"learning_rate": 1.5594624954770494e-05,
|
|
"loss": 0.2708,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 6.276947285601889,
|
|
"grad_norm": 0.22113656266743126,
|
|
"learning_rate": 1.5577436670883108e-05,
|
|
"loss": 0.2673,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 6.280094413847364,
|
|
"grad_norm": 0.21824707276540103,
|
|
"learning_rate": 1.5560255095108824e-05,
|
|
"loss": 0.2755,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 6.28324154209284,
|
|
"grad_norm": 0.22677192743914226,
|
|
"learning_rate": 1.5543080245393128e-05,
|
|
"loss": 0.2695,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 6.286388670338316,
|
|
"grad_norm": 0.22384148599348552,
|
|
"learning_rate": 1.552591213967446e-05,
|
|
"loss": 0.2693,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 6.289535798583792,
|
|
"grad_norm": 0.2231213061535353,
|
|
"learning_rate": 1.5508750795884222e-05,
|
|
"loss": 0.2743,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 6.2926829268292686,
|
|
"grad_norm": 0.21524551643992698,
|
|
"learning_rate": 1.5491596231946764e-05,
|
|
"loss": 0.2615,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 6.295830055074744,
|
|
"grad_norm": 0.21550805228949638,
|
|
"learning_rate": 1.5474448465779355e-05,
|
|
"loss": 0.2721,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 6.29897718332022,
|
|
"grad_norm": 0.2157668033969489,
|
|
"learning_rate": 1.5457307515292152e-05,
|
|
"loss": 0.268,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 6.302124311565696,
|
|
"grad_norm": 0.21835492722502536,
|
|
"learning_rate": 1.5440173398388202e-05,
|
|
"loss": 0.2667,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 6.305271439811173,
|
|
"grad_norm": 0.2116868929153955,
|
|
"learning_rate": 1.5423046132963407e-05,
|
|
"loss": 0.2646,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 6.3084185680566485,
|
|
"grad_norm": 0.23996177968549967,
|
|
"learning_rate": 1.5405925736906507e-05,
|
|
"loss": 0.2681,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 6.311565696302124,
|
|
"grad_norm": 0.227126159207544,
|
|
"learning_rate": 1.5388812228099105e-05,
|
|
"loss": 0.268,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 6.3147128245476,
|
|
"grad_norm": 0.22790212437206483,
|
|
"learning_rate": 1.5371705624415566e-05,
|
|
"loss": 0.2693,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 6.317859952793077,
|
|
"grad_norm": 0.21628336152215077,
|
|
"learning_rate": 1.535460594372307e-05,
|
|
"loss": 0.2712,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 6.321007081038553,
|
|
"grad_norm": 0.2271127802784349,
|
|
"learning_rate": 1.533751320388154e-05,
|
|
"loss": 0.2687,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 6.324154209284028,
|
|
"grad_norm": 0.21421542101219174,
|
|
"learning_rate": 1.5320427422743685e-05,
|
|
"loss": 0.2718,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 6.327301337529504,
|
|
"grad_norm": 0.24451271038131359,
|
|
"learning_rate": 1.5303348618154915e-05,
|
|
"loss": 0.2623,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 6.33044846577498,
|
|
"grad_norm": 0.20321144790178836,
|
|
"learning_rate": 1.5286276807953365e-05,
|
|
"loss": 0.2693,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 6.333595594020457,
|
|
"grad_norm": 0.24302047081282138,
|
|
"learning_rate": 1.5269212009969868e-05,
|
|
"loss": 0.2725,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 6.3367427222659325,
|
|
"grad_norm": 0.21613725540205522,
|
|
"learning_rate": 1.5252154242027932e-05,
|
|
"loss": 0.2695,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 6.339889850511408,
|
|
"grad_norm": 0.20743059537745503,
|
|
"learning_rate": 1.5235103521943719e-05,
|
|
"loss": 0.2729,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 6.343036978756884,
|
|
"grad_norm": 0.2188470883353957,
|
|
"learning_rate": 1.5218059867526025e-05,
|
|
"loss": 0.2633,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 6.34618410700236,
|
|
"grad_norm": 0.2077384192612987,
|
|
"learning_rate": 1.5201023296576281e-05,
|
|
"loss": 0.2749,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 6.349331235247837,
|
|
"grad_norm": 0.21085206599076245,
|
|
"learning_rate": 1.5183993826888506e-05,
|
|
"loss": 0.28,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 6.352478363493312,
|
|
"grad_norm": 0.20963113069856626,
|
|
"learning_rate": 1.5166971476249299e-05,
|
|
"loss": 0.2699,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 6.355625491738788,
|
|
"grad_norm": 0.21152418642222406,
|
|
"learning_rate": 1.5149956262437848e-05,
|
|
"loss": 0.2691,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 6.358772619984264,
|
|
"grad_norm": 0.21798400408864652,
|
|
"learning_rate": 1.5132948203225866e-05,
|
|
"loss": 0.2701,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 6.361919748229741,
|
|
"grad_norm": 0.212384884342032,
|
|
"learning_rate": 1.5115947316377591e-05,
|
|
"loss": 0.2714,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 6.3650668764752165,
|
|
"grad_norm": 0.21764729442192574,
|
|
"learning_rate": 1.5098953619649779e-05,
|
|
"loss": 0.2706,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 6.368214004720692,
|
|
"grad_norm": 0.20976149383277362,
|
|
"learning_rate": 1.5081967130791672e-05,
|
|
"loss": 0.2715,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 6.371361132966168,
|
|
"grad_norm": 0.21234247868680792,
|
|
"learning_rate": 1.5064987867544982e-05,
|
|
"loss": 0.2665,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 6.374508261211645,
|
|
"grad_norm": 0.21051751829878967,
|
|
"learning_rate": 1.5048015847643887e-05,
|
|
"loss": 0.2672,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 6.377655389457121,
|
|
"grad_norm": 0.20516874410918237,
|
|
"learning_rate": 1.5031051088814982e-05,
|
|
"loss": 0.2634,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 6.380802517702596,
|
|
"grad_norm": 0.1999457276853811,
|
|
"learning_rate": 1.5014093608777294e-05,
|
|
"loss": 0.2738,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 6.383949645948072,
|
|
"grad_norm": 0.21305496050001185,
|
|
"learning_rate": 1.4997143425242229e-05,
|
|
"loss": 0.2737,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 6.387096774193548,
|
|
"grad_norm": 0.20672780819338069,
|
|
"learning_rate": 1.4980200555913586e-05,
|
|
"loss": 0.2718,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 6.390243902439025,
|
|
"grad_norm": 0.21450892009300504,
|
|
"learning_rate": 1.4963265018487523e-05,
|
|
"loss": 0.2654,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 6.3933910306845005,
|
|
"grad_norm": 0.20320195505647742,
|
|
"learning_rate": 1.4946336830652533e-05,
|
|
"loss": 0.2658,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 6.396538158929976,
|
|
"grad_norm": 0.20392713465924406,
|
|
"learning_rate": 1.492941601008945e-05,
|
|
"loss": 0.2746,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 6.399685287175452,
|
|
"grad_norm": 0.20959876954254805,
|
|
"learning_rate": 1.4912502574471384e-05,
|
|
"loss": 0.2747,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 6.402832415420928,
|
|
"grad_norm": 0.2108786736914976,
|
|
"learning_rate": 1.4895596541463771e-05,
|
|
"loss": 0.2701,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 6.405979543666405,
|
|
"grad_norm": 0.22118213608216855,
|
|
"learning_rate": 1.4878697928724273e-05,
|
|
"loss": 0.272,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 6.4091266719118805,
|
|
"grad_norm": 0.203568518365141,
|
|
"learning_rate": 1.486180675390283e-05,
|
|
"loss": 0.2659,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 6.412273800157356,
|
|
"grad_norm": 0.2227618023805204,
|
|
"learning_rate": 1.484492303464161e-05,
|
|
"loss": 0.2717,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 6.415420928402832,
|
|
"grad_norm": 0.21398359499500041,
|
|
"learning_rate": 1.482804678857498e-05,
|
|
"loss": 0.2648,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 6.418568056648309,
|
|
"grad_norm": 0.22308492477910258,
|
|
"learning_rate": 1.4811178033329516e-05,
|
|
"loss": 0.2642,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 6.421715184893785,
|
|
"grad_norm": 0.21249595991484213,
|
|
"learning_rate": 1.4794316786523962e-05,
|
|
"loss": 0.2683,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 6.42486231313926,
|
|
"grad_norm": 0.21068800662577022,
|
|
"learning_rate": 1.4777463065769224e-05,
|
|
"loss": 0.2701,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 6.428009441384736,
|
|
"grad_norm": 0.21437199711857782,
|
|
"learning_rate": 1.4760616888668353e-05,
|
|
"loss": 0.2747,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 6.431156569630213,
|
|
"grad_norm": 0.22330541645842986,
|
|
"learning_rate": 1.4743778272816504e-05,
|
|
"loss": 0.2704,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 6.434303697875689,
|
|
"grad_norm": 0.2062736384947792,
|
|
"learning_rate": 1.4726947235800952e-05,
|
|
"loss": 0.272,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 6.4374508261211645,
|
|
"grad_norm": 0.22521586255432074,
|
|
"learning_rate": 1.4710123795201039e-05,
|
|
"loss": 0.278,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 6.44059795436664,
|
|
"grad_norm": 0.203912107820357,
|
|
"learning_rate": 1.4693307968588194e-05,
|
|
"loss": 0.2711,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 6.443745082612116,
|
|
"grad_norm": 0.21931119328761017,
|
|
"learning_rate": 1.4676499773525863e-05,
|
|
"loss": 0.2663,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 6.446892210857593,
|
|
"grad_norm": 0.2094814854379808,
|
|
"learning_rate": 1.4659699227569566e-05,
|
|
"loss": 0.2695,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 6.450039339103069,
|
|
"grad_norm": 0.23117534470233453,
|
|
"learning_rate": 1.464290634826679e-05,
|
|
"loss": 0.2692,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 6.453186467348544,
|
|
"grad_norm": 0.21379505139811097,
|
|
"learning_rate": 1.4626121153157046e-05,
|
|
"loss": 0.2768,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 6.45633359559402,
|
|
"grad_norm": 0.217663110855076,
|
|
"learning_rate": 1.4609343659771793e-05,
|
|
"loss": 0.2713,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 6.459480723839496,
|
|
"grad_norm": 0.2040638494691249,
|
|
"learning_rate": 1.4592573885634464e-05,
|
|
"loss": 0.2654,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 6.462627852084973,
|
|
"grad_norm": 0.2303747870401908,
|
|
"learning_rate": 1.4575811848260429e-05,
|
|
"loss": 0.2749,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 6.4657749803304485,
|
|
"grad_norm": 0.20958637817569242,
|
|
"learning_rate": 1.4559057565156964e-05,
|
|
"loss": 0.2708,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 6.468922108575924,
|
|
"grad_norm": 0.2224204366720408,
|
|
"learning_rate": 1.4542311053823257e-05,
|
|
"loss": 0.2748,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 6.4720692368214,
|
|
"grad_norm": 0.21278520060079634,
|
|
"learning_rate": 1.4525572331750373e-05,
|
|
"loss": 0.2674,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 6.475216365066877,
|
|
"grad_norm": 0.21995573974288046,
|
|
"learning_rate": 1.4508841416421256e-05,
|
|
"loss": 0.2696,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 6.478363493312353,
|
|
"grad_norm": 0.222067909671715,
|
|
"learning_rate": 1.4492118325310673e-05,
|
|
"loss": 0.2735,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 6.481510621557828,
|
|
"grad_norm": 0.21469400060674615,
|
|
"learning_rate": 1.4475403075885233e-05,
|
|
"loss": 0.2738,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 6.484657749803304,
|
|
"grad_norm": 0.2220259001976218,
|
|
"learning_rate": 1.445869568560335e-05,
|
|
"loss": 0.2655,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 6.487804878048781,
|
|
"grad_norm": 0.22065015324342255,
|
|
"learning_rate": 1.4441996171915241e-05,
|
|
"loss": 0.2703,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 6.490952006294257,
|
|
"grad_norm": 0.2233908978556124,
|
|
"learning_rate": 1.4425304552262876e-05,
|
|
"loss": 0.2749,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 6.4940991345397325,
|
|
"grad_norm": 0.21860192593438782,
|
|
"learning_rate": 1.4408620844079998e-05,
|
|
"loss": 0.2691,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 6.497246262785208,
|
|
"grad_norm": 0.21630356299188297,
|
|
"learning_rate": 1.4391945064792076e-05,
|
|
"loss": 0.2699,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 6.500393391030684,
|
|
"grad_norm": 0.20468468546139096,
|
|
"learning_rate": 1.4375277231816309e-05,
|
|
"loss": 0.2659,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 6.503540519276161,
|
|
"grad_norm": 0.2266198893281483,
|
|
"learning_rate": 1.435861736256158e-05,
|
|
"loss": 0.2636,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 6.506687647521637,
|
|
"grad_norm": 0.210355804203251,
|
|
"learning_rate": 1.4341965474428463e-05,
|
|
"loss": 0.281,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 6.5098347757671124,
|
|
"grad_norm": 0.2104007529783895,
|
|
"learning_rate": 1.4325321584809193e-05,
|
|
"loss": 0.2745,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 6.512981904012588,
|
|
"grad_norm": 0.21454334831641367,
|
|
"learning_rate": 1.4308685711087664e-05,
|
|
"loss": 0.2714,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 6.516129032258064,
|
|
"grad_norm": 0.20461473363605256,
|
|
"learning_rate": 1.4292057870639387e-05,
|
|
"loss": 0.2737,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 6.519276160503541,
|
|
"grad_norm": 0.22229813634194787,
|
|
"learning_rate": 1.4275438080831468e-05,
|
|
"loss": 0.2713,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 6.522423288749017,
|
|
"grad_norm": 0.20801329570201357,
|
|
"learning_rate": 1.4258826359022639e-05,
|
|
"loss": 0.2664,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 6.525570416994492,
|
|
"grad_norm": 0.2141355735304912,
|
|
"learning_rate": 1.4242222722563166e-05,
|
|
"loss": 0.2692,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 6.528717545239968,
|
|
"grad_norm": 0.21674575849736738,
|
|
"learning_rate": 1.4225627188794913e-05,
|
|
"loss": 0.2735,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 6.531864673485445,
|
|
"grad_norm": 0.23378921154557367,
|
|
"learning_rate": 1.4209039775051233e-05,
|
|
"loss": 0.2779,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 6.535011801730921,
|
|
"grad_norm": 0.20405908727514255,
|
|
"learning_rate": 1.4192460498657035e-05,
|
|
"loss": 0.2778,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 6.5381589299763965,
|
|
"grad_norm": 0.2167015673737203,
|
|
"learning_rate": 1.4175889376928717e-05,
|
|
"loss": 0.2674,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 6.541306058221872,
|
|
"grad_norm": 0.22602621445184934,
|
|
"learning_rate": 1.415932642717416e-05,
|
|
"loss": 0.2776,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 6.544453186467349,
|
|
"grad_norm": 0.2103436914815303,
|
|
"learning_rate": 1.4142771666692716e-05,
|
|
"loss": 0.2748,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 6.547600314712825,
|
|
"grad_norm": 0.22284944224802702,
|
|
"learning_rate": 1.4126225112775163e-05,
|
|
"loss": 0.2703,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 6.550747442958301,
|
|
"grad_norm": 0.20859337785195634,
|
|
"learning_rate": 1.4109686782703729e-05,
|
|
"loss": 0.2751,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 6.553894571203776,
|
|
"grad_norm": 0.20898420028915374,
|
|
"learning_rate": 1.4093156693752041e-05,
|
|
"loss": 0.2722,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 6.557041699449252,
|
|
"grad_norm": 0.21921481305234924,
|
|
"learning_rate": 1.407663486318513e-05,
|
|
"loss": 0.2743,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 6.560188827694729,
|
|
"grad_norm": 0.19770871033798984,
|
|
"learning_rate": 1.4060121308259386e-05,
|
|
"loss": 0.2682,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 6.563335955940205,
|
|
"grad_norm": 0.20251135019019187,
|
|
"learning_rate": 1.4043616046222562e-05,
|
|
"loss": 0.2796,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 6.5664830841856805,
|
|
"grad_norm": 0.20589764192052976,
|
|
"learning_rate": 1.4027119094313766e-05,
|
|
"loss": 0.268,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 6.569630212431156,
|
|
"grad_norm": 0.20962471096621335,
|
|
"learning_rate": 1.4010630469763386e-05,
|
|
"loss": 0.2689,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 6.572777340676632,
|
|
"grad_norm": 0.2077048133726809,
|
|
"learning_rate": 1.3994150189793165e-05,
|
|
"loss": 0.2666,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 6.575924468922109,
|
|
"grad_norm": 0.21367970260930136,
|
|
"learning_rate": 1.397767827161608e-05,
|
|
"loss": 0.2668,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 6.579071597167585,
|
|
"grad_norm": 0.20369044452822496,
|
|
"learning_rate": 1.3961214732436407e-05,
|
|
"loss": 0.2717,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 6.58221872541306,
|
|
"grad_norm": 0.20608659783474267,
|
|
"learning_rate": 1.3944759589449657e-05,
|
|
"loss": 0.2662,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 6.585365853658536,
|
|
"grad_norm": 0.21167921476702603,
|
|
"learning_rate": 1.3928312859842592e-05,
|
|
"loss": 0.2548,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 6.588512981904013,
|
|
"grad_norm": 0.2119572678076718,
|
|
"learning_rate": 1.3911874560793149e-05,
|
|
"loss": 0.2686,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 6.591660110149489,
|
|
"grad_norm": 0.23046584602252454,
|
|
"learning_rate": 1.3895444709470485e-05,
|
|
"loss": 0.2691,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 6.5948072383949645,
|
|
"grad_norm": 0.2152670153636726,
|
|
"learning_rate": 1.387902332303494e-05,
|
|
"loss": 0.2789,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 6.59795436664044,
|
|
"grad_norm": 0.21392485708596343,
|
|
"learning_rate": 1.3862610418637988e-05,
|
|
"loss": 0.276,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 6.601101494885917,
|
|
"grad_norm": 0.2251993808341726,
|
|
"learning_rate": 1.384620601342227e-05,
|
|
"loss": 0.2731,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 6.604248623131393,
|
|
"grad_norm": 0.2105054752455257,
|
|
"learning_rate": 1.3829810124521528e-05,
|
|
"loss": 0.2712,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 6.607395751376869,
|
|
"grad_norm": 0.22197663382801497,
|
|
"learning_rate": 1.3813422769060628e-05,
|
|
"loss": 0.269,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 6.610542879622344,
|
|
"grad_norm": 0.21251503803497962,
|
|
"learning_rate": 1.37970439641555e-05,
|
|
"loss": 0.2673,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 6.61369000786782,
|
|
"grad_norm": 0.22088548865063007,
|
|
"learning_rate": 1.3780673726913168e-05,
|
|
"loss": 0.2741,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 6.616837136113297,
|
|
"grad_norm": 0.2100559518908244,
|
|
"learning_rate": 1.37643120744317e-05,
|
|
"loss": 0.2809,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 6.619984264358773,
|
|
"grad_norm": 0.21846715779185052,
|
|
"learning_rate": 1.3747959023800181e-05,
|
|
"loss": 0.2712,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 6.6231313926042485,
|
|
"grad_norm": 0.2051793253015262,
|
|
"learning_rate": 1.3731614592098735e-05,
|
|
"loss": 0.274,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 6.626278520849724,
|
|
"grad_norm": 0.21998385302968826,
|
|
"learning_rate": 1.3715278796398468e-05,
|
|
"loss": 0.2668,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 6.6294256490952,
|
|
"grad_norm": 0.20524755125389635,
|
|
"learning_rate": 1.3698951653761487e-05,
|
|
"loss": 0.2726,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 6.632572777340677,
|
|
"grad_norm": 0.21310125591169676,
|
|
"learning_rate": 1.3682633181240826e-05,
|
|
"loss": 0.2731,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 6.635719905586153,
|
|
"grad_norm": 0.20373328332909574,
|
|
"learning_rate": 1.3666323395880493e-05,
|
|
"loss": 0.2786,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 6.6388670338316285,
|
|
"grad_norm": 0.2194259189312518,
|
|
"learning_rate": 1.3650022314715412e-05,
|
|
"loss": 0.271,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 6.642014162077104,
|
|
"grad_norm": 0.2044629306655923,
|
|
"learning_rate": 1.3633729954771414e-05,
|
|
"loss": 0.2768,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 6.645161290322581,
|
|
"grad_norm": 0.2071870556335287,
|
|
"learning_rate": 1.3617446333065234e-05,
|
|
"loss": 0.2742,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 6.648308418568057,
|
|
"grad_norm": 0.20950878944876208,
|
|
"learning_rate": 1.3601171466604452e-05,
|
|
"loss": 0.274,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 6.651455546813533,
|
|
"grad_norm": 0.2048060795347047,
|
|
"learning_rate": 1.3584905372387542e-05,
|
|
"loss": 0.2744,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 6.654602675059008,
|
|
"grad_norm": 0.20307264689129167,
|
|
"learning_rate": 1.356864806740378e-05,
|
|
"loss": 0.2718,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 6.657749803304485,
|
|
"grad_norm": 0.20870683975446555,
|
|
"learning_rate": 1.3552399568633287e-05,
|
|
"loss": 0.2751,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 6.660896931549961,
|
|
"grad_norm": 0.20074881013703952,
|
|
"learning_rate": 1.3536159893046969e-05,
|
|
"loss": 0.2724,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 6.664044059795437,
|
|
"grad_norm": 0.20014382071858103,
|
|
"learning_rate": 1.3519929057606526e-05,
|
|
"loss": 0.2693,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 6.6671911880409125,
|
|
"grad_norm": 0.20804817092817526,
|
|
"learning_rate": 1.3503707079264432e-05,
|
|
"loss": 0.274,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 6.670338316286388,
|
|
"grad_norm": 0.20493619242075548,
|
|
"learning_rate": 1.348749397496388e-05,
|
|
"loss": 0.2769,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 6.673485444531865,
|
|
"grad_norm": 0.2099027610431233,
|
|
"learning_rate": 1.3471289761638842e-05,
|
|
"loss": 0.2764,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 6.676632572777341,
|
|
"grad_norm": 0.193445232337728,
|
|
"learning_rate": 1.345509445621396e-05,
|
|
"loss": 0.2695,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 6.679779701022817,
|
|
"grad_norm": 0.2096470666573266,
|
|
"learning_rate": 1.34389080756046e-05,
|
|
"loss": 0.2829,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 6.682926829268292,
|
|
"grad_norm": 0.20576106019634247,
|
|
"learning_rate": 1.342273063671678e-05,
|
|
"loss": 0.2756,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 6.686073957513768,
|
|
"grad_norm": 0.21501849543327453,
|
|
"learning_rate": 1.3406562156447211e-05,
|
|
"loss": 0.2727,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 6.689221085759245,
|
|
"grad_norm": 0.1993115840566364,
|
|
"learning_rate": 1.339040265168322e-05,
|
|
"loss": 0.2663,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 6.692368214004721,
|
|
"grad_norm": 0.21226796949929252,
|
|
"learning_rate": 1.337425213930277e-05,
|
|
"loss": 0.2708,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 6.6955153422501965,
|
|
"grad_norm": 0.20469216116105463,
|
|
"learning_rate": 1.3358110636174443e-05,
|
|
"loss": 0.277,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 6.698662470495672,
|
|
"grad_norm": 0.21150553812629214,
|
|
"learning_rate": 1.3341978159157388e-05,
|
|
"loss": 0.2726,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 6.701809598741149,
|
|
"grad_norm": 0.2009979298846876,
|
|
"learning_rate": 1.3325854725101346e-05,
|
|
"loss": 0.2742,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 6.704956726986625,
|
|
"grad_norm": 0.20585970570515025,
|
|
"learning_rate": 1.3309740350846597e-05,
|
|
"loss": 0.2712,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 6.708103855232101,
|
|
"grad_norm": 0.20267136318362497,
|
|
"learning_rate": 1.3293635053223976e-05,
|
|
"loss": 0.2768,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 6.711250983477576,
|
|
"grad_norm": 0.1966034773897379,
|
|
"learning_rate": 1.3277538849054818e-05,
|
|
"loss": 0.2685,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 6.714398111723053,
|
|
"grad_norm": 0.19084672362126345,
|
|
"learning_rate": 1.326145175515098e-05,
|
|
"loss": 0.2707,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 6.717545239968529,
|
|
"grad_norm": 0.21151915236025404,
|
|
"learning_rate": 1.324537378831479e-05,
|
|
"loss": 0.2762,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 6.720692368214005,
|
|
"grad_norm": 0.19161947045159958,
|
|
"learning_rate": 1.3229304965339052e-05,
|
|
"loss": 0.272,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 6.7238394964594805,
|
|
"grad_norm": 0.2057037763555039,
|
|
"learning_rate": 1.3213245303007018e-05,
|
|
"loss": 0.2731,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 6.726986624704956,
|
|
"grad_norm": 0.20303632318163164,
|
|
"learning_rate": 1.3197194818092359e-05,
|
|
"loss": 0.2773,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 6.730133752950433,
|
|
"grad_norm": 0.19044415445481397,
|
|
"learning_rate": 1.318115352735918e-05,
|
|
"loss": 0.2793,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 6.733280881195909,
|
|
"grad_norm": 0.2073975268632104,
|
|
"learning_rate": 1.3165121447561968e-05,
|
|
"loss": 0.2683,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 6.736428009441385,
|
|
"grad_norm": 0.2133081665575485,
|
|
"learning_rate": 1.3149098595445604e-05,
|
|
"loss": 0.2742,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 6.7395751376868605,
|
|
"grad_norm": 0.21038693316237772,
|
|
"learning_rate": 1.313308498774531e-05,
|
|
"loss": 0.2712,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 6.742722265932336,
|
|
"grad_norm": 0.21412237772602838,
|
|
"learning_rate": 1.3117080641186672e-05,
|
|
"loss": 0.2765,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 6.745869394177813,
|
|
"grad_norm": 0.20678305330276317,
|
|
"learning_rate": 1.3101085572485603e-05,
|
|
"loss": 0.2688,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 6.749016522423289,
|
|
"grad_norm": 0.21245293744715033,
|
|
"learning_rate": 1.3085099798348306e-05,
|
|
"loss": 0.2718,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 6.752163650668765,
|
|
"grad_norm": 0.20398550728736917,
|
|
"learning_rate": 1.3069123335471301e-05,
|
|
"loss": 0.2714,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 6.755310778914241,
|
|
"grad_norm": 0.2031385438667128,
|
|
"learning_rate": 1.3053156200541364e-05,
|
|
"loss": 0.2699,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 6.758457907159717,
|
|
"grad_norm": 0.19773088989378942,
|
|
"learning_rate": 1.303719841023553e-05,
|
|
"loss": 0.2635,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 6.761605035405193,
|
|
"grad_norm": 0.21395849634573397,
|
|
"learning_rate": 1.3021249981221086e-05,
|
|
"loss": 0.2771,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 6.764752163650669,
|
|
"grad_norm": 0.19961802457486216,
|
|
"learning_rate": 1.3005310930155544e-05,
|
|
"loss": 0.2709,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 6.7678992918961445,
|
|
"grad_norm": 0.21134776492595922,
|
|
"learning_rate": 1.2989381273686597e-05,
|
|
"loss": 0.2669,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 6.771046420141621,
|
|
"grad_norm": 0.19864060482042745,
|
|
"learning_rate": 1.2973461028452144e-05,
|
|
"loss": 0.2706,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 6.774193548387097,
|
|
"grad_norm": 0.2044619678907636,
|
|
"learning_rate": 1.2957550211080259e-05,
|
|
"loss": 0.2739,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 6.777340676632573,
|
|
"grad_norm": 0.21504368793018358,
|
|
"learning_rate": 1.2941648838189147e-05,
|
|
"loss": 0.2674,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 6.780487804878049,
|
|
"grad_norm": 0.20378150368432318,
|
|
"learning_rate": 1.2925756926387177e-05,
|
|
"loss": 0.2696,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 6.783634933123524,
|
|
"grad_norm": 0.20600148728967427,
|
|
"learning_rate": 1.2909874492272807e-05,
|
|
"loss": 0.2802,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 6.786782061369001,
|
|
"grad_norm": 0.19938138549196283,
|
|
"learning_rate": 1.2894001552434626e-05,
|
|
"loss": 0.2759,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 6.789929189614477,
|
|
"grad_norm": 0.21154201382497265,
|
|
"learning_rate": 1.2878138123451274e-05,
|
|
"loss": 0.2731,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 6.793076317859953,
|
|
"grad_norm": 0.19784577469812065,
|
|
"learning_rate": 1.2862284221891485e-05,
|
|
"loss": 0.2763,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 6.7962234461054285,
|
|
"grad_norm": 0.21310912362182374,
|
|
"learning_rate": 1.2846439864314037e-05,
|
|
"loss": 0.2761,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 6.799370574350904,
|
|
"grad_norm": 0.20695979034558215,
|
|
"learning_rate": 1.283060506726772e-05,
|
|
"loss": 0.2774,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 6.802517702596381,
|
|
"grad_norm": 0.21261086091250228,
|
|
"learning_rate": 1.2814779847291367e-05,
|
|
"loss": 0.2758,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 6.805664830841857,
|
|
"grad_norm": 0.20979015880101165,
|
|
"learning_rate": 1.2798964220913772e-05,
|
|
"loss": 0.2804,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 6.808811959087333,
|
|
"grad_norm": 0.21761044939140664,
|
|
"learning_rate": 1.278315820465376e-05,
|
|
"loss": 0.2769,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 6.811959087332809,
|
|
"grad_norm": 0.2014697385726117,
|
|
"learning_rate": 1.2767361815020065e-05,
|
|
"loss": 0.2783,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 6.815106215578285,
|
|
"grad_norm": 0.21283798980232946,
|
|
"learning_rate": 1.2751575068511408e-05,
|
|
"loss": 0.2657,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 6.818253343823761,
|
|
"grad_norm": 0.19962011480967196,
|
|
"learning_rate": 1.2735797981616407e-05,
|
|
"loss": 0.2806,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 6.821400472069237,
|
|
"grad_norm": 0.21924693121937547,
|
|
"learning_rate": 1.2720030570813608e-05,
|
|
"loss": 0.2746,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 6.8245476003147125,
|
|
"grad_norm": 0.19657838694235807,
|
|
"learning_rate": 1.2704272852571455e-05,
|
|
"loss": 0.2684,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 6.827694728560189,
|
|
"grad_norm": 0.2030249463511617,
|
|
"learning_rate": 1.2688524843348252e-05,
|
|
"loss": 0.2722,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 6.830841856805665,
|
|
"grad_norm": 0.20062136834665203,
|
|
"learning_rate": 1.2672786559592178e-05,
|
|
"loss": 0.2722,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 6.833988985051141,
|
|
"grad_norm": 0.21259980872470255,
|
|
"learning_rate": 1.2657058017741237e-05,
|
|
"loss": 0.2746,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 6.837136113296617,
|
|
"grad_norm": 0.18743513115346688,
|
|
"learning_rate": 1.2641339234223282e-05,
|
|
"loss": 0.2695,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 6.840283241542092,
|
|
"grad_norm": 0.20138411396722927,
|
|
"learning_rate": 1.2625630225455946e-05,
|
|
"loss": 0.2764,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 6.843430369787569,
|
|
"grad_norm": 0.20724943586989145,
|
|
"learning_rate": 1.2609931007846672e-05,
|
|
"loss": 0.28,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 6.846577498033045,
|
|
"grad_norm": 0.20364398935190642,
|
|
"learning_rate": 1.2594241597792678e-05,
|
|
"loss": 0.2742,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 6.849724626278521,
|
|
"grad_norm": 0.21697947841842968,
|
|
"learning_rate": 1.2578562011680914e-05,
|
|
"loss": 0.2722,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 6.8528717545239966,
|
|
"grad_norm": 0.20438816521590877,
|
|
"learning_rate": 1.2562892265888116e-05,
|
|
"loss": 0.2742,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 6.856018882769473,
|
|
"grad_norm": 0.2160342522382541,
|
|
"learning_rate": 1.2547232376780687e-05,
|
|
"loss": 0.2757,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 6.859166011014949,
|
|
"grad_norm": 0.20593020428643655,
|
|
"learning_rate": 1.2531582360714775e-05,
|
|
"loss": 0.2675,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 6.862313139260425,
|
|
"grad_norm": 0.21437695083001138,
|
|
"learning_rate": 1.251594223403619e-05,
|
|
"loss": 0.2693,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 6.865460267505901,
|
|
"grad_norm": 0.2119697416305465,
|
|
"learning_rate": 1.2500312013080444e-05,
|
|
"loss": 0.2669,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 6.868607395751377,
|
|
"grad_norm": 0.2060511460509206,
|
|
"learning_rate": 1.2484691714172663e-05,
|
|
"loss": 0.2861,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 6.871754523996853,
|
|
"grad_norm": 0.19849969915264076,
|
|
"learning_rate": 1.246908135362764e-05,
|
|
"loss": 0.2758,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 6.874901652242329,
|
|
"grad_norm": 0.20718364482758267,
|
|
"learning_rate": 1.2453480947749785e-05,
|
|
"loss": 0.2746,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 6.878048780487805,
|
|
"grad_norm": 0.2009159233212597,
|
|
"learning_rate": 1.2437890512833089e-05,
|
|
"loss": 0.2804,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 6.881195908733281,
|
|
"grad_norm": 0.20361754932690013,
|
|
"learning_rate": 1.2422310065161162e-05,
|
|
"loss": 0.265,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 6.884343036978757,
|
|
"grad_norm": 0.20703457454924554,
|
|
"learning_rate": 1.240673962100715e-05,
|
|
"loss": 0.2686,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 6.887490165224233,
|
|
"grad_norm": 0.20068143636302968,
|
|
"learning_rate": 1.2391179196633776e-05,
|
|
"loss": 0.2763,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 6.890637293469709,
|
|
"grad_norm": 0.20358246660033522,
|
|
"learning_rate": 1.2375628808293274e-05,
|
|
"loss": 0.2792,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 6.893784421715185,
|
|
"grad_norm": 0.2036764238431656,
|
|
"learning_rate": 1.2360088472227418e-05,
|
|
"loss": 0.2737,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 6.8969315499606605,
|
|
"grad_norm": 0.20147681379716198,
|
|
"learning_rate": 1.2344558204667475e-05,
|
|
"loss": 0.2725,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 6.900078678206137,
|
|
"grad_norm": 0.2071864222789661,
|
|
"learning_rate": 1.2329038021834193e-05,
|
|
"loss": 0.2709,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 6.903225806451613,
|
|
"grad_norm": 0.2043649714329845,
|
|
"learning_rate": 1.231352793993779e-05,
|
|
"loss": 0.2738,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 6.906372934697089,
|
|
"grad_norm": 0.20125299342080424,
|
|
"learning_rate": 1.2298027975177926e-05,
|
|
"loss": 0.2636,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 6.909520062942565,
|
|
"grad_norm": 0.20814571779048338,
|
|
"learning_rate": 1.2282538143743712e-05,
|
|
"loss": 0.2771,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 6.912667191188041,
|
|
"grad_norm": 0.20030130841003688,
|
|
"learning_rate": 1.2267058461813649e-05,
|
|
"loss": 0.2694,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 6.915814319433517,
|
|
"grad_norm": 0.21200310708703735,
|
|
"learning_rate": 1.2251588945555666e-05,
|
|
"loss": 0.2725,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 6.918961447678993,
|
|
"grad_norm": 0.20922436094459726,
|
|
"learning_rate": 1.2236129611127045e-05,
|
|
"loss": 0.2726,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 6.922108575924469,
|
|
"grad_norm": 0.19704648497715532,
|
|
"learning_rate": 1.2220680474674458e-05,
|
|
"loss": 0.2741,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 6.925255704169945,
|
|
"grad_norm": 0.21570986491875785,
|
|
"learning_rate": 1.2205241552333922e-05,
|
|
"loss": 0.2716,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 6.928402832415421,
|
|
"grad_norm": 0.20000918248634075,
|
|
"learning_rate": 1.218981286023077e-05,
|
|
"loss": 0.2791,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 6.931549960660897,
|
|
"grad_norm": 0.1997584203723108,
|
|
"learning_rate": 1.2174394414479667e-05,
|
|
"loss": 0.2783,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 6.934697088906373,
|
|
"grad_norm": 0.20118408426733328,
|
|
"learning_rate": 1.215898623118456e-05,
|
|
"loss": 0.2736,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 6.937844217151849,
|
|
"grad_norm": 0.19493904766515557,
|
|
"learning_rate": 1.2143588326438697e-05,
|
|
"loss": 0.2734,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 6.940991345397325,
|
|
"grad_norm": 0.21288261790118718,
|
|
"learning_rate": 1.2128200716324566e-05,
|
|
"loss": 0.2768,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 6.944138473642801,
|
|
"grad_norm": 0.1976856689237112,
|
|
"learning_rate": 1.2112823416913936e-05,
|
|
"loss": 0.2747,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 6.947285601888277,
|
|
"grad_norm": 0.2130261035735836,
|
|
"learning_rate": 1.2097456444267771e-05,
|
|
"loss": 0.2677,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 6.950432730133753,
|
|
"grad_norm": 0.19662526671296285,
|
|
"learning_rate": 1.208209981443627e-05,
|
|
"loss": 0.2717,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 6.9535798583792285,
|
|
"grad_norm": 0.20222706552909867,
|
|
"learning_rate": 1.2066753543458835e-05,
|
|
"loss": 0.2711,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 6.956726986624705,
|
|
"grad_norm": 0.2069577717265713,
|
|
"learning_rate": 1.2051417647364021e-05,
|
|
"loss": 0.2793,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 6.959874114870181,
|
|
"grad_norm": 0.203212313831048,
|
|
"learning_rate": 1.2036092142169582e-05,
|
|
"loss": 0.2763,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 6.963021243115657,
|
|
"grad_norm": 0.20509305087697424,
|
|
"learning_rate": 1.2020777043882386e-05,
|
|
"loss": 0.2759,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 6.966168371361133,
|
|
"grad_norm": 0.1989672235984331,
|
|
"learning_rate": 1.2005472368498457e-05,
|
|
"loss": 0.2713,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 6.969315499606609,
|
|
"grad_norm": 0.22138011183206288,
|
|
"learning_rate": 1.1990178132002913e-05,
|
|
"loss": 0.2692,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 6.972462627852085,
|
|
"grad_norm": 0.2094379431636141,
|
|
"learning_rate": 1.1974894350369981e-05,
|
|
"loss": 0.2788,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 6.975609756097561,
|
|
"grad_norm": 0.20436997575009863,
|
|
"learning_rate": 1.195962103956298e-05,
|
|
"loss": 0.2759,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 6.978756884343037,
|
|
"grad_norm": 0.204009380946763,
|
|
"learning_rate": 1.1944358215534258e-05,
|
|
"loss": 0.2701,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 6.9819040125885135,
|
|
"grad_norm": 0.20886713208010613,
|
|
"learning_rate": 1.1929105894225248e-05,
|
|
"loss": 0.2687,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 6.985051140833989,
|
|
"grad_norm": 0.20522061292808225,
|
|
"learning_rate": 1.1913864091566372e-05,
|
|
"loss": 0.2628,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 6.988198269079465,
|
|
"grad_norm": 0.202811286102291,
|
|
"learning_rate": 1.1898632823477121e-05,
|
|
"loss": 0.2757,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 6.991345397324941,
|
|
"grad_norm": 0.2192831752403655,
|
|
"learning_rate": 1.1883412105865925e-05,
|
|
"loss": 0.2698,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 6.994492525570417,
|
|
"grad_norm": 0.19233397196404134,
|
|
"learning_rate": 1.1868201954630238e-05,
|
|
"loss": 0.2723,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 6.997639653815893,
|
|
"grad_norm": 0.210128078621367,
|
|
"learning_rate": 1.185300238565645e-05,
|
|
"loss": 0.2774,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 7.003147128245476,
|
|
"grad_norm": 0.5714481470801931,
|
|
"learning_rate": 1.183781341481991e-05,
|
|
"loss": 0.4569,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 7.006294256490952,
|
|
"grad_norm": 0.4250625166239754,
|
|
"learning_rate": 1.1822635057984906e-05,
|
|
"loss": 0.2112,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 7.009441384736428,
|
|
"grad_norm": 0.2691450770861746,
|
|
"learning_rate": 1.1807467331004619e-05,
|
|
"loss": 0.2138,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 7.012588512981904,
|
|
"grad_norm": 0.6396419703643319,
|
|
"learning_rate": 1.179231024972115e-05,
|
|
"loss": 0.2188,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 7.01573564122738,
|
|
"grad_norm": 0.3063619660764144,
|
|
"learning_rate": 1.177716382996546e-05,
|
|
"loss": 0.2141,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 7.018882769472856,
|
|
"grad_norm": 0.37212547491407416,
|
|
"learning_rate": 1.1762028087557393e-05,
|
|
"loss": 0.207,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 7.022029897718332,
|
|
"grad_norm": 0.381130447699765,
|
|
"learning_rate": 1.1746903038305626e-05,
|
|
"loss": 0.2121,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 7.025177025963808,
|
|
"grad_norm": 0.3005238930589458,
|
|
"learning_rate": 1.1731788698007675e-05,
|
|
"loss": 0.2127,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 7.028324154209284,
|
|
"grad_norm": 0.3300998957168307,
|
|
"learning_rate": 1.1716685082449879e-05,
|
|
"loss": 0.2237,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 7.03147128245476,
|
|
"grad_norm": 0.38114455718692064,
|
|
"learning_rate": 1.1701592207407355e-05,
|
|
"loss": 0.2176,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 7.034618410700236,
|
|
"grad_norm": 0.30283344304928583,
|
|
"learning_rate": 1.1686510088644014e-05,
|
|
"loss": 0.2086,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 7.037765538945712,
|
|
"grad_norm": 0.3331464112562727,
|
|
"learning_rate": 1.167143874191254e-05,
|
|
"loss": 0.2075,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 7.040912667191188,
|
|
"grad_norm": 0.29800299120189844,
|
|
"learning_rate": 1.1656378182954357e-05,
|
|
"loss": 0.2052,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 7.044059795436664,
|
|
"grad_norm": 0.2894375393244374,
|
|
"learning_rate": 1.1641328427499614e-05,
|
|
"loss": 0.2071,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 7.04720692368214,
|
|
"grad_norm": 0.3155503383974769,
|
|
"learning_rate": 1.1626289491267197e-05,
|
|
"loss": 0.2161,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 7.050354051927616,
|
|
"grad_norm": 0.26776854986829085,
|
|
"learning_rate": 1.161126138996467e-05,
|
|
"loss": 0.2022,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 7.053501180173092,
|
|
"grad_norm": 0.27601227009065155,
|
|
"learning_rate": 1.1596244139288286e-05,
|
|
"loss": 0.2066,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 7.056648308418568,
|
|
"grad_norm": 0.27656033063047314,
|
|
"learning_rate": 1.1581237754922984e-05,
|
|
"loss": 0.2104,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 7.059795436664044,
|
|
"grad_norm": 0.2449882270902797,
|
|
"learning_rate": 1.1566242252542325e-05,
|
|
"loss": 0.2073,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 7.06294256490952,
|
|
"grad_norm": 0.2768374274550793,
|
|
"learning_rate": 1.1551257647808524e-05,
|
|
"loss": 0.2102,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 7.066089693154996,
|
|
"grad_norm": 0.26011823472626777,
|
|
"learning_rate": 1.1536283956372402e-05,
|
|
"loss": 0.2142,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 7.069236821400472,
|
|
"grad_norm": 0.25819042597992947,
|
|
"learning_rate": 1.1521321193873395e-05,
|
|
"loss": 0.208,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 7.072383949645948,
|
|
"grad_norm": 0.24665055592686272,
|
|
"learning_rate": 1.1506369375939506e-05,
|
|
"loss": 0.208,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 7.075531077891424,
|
|
"grad_norm": 0.24751137987885644,
|
|
"learning_rate": 1.1491428518187321e-05,
|
|
"loss": 0.2092,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 7.0786782061369005,
|
|
"grad_norm": 0.24624896065268903,
|
|
"learning_rate": 1.1476498636221978e-05,
|
|
"loss": 0.2087,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 7.081825334382376,
|
|
"grad_norm": 0.25982701212215953,
|
|
"learning_rate": 1.1461579745637143e-05,
|
|
"loss": 0.2063,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 7.084972462627852,
|
|
"grad_norm": 0.2397365742144504,
|
|
"learning_rate": 1.1446671862015013e-05,
|
|
"loss": 0.2151,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 7.088119590873328,
|
|
"grad_norm": 0.23751859174229084,
|
|
"learning_rate": 1.1431775000926272e-05,
|
|
"loss": 0.2067,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 7.091266719118804,
|
|
"grad_norm": 0.23996775925549246,
|
|
"learning_rate": 1.1416889177930113e-05,
|
|
"loss": 0.2113,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 7.09441384736428,
|
|
"grad_norm": 0.24150863328124383,
|
|
"learning_rate": 1.1402014408574177e-05,
|
|
"loss": 0.2125,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 7.097560975609756,
|
|
"grad_norm": 0.24163504765540855,
|
|
"learning_rate": 1.1387150708394586e-05,
|
|
"loss": 0.1962,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 7.100708103855232,
|
|
"grad_norm": 0.24010609549944184,
|
|
"learning_rate": 1.1372298092915868e-05,
|
|
"loss": 0.2141,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 7.103855232100708,
|
|
"grad_norm": 0.2454335372395361,
|
|
"learning_rate": 1.1357456577651007e-05,
|
|
"loss": 0.2105,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 7.1070023603461845,
|
|
"grad_norm": 0.23394725001346658,
|
|
"learning_rate": 1.1342626178101374e-05,
|
|
"loss": 0.2079,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 7.11014948859166,
|
|
"grad_norm": 0.23777743303747212,
|
|
"learning_rate": 1.132780690975673e-05,
|
|
"loss": 0.2114,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 7.113296616837136,
|
|
"grad_norm": 0.22606234526414365,
|
|
"learning_rate": 1.131299878809522e-05,
|
|
"loss": 0.2081,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 7.116443745082612,
|
|
"grad_norm": 0.2418578090305854,
|
|
"learning_rate": 1.1298201828583332e-05,
|
|
"loss": 0.2066,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 7.119590873328088,
|
|
"grad_norm": 0.23113427714810778,
|
|
"learning_rate": 1.1283416046675916e-05,
|
|
"loss": 0.2102,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 7.122738001573564,
|
|
"grad_norm": 0.2381266978689901,
|
|
"learning_rate": 1.1268641457816117e-05,
|
|
"loss": 0.207,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 7.12588512981904,
|
|
"grad_norm": 0.2361934040445735,
|
|
"learning_rate": 1.1253878077435436e-05,
|
|
"loss": 0.2158,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 7.129032258064516,
|
|
"grad_norm": 0.21836833345649923,
|
|
"learning_rate": 1.1239125920953615e-05,
|
|
"loss": 0.2134,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 7.132179386309992,
|
|
"grad_norm": 0.24198498582441896,
|
|
"learning_rate": 1.122438500377871e-05,
|
|
"loss": 0.2042,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 7.1353265145554685,
|
|
"grad_norm": 0.22713295487622734,
|
|
"learning_rate": 1.1209655341307024e-05,
|
|
"loss": 0.2117,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 7.138473642800944,
|
|
"grad_norm": 0.2343503065926964,
|
|
"learning_rate": 1.1194936948923103e-05,
|
|
"loss": 0.2098,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 7.14162077104642,
|
|
"grad_norm": 0.22909025791500967,
|
|
"learning_rate": 1.1180229841999726e-05,
|
|
"loss": 0.2106,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 7.144767899291896,
|
|
"grad_norm": 0.227228211003999,
|
|
"learning_rate": 1.1165534035897881e-05,
|
|
"loss": 0.2192,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 7.147915027537372,
|
|
"grad_norm": 0.22905608109888015,
|
|
"learning_rate": 1.1150849545966766e-05,
|
|
"loss": 0.2085,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 7.151062155782848,
|
|
"grad_norm": 0.21727537194341173,
|
|
"learning_rate": 1.1136176387543736e-05,
|
|
"loss": 0.2122,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 7.154209284028324,
|
|
"grad_norm": 0.23840050117066902,
|
|
"learning_rate": 1.1121514575954327e-05,
|
|
"loss": 0.2149,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 7.1573564122738,
|
|
"grad_norm": 0.22511280292668318,
|
|
"learning_rate": 1.1106864126512233e-05,
|
|
"loss": 0.2026,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 7.160503540519276,
|
|
"grad_norm": 0.2319999499213673,
|
|
"learning_rate": 1.109222505451925e-05,
|
|
"loss": 0.2045,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 7.1636506687647525,
|
|
"grad_norm": 0.22621753505730435,
|
|
"learning_rate": 1.1077597375265325e-05,
|
|
"loss": 0.2024,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 7.166797797010228,
|
|
"grad_norm": 0.22239947016703665,
|
|
"learning_rate": 1.1062981104028479e-05,
|
|
"loss": 0.2096,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 7.169944925255704,
|
|
"grad_norm": 0.23183568916578862,
|
|
"learning_rate": 1.1048376256074831e-05,
|
|
"loss": 0.2046,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 7.17309205350118,
|
|
"grad_norm": 0.23415363186834018,
|
|
"learning_rate": 1.1033782846658567e-05,
|
|
"loss": 0.2126,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 7.176239181746656,
|
|
"grad_norm": 0.21870216739732176,
|
|
"learning_rate": 1.1019200891021932e-05,
|
|
"loss": 0.201,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 7.1793863099921325,
|
|
"grad_norm": 0.23538602690513877,
|
|
"learning_rate": 1.1004630404395193e-05,
|
|
"loss": 0.2138,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 7.182533438237608,
|
|
"grad_norm": 0.2278346344735737,
|
|
"learning_rate": 1.0990071401996647e-05,
|
|
"loss": 0.2097,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 7.185680566483084,
|
|
"grad_norm": 0.22286482289133921,
|
|
"learning_rate": 1.0975523899032603e-05,
|
|
"loss": 0.2082,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 7.18882769472856,
|
|
"grad_norm": 0.21831126634768455,
|
|
"learning_rate": 1.0960987910697338e-05,
|
|
"loss": 0.2098,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 7.191974822974037,
|
|
"grad_norm": 0.23172372964239146,
|
|
"learning_rate": 1.0946463452173135e-05,
|
|
"loss": 0.2096,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 7.195121951219512,
|
|
"grad_norm": 0.22322237214331764,
|
|
"learning_rate": 1.0931950538630199e-05,
|
|
"loss": 0.2132,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 7.198269079464988,
|
|
"grad_norm": 0.22598175395258135,
|
|
"learning_rate": 1.0917449185226702e-05,
|
|
"loss": 0.2108,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 7.201416207710464,
|
|
"grad_norm": 0.22815280201115662,
|
|
"learning_rate": 1.090295940710873e-05,
|
|
"loss": 0.2135,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 7.20456333595594,
|
|
"grad_norm": 0.22423887146026672,
|
|
"learning_rate": 1.0888481219410286e-05,
|
|
"loss": 0.2155,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 7.2077104642014165,
|
|
"grad_norm": 0.23680769002713375,
|
|
"learning_rate": 1.087401463725326e-05,
|
|
"loss": 0.2115,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 7.210857592446892,
|
|
"grad_norm": 0.22339424747961625,
|
|
"learning_rate": 1.0859559675747427e-05,
|
|
"loss": 0.2073,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 7.214004720692368,
|
|
"grad_norm": 0.22958762279363118,
|
|
"learning_rate": 1.0845116349990418e-05,
|
|
"loss": 0.2102,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 7.217151848937844,
|
|
"grad_norm": 0.21905123849431263,
|
|
"learning_rate": 1.083068467506772e-05,
|
|
"loss": 0.2096,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 7.220298977183321,
|
|
"grad_norm": 0.2299465638743488,
|
|
"learning_rate": 1.0816264666052652e-05,
|
|
"loss": 0.2103,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 7.223446105428796,
|
|
"grad_norm": 0.22978612951320251,
|
|
"learning_rate": 1.0801856338006323e-05,
|
|
"loss": 0.2155,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 7.226593233674272,
|
|
"grad_norm": 0.22975189777607816,
|
|
"learning_rate": 1.0787459705977681e-05,
|
|
"loss": 0.2114,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 7.229740361919748,
|
|
"grad_norm": 0.22933284295055767,
|
|
"learning_rate": 1.0773074785003426e-05,
|
|
"loss": 0.2108,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 7.232887490165224,
|
|
"grad_norm": 0.23052487817754658,
|
|
"learning_rate": 1.0758701590108039e-05,
|
|
"loss": 0.2054,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 7.2360346184107005,
|
|
"grad_norm": 0.22513890179226442,
|
|
"learning_rate": 1.0744340136303765e-05,
|
|
"loss": 0.2069,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 7.239181746656176,
|
|
"grad_norm": 0.22537210978153835,
|
|
"learning_rate": 1.0729990438590558e-05,
|
|
"loss": 0.2154,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 7.242328874901652,
|
|
"grad_norm": 0.22711701620016747,
|
|
"learning_rate": 1.0715652511956122e-05,
|
|
"loss": 0.2117,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 7.245476003147128,
|
|
"grad_norm": 0.21447246807326145,
|
|
"learning_rate": 1.0701326371375842e-05,
|
|
"loss": 0.2099,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 7.248623131392605,
|
|
"grad_norm": 0.22924666327151738,
|
|
"learning_rate": 1.0687012031812818e-05,
|
|
"loss": 0.2059,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 7.25177025963808,
|
|
"grad_norm": 0.2161633462452467,
|
|
"learning_rate": 1.0672709508217796e-05,
|
|
"loss": 0.2071,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 7.254917387883556,
|
|
"grad_norm": 0.24671002964793948,
|
|
"learning_rate": 1.0658418815529204e-05,
|
|
"loss": 0.2194,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 7.258064516129032,
|
|
"grad_norm": 0.21221688808795114,
|
|
"learning_rate": 1.0644139968673101e-05,
|
|
"loss": 0.2182,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 7.261211644374509,
|
|
"grad_norm": 0.22568981603880797,
|
|
"learning_rate": 1.062987298256318e-05,
|
|
"loss": 0.2159,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 7.2643587726199845,
|
|
"grad_norm": 0.22104241538483152,
|
|
"learning_rate": 1.0615617872100752e-05,
|
|
"loss": 0.2041,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 7.26750590086546,
|
|
"grad_norm": 0.22669047159973574,
|
|
"learning_rate": 1.06013746521747e-05,
|
|
"loss": 0.2078,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 7.270653029110936,
|
|
"grad_norm": 0.2253716719320985,
|
|
"learning_rate": 1.0587143337661516e-05,
|
|
"loss": 0.2125,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 7.273800157356412,
|
|
"grad_norm": 0.22803877169918388,
|
|
"learning_rate": 1.0572923943425234e-05,
|
|
"loss": 0.2092,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 7.276947285601889,
|
|
"grad_norm": 0.2213507250976366,
|
|
"learning_rate": 1.0558716484317456e-05,
|
|
"loss": 0.2108,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 7.280094413847364,
|
|
"grad_norm": 0.2273540787687723,
|
|
"learning_rate": 1.05445209751773e-05,
|
|
"loss": 0.2134,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 7.28324154209284,
|
|
"grad_norm": 0.2138902730182339,
|
|
"learning_rate": 1.053033743083142e-05,
|
|
"loss": 0.2089,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 7.286388670338316,
|
|
"grad_norm": 0.23680899817380743,
|
|
"learning_rate": 1.0516165866093974e-05,
|
|
"loss": 0.2108,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 7.289535798583792,
|
|
"grad_norm": 0.22183476822950635,
|
|
"learning_rate": 1.0502006295766589e-05,
|
|
"loss": 0.2174,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 7.2926829268292686,
|
|
"grad_norm": 0.22004800892442652,
|
|
"learning_rate": 1.0487858734638385e-05,
|
|
"loss": 0.2151,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 7.295830055074744,
|
|
"grad_norm": 0.22181045274812225,
|
|
"learning_rate": 1.0473723197485914e-05,
|
|
"loss": 0.2025,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 7.29897718332022,
|
|
"grad_norm": 0.21908332323352983,
|
|
"learning_rate": 1.0459599699073206e-05,
|
|
"loss": 0.2162,
|
|
"step": 2314
|
|
},
|
|
{
|
|
"epoch": 7.302124311565696,
|
|
"grad_norm": 0.21884697231931952,
|
|
"learning_rate": 1.044548825415168e-05,
|
|
"loss": 0.2129,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 7.305271439811173,
|
|
"grad_norm": 0.2187517231572296,
|
|
"learning_rate": 1.043138887746018e-05,
|
|
"loss": 0.2092,
|
|
"step": 2316
|
|
},
|
|
{
|
|
"epoch": 7.3084185680566485,
|
|
"grad_norm": 0.22546922277138795,
|
|
"learning_rate": 1.041730158372496e-05,
|
|
"loss": 0.2062,
|
|
"step": 2317
|
|
},
|
|
{
|
|
"epoch": 7.311565696302124,
|
|
"grad_norm": 0.22614767597501462,
|
|
"learning_rate": 1.0403226387659628e-05,
|
|
"loss": 0.2141,
|
|
"step": 2318
|
|
},
|
|
{
|
|
"epoch": 7.3147128245476,
|
|
"grad_norm": 0.22707234003611404,
|
|
"learning_rate": 1.0389163303965186e-05,
|
|
"loss": 0.2122,
|
|
"step": 2319
|
|
},
|
|
{
|
|
"epoch": 7.317859952793077,
|
|
"grad_norm": 0.23186259964324954,
|
|
"learning_rate": 1.0375112347329946e-05,
|
|
"loss": 0.2146,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 7.321007081038553,
|
|
"grad_norm": 0.23276792906716168,
|
|
"learning_rate": 1.0361073532429594e-05,
|
|
"loss": 0.2103,
|
|
"step": 2321
|
|
},
|
|
{
|
|
"epoch": 7.324154209284028,
|
|
"grad_norm": 0.2074352547542711,
|
|
"learning_rate": 1.0347046873927104e-05,
|
|
"loss": 0.2104,
|
|
"step": 2322
|
|
},
|
|
{
|
|
"epoch": 7.327301337529504,
|
|
"grad_norm": 0.2236327394327096,
|
|
"learning_rate": 1.0333032386472775e-05,
|
|
"loss": 0.2155,
|
|
"step": 2323
|
|
},
|
|
{
|
|
"epoch": 7.33044846577498,
|
|
"grad_norm": 0.221050234723865,
|
|
"learning_rate": 1.0319030084704175e-05,
|
|
"loss": 0.2214,
|
|
"step": 2324
|
|
},
|
|
{
|
|
"epoch": 7.333595594020457,
|
|
"grad_norm": 0.2249617592191245,
|
|
"learning_rate": 1.0305039983246159e-05,
|
|
"loss": 0.2054,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 7.3367427222659325,
|
|
"grad_norm": 0.22698815261155295,
|
|
"learning_rate": 1.0291062096710837e-05,
|
|
"loss": 0.2071,
|
|
"step": 2326
|
|
},
|
|
{
|
|
"epoch": 7.339889850511408,
|
|
"grad_norm": 0.2268711614187744,
|
|
"learning_rate": 1.0277096439697552e-05,
|
|
"loss": 0.2145,
|
|
"step": 2327
|
|
},
|
|
{
|
|
"epoch": 7.343036978756884,
|
|
"grad_norm": 0.215143567561118,
|
|
"learning_rate": 1.0263143026792883e-05,
|
|
"loss": 0.207,
|
|
"step": 2328
|
|
},
|
|
{
|
|
"epoch": 7.34618410700236,
|
|
"grad_norm": 0.22328803868837543,
|
|
"learning_rate": 1.0249201872570614e-05,
|
|
"loss": 0.2183,
|
|
"step": 2329
|
|
},
|
|
{
|
|
"epoch": 7.349331235247837,
|
|
"grad_norm": 0.2218308643421254,
|
|
"learning_rate": 1.0235272991591732e-05,
|
|
"loss": 0.2099,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 7.352478363493312,
|
|
"grad_norm": 0.23227296918591858,
|
|
"learning_rate": 1.0221356398404398e-05,
|
|
"loss": 0.2096,
|
|
"step": 2331
|
|
},
|
|
{
|
|
"epoch": 7.355625491738788,
|
|
"grad_norm": 0.2387762137802973,
|
|
"learning_rate": 1.0207452107543955e-05,
|
|
"loss": 0.2065,
|
|
"step": 2332
|
|
},
|
|
{
|
|
"epoch": 7.358772619984264,
|
|
"grad_norm": 0.22570367340945718,
|
|
"learning_rate": 1.0193560133532868e-05,
|
|
"loss": 0.2131,
|
|
"step": 2333
|
|
},
|
|
{
|
|
"epoch": 7.361919748229741,
|
|
"grad_norm": 0.2306105201682074,
|
|
"learning_rate": 1.017968049088076e-05,
|
|
"loss": 0.2166,
|
|
"step": 2334
|
|
},
|
|
{
|
|
"epoch": 7.3650668764752165,
|
|
"grad_norm": 0.2247866318155448,
|
|
"learning_rate": 1.0165813194084375e-05,
|
|
"loss": 0.2065,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 7.368214004720692,
|
|
"grad_norm": 0.22844131668659315,
|
|
"learning_rate": 1.0151958257627541e-05,
|
|
"loss": 0.2094,
|
|
"step": 2336
|
|
},
|
|
{
|
|
"epoch": 7.371361132966168,
|
|
"grad_norm": 0.23333574403162458,
|
|
"learning_rate": 1.0138115695981207e-05,
|
|
"loss": 0.213,
|
|
"step": 2337
|
|
},
|
|
{
|
|
"epoch": 7.374508261211645,
|
|
"grad_norm": 0.21257237150019098,
|
|
"learning_rate": 1.0124285523603365e-05,
|
|
"loss": 0.2187,
|
|
"step": 2338
|
|
},
|
|
{
|
|
"epoch": 7.377655389457121,
|
|
"grad_norm": 0.22969384430433795,
|
|
"learning_rate": 1.01104677549391e-05,
|
|
"loss": 0.2108,
|
|
"step": 2339
|
|
},
|
|
{
|
|
"epoch": 7.380802517702596,
|
|
"grad_norm": 0.23754367381929004,
|
|
"learning_rate": 1.0096662404420501e-05,
|
|
"loss": 0.2132,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 7.383949645948072,
|
|
"grad_norm": 0.22700013636080565,
|
|
"learning_rate": 1.0082869486466729e-05,
|
|
"loss": 0.2067,
|
|
"step": 2341
|
|
},
|
|
{
|
|
"epoch": 7.387096774193548,
|
|
"grad_norm": 0.23919755857430938,
|
|
"learning_rate": 1.006908901548394e-05,
|
|
"loss": 0.2117,
|
|
"step": 2342
|
|
},
|
|
{
|
|
"epoch": 7.390243902439025,
|
|
"grad_norm": 0.227136733402989,
|
|
"learning_rate": 1.0055321005865277e-05,
|
|
"loss": 0.2162,
|
|
"step": 2343
|
|
},
|
|
{
|
|
"epoch": 7.3933910306845005,
|
|
"grad_norm": 0.23525073363793073,
|
|
"learning_rate": 1.0041565471990897e-05,
|
|
"loss": 0.2112,
|
|
"step": 2344
|
|
},
|
|
{
|
|
"epoch": 7.396538158929976,
|
|
"grad_norm": 0.2321185458009399,
|
|
"learning_rate": 1.0027822428227889e-05,
|
|
"loss": 0.215,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 7.399685287175452,
|
|
"grad_norm": 0.23528217492361306,
|
|
"learning_rate": 1.0014091888930344e-05,
|
|
"loss": 0.2142,
|
|
"step": 2346
|
|
},
|
|
{
|
|
"epoch": 7.402832415420928,
|
|
"grad_norm": 0.22749689788373387,
|
|
"learning_rate": 1.0000373868439248e-05,
|
|
"loss": 0.2158,
|
|
"step": 2347
|
|
},
|
|
{
|
|
"epoch": 7.405979543666405,
|
|
"grad_norm": 0.2404493638710273,
|
|
"learning_rate": 9.986668381082545e-06,
|
|
"loss": 0.2168,
|
|
"step": 2348
|
|
},
|
|
{
|
|
"epoch": 7.4091266719118805,
|
|
"grad_norm": 0.22585072391780345,
|
|
"learning_rate": 9.972975441175057e-06,
|
|
"loss": 0.2164,
|
|
"step": 2349
|
|
},
|
|
{
|
|
"epoch": 7.412273800157356,
|
|
"grad_norm": 0.23795916213633916,
|
|
"learning_rate": 9.959295063018526e-06,
|
|
"loss": 0.215,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 7.415420928402832,
|
|
"grad_norm": 0.23204552138933593,
|
|
"learning_rate": 9.945627260901571e-06,
|
|
"loss": 0.2174,
|
|
"step": 2351
|
|
},
|
|
{
|
|
"epoch": 7.418568056648309,
|
|
"grad_norm": 0.22824560817611173,
|
|
"learning_rate": 9.93197204909966e-06,
|
|
"loss": 0.2111,
|
|
"step": 2352
|
|
},
|
|
{
|
|
"epoch": 7.421715184893785,
|
|
"grad_norm": 0.2358749174129253,
|
|
"learning_rate": 9.918329441875129e-06,
|
|
"loss": 0.2132,
|
|
"step": 2353
|
|
},
|
|
{
|
|
"epoch": 7.42486231313926,
|
|
"grad_norm": 0.23304655894118764,
|
|
"learning_rate": 9.904699453477136e-06,
|
|
"loss": 0.2121,
|
|
"step": 2354
|
|
},
|
|
{
|
|
"epoch": 7.428009441384736,
|
|
"grad_norm": 0.2305516088388655,
|
|
"learning_rate": 9.891082098141667e-06,
|
|
"loss": 0.2165,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 7.431156569630213,
|
|
"grad_norm": 0.23079140563064027,
|
|
"learning_rate": 9.877477390091509e-06,
|
|
"loss": 0.2141,
|
|
"step": 2356
|
|
},
|
|
{
|
|
"epoch": 7.434303697875689,
|
|
"grad_norm": 0.22387025416375533,
|
|
"learning_rate": 9.863885343536238e-06,
|
|
"loss": 0.2121,
|
|
"step": 2357
|
|
},
|
|
{
|
|
"epoch": 7.4374508261211645,
|
|
"grad_norm": 0.22787402873623003,
|
|
"learning_rate": 9.850305972672214e-06,
|
|
"loss": 0.2203,
|
|
"step": 2358
|
|
},
|
|
{
|
|
"epoch": 7.44059795436664,
|
|
"grad_norm": 0.22535783554358702,
|
|
"learning_rate": 9.836739291682543e-06,
|
|
"loss": 0.2154,
|
|
"step": 2359
|
|
},
|
|
{
|
|
"epoch": 7.443745082612116,
|
|
"grad_norm": 0.22981126911531366,
|
|
"learning_rate": 9.823185314737104e-06,
|
|
"loss": 0.2156,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 7.446892210857593,
|
|
"grad_norm": 0.23338901289009809,
|
|
"learning_rate": 9.809644055992471e-06,
|
|
"loss": 0.2112,
|
|
"step": 2361
|
|
},
|
|
{
|
|
"epoch": 7.450039339103069,
|
|
"grad_norm": 0.24023663975496,
|
|
"learning_rate": 9.796115529591967e-06,
|
|
"loss": 0.2093,
|
|
"step": 2362
|
|
},
|
|
{
|
|
"epoch": 7.453186467348544,
|
|
"grad_norm": 0.22580520597689485,
|
|
"learning_rate": 9.78259974966559e-06,
|
|
"loss": 0.2175,
|
|
"step": 2363
|
|
},
|
|
{
|
|
"epoch": 7.45633359559402,
|
|
"grad_norm": 0.2221577009585905,
|
|
"learning_rate": 9.769096730330047e-06,
|
|
"loss": 0.2128,
|
|
"step": 2364
|
|
},
|
|
{
|
|
"epoch": 7.459480723839496,
|
|
"grad_norm": 0.2314324525926755,
|
|
"learning_rate": 9.755606485688695e-06,
|
|
"loss": 0.2064,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 7.462627852084973,
|
|
"grad_norm": 0.2234171977467309,
|
|
"learning_rate": 9.742129029831569e-06,
|
|
"loss": 0.2137,
|
|
"step": 2366
|
|
},
|
|
{
|
|
"epoch": 7.4657749803304485,
|
|
"grad_norm": 0.23731134897981873,
|
|
"learning_rate": 9.728664376835343e-06,
|
|
"loss": 0.2134,
|
|
"step": 2367
|
|
},
|
|
{
|
|
"epoch": 7.468922108575924,
|
|
"grad_norm": 0.21962718713348828,
|
|
"learning_rate": 9.7152125407633e-06,
|
|
"loss": 0.2108,
|
|
"step": 2368
|
|
},
|
|
{
|
|
"epoch": 7.4720692368214,
|
|
"grad_norm": 0.2207798183423775,
|
|
"learning_rate": 9.701773535665366e-06,
|
|
"loss": 0.2101,
|
|
"step": 2369
|
|
},
|
|
{
|
|
"epoch": 7.475216365066877,
|
|
"grad_norm": 0.23437916694512362,
|
|
"learning_rate": 9.688347375578033e-06,
|
|
"loss": 0.2154,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 7.478363493312353,
|
|
"grad_norm": 0.22408835369735966,
|
|
"learning_rate": 9.674934074524411e-06,
|
|
"loss": 0.2172,
|
|
"step": 2371
|
|
},
|
|
{
|
|
"epoch": 7.481510621557828,
|
|
"grad_norm": 0.22121992831685067,
|
|
"learning_rate": 9.661533646514142e-06,
|
|
"loss": 0.2088,
|
|
"step": 2372
|
|
},
|
|
{
|
|
"epoch": 7.484657749803304,
|
|
"grad_norm": 0.21478252709139647,
|
|
"learning_rate": 9.648146105543457e-06,
|
|
"loss": 0.213,
|
|
"step": 2373
|
|
},
|
|
{
|
|
"epoch": 7.487804878048781,
|
|
"grad_norm": 0.22236538402387201,
|
|
"learning_rate": 9.634771465595109e-06,
|
|
"loss": 0.2146,
|
|
"step": 2374
|
|
},
|
|
{
|
|
"epoch": 7.490952006294257,
|
|
"grad_norm": 0.2329798548119093,
|
|
"learning_rate": 9.62140974063838e-06,
|
|
"loss": 0.2147,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 7.4940991345397325,
|
|
"grad_norm": 0.20764196366436552,
|
|
"learning_rate": 9.608060944629065e-06,
|
|
"loss": 0.2158,
|
|
"step": 2376
|
|
},
|
|
{
|
|
"epoch": 7.497246262785208,
|
|
"grad_norm": 0.22039448738264225,
|
|
"learning_rate": 9.59472509150945e-06,
|
|
"loss": 0.2131,
|
|
"step": 2377
|
|
},
|
|
{
|
|
"epoch": 7.500393391030684,
|
|
"grad_norm": 0.21967232307742138,
|
|
"learning_rate": 9.581402195208307e-06,
|
|
"loss": 0.2155,
|
|
"step": 2378
|
|
},
|
|
{
|
|
"epoch": 7.503540519276161,
|
|
"grad_norm": 0.23165634584475214,
|
|
"learning_rate": 9.568092269640867e-06,
|
|
"loss": 0.2058,
|
|
"step": 2379
|
|
},
|
|
{
|
|
"epoch": 7.506687647521637,
|
|
"grad_norm": 0.21342819732195714,
|
|
"learning_rate": 9.554795328708833e-06,
|
|
"loss": 0.2212,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 7.5098347757671124,
|
|
"grad_norm": 0.21653855605412423,
|
|
"learning_rate": 9.541511386300321e-06,
|
|
"loss": 0.2184,
|
|
"step": 2381
|
|
},
|
|
{
|
|
"epoch": 7.512981904012588,
|
|
"grad_norm": 0.212878668638118,
|
|
"learning_rate": 9.528240456289887e-06,
|
|
"loss": 0.2191,
|
|
"step": 2382
|
|
},
|
|
{
|
|
"epoch": 7.516129032258064,
|
|
"grad_norm": 0.22346959923074045,
|
|
"learning_rate": 9.5149825525385e-06,
|
|
"loss": 0.214,
|
|
"step": 2383
|
|
},
|
|
{
|
|
"epoch": 7.519276160503541,
|
|
"grad_norm": 0.22445356716908701,
|
|
"learning_rate": 9.5017376888935e-06,
|
|
"loss": 0.2115,
|
|
"step": 2384
|
|
},
|
|
{
|
|
"epoch": 7.522423288749017,
|
|
"grad_norm": 0.22584124484887466,
|
|
"learning_rate": 9.488505879188638e-06,
|
|
"loss": 0.2104,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 7.525570416994492,
|
|
"grad_norm": 0.2278568808064928,
|
|
"learning_rate": 9.475287137244006e-06,
|
|
"loss": 0.2119,
|
|
"step": 2386
|
|
},
|
|
{
|
|
"epoch": 7.528717545239968,
|
|
"grad_norm": 0.22171779725801655,
|
|
"learning_rate": 9.462081476866061e-06,
|
|
"loss": 0.2092,
|
|
"step": 2387
|
|
},
|
|
{
|
|
"epoch": 7.531864673485445,
|
|
"grad_norm": 0.22622915992130538,
|
|
"learning_rate": 9.44888891184758e-06,
|
|
"loss": 0.2116,
|
|
"step": 2388
|
|
},
|
|
{
|
|
"epoch": 7.535011801730921,
|
|
"grad_norm": 0.21608386506577046,
|
|
"learning_rate": 9.435709455967696e-06,
|
|
"loss": 0.2125,
|
|
"step": 2389
|
|
},
|
|
{
|
|
"epoch": 7.5381589299763965,
|
|
"grad_norm": 0.2190850436150323,
|
|
"learning_rate": 9.422543122991816e-06,
|
|
"loss": 0.215,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 7.541306058221872,
|
|
"grad_norm": 0.22266706339169948,
|
|
"learning_rate": 9.409389926671652e-06,
|
|
"loss": 0.2231,
|
|
"step": 2391
|
|
},
|
|
{
|
|
"epoch": 7.544453186467349,
|
|
"grad_norm": 0.223823120767086,
|
|
"learning_rate": 9.396249880745208e-06,
|
|
"loss": 0.2096,
|
|
"step": 2392
|
|
},
|
|
{
|
|
"epoch": 7.547600314712825,
|
|
"grad_norm": 0.21474334984155782,
|
|
"learning_rate": 9.383122998936728e-06,
|
|
"loss": 0.2211,
|
|
"step": 2393
|
|
},
|
|
{
|
|
"epoch": 7.550747442958301,
|
|
"grad_norm": 0.22017470608910686,
|
|
"learning_rate": 9.370009294956731e-06,
|
|
"loss": 0.2127,
|
|
"step": 2394
|
|
},
|
|
{
|
|
"epoch": 7.553894571203776,
|
|
"grad_norm": 0.22623985048590417,
|
|
"learning_rate": 9.356908782501953e-06,
|
|
"loss": 0.2079,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 7.557041699449252,
|
|
"grad_norm": 0.21790079455618502,
|
|
"learning_rate": 9.34382147525537e-06,
|
|
"loss": 0.2084,
|
|
"step": 2396
|
|
},
|
|
{
|
|
"epoch": 7.560188827694729,
|
|
"grad_norm": 0.23038553545405369,
|
|
"learning_rate": 9.330747386886145e-06,
|
|
"loss": 0.2144,
|
|
"step": 2397
|
|
},
|
|
{
|
|
"epoch": 7.563335955940205,
|
|
"grad_norm": 0.22823441881568057,
|
|
"learning_rate": 9.317686531049651e-06,
|
|
"loss": 0.2155,
|
|
"step": 2398
|
|
},
|
|
{
|
|
"epoch": 7.5664830841856805,
|
|
"grad_norm": 0.22875720062537966,
|
|
"learning_rate": 9.30463892138744e-06,
|
|
"loss": 0.2163,
|
|
"step": 2399
|
|
},
|
|
{
|
|
"epoch": 7.569630212431156,
|
|
"grad_norm": 0.2289164728738839,
|
|
"learning_rate": 9.291604571527218e-06,
|
|
"loss": 0.2136,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 7.572777340676632,
|
|
"grad_norm": 0.215177642884113,
|
|
"learning_rate": 9.27858349508285e-06,
|
|
"loss": 0.2091,
|
|
"step": 2401
|
|
},
|
|
{
|
|
"epoch": 7.575924468922109,
|
|
"grad_norm": 0.2274883491248114,
|
|
"learning_rate": 9.265575705654322e-06,
|
|
"loss": 0.2109,
|
|
"step": 2402
|
|
},
|
|
{
|
|
"epoch": 7.579071597167585,
|
|
"grad_norm": 0.23063099015253677,
|
|
"learning_rate": 9.252581216827778e-06,
|
|
"loss": 0.2007,
|
|
"step": 2403
|
|
},
|
|
{
|
|
"epoch": 7.58221872541306,
|
|
"grad_norm": 0.2218214636743055,
|
|
"learning_rate": 9.23960004217543e-06,
|
|
"loss": 0.2054,
|
|
"step": 2404
|
|
},
|
|
{
|
|
"epoch": 7.585365853658536,
|
|
"grad_norm": 0.23479587301596305,
|
|
"learning_rate": 9.226632195255612e-06,
|
|
"loss": 0.2109,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 7.588512981904013,
|
|
"grad_norm": 0.22498307321621322,
|
|
"learning_rate": 9.213677689612714e-06,
|
|
"loss": 0.2105,
|
|
"step": 2406
|
|
},
|
|
{
|
|
"epoch": 7.591660110149489,
|
|
"grad_norm": 0.23788611222524667,
|
|
"learning_rate": 9.200736538777214e-06,
|
|
"loss": 0.2082,
|
|
"step": 2407
|
|
},
|
|
{
|
|
"epoch": 7.5948072383949645,
|
|
"grad_norm": 0.21636929208162226,
|
|
"learning_rate": 9.18780875626563e-06,
|
|
"loss": 0.2097,
|
|
"step": 2408
|
|
},
|
|
{
|
|
"epoch": 7.59795436664044,
|
|
"grad_norm": 0.22912436986509432,
|
|
"learning_rate": 9.174894355580514e-06,
|
|
"loss": 0.208,
|
|
"step": 2409
|
|
},
|
|
{
|
|
"epoch": 7.601101494885917,
|
|
"grad_norm": 0.22835007748766775,
|
|
"learning_rate": 9.161993350210457e-06,
|
|
"loss": 0.2086,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 7.604248623131393,
|
|
"grad_norm": 0.22707280711620756,
|
|
"learning_rate": 9.149105753630033e-06,
|
|
"loss": 0.2137,
|
|
"step": 2411
|
|
},
|
|
{
|
|
"epoch": 7.607395751376869,
|
|
"grad_norm": 0.23071685677491993,
|
|
"learning_rate": 9.136231579299843e-06,
|
|
"loss": 0.2116,
|
|
"step": 2412
|
|
},
|
|
{
|
|
"epoch": 7.610542879622344,
|
|
"grad_norm": 0.21489892065441007,
|
|
"learning_rate": 9.123370840666437e-06,
|
|
"loss": 0.2108,
|
|
"step": 2413
|
|
},
|
|
{
|
|
"epoch": 7.61369000786782,
|
|
"grad_norm": 0.22666390058768177,
|
|
"learning_rate": 9.110523551162355e-06,
|
|
"loss": 0.2129,
|
|
"step": 2414
|
|
},
|
|
{
|
|
"epoch": 7.616837136113297,
|
|
"grad_norm": 0.22108033103329094,
|
|
"learning_rate": 9.097689724206085e-06,
|
|
"loss": 0.2147,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 7.619984264358773,
|
|
"grad_norm": 0.22927754219244445,
|
|
"learning_rate": 9.084869373202036e-06,
|
|
"loss": 0.2122,
|
|
"step": 2416
|
|
},
|
|
{
|
|
"epoch": 7.6231313926042485,
|
|
"grad_norm": 0.21679471833735905,
|
|
"learning_rate": 9.072062511540583e-06,
|
|
"loss": 0.2118,
|
|
"step": 2417
|
|
},
|
|
{
|
|
"epoch": 7.626278520849724,
|
|
"grad_norm": 0.22320372473610817,
|
|
"learning_rate": 9.059269152597964e-06,
|
|
"loss": 0.2146,
|
|
"step": 2418
|
|
},
|
|
{
|
|
"epoch": 7.6294256490952,
|
|
"grad_norm": 0.22512553465766197,
|
|
"learning_rate": 9.046489309736348e-06,
|
|
"loss": 0.212,
|
|
"step": 2419
|
|
},
|
|
{
|
|
"epoch": 7.632572777340677,
|
|
"grad_norm": 0.2183847269049812,
|
|
"learning_rate": 9.033722996303768e-06,
|
|
"loss": 0.2158,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 7.635719905586153,
|
|
"grad_norm": 0.22036959109568996,
|
|
"learning_rate": 9.020970225634136e-06,
|
|
"loss": 0.2164,
|
|
"step": 2421
|
|
},
|
|
{
|
|
"epoch": 7.6388670338316285,
|
|
"grad_norm": 0.22687740918307078,
|
|
"learning_rate": 9.008231011047213e-06,
|
|
"loss": 0.2146,
|
|
"step": 2422
|
|
},
|
|
{
|
|
"epoch": 7.642014162077104,
|
|
"grad_norm": 0.22039894721380168,
|
|
"learning_rate": 8.995505365848605e-06,
|
|
"loss": 0.2133,
|
|
"step": 2423
|
|
},
|
|
{
|
|
"epoch": 7.645161290322581,
|
|
"grad_norm": 0.22178747844096786,
|
|
"learning_rate": 8.982793303329751e-06,
|
|
"loss": 0.218,
|
|
"step": 2424
|
|
},
|
|
{
|
|
"epoch": 7.648308418568057,
|
|
"grad_norm": 0.21692147792936287,
|
|
"learning_rate": 8.970094836767888e-06,
|
|
"loss": 0.222,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 7.651455546813533,
|
|
"grad_norm": 0.2138257230931039,
|
|
"learning_rate": 8.957409979426072e-06,
|
|
"loss": 0.2089,
|
|
"step": 2426
|
|
},
|
|
{
|
|
"epoch": 7.654602675059008,
|
|
"grad_norm": 0.2265413919770675,
|
|
"learning_rate": 8.944738744553121e-06,
|
|
"loss": 0.2172,
|
|
"step": 2427
|
|
},
|
|
{
|
|
"epoch": 7.657749803304485,
|
|
"grad_norm": 0.22297257163502948,
|
|
"learning_rate": 8.93208114538365e-06,
|
|
"loss": 0.2121,
|
|
"step": 2428
|
|
},
|
|
{
|
|
"epoch": 7.660896931549961,
|
|
"grad_norm": 0.22148672698728822,
|
|
"learning_rate": 8.91943719513801e-06,
|
|
"loss": 0.2088,
|
|
"step": 2429
|
|
},
|
|
{
|
|
"epoch": 7.664044059795437,
|
|
"grad_norm": 0.21736370577760047,
|
|
"learning_rate": 8.906806907022311e-06,
|
|
"loss": 0.2153,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 7.6671911880409125,
|
|
"grad_norm": 0.22466767010680358,
|
|
"learning_rate": 8.894190294228391e-06,
|
|
"loss": 0.21,
|
|
"step": 2431
|
|
},
|
|
{
|
|
"epoch": 7.670338316286388,
|
|
"grad_norm": 0.22112902451894795,
|
|
"learning_rate": 8.881587369933799e-06,
|
|
"loss": 0.2175,
|
|
"step": 2432
|
|
},
|
|
{
|
|
"epoch": 7.673485444531865,
|
|
"grad_norm": 0.2214421346522819,
|
|
"learning_rate": 8.8689981473018e-06,
|
|
"loss": 0.214,
|
|
"step": 2433
|
|
},
|
|
{
|
|
"epoch": 7.676632572777341,
|
|
"grad_norm": 0.21631613538316005,
|
|
"learning_rate": 8.856422639481324e-06,
|
|
"loss": 0.2084,
|
|
"step": 2434
|
|
},
|
|
{
|
|
"epoch": 7.679779701022817,
|
|
"grad_norm": 0.22609337537385527,
|
|
"learning_rate": 8.843860859607001e-06,
|
|
"loss": 0.2147,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 7.682926829268292,
|
|
"grad_norm": 0.22739242141285246,
|
|
"learning_rate": 8.831312820799108e-06,
|
|
"loss": 0.2177,
|
|
"step": 2436
|
|
},
|
|
{
|
|
"epoch": 7.686073957513768,
|
|
"grad_norm": 0.22087226671865368,
|
|
"learning_rate": 8.81877853616358e-06,
|
|
"loss": 0.215,
|
|
"step": 2437
|
|
},
|
|
{
|
|
"epoch": 7.689221085759245,
|
|
"grad_norm": 0.226834895900349,
|
|
"learning_rate": 8.80625801879197e-06,
|
|
"loss": 0.212,
|
|
"step": 2438
|
|
},
|
|
{
|
|
"epoch": 7.692368214004721,
|
|
"grad_norm": 0.23470020627965657,
|
|
"learning_rate": 8.793751281761473e-06,
|
|
"loss": 0.215,
|
|
"step": 2439
|
|
},
|
|
{
|
|
"epoch": 7.6955153422501965,
|
|
"grad_norm": 0.21858002999304377,
|
|
"learning_rate": 8.781258338134882e-06,
|
|
"loss": 0.2195,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 7.698662470495672,
|
|
"grad_norm": 0.22766506836176625,
|
|
"learning_rate": 8.768779200960573e-06,
|
|
"loss": 0.2141,
|
|
"step": 2441
|
|
},
|
|
{
|
|
"epoch": 7.701809598741149,
|
|
"grad_norm": 0.2416124281891341,
|
|
"learning_rate": 8.756313883272518e-06,
|
|
"loss": 0.206,
|
|
"step": 2442
|
|
},
|
|
{
|
|
"epoch": 7.704956726986625,
|
|
"grad_norm": 0.22684959206739022,
|
|
"learning_rate": 8.74386239809024e-06,
|
|
"loss": 0.217,
|
|
"step": 2443
|
|
},
|
|
{
|
|
"epoch": 7.708103855232101,
|
|
"grad_norm": 0.22333064815479248,
|
|
"learning_rate": 8.731424758418837e-06,
|
|
"loss": 0.2238,
|
|
"step": 2444
|
|
},
|
|
{
|
|
"epoch": 7.711250983477576,
|
|
"grad_norm": 0.23426086091700454,
|
|
"learning_rate": 8.719000977248909e-06,
|
|
"loss": 0.2159,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 7.714398111723053,
|
|
"grad_norm": 0.23296607438783742,
|
|
"learning_rate": 8.706591067556625e-06,
|
|
"loss": 0.2149,
|
|
"step": 2446
|
|
},
|
|
{
|
|
"epoch": 7.717545239968529,
|
|
"grad_norm": 0.21675819624949147,
|
|
"learning_rate": 8.694195042303631e-06,
|
|
"loss": 0.2143,
|
|
"step": 2447
|
|
},
|
|
{
|
|
"epoch": 7.720692368214005,
|
|
"grad_norm": 0.21790836958624593,
|
|
"learning_rate": 8.681812914437088e-06,
|
|
"loss": 0.2163,
|
|
"step": 2448
|
|
},
|
|
{
|
|
"epoch": 7.7238394964594805,
|
|
"grad_norm": 0.2274002177866238,
|
|
"learning_rate": 8.669444696889645e-06,
|
|
"loss": 0.2132,
|
|
"step": 2449
|
|
},
|
|
{
|
|
"epoch": 7.726986624704956,
|
|
"grad_norm": 0.22632872665252599,
|
|
"learning_rate": 8.657090402579406e-06,
|
|
"loss": 0.2117,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 7.730133752950433,
|
|
"grad_norm": 0.22423037534942736,
|
|
"learning_rate": 8.64475004440995e-06,
|
|
"loss": 0.2147,
|
|
"step": 2451
|
|
},
|
|
{
|
|
"epoch": 7.733280881195909,
|
|
"grad_norm": 0.21721365887439806,
|
|
"learning_rate": 8.632423635270284e-06,
|
|
"loss": 0.213,
|
|
"step": 2452
|
|
},
|
|
{
|
|
"epoch": 7.736428009441385,
|
|
"grad_norm": 0.22467979551975023,
|
|
"learning_rate": 8.620111188034862e-06,
|
|
"loss": 0.2131,
|
|
"step": 2453
|
|
},
|
|
{
|
|
"epoch": 7.7395751376868605,
|
|
"grad_norm": 0.21890044167587794,
|
|
"learning_rate": 8.60781271556354e-06,
|
|
"loss": 0.2233,
|
|
"step": 2454
|
|
},
|
|
{
|
|
"epoch": 7.742722265932336,
|
|
"grad_norm": 0.23103144172132506,
|
|
"learning_rate": 8.595528230701591e-06,
|
|
"loss": 0.2125,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 7.745869394177813,
|
|
"grad_norm": 0.21442112221294987,
|
|
"learning_rate": 8.583257746279678e-06,
|
|
"loss": 0.2132,
|
|
"step": 2456
|
|
},
|
|
{
|
|
"epoch": 7.749016522423289,
|
|
"grad_norm": 0.23258158194634532,
|
|
"learning_rate": 8.571001275113825e-06,
|
|
"loss": 0.2121,
|
|
"step": 2457
|
|
},
|
|
{
|
|
"epoch": 7.752163650668765,
|
|
"grad_norm": 0.218808737474262,
|
|
"learning_rate": 8.55875883000544e-06,
|
|
"loss": 0.2099,
|
|
"step": 2458
|
|
},
|
|
{
|
|
"epoch": 7.755310778914241,
|
|
"grad_norm": 0.22662779438337177,
|
|
"learning_rate": 8.546530423741258e-06,
|
|
"loss": 0.2139,
|
|
"step": 2459
|
|
},
|
|
{
|
|
"epoch": 7.758457907159717,
|
|
"grad_norm": 0.22023777628464106,
|
|
"learning_rate": 8.534316069093385e-06,
|
|
"loss": 0.2198,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 7.761605035405193,
|
|
"grad_norm": 0.22730348731809383,
|
|
"learning_rate": 8.52211577881922e-06,
|
|
"loss": 0.2203,
|
|
"step": 2461
|
|
},
|
|
{
|
|
"epoch": 7.764752163650669,
|
|
"grad_norm": 0.224778305802601,
|
|
"learning_rate": 8.509929565661486e-06,
|
|
"loss": 0.2144,
|
|
"step": 2462
|
|
},
|
|
{
|
|
"epoch": 7.7678992918961445,
|
|
"grad_norm": 0.21387975638987688,
|
|
"learning_rate": 8.497757442348194e-06,
|
|
"loss": 0.2193,
|
|
"step": 2463
|
|
},
|
|
{
|
|
"epoch": 7.771046420141621,
|
|
"grad_norm": 0.22076113317664492,
|
|
"learning_rate": 8.485599421592648e-06,
|
|
"loss": 0.2212,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 7.774193548387097,
|
|
"grad_norm": 0.2191624701650899,
|
|
"learning_rate": 8.473455516093427e-06,
|
|
"loss": 0.2194,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 7.777340676632573,
|
|
"grad_norm": 0.22080484859112698,
|
|
"learning_rate": 8.461325738534349e-06,
|
|
"loss": 0.2166,
|
|
"step": 2466
|
|
},
|
|
{
|
|
"epoch": 7.780487804878049,
|
|
"grad_norm": 0.2286105390431359,
|
|
"learning_rate": 8.449210101584495e-06,
|
|
"loss": 0.2101,
|
|
"step": 2467
|
|
},
|
|
{
|
|
"epoch": 7.783634933123524,
|
|
"grad_norm": 0.22181973516008022,
|
|
"learning_rate": 8.43710861789816e-06,
|
|
"loss": 0.2111,
|
|
"step": 2468
|
|
},
|
|
{
|
|
"epoch": 7.786782061369001,
|
|
"grad_norm": 0.22834807098550453,
|
|
"learning_rate": 8.42502130011487e-06,
|
|
"loss": 0.2203,
|
|
"step": 2469
|
|
},
|
|
{
|
|
"epoch": 7.789929189614477,
|
|
"grad_norm": 0.2155756950754227,
|
|
"learning_rate": 8.412948160859346e-06,
|
|
"loss": 0.2078,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 7.793076317859953,
|
|
"grad_norm": 0.22460211874784158,
|
|
"learning_rate": 8.400889212741506e-06,
|
|
"loss": 0.2138,
|
|
"step": 2471
|
|
},
|
|
{
|
|
"epoch": 7.7962234461054285,
|
|
"grad_norm": 0.23266223147738663,
|
|
"learning_rate": 8.388844468356447e-06,
|
|
"loss": 0.2082,
|
|
"step": 2472
|
|
},
|
|
{
|
|
"epoch": 7.799370574350904,
|
|
"grad_norm": 0.22066055467773144,
|
|
"learning_rate": 8.37681394028442e-06,
|
|
"loss": 0.2167,
|
|
"step": 2473
|
|
},
|
|
{
|
|
"epoch": 7.802517702596381,
|
|
"grad_norm": 0.22436154174278092,
|
|
"learning_rate": 8.364797641090839e-06,
|
|
"loss": 0.2219,
|
|
"step": 2474
|
|
},
|
|
{
|
|
"epoch": 7.805664830841857,
|
|
"grad_norm": 0.22160888608683596,
|
|
"learning_rate": 8.352795583326255e-06,
|
|
"loss": 0.2205,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 7.808811959087333,
|
|
"grad_norm": 0.2226551886278477,
|
|
"learning_rate": 8.340807779526345e-06,
|
|
"loss": 0.2176,
|
|
"step": 2476
|
|
},
|
|
{
|
|
"epoch": 7.811959087332809,
|
|
"grad_norm": 0.21580763083372614,
|
|
"learning_rate": 8.328834242211887e-06,
|
|
"loss": 0.2163,
|
|
"step": 2477
|
|
},
|
|
{
|
|
"epoch": 7.815106215578285,
|
|
"grad_norm": 0.22040811507283187,
|
|
"learning_rate": 8.316874983888774e-06,
|
|
"loss": 0.2107,
|
|
"step": 2478
|
|
},
|
|
{
|
|
"epoch": 7.818253343823761,
|
|
"grad_norm": 0.2291989714865517,
|
|
"learning_rate": 8.304930017047969e-06,
|
|
"loss": 0.2032,
|
|
"step": 2479
|
|
},
|
|
{
|
|
"epoch": 7.821400472069237,
|
|
"grad_norm": 0.22911848156820763,
|
|
"learning_rate": 8.292999354165525e-06,
|
|
"loss": 0.2082,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 7.8245476003147125,
|
|
"grad_norm": 0.23926926990020336,
|
|
"learning_rate": 8.281083007702546e-06,
|
|
"loss": 0.2095,
|
|
"step": 2481
|
|
},
|
|
{
|
|
"epoch": 7.827694728560189,
|
|
"grad_norm": 0.22512534340178053,
|
|
"learning_rate": 8.26918099010518e-06,
|
|
"loss": 0.2173,
|
|
"step": 2482
|
|
},
|
|
{
|
|
"epoch": 7.830841856805665,
|
|
"grad_norm": 0.2167774809962824,
|
|
"learning_rate": 8.25729331380462e-06,
|
|
"loss": 0.2151,
|
|
"step": 2483
|
|
},
|
|
{
|
|
"epoch": 7.833988985051141,
|
|
"grad_norm": 0.2129622004627707,
|
|
"learning_rate": 8.245419991217063e-06,
|
|
"loss": 0.2175,
|
|
"step": 2484
|
|
},
|
|
{
|
|
"epoch": 7.837136113296617,
|
|
"grad_norm": 0.2234270903188792,
|
|
"learning_rate": 8.233561034743737e-06,
|
|
"loss": 0.2117,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 7.840283241542092,
|
|
"grad_norm": 0.22836568327537848,
|
|
"learning_rate": 8.221716456770838e-06,
|
|
"loss": 0.2136,
|
|
"step": 2486
|
|
},
|
|
{
|
|
"epoch": 7.843430369787569,
|
|
"grad_norm": 0.21719647596939415,
|
|
"learning_rate": 8.209886269669569e-06,
|
|
"loss": 0.216,
|
|
"step": 2487
|
|
},
|
|
{
|
|
"epoch": 7.846577498033045,
|
|
"grad_norm": 0.2266420312585156,
|
|
"learning_rate": 8.198070485796087e-06,
|
|
"loss": 0.2156,
|
|
"step": 2488
|
|
},
|
|
{
|
|
"epoch": 7.849724626278521,
|
|
"grad_norm": 0.2231981024827487,
|
|
"learning_rate": 8.186269117491515e-06,
|
|
"loss": 0.2078,
|
|
"step": 2489
|
|
},
|
|
{
|
|
"epoch": 7.8528717545239966,
|
|
"grad_norm": 0.2216098578898215,
|
|
"learning_rate": 8.174482177081914e-06,
|
|
"loss": 0.2098,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 7.856018882769473,
|
|
"grad_norm": 0.22092053778425194,
|
|
"learning_rate": 8.162709676878274e-06,
|
|
"loss": 0.2149,
|
|
"step": 2491
|
|
},
|
|
{
|
|
"epoch": 7.859166011014949,
|
|
"grad_norm": 0.2181787736211054,
|
|
"learning_rate": 8.15095162917651e-06,
|
|
"loss": 0.2147,
|
|
"step": 2492
|
|
},
|
|
{
|
|
"epoch": 7.862313139260425,
|
|
"grad_norm": 0.21707933494315532,
|
|
"learning_rate": 8.13920804625743e-06,
|
|
"loss": 0.2144,
|
|
"step": 2493
|
|
},
|
|
{
|
|
"epoch": 7.865460267505901,
|
|
"grad_norm": 0.21885761883840674,
|
|
"learning_rate": 8.12747894038675e-06,
|
|
"loss": 0.2176,
|
|
"step": 2494
|
|
},
|
|
{
|
|
"epoch": 7.868607395751377,
|
|
"grad_norm": 0.21519099154642782,
|
|
"learning_rate": 8.115764323815047e-06,
|
|
"loss": 0.2092,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 7.871754523996853,
|
|
"grad_norm": 0.2218096537191133,
|
|
"learning_rate": 8.10406420877778e-06,
|
|
"loss": 0.2142,
|
|
"step": 2496
|
|
},
|
|
{
|
|
"epoch": 7.874901652242329,
|
|
"grad_norm": 0.2160268035618521,
|
|
"learning_rate": 8.092378607495259e-06,
|
|
"loss": 0.2128,
|
|
"step": 2497
|
|
},
|
|
{
|
|
"epoch": 7.878048780487805,
|
|
"grad_norm": 0.21717472587214887,
|
|
"learning_rate": 8.080707532172621e-06,
|
|
"loss": 0.2089,
|
|
"step": 2498
|
|
},
|
|
{
|
|
"epoch": 7.881195908733281,
|
|
"grad_norm": 0.22978681303101012,
|
|
"learning_rate": 8.069050994999859e-06,
|
|
"loss": 0.2159,
|
|
"step": 2499
|
|
},
|
|
{
|
|
"epoch": 7.884343036978757,
|
|
"grad_norm": 0.2145432190672201,
|
|
"learning_rate": 8.057409008151747e-06,
|
|
"loss": 0.2191,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 7.887490165224233,
|
|
"grad_norm": 0.21225002883741487,
|
|
"learning_rate": 8.04578158378789e-06,
|
|
"loss": 0.213,
|
|
"step": 2501
|
|
},
|
|
{
|
|
"epoch": 7.890637293469709,
|
|
"grad_norm": 0.2238498839813341,
|
|
"learning_rate": 8.034168734052665e-06,
|
|
"loss": 0.2166,
|
|
"step": 2502
|
|
},
|
|
{
|
|
"epoch": 7.893784421715185,
|
|
"grad_norm": 0.20967970929834082,
|
|
"learning_rate": 8.022570471075239e-06,
|
|
"loss": 0.221,
|
|
"step": 2503
|
|
},
|
|
{
|
|
"epoch": 7.8969315499606605,
|
|
"grad_norm": 0.21708326506558323,
|
|
"learning_rate": 8.010986806969536e-06,
|
|
"loss": 0.2168,
|
|
"step": 2504
|
|
},
|
|
{
|
|
"epoch": 7.900078678206137,
|
|
"grad_norm": 0.2194455638945262,
|
|
"learning_rate": 7.999417753834237e-06,
|
|
"loss": 0.2159,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 7.903225806451613,
|
|
"grad_norm": 0.22999330162628176,
|
|
"learning_rate": 7.987863323752768e-06,
|
|
"loss": 0.2152,
|
|
"step": 2506
|
|
},
|
|
{
|
|
"epoch": 7.906372934697089,
|
|
"grad_norm": 0.23123822749524514,
|
|
"learning_rate": 7.976323528793253e-06,
|
|
"loss": 0.2114,
|
|
"step": 2507
|
|
},
|
|
{
|
|
"epoch": 7.909520062942565,
|
|
"grad_norm": 0.20876015592600447,
|
|
"learning_rate": 7.964798381008572e-06,
|
|
"loss": 0.2187,
|
|
"step": 2508
|
|
},
|
|
{
|
|
"epoch": 7.912667191188041,
|
|
"grad_norm": 0.23282553158404024,
|
|
"learning_rate": 7.95328789243627e-06,
|
|
"loss": 0.2141,
|
|
"step": 2509
|
|
},
|
|
{
|
|
"epoch": 7.915814319433517,
|
|
"grad_norm": 0.21665198107506672,
|
|
"learning_rate": 7.941792075098607e-06,
|
|
"loss": 0.22,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 7.918961447678993,
|
|
"grad_norm": 0.21353778052651864,
|
|
"learning_rate": 7.930310941002498e-06,
|
|
"loss": 0.2139,
|
|
"step": 2511
|
|
},
|
|
{
|
|
"epoch": 7.922108575924469,
|
|
"grad_norm": 0.21878104539924628,
|
|
"learning_rate": 7.918844502139542e-06,
|
|
"loss": 0.2178,
|
|
"step": 2512
|
|
},
|
|
{
|
|
"epoch": 7.925255704169945,
|
|
"grad_norm": 0.22382902656974832,
|
|
"learning_rate": 7.907392770485981e-06,
|
|
"loss": 0.2182,
|
|
"step": 2513
|
|
},
|
|
{
|
|
"epoch": 7.928402832415421,
|
|
"grad_norm": 0.21896656616114246,
|
|
"learning_rate": 7.895955758002692e-06,
|
|
"loss": 0.2046,
|
|
"step": 2514
|
|
},
|
|
{
|
|
"epoch": 7.931549960660897,
|
|
"grad_norm": 0.23683519616131976,
|
|
"learning_rate": 7.884533476635183e-06,
|
|
"loss": 0.2152,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 7.934697088906373,
|
|
"grad_norm": 0.2166277071545748,
|
|
"learning_rate": 7.873125938313572e-06,
|
|
"loss": 0.2107,
|
|
"step": 2516
|
|
},
|
|
{
|
|
"epoch": 7.937844217151849,
|
|
"grad_norm": 0.21951761687518714,
|
|
"learning_rate": 7.86173315495258e-06,
|
|
"loss": 0.2118,
|
|
"step": 2517
|
|
},
|
|
{
|
|
"epoch": 7.940991345397325,
|
|
"grad_norm": 0.2309665891777021,
|
|
"learning_rate": 7.850355138451522e-06,
|
|
"loss": 0.2136,
|
|
"step": 2518
|
|
},
|
|
{
|
|
"epoch": 7.944138473642801,
|
|
"grad_norm": 0.22022379458796373,
|
|
"learning_rate": 7.83899190069429e-06,
|
|
"loss": 0.2173,
|
|
"step": 2519
|
|
},
|
|
{
|
|
"epoch": 7.947285601888277,
|
|
"grad_norm": 0.2174782268352602,
|
|
"learning_rate": 7.827643453549325e-06,
|
|
"loss": 0.2192,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 7.950432730133753,
|
|
"grad_norm": 0.22169879590437622,
|
|
"learning_rate": 7.816309808869637e-06,
|
|
"loss": 0.2184,
|
|
"step": 2521
|
|
},
|
|
{
|
|
"epoch": 7.9535798583792285,
|
|
"grad_norm": 0.22065198802434965,
|
|
"learning_rate": 7.804990978492774e-06,
|
|
"loss": 0.2114,
|
|
"step": 2522
|
|
},
|
|
{
|
|
"epoch": 7.956726986624705,
|
|
"grad_norm": 0.2165885857788757,
|
|
"learning_rate": 7.793686974240795e-06,
|
|
"loss": 0.2132,
|
|
"step": 2523
|
|
},
|
|
{
|
|
"epoch": 7.959874114870181,
|
|
"grad_norm": 0.22336276390335888,
|
|
"learning_rate": 7.782397807920297e-06,
|
|
"loss": 0.2137,
|
|
"step": 2524
|
|
},
|
|
{
|
|
"epoch": 7.963021243115657,
|
|
"grad_norm": 0.21596192334489273,
|
|
"learning_rate": 7.771123491322353e-06,
|
|
"loss": 0.2162,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 7.966168371361133,
|
|
"grad_norm": 0.2122590682034009,
|
|
"learning_rate": 7.759864036222556e-06,
|
|
"loss": 0.2154,
|
|
"step": 2526
|
|
},
|
|
{
|
|
"epoch": 7.969315499606609,
|
|
"grad_norm": 0.2247749539500636,
|
|
"learning_rate": 7.748619454380947e-06,
|
|
"loss": 0.2143,
|
|
"step": 2527
|
|
},
|
|
{
|
|
"epoch": 7.972462627852085,
|
|
"grad_norm": 0.22164707239632309,
|
|
"learning_rate": 7.737389757542051e-06,
|
|
"loss": 0.22,
|
|
"step": 2528
|
|
},
|
|
{
|
|
"epoch": 7.975609756097561,
|
|
"grad_norm": 0.21942953911808707,
|
|
"learning_rate": 7.72617495743485e-06,
|
|
"loss": 0.2142,
|
|
"step": 2529
|
|
},
|
|
{
|
|
"epoch": 7.978756884343037,
|
|
"grad_norm": 0.2169333255982246,
|
|
"learning_rate": 7.714975065772747e-06,
|
|
"loss": 0.2167,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 7.9819040125885135,
|
|
"grad_norm": 0.226087767263674,
|
|
"learning_rate": 7.70379009425359e-06,
|
|
"loss": 0.219,
|
|
"step": 2531
|
|
},
|
|
{
|
|
"epoch": 7.985051140833989,
|
|
"grad_norm": 0.22119104996323627,
|
|
"learning_rate": 7.692620054559641e-06,
|
|
"loss": 0.2148,
|
|
"step": 2532
|
|
},
|
|
{
|
|
"epoch": 7.988198269079465,
|
|
"grad_norm": 0.22663737097763387,
|
|
"learning_rate": 7.681464958357565e-06,
|
|
"loss": 0.2134,
|
|
"step": 2533
|
|
},
|
|
{
|
|
"epoch": 7.991345397324941,
|
|
"grad_norm": 0.22890196789986905,
|
|
"learning_rate": 7.670324817298414e-06,
|
|
"loss": 0.2118,
|
|
"step": 2534
|
|
},
|
|
{
|
|
"epoch": 7.994492525570417,
|
|
"grad_norm": 0.21331469668678782,
|
|
"learning_rate": 7.659199643017628e-06,
|
|
"loss": 0.2174,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 7.997639653815893,
|
|
"grad_norm": 0.2216721447509053,
|
|
"learning_rate": 7.648089447135005e-06,
|
|
"loss": 0.2133,
|
|
"step": 2536
|
|
},
|
|
{
|
|
"epoch": 8.003147128245477,
|
|
"grad_norm": 0.4349977764263755,
|
|
"learning_rate": 7.63699424125471e-06,
|
|
"loss": 0.3762,
|
|
"step": 2537
|
|
},
|
|
{
|
|
"epoch": 8.006294256490952,
|
|
"grad_norm": 0.26742441223502816,
|
|
"learning_rate": 7.62591403696525e-06,
|
|
"loss": 0.1644,
|
|
"step": 2538
|
|
},
|
|
{
|
|
"epoch": 8.009441384736428,
|
|
"grad_norm": 0.388106689437148,
|
|
"learning_rate": 7.614848845839449e-06,
|
|
"loss": 0.1651,
|
|
"step": 2539
|
|
},
|
|
{
|
|
"epoch": 8.012588512981903,
|
|
"grad_norm": 0.5323255528110843,
|
|
"learning_rate": 7.603798679434472e-06,
|
|
"loss": 0.1682,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 8.01573564122738,
|
|
"grad_norm": 0.2923949539924144,
|
|
"learning_rate": 7.592763549291768e-06,
|
|
"loss": 0.1656,
|
|
"step": 2541
|
|
},
|
|
{
|
|
"epoch": 8.018882769472857,
|
|
"grad_norm": 0.38423574858459186,
|
|
"learning_rate": 7.58174346693711e-06,
|
|
"loss": 0.1721,
|
|
"step": 2542
|
|
},
|
|
{
|
|
"epoch": 8.022029897718332,
|
|
"grad_norm": 0.3833329737299957,
|
|
"learning_rate": 7.570738443880521e-06,
|
|
"loss": 0.1648,
|
|
"step": 2543
|
|
},
|
|
{
|
|
"epoch": 8.025177025963808,
|
|
"grad_norm": 0.30621817890570635,
|
|
"learning_rate": 7.559748491616319e-06,
|
|
"loss": 0.1675,
|
|
"step": 2544
|
|
},
|
|
{
|
|
"epoch": 8.028324154209285,
|
|
"grad_norm": 0.3082779392109665,
|
|
"learning_rate": 7.54877362162308e-06,
|
|
"loss": 0.1712,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 8.03147128245476,
|
|
"grad_norm": 0.3706427459165308,
|
|
"learning_rate": 7.537813845363604e-06,
|
|
"loss": 0.1665,
|
|
"step": 2546
|
|
},
|
|
{
|
|
"epoch": 8.034618410700237,
|
|
"grad_norm": 0.32385321291038643,
|
|
"learning_rate": 7.5268691742849665e-06,
|
|
"loss": 0.1657,
|
|
"step": 2547
|
|
},
|
|
{
|
|
"epoch": 8.037765538945711,
|
|
"grad_norm": 0.25807590952385684,
|
|
"learning_rate": 7.5159396198184246e-06,
|
|
"loss": 0.1641,
|
|
"step": 2548
|
|
},
|
|
{
|
|
"epoch": 8.040912667191188,
|
|
"grad_norm": 0.2867986489695364,
|
|
"learning_rate": 7.505025193379478e-06,
|
|
"loss": 0.1646,
|
|
"step": 2549
|
|
},
|
|
{
|
|
"epoch": 8.044059795436665,
|
|
"grad_norm": 0.29169617877555604,
|
|
"learning_rate": 7.494125906367801e-06,
|
|
"loss": 0.1613,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 8.04720692368214,
|
|
"grad_norm": 0.2527672771226682,
|
|
"learning_rate": 7.48324177016728e-06,
|
|
"loss": 0.1572,
|
|
"step": 2551
|
|
},
|
|
{
|
|
"epoch": 8.050354051927616,
|
|
"grad_norm": 0.2836106342803346,
|
|
"learning_rate": 7.47237279614595e-06,
|
|
"loss": 0.1687,
|
|
"step": 2552
|
|
},
|
|
{
|
|
"epoch": 8.053501180173091,
|
|
"grad_norm": 0.2975001770525274,
|
|
"learning_rate": 7.461518995656034e-06,
|
|
"loss": 0.164,
|
|
"step": 2553
|
|
},
|
|
{
|
|
"epoch": 8.056648308418568,
|
|
"grad_norm": 0.2735520742422293,
|
|
"learning_rate": 7.450680380033897e-06,
|
|
"loss": 0.1683,
|
|
"step": 2554
|
|
},
|
|
{
|
|
"epoch": 8.059795436664045,
|
|
"grad_norm": 0.2449222310949691,
|
|
"learning_rate": 7.439856960600038e-06,
|
|
"loss": 0.1634,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 8.06294256490952,
|
|
"grad_norm": 0.26957950448412327,
|
|
"learning_rate": 7.429048748659098e-06,
|
|
"loss": 0.164,
|
|
"step": 2556
|
|
},
|
|
{
|
|
"epoch": 8.066089693154996,
|
|
"grad_norm": 0.2468154213899654,
|
|
"learning_rate": 7.418255755499817e-06,
|
|
"loss": 0.1635,
|
|
"step": 2557
|
|
},
|
|
{
|
|
"epoch": 8.069236821400471,
|
|
"grad_norm": 0.24374149361084566,
|
|
"learning_rate": 7.407477992395058e-06,
|
|
"loss": 0.1653,
|
|
"step": 2558
|
|
},
|
|
{
|
|
"epoch": 8.072383949645948,
|
|
"grad_norm": 0.2639345175746216,
|
|
"learning_rate": 7.396715470601759e-06,
|
|
"loss": 0.1654,
|
|
"step": 2559
|
|
},
|
|
{
|
|
"epoch": 8.075531077891425,
|
|
"grad_norm": 0.2592056292755547,
|
|
"learning_rate": 7.385968201360953e-06,
|
|
"loss": 0.17,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 8.0786782061369,
|
|
"grad_norm": 0.24098620754378253,
|
|
"learning_rate": 7.375236195897737e-06,
|
|
"loss": 0.1598,
|
|
"step": 2561
|
|
},
|
|
{
|
|
"epoch": 8.081825334382376,
|
|
"grad_norm": 0.2392626040847066,
|
|
"learning_rate": 7.364519465421265e-06,
|
|
"loss": 0.1664,
|
|
"step": 2562
|
|
},
|
|
{
|
|
"epoch": 8.084972462627853,
|
|
"grad_norm": 0.23598245288642505,
|
|
"learning_rate": 7.353818021124745e-06,
|
|
"loss": 0.1676,
|
|
"step": 2563
|
|
},
|
|
{
|
|
"epoch": 8.088119590873328,
|
|
"grad_norm": 0.2434889824957857,
|
|
"learning_rate": 7.343131874185396e-06,
|
|
"loss": 0.1528,
|
|
"step": 2564
|
|
},
|
|
{
|
|
"epoch": 8.091266719118805,
|
|
"grad_norm": 0.23185343146126583,
|
|
"learning_rate": 7.332461035764492e-06,
|
|
"loss": 0.162,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 8.09441384736428,
|
|
"grad_norm": 0.24759550291938406,
|
|
"learning_rate": 7.32180551700729e-06,
|
|
"loss": 0.1643,
|
|
"step": 2566
|
|
},
|
|
{
|
|
"epoch": 8.097560975609756,
|
|
"grad_norm": 0.24247858902191025,
|
|
"learning_rate": 7.311165329043064e-06,
|
|
"loss": 0.1676,
|
|
"step": 2567
|
|
},
|
|
{
|
|
"epoch": 8.100708103855233,
|
|
"grad_norm": 0.23072668662855816,
|
|
"learning_rate": 7.300540482985061e-06,
|
|
"loss": 0.1659,
|
|
"step": 2568
|
|
},
|
|
{
|
|
"epoch": 8.103855232100708,
|
|
"grad_norm": 0.23644287711658252,
|
|
"learning_rate": 7.289930989930518e-06,
|
|
"loss": 0.1628,
|
|
"step": 2569
|
|
},
|
|
{
|
|
"epoch": 8.107002360346184,
|
|
"grad_norm": 0.22627740006753835,
|
|
"learning_rate": 7.279336860960633e-06,
|
|
"loss": 0.1606,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 8.11014948859166,
|
|
"grad_norm": 0.2291150949130993,
|
|
"learning_rate": 7.26875810714055e-06,
|
|
"loss": 0.1654,
|
|
"step": 2571
|
|
},
|
|
{
|
|
"epoch": 8.113296616837136,
|
|
"grad_norm": 0.28213249790664724,
|
|
"learning_rate": 7.25819473951936e-06,
|
|
"loss": 0.1754,
|
|
"step": 2572
|
|
},
|
|
{
|
|
"epoch": 8.116443745082613,
|
|
"grad_norm": 0.22998099293791194,
|
|
"learning_rate": 7.247646769130079e-06,
|
|
"loss": 0.1657,
|
|
"step": 2573
|
|
},
|
|
{
|
|
"epoch": 8.119590873328088,
|
|
"grad_norm": 0.228994767720687,
|
|
"learning_rate": 7.237114206989646e-06,
|
|
"loss": 0.1612,
|
|
"step": 2574
|
|
},
|
|
{
|
|
"epoch": 8.122738001573564,
|
|
"grad_norm": 0.22828083602687194,
|
|
"learning_rate": 7.226597064098905e-06,
|
|
"loss": 0.1579,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 8.12588512981904,
|
|
"grad_norm": 0.2296606678197253,
|
|
"learning_rate": 7.216095351442604e-06,
|
|
"loss": 0.164,
|
|
"step": 2576
|
|
},
|
|
{
|
|
"epoch": 8.129032258064516,
|
|
"grad_norm": 0.22801101601758597,
|
|
"learning_rate": 7.205609079989353e-06,
|
|
"loss": 0.1659,
|
|
"step": 2577
|
|
},
|
|
{
|
|
"epoch": 8.132179386309993,
|
|
"grad_norm": 0.2375397466097386,
|
|
"learning_rate": 7.195138260691652e-06,
|
|
"loss": 0.1615,
|
|
"step": 2578
|
|
},
|
|
{
|
|
"epoch": 8.135326514555468,
|
|
"grad_norm": 0.24227397129094566,
|
|
"learning_rate": 7.184682904485862e-06,
|
|
"loss": 0.1659,
|
|
"step": 2579
|
|
},
|
|
{
|
|
"epoch": 8.138473642800944,
|
|
"grad_norm": 0.23277206582166304,
|
|
"learning_rate": 7.1742430222921834e-06,
|
|
"loss": 0.1593,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 8.141620771046421,
|
|
"grad_norm": 0.22865926008781765,
|
|
"learning_rate": 7.163818625014662e-06,
|
|
"loss": 0.1615,
|
|
"step": 2581
|
|
},
|
|
{
|
|
"epoch": 8.144767899291896,
|
|
"grad_norm": 0.2294968243718884,
|
|
"learning_rate": 7.1534097235411674e-06,
|
|
"loss": 0.1643,
|
|
"step": 2582
|
|
},
|
|
{
|
|
"epoch": 8.147915027537373,
|
|
"grad_norm": 0.22695319257199334,
|
|
"learning_rate": 7.143016328743384e-06,
|
|
"loss": 0.1676,
|
|
"step": 2583
|
|
},
|
|
{
|
|
"epoch": 8.151062155782848,
|
|
"grad_norm": 0.2305261334568713,
|
|
"learning_rate": 7.132638451476801e-06,
|
|
"loss": 0.1716,
|
|
"step": 2584
|
|
},
|
|
{
|
|
"epoch": 8.154209284028324,
|
|
"grad_norm": 0.22658443236517037,
|
|
"learning_rate": 7.122276102580698e-06,
|
|
"loss": 0.1693,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 8.157356412273801,
|
|
"grad_norm": 0.23678326060091193,
|
|
"learning_rate": 7.111929292878147e-06,
|
|
"loss": 0.1659,
|
|
"step": 2586
|
|
},
|
|
{
|
|
"epoch": 8.160503540519276,
|
|
"grad_norm": 0.23144801338294188,
|
|
"learning_rate": 7.101598033175973e-06,
|
|
"loss": 0.1667,
|
|
"step": 2587
|
|
},
|
|
{
|
|
"epoch": 8.163650668764753,
|
|
"grad_norm": 0.2255681221831429,
|
|
"learning_rate": 7.091282334264773e-06,
|
|
"loss": 0.1684,
|
|
"step": 2588
|
|
},
|
|
{
|
|
"epoch": 8.166797797010227,
|
|
"grad_norm": 0.23550516194771806,
|
|
"learning_rate": 7.080982206918873e-06,
|
|
"loss": 0.1624,
|
|
"step": 2589
|
|
},
|
|
{
|
|
"epoch": 8.169944925255704,
|
|
"grad_norm": 0.23560675125042624,
|
|
"learning_rate": 7.070697661896368e-06,
|
|
"loss": 0.1597,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 8.17309205350118,
|
|
"grad_norm": 0.231816404825124,
|
|
"learning_rate": 7.060428709939047e-06,
|
|
"loss": 0.1648,
|
|
"step": 2591
|
|
},
|
|
{
|
|
"epoch": 8.176239181746656,
|
|
"grad_norm": 0.23355018538739725,
|
|
"learning_rate": 7.050175361772427e-06,
|
|
"loss": 0.1626,
|
|
"step": 2592
|
|
},
|
|
{
|
|
"epoch": 8.179386309992132,
|
|
"grad_norm": 0.24417933186787055,
|
|
"learning_rate": 7.039937628105717e-06,
|
|
"loss": 0.1651,
|
|
"step": 2593
|
|
},
|
|
{
|
|
"epoch": 8.182533438237607,
|
|
"grad_norm": 0.22574700897136932,
|
|
"learning_rate": 7.029715519631832e-06,
|
|
"loss": 0.1671,
|
|
"step": 2594
|
|
},
|
|
{
|
|
"epoch": 8.185680566483084,
|
|
"grad_norm": 0.2304324967468494,
|
|
"learning_rate": 7.019509047027362e-06,
|
|
"loss": 0.1672,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 8.18882769472856,
|
|
"grad_norm": 0.2287503066121252,
|
|
"learning_rate": 7.0093182209525525e-06,
|
|
"loss": 0.1627,
|
|
"step": 2596
|
|
},
|
|
{
|
|
"epoch": 8.191974822974036,
|
|
"grad_norm": 0.2242905190122735,
|
|
"learning_rate": 6.9991430520513306e-06,
|
|
"loss": 0.1577,
|
|
"step": 2597
|
|
},
|
|
{
|
|
"epoch": 8.195121951219512,
|
|
"grad_norm": 0.2475609011866222,
|
|
"learning_rate": 6.988983550951245e-06,
|
|
"loss": 0.1644,
|
|
"step": 2598
|
|
},
|
|
{
|
|
"epoch": 8.198269079464989,
|
|
"grad_norm": 0.23890330471937485,
|
|
"learning_rate": 6.9788397282635044e-06,
|
|
"loss": 0.1644,
|
|
"step": 2599
|
|
},
|
|
{
|
|
"epoch": 8.201416207710464,
|
|
"grad_norm": 0.2386410693285585,
|
|
"learning_rate": 6.968711594582919e-06,
|
|
"loss": 0.164,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 8.20456333595594,
|
|
"grad_norm": 0.2389681606873896,
|
|
"learning_rate": 6.958599160487927e-06,
|
|
"loss": 0.1623,
|
|
"step": 2601
|
|
},
|
|
{
|
|
"epoch": 8.207710464201416,
|
|
"grad_norm": 0.22261291520501994,
|
|
"learning_rate": 6.948502436540572e-06,
|
|
"loss": 0.159,
|
|
"step": 2602
|
|
},
|
|
{
|
|
"epoch": 8.210857592446892,
|
|
"grad_norm": 0.22847054610493028,
|
|
"learning_rate": 6.93842143328647e-06,
|
|
"loss": 0.1602,
|
|
"step": 2603
|
|
},
|
|
{
|
|
"epoch": 8.214004720692369,
|
|
"grad_norm": 0.233317294983424,
|
|
"learning_rate": 6.928356161254845e-06,
|
|
"loss": 0.162,
|
|
"step": 2604
|
|
},
|
|
{
|
|
"epoch": 8.217151848937844,
|
|
"grad_norm": 0.23035769556977229,
|
|
"learning_rate": 6.91830663095846e-06,
|
|
"loss": 0.1653,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 8.22029897718332,
|
|
"grad_norm": 0.2348863615456292,
|
|
"learning_rate": 6.908272852893666e-06,
|
|
"loss": 0.1708,
|
|
"step": 2606
|
|
},
|
|
{
|
|
"epoch": 8.223446105428796,
|
|
"grad_norm": 0.22854836342550924,
|
|
"learning_rate": 6.898254837540333e-06,
|
|
"loss": 0.169,
|
|
"step": 2607
|
|
},
|
|
{
|
|
"epoch": 8.226593233674272,
|
|
"grad_norm": 0.22732100111251707,
|
|
"learning_rate": 6.888252595361895e-06,
|
|
"loss": 0.164,
|
|
"step": 2608
|
|
},
|
|
{
|
|
"epoch": 8.229740361919749,
|
|
"grad_norm": 0.2211306149730069,
|
|
"learning_rate": 6.878266136805284e-06,
|
|
"loss": 0.1649,
|
|
"step": 2609
|
|
},
|
|
{
|
|
"epoch": 8.232887490165224,
|
|
"grad_norm": 0.23144901547851038,
|
|
"learning_rate": 6.86829547230097e-06,
|
|
"loss": 0.1672,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 8.2360346184107,
|
|
"grad_norm": 0.23413830665280178,
|
|
"learning_rate": 6.858340612262916e-06,
|
|
"loss": 0.1644,
|
|
"step": 2611
|
|
},
|
|
{
|
|
"epoch": 8.239181746656175,
|
|
"grad_norm": 0.2245328322035716,
|
|
"learning_rate": 6.848401567088575e-06,
|
|
"loss": 0.1623,
|
|
"step": 2612
|
|
},
|
|
{
|
|
"epoch": 8.242328874901652,
|
|
"grad_norm": 0.2351914752245803,
|
|
"learning_rate": 6.838478347158893e-06,
|
|
"loss": 0.1568,
|
|
"step": 2613
|
|
},
|
|
{
|
|
"epoch": 8.245476003147129,
|
|
"grad_norm": 0.22741744869087863,
|
|
"learning_rate": 6.828570962838271e-06,
|
|
"loss": 0.1647,
|
|
"step": 2614
|
|
},
|
|
{
|
|
"epoch": 8.248623131392604,
|
|
"grad_norm": 0.234581482295964,
|
|
"learning_rate": 6.81867942447459e-06,
|
|
"loss": 0.1625,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 8.25177025963808,
|
|
"grad_norm": 0.23787978249548633,
|
|
"learning_rate": 6.808803742399162e-06,
|
|
"loss": 0.1643,
|
|
"step": 2616
|
|
},
|
|
{
|
|
"epoch": 8.254917387883557,
|
|
"grad_norm": 0.22558874923524821,
|
|
"learning_rate": 6.798943926926748e-06,
|
|
"loss": 0.1655,
|
|
"step": 2617
|
|
},
|
|
{
|
|
"epoch": 8.258064516129032,
|
|
"grad_norm": 0.23066847653534014,
|
|
"learning_rate": 6.7890999883555365e-06,
|
|
"loss": 0.1598,
|
|
"step": 2618
|
|
},
|
|
{
|
|
"epoch": 8.261211644374509,
|
|
"grad_norm": 0.24121585112670477,
|
|
"learning_rate": 6.779271936967129e-06,
|
|
"loss": 0.1671,
|
|
"step": 2619
|
|
},
|
|
{
|
|
"epoch": 8.264358772619984,
|
|
"grad_norm": 0.23631017657037634,
|
|
"learning_rate": 6.769459783026544e-06,
|
|
"loss": 0.1662,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 8.26750590086546,
|
|
"grad_norm": 0.2391507536037999,
|
|
"learning_rate": 6.759663536782177e-06,
|
|
"loss": 0.1666,
|
|
"step": 2621
|
|
},
|
|
{
|
|
"epoch": 8.270653029110937,
|
|
"grad_norm": 0.22647269595449845,
|
|
"learning_rate": 6.74988320846583e-06,
|
|
"loss": 0.1646,
|
|
"step": 2622
|
|
},
|
|
{
|
|
"epoch": 8.273800157356412,
|
|
"grad_norm": 0.23536140779260983,
|
|
"learning_rate": 6.740118808292657e-06,
|
|
"loss": 0.174,
|
|
"step": 2623
|
|
},
|
|
{
|
|
"epoch": 8.276947285601889,
|
|
"grad_norm": 0.2186614203495139,
|
|
"learning_rate": 6.730370346461198e-06,
|
|
"loss": 0.1717,
|
|
"step": 2624
|
|
},
|
|
{
|
|
"epoch": 8.280094413847364,
|
|
"grad_norm": 0.2322550042532507,
|
|
"learning_rate": 6.720637833153325e-06,
|
|
"loss": 0.1659,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 8.28324154209284,
|
|
"grad_norm": 0.23867083019820706,
|
|
"learning_rate": 6.710921278534269e-06,
|
|
"loss": 0.164,
|
|
"step": 2626
|
|
},
|
|
{
|
|
"epoch": 8.286388670338317,
|
|
"grad_norm": 0.23732190600347555,
|
|
"learning_rate": 6.7012206927525926e-06,
|
|
"loss": 0.1683,
|
|
"step": 2627
|
|
},
|
|
{
|
|
"epoch": 8.289535798583792,
|
|
"grad_norm": 0.23792842922753502,
|
|
"learning_rate": 6.69153608594016e-06,
|
|
"loss": 0.1552,
|
|
"step": 2628
|
|
},
|
|
{
|
|
"epoch": 8.292682926829269,
|
|
"grad_norm": 0.2310163348962354,
|
|
"learning_rate": 6.681867468212171e-06,
|
|
"loss": 0.1669,
|
|
"step": 2629
|
|
},
|
|
{
|
|
"epoch": 8.295830055074743,
|
|
"grad_norm": 0.22593964960603174,
|
|
"learning_rate": 6.672214849667107e-06,
|
|
"loss": 0.1649,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 8.29897718332022,
|
|
"grad_norm": 0.23274451068025068,
|
|
"learning_rate": 6.66257824038675e-06,
|
|
"loss": 0.1644,
|
|
"step": 2631
|
|
},
|
|
{
|
|
"epoch": 8.302124311565697,
|
|
"grad_norm": 0.2276653833702071,
|
|
"learning_rate": 6.652957650436149e-06,
|
|
"loss": 0.1631,
|
|
"step": 2632
|
|
},
|
|
{
|
|
"epoch": 8.305271439811172,
|
|
"grad_norm": 0.22912711462824803,
|
|
"learning_rate": 6.643353089863644e-06,
|
|
"loss": 0.1673,
|
|
"step": 2633
|
|
},
|
|
{
|
|
"epoch": 8.308418568056648,
|
|
"grad_norm": 0.2374453905619608,
|
|
"learning_rate": 6.633764568700805e-06,
|
|
"loss": 0.1633,
|
|
"step": 2634
|
|
},
|
|
{
|
|
"epoch": 8.311565696302125,
|
|
"grad_norm": 0.23454962799092569,
|
|
"learning_rate": 6.624192096962468e-06,
|
|
"loss": 0.1578,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 8.3147128245476,
|
|
"grad_norm": 0.22400278973493876,
|
|
"learning_rate": 6.614635684646704e-06,
|
|
"loss": 0.1665,
|
|
"step": 2636
|
|
},
|
|
{
|
|
"epoch": 8.317859952793077,
|
|
"grad_norm": 0.2367047312346527,
|
|
"learning_rate": 6.6050953417348e-06,
|
|
"loss": 0.1659,
|
|
"step": 2637
|
|
},
|
|
{
|
|
"epoch": 8.321007081038552,
|
|
"grad_norm": 0.2396724707707527,
|
|
"learning_rate": 6.595571078191273e-06,
|
|
"loss": 0.1618,
|
|
"step": 2638
|
|
},
|
|
{
|
|
"epoch": 8.324154209284028,
|
|
"grad_norm": 0.23140731829793698,
|
|
"learning_rate": 6.586062903963832e-06,
|
|
"loss": 0.1653,
|
|
"step": 2639
|
|
},
|
|
{
|
|
"epoch": 8.327301337529505,
|
|
"grad_norm": 0.2402337835141489,
|
|
"learning_rate": 6.576570828983397e-06,
|
|
"loss": 0.1685,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 8.33044846577498,
|
|
"grad_norm": 0.24165802205944656,
|
|
"learning_rate": 6.5670948631640575e-06,
|
|
"loss": 0.1714,
|
|
"step": 2641
|
|
},
|
|
{
|
|
"epoch": 8.333595594020457,
|
|
"grad_norm": 0.22826464282932363,
|
|
"learning_rate": 6.557635016403086e-06,
|
|
"loss": 0.1655,
|
|
"step": 2642
|
|
},
|
|
{
|
|
"epoch": 8.336742722265932,
|
|
"grad_norm": 0.232516141417856,
|
|
"learning_rate": 6.548191298580923e-06,
|
|
"loss": 0.1644,
|
|
"step": 2643
|
|
},
|
|
{
|
|
"epoch": 8.339889850511408,
|
|
"grad_norm": 0.22800084761954714,
|
|
"learning_rate": 6.538763719561149e-06,
|
|
"loss": 0.1725,
|
|
"step": 2644
|
|
},
|
|
{
|
|
"epoch": 8.343036978756885,
|
|
"grad_norm": 0.23124756998898505,
|
|
"learning_rate": 6.529352289190507e-06,
|
|
"loss": 0.1669,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 8.34618410700236,
|
|
"grad_norm": 0.23164406300800636,
|
|
"learning_rate": 6.51995701729885e-06,
|
|
"loss": 0.1606,
|
|
"step": 2646
|
|
},
|
|
{
|
|
"epoch": 8.349331235247837,
|
|
"grad_norm": 0.23456181103996557,
|
|
"learning_rate": 6.510577913699186e-06,
|
|
"loss": 0.1626,
|
|
"step": 2647
|
|
},
|
|
{
|
|
"epoch": 8.352478363493312,
|
|
"grad_norm": 0.22874955849477083,
|
|
"learning_rate": 6.501214988187601e-06,
|
|
"loss": 0.1624,
|
|
"step": 2648
|
|
},
|
|
{
|
|
"epoch": 8.355625491738788,
|
|
"grad_norm": 0.23854686972912492,
|
|
"learning_rate": 6.491868250543312e-06,
|
|
"loss": 0.1642,
|
|
"step": 2649
|
|
},
|
|
{
|
|
"epoch": 8.358772619984265,
|
|
"grad_norm": 0.2372645112138056,
|
|
"learning_rate": 6.4825377105286044e-06,
|
|
"loss": 0.1655,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 8.36191974822974,
|
|
"grad_norm": 0.2319500615137312,
|
|
"learning_rate": 6.473223377888865e-06,
|
|
"loss": 0.1701,
|
|
"step": 2651
|
|
},
|
|
{
|
|
"epoch": 8.365066876475217,
|
|
"grad_norm": 0.23635738733339692,
|
|
"learning_rate": 6.463925262352549e-06,
|
|
"loss": 0.1648,
|
|
"step": 2652
|
|
},
|
|
{
|
|
"epoch": 8.368214004720693,
|
|
"grad_norm": 0.22916613347850073,
|
|
"learning_rate": 6.454643373631161e-06,
|
|
"loss": 0.167,
|
|
"step": 2653
|
|
},
|
|
{
|
|
"epoch": 8.371361132966168,
|
|
"grad_norm": 0.2343271870579212,
|
|
"learning_rate": 6.445377721419274e-06,
|
|
"loss": 0.1687,
|
|
"step": 2654
|
|
},
|
|
{
|
|
"epoch": 8.374508261211645,
|
|
"grad_norm": 0.23188613838557393,
|
|
"learning_rate": 6.436128315394487e-06,
|
|
"loss": 0.1626,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 8.37765538945712,
|
|
"grad_norm": 0.23834493012382535,
|
|
"learning_rate": 6.426895165217448e-06,
|
|
"loss": 0.17,
|
|
"step": 2656
|
|
},
|
|
{
|
|
"epoch": 8.380802517702596,
|
|
"grad_norm": 0.23537207044394082,
|
|
"learning_rate": 6.417678280531808e-06,
|
|
"loss": 0.1623,
|
|
"step": 2657
|
|
},
|
|
{
|
|
"epoch": 8.383949645948073,
|
|
"grad_norm": 0.23842873789085556,
|
|
"learning_rate": 6.408477670964244e-06,
|
|
"loss": 0.1671,
|
|
"step": 2658
|
|
},
|
|
{
|
|
"epoch": 8.387096774193548,
|
|
"grad_norm": 0.22315005761815868,
|
|
"learning_rate": 6.399293346124427e-06,
|
|
"loss": 0.1648,
|
|
"step": 2659
|
|
},
|
|
{
|
|
"epoch": 8.390243902439025,
|
|
"grad_norm": 0.2369572676127876,
|
|
"learning_rate": 6.390125315605016e-06,
|
|
"loss": 0.1669,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 8.3933910306845,
|
|
"grad_norm": 0.23656957128379635,
|
|
"learning_rate": 6.380973588981662e-06,
|
|
"loss": 0.1658,
|
|
"step": 2661
|
|
},
|
|
{
|
|
"epoch": 8.396538158929976,
|
|
"grad_norm": 0.23549710286212458,
|
|
"learning_rate": 6.371838175812977e-06,
|
|
"loss": 0.165,
|
|
"step": 2662
|
|
},
|
|
{
|
|
"epoch": 8.399685287175453,
|
|
"grad_norm": 0.23821077849443947,
|
|
"learning_rate": 6.362719085640544e-06,
|
|
"loss": 0.1644,
|
|
"step": 2663
|
|
},
|
|
{
|
|
"epoch": 8.402832415420928,
|
|
"grad_norm": 0.23812845227381751,
|
|
"learning_rate": 6.353616327988885e-06,
|
|
"loss": 0.1695,
|
|
"step": 2664
|
|
},
|
|
{
|
|
"epoch": 8.405979543666405,
|
|
"grad_norm": 0.23151702473551566,
|
|
"learning_rate": 6.344529912365477e-06,
|
|
"loss": 0.1664,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 8.40912667191188,
|
|
"grad_norm": 0.23515427535640315,
|
|
"learning_rate": 6.335459848260712e-06,
|
|
"loss": 0.1628,
|
|
"step": 2666
|
|
},
|
|
{
|
|
"epoch": 8.412273800157356,
|
|
"grad_norm": 0.24148276293516086,
|
|
"learning_rate": 6.326406145147919e-06,
|
|
"loss": 0.165,
|
|
"step": 2667
|
|
},
|
|
{
|
|
"epoch": 8.415420928402833,
|
|
"grad_norm": 0.22961657881825784,
|
|
"learning_rate": 6.3173688124833354e-06,
|
|
"loss": 0.1566,
|
|
"step": 2668
|
|
},
|
|
{
|
|
"epoch": 8.418568056648308,
|
|
"grad_norm": 0.23330366260196786,
|
|
"learning_rate": 6.3083478597060895e-06,
|
|
"loss": 0.1679,
|
|
"step": 2669
|
|
},
|
|
{
|
|
"epoch": 8.421715184893785,
|
|
"grad_norm": 0.23139753589023687,
|
|
"learning_rate": 6.299343296238215e-06,
|
|
"loss": 0.1715,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 8.424862313139261,
|
|
"grad_norm": 0.23233022652711008,
|
|
"learning_rate": 6.290355131484619e-06,
|
|
"loss": 0.1625,
|
|
"step": 2671
|
|
},
|
|
{
|
|
"epoch": 8.428009441384736,
|
|
"grad_norm": 0.23192930432672745,
|
|
"learning_rate": 6.281383374833088e-06,
|
|
"loss": 0.1661,
|
|
"step": 2672
|
|
},
|
|
{
|
|
"epoch": 8.431156569630213,
|
|
"grad_norm": 0.237960894230701,
|
|
"learning_rate": 6.272428035654258e-06,
|
|
"loss": 0.1664,
|
|
"step": 2673
|
|
},
|
|
{
|
|
"epoch": 8.434303697875688,
|
|
"grad_norm": 0.23781605768359015,
|
|
"learning_rate": 6.263489123301633e-06,
|
|
"loss": 0.1682,
|
|
"step": 2674
|
|
},
|
|
{
|
|
"epoch": 8.437450826121164,
|
|
"grad_norm": 0.23308152261123055,
|
|
"learning_rate": 6.254566647111552e-06,
|
|
"loss": 0.1684,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 8.440597954366641,
|
|
"grad_norm": 0.24050735086700006,
|
|
"learning_rate": 6.2456606164031865e-06,
|
|
"loss": 0.1691,
|
|
"step": 2676
|
|
},
|
|
{
|
|
"epoch": 8.443745082612116,
|
|
"grad_norm": 0.2332745071775824,
|
|
"learning_rate": 6.23677104047854e-06,
|
|
"loss": 0.1684,
|
|
"step": 2677
|
|
},
|
|
{
|
|
"epoch": 8.446892210857593,
|
|
"grad_norm": 0.23439482464949807,
|
|
"learning_rate": 6.22789792862241e-06,
|
|
"loss": 0.1644,
|
|
"step": 2678
|
|
},
|
|
{
|
|
"epoch": 8.450039339103068,
|
|
"grad_norm": 0.22619429799525462,
|
|
"learning_rate": 6.219041290102423e-06,
|
|
"loss": 0.1633,
|
|
"step": 2679
|
|
},
|
|
{
|
|
"epoch": 8.453186467348544,
|
|
"grad_norm": 0.23111017539558812,
|
|
"learning_rate": 6.210201134168976e-06,
|
|
"loss": 0.1686,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 8.456333595594021,
|
|
"grad_norm": 0.23077386898729263,
|
|
"learning_rate": 6.201377470055274e-06,
|
|
"loss": 0.1643,
|
|
"step": 2681
|
|
},
|
|
{
|
|
"epoch": 8.459480723839496,
|
|
"grad_norm": 0.2314358817989027,
|
|
"learning_rate": 6.192570306977274e-06,
|
|
"loss": 0.1659,
|
|
"step": 2682
|
|
},
|
|
{
|
|
"epoch": 8.462627852084973,
|
|
"grad_norm": 0.2429161146850131,
|
|
"learning_rate": 6.183779654133711e-06,
|
|
"loss": 0.1658,
|
|
"step": 2683
|
|
},
|
|
{
|
|
"epoch": 8.465774980330448,
|
|
"grad_norm": 0.22921232924755053,
|
|
"learning_rate": 6.175005520706083e-06,
|
|
"loss": 0.1753,
|
|
"step": 2684
|
|
},
|
|
{
|
|
"epoch": 8.468922108575924,
|
|
"grad_norm": 0.23660956898638807,
|
|
"learning_rate": 6.166247915858612e-06,
|
|
"loss": 0.1641,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 8.472069236821401,
|
|
"grad_norm": 0.23359602867885043,
|
|
"learning_rate": 6.157506848738281e-06,
|
|
"loss": 0.1663,
|
|
"step": 2686
|
|
},
|
|
{
|
|
"epoch": 8.475216365066876,
|
|
"grad_norm": 0.22916515054110842,
|
|
"learning_rate": 6.148782328474779e-06,
|
|
"loss": 0.1681,
|
|
"step": 2687
|
|
},
|
|
{
|
|
"epoch": 8.478363493312353,
|
|
"grad_norm": 0.2341655050876696,
|
|
"learning_rate": 6.1400743641805295e-06,
|
|
"loss": 0.1637,
|
|
"step": 2688
|
|
},
|
|
{
|
|
"epoch": 8.48151062155783,
|
|
"grad_norm": 0.2326657826844374,
|
|
"learning_rate": 6.131382964950646e-06,
|
|
"loss": 0.1714,
|
|
"step": 2689
|
|
},
|
|
{
|
|
"epoch": 8.484657749803304,
|
|
"grad_norm": 0.22139519017417883,
|
|
"learning_rate": 6.122708139862964e-06,
|
|
"loss": 0.1644,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 8.487804878048781,
|
|
"grad_norm": 0.2358515865230787,
|
|
"learning_rate": 6.114049897977987e-06,
|
|
"loss": 0.1678,
|
|
"step": 2691
|
|
},
|
|
{
|
|
"epoch": 8.490952006294256,
|
|
"grad_norm": 0.24072618363900117,
|
|
"learning_rate": 6.105408248338907e-06,
|
|
"loss": 0.1652,
|
|
"step": 2692
|
|
},
|
|
{
|
|
"epoch": 8.494099134539733,
|
|
"grad_norm": 0.23833730230772185,
|
|
"learning_rate": 6.0967831999715895e-06,
|
|
"loss": 0.1653,
|
|
"step": 2693
|
|
},
|
|
{
|
|
"epoch": 8.49724626278521,
|
|
"grad_norm": 0.23569737870390947,
|
|
"learning_rate": 6.088174761884547e-06,
|
|
"loss": 0.1676,
|
|
"step": 2694
|
|
},
|
|
{
|
|
"epoch": 8.500393391030684,
|
|
"grad_norm": 0.2362096527557548,
|
|
"learning_rate": 6.079582943068963e-06,
|
|
"loss": 0.1613,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 8.50354051927616,
|
|
"grad_norm": 0.2276250499406588,
|
|
"learning_rate": 6.07100775249864e-06,
|
|
"loss": 0.1679,
|
|
"step": 2696
|
|
},
|
|
{
|
|
"epoch": 8.506687647521636,
|
|
"grad_norm": 0.23255051930242077,
|
|
"learning_rate": 6.062449199130038e-06,
|
|
"loss": 0.158,
|
|
"step": 2697
|
|
},
|
|
{
|
|
"epoch": 8.509834775767112,
|
|
"grad_norm": 0.23823745767581703,
|
|
"learning_rate": 6.053907291902215e-06,
|
|
"loss": 0.1675,
|
|
"step": 2698
|
|
},
|
|
{
|
|
"epoch": 8.51298190401259,
|
|
"grad_norm": 0.22780299459891643,
|
|
"learning_rate": 6.04538203973686e-06,
|
|
"loss": 0.167,
|
|
"step": 2699
|
|
},
|
|
{
|
|
"epoch": 8.516129032258064,
|
|
"grad_norm": 0.23195173113273695,
|
|
"learning_rate": 6.036873451538268e-06,
|
|
"loss": 0.1604,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 8.51927616050354,
|
|
"grad_norm": 0.22611245875521443,
|
|
"learning_rate": 6.02838153619331e-06,
|
|
"loss": 0.1672,
|
|
"step": 2701
|
|
},
|
|
{
|
|
"epoch": 8.522423288749017,
|
|
"grad_norm": 0.22775959921131073,
|
|
"learning_rate": 6.019906302571467e-06,
|
|
"loss": 0.1641,
|
|
"step": 2702
|
|
},
|
|
{
|
|
"epoch": 8.525570416994492,
|
|
"grad_norm": 0.23041588221663856,
|
|
"learning_rate": 6.011447759524776e-06,
|
|
"loss": 0.1683,
|
|
"step": 2703
|
|
},
|
|
{
|
|
"epoch": 8.528717545239969,
|
|
"grad_norm": 0.22838434499944277,
|
|
"learning_rate": 6.003005915887853e-06,
|
|
"loss": 0.1637,
|
|
"step": 2704
|
|
},
|
|
{
|
|
"epoch": 8.531864673485444,
|
|
"grad_norm": 0.23218601962132057,
|
|
"learning_rate": 5.99458078047787e-06,
|
|
"loss": 0.1701,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 8.53501180173092,
|
|
"grad_norm": 0.2416174473030719,
|
|
"learning_rate": 5.986172362094551e-06,
|
|
"loss": 0.1653,
|
|
"step": 2706
|
|
},
|
|
{
|
|
"epoch": 8.538158929976397,
|
|
"grad_norm": 0.232552051298067,
|
|
"learning_rate": 5.977780669520149e-06,
|
|
"loss": 0.1673,
|
|
"step": 2707
|
|
},
|
|
{
|
|
"epoch": 8.541306058221872,
|
|
"grad_norm": 0.2345354384269151,
|
|
"learning_rate": 5.96940571151946e-06,
|
|
"loss": 0.1597,
|
|
"step": 2708
|
|
},
|
|
{
|
|
"epoch": 8.544453186467349,
|
|
"grad_norm": 0.2352724122845352,
|
|
"learning_rate": 5.961047496839797e-06,
|
|
"loss": 0.17,
|
|
"step": 2709
|
|
},
|
|
{
|
|
"epoch": 8.547600314712824,
|
|
"grad_norm": 0.2347397790865505,
|
|
"learning_rate": 5.952706034210978e-06,
|
|
"loss": 0.1654,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 8.5507474429583,
|
|
"grad_norm": 0.23099011233464176,
|
|
"learning_rate": 5.944381332345337e-06,
|
|
"loss": 0.1693,
|
|
"step": 2711
|
|
},
|
|
{
|
|
"epoch": 8.553894571203777,
|
|
"grad_norm": 0.23436885807183397,
|
|
"learning_rate": 5.93607339993769e-06,
|
|
"loss": 0.1625,
|
|
"step": 2712
|
|
},
|
|
{
|
|
"epoch": 8.557041699449252,
|
|
"grad_norm": 0.23460704187792045,
|
|
"learning_rate": 5.92778224566535e-06,
|
|
"loss": 0.1661,
|
|
"step": 2713
|
|
},
|
|
{
|
|
"epoch": 8.560188827694729,
|
|
"grad_norm": 0.23592554127140355,
|
|
"learning_rate": 5.919507878188092e-06,
|
|
"loss": 0.1681,
|
|
"step": 2714
|
|
},
|
|
{
|
|
"epoch": 8.563335955940204,
|
|
"grad_norm": 0.22436344128981697,
|
|
"learning_rate": 5.9112503061481685e-06,
|
|
"loss": 0.1681,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 8.56648308418568,
|
|
"grad_norm": 0.23478075715615096,
|
|
"learning_rate": 5.903009538170289e-06,
|
|
"loss": 0.1697,
|
|
"step": 2716
|
|
},
|
|
{
|
|
"epoch": 8.569630212431157,
|
|
"grad_norm": 0.23791774157827272,
|
|
"learning_rate": 5.894785582861606e-06,
|
|
"loss": 0.1679,
|
|
"step": 2717
|
|
},
|
|
{
|
|
"epoch": 8.572777340676632,
|
|
"grad_norm": 0.2411394751490439,
|
|
"learning_rate": 5.886578448811714e-06,
|
|
"loss": 0.167,
|
|
"step": 2718
|
|
},
|
|
{
|
|
"epoch": 8.575924468922109,
|
|
"grad_norm": 0.22846188636305548,
|
|
"learning_rate": 5.878388144592642e-06,
|
|
"loss": 0.1691,
|
|
"step": 2719
|
|
},
|
|
{
|
|
"epoch": 8.579071597167584,
|
|
"grad_norm": 0.2385881186153001,
|
|
"learning_rate": 5.8702146787588435e-06,
|
|
"loss": 0.1655,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 8.58221872541306,
|
|
"grad_norm": 0.24030991985714578,
|
|
"learning_rate": 5.862058059847169e-06,
|
|
"loss": 0.1724,
|
|
"step": 2721
|
|
},
|
|
{
|
|
"epoch": 8.585365853658537,
|
|
"grad_norm": 0.22562331859076523,
|
|
"learning_rate": 5.8539182963768935e-06,
|
|
"loss": 0.1673,
|
|
"step": 2722
|
|
},
|
|
{
|
|
"epoch": 8.588512981904012,
|
|
"grad_norm": 0.22799397594939672,
|
|
"learning_rate": 5.845795396849671e-06,
|
|
"loss": 0.1625,
|
|
"step": 2723
|
|
},
|
|
{
|
|
"epoch": 8.591660110149489,
|
|
"grad_norm": 0.22658460250777282,
|
|
"learning_rate": 5.837689369749554e-06,
|
|
"loss": 0.1672,
|
|
"step": 2724
|
|
},
|
|
{
|
|
"epoch": 8.594807238394965,
|
|
"grad_norm": 0.23287534822002212,
|
|
"learning_rate": 5.829600223542965e-06,
|
|
"loss": 0.167,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 8.59795436664044,
|
|
"grad_norm": 0.23513451530619064,
|
|
"learning_rate": 5.821527966678693e-06,
|
|
"loss": 0.1604,
|
|
"step": 2726
|
|
},
|
|
{
|
|
"epoch": 8.601101494885917,
|
|
"grad_norm": 0.24331885939351217,
|
|
"learning_rate": 5.8134726075878965e-06,
|
|
"loss": 0.1669,
|
|
"step": 2727
|
|
},
|
|
{
|
|
"epoch": 8.604248623131392,
|
|
"grad_norm": 0.23480791746582516,
|
|
"learning_rate": 5.805434154684075e-06,
|
|
"loss": 0.1631,
|
|
"step": 2728
|
|
},
|
|
{
|
|
"epoch": 8.607395751376869,
|
|
"grad_norm": 0.2313813224235784,
|
|
"learning_rate": 5.797412616363077e-06,
|
|
"loss": 0.1718,
|
|
"step": 2729
|
|
},
|
|
{
|
|
"epoch": 8.610542879622345,
|
|
"grad_norm": 0.23469324508654915,
|
|
"learning_rate": 5.789408001003079e-06,
|
|
"loss": 0.1645,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 8.61369000786782,
|
|
"grad_norm": 0.22766690046481194,
|
|
"learning_rate": 5.781420316964586e-06,
|
|
"loss": 0.1641,
|
|
"step": 2731
|
|
},
|
|
{
|
|
"epoch": 8.616837136113297,
|
|
"grad_norm": 0.22244068111472892,
|
|
"learning_rate": 5.773449572590417e-06,
|
|
"loss": 0.1677,
|
|
"step": 2732
|
|
},
|
|
{
|
|
"epoch": 8.619984264358772,
|
|
"grad_norm": 0.22626928201892044,
|
|
"learning_rate": 5.7654957762056994e-06,
|
|
"loss": 0.1658,
|
|
"step": 2733
|
|
},
|
|
{
|
|
"epoch": 8.623131392604249,
|
|
"grad_norm": 0.23558057122663417,
|
|
"learning_rate": 5.7575589361178645e-06,
|
|
"loss": 0.1623,
|
|
"step": 2734
|
|
},
|
|
{
|
|
"epoch": 8.626278520849725,
|
|
"grad_norm": 0.22952982394885552,
|
|
"learning_rate": 5.749639060616618e-06,
|
|
"loss": 0.1654,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 8.6294256490952,
|
|
"grad_norm": 0.2260057004402793,
|
|
"learning_rate": 5.74173615797396e-06,
|
|
"loss": 0.1611,
|
|
"step": 2736
|
|
},
|
|
{
|
|
"epoch": 8.632572777340677,
|
|
"grad_norm": 0.22660148535255212,
|
|
"learning_rate": 5.733850236444161e-06,
|
|
"loss": 0.1654,
|
|
"step": 2737
|
|
},
|
|
{
|
|
"epoch": 8.635719905586154,
|
|
"grad_norm": 0.2275035237099505,
|
|
"learning_rate": 5.725981304263756e-06,
|
|
"loss": 0.1704,
|
|
"step": 2738
|
|
},
|
|
{
|
|
"epoch": 8.638867033831628,
|
|
"grad_norm": 0.23670395896446667,
|
|
"learning_rate": 5.718129369651524e-06,
|
|
"loss": 0.1683,
|
|
"step": 2739
|
|
},
|
|
{
|
|
"epoch": 8.642014162077105,
|
|
"grad_norm": 0.2373653142814522,
|
|
"learning_rate": 5.710294440808507e-06,
|
|
"loss": 0.1721,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 8.64516129032258,
|
|
"grad_norm": 0.23104346401189593,
|
|
"learning_rate": 5.702476525917979e-06,
|
|
"loss": 0.1663,
|
|
"step": 2741
|
|
},
|
|
{
|
|
"epoch": 8.648308418568057,
|
|
"grad_norm": 0.23283488730211302,
|
|
"learning_rate": 5.6946756331454354e-06,
|
|
"loss": 0.1668,
|
|
"step": 2742
|
|
},
|
|
{
|
|
"epoch": 8.651455546813533,
|
|
"grad_norm": 0.2311838977095239,
|
|
"learning_rate": 5.6868917706386105e-06,
|
|
"loss": 0.1747,
|
|
"step": 2743
|
|
},
|
|
{
|
|
"epoch": 8.654602675059008,
|
|
"grad_norm": 0.2241606956720152,
|
|
"learning_rate": 5.67912494652743e-06,
|
|
"loss": 0.1641,
|
|
"step": 2744
|
|
},
|
|
{
|
|
"epoch": 8.657749803304485,
|
|
"grad_norm": 0.23193312446144088,
|
|
"learning_rate": 5.671375168924041e-06,
|
|
"loss": 0.1696,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 8.66089693154996,
|
|
"grad_norm": 0.23447145957791274,
|
|
"learning_rate": 5.663642445922777e-06,
|
|
"loss": 0.1699,
|
|
"step": 2746
|
|
},
|
|
{
|
|
"epoch": 8.664044059795437,
|
|
"grad_norm": 0.22987054795106973,
|
|
"learning_rate": 5.655926785600158e-06,
|
|
"loss": 0.1612,
|
|
"step": 2747
|
|
},
|
|
{
|
|
"epoch": 8.667191188040913,
|
|
"grad_norm": 0.2352484784071021,
|
|
"learning_rate": 5.648228196014888e-06,
|
|
"loss": 0.1674,
|
|
"step": 2748
|
|
},
|
|
{
|
|
"epoch": 8.670338316286388,
|
|
"grad_norm": 0.23150853204839367,
|
|
"learning_rate": 5.640546685207842e-06,
|
|
"loss": 0.1677,
|
|
"step": 2749
|
|
},
|
|
{
|
|
"epoch": 8.673485444531865,
|
|
"grad_norm": 0.2319763405157472,
|
|
"learning_rate": 5.632882261202054e-06,
|
|
"loss": 0.1627,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 8.67663257277734,
|
|
"grad_norm": 0.23040198433732714,
|
|
"learning_rate": 5.625234932002706e-06,
|
|
"loss": 0.1641,
|
|
"step": 2751
|
|
},
|
|
{
|
|
"epoch": 8.679779701022817,
|
|
"grad_norm": 0.24040413406804584,
|
|
"learning_rate": 5.617604705597136e-06,
|
|
"loss": 0.166,
|
|
"step": 2752
|
|
},
|
|
{
|
|
"epoch": 8.682926829268293,
|
|
"grad_norm": 0.23110146532134304,
|
|
"learning_rate": 5.609991589954809e-06,
|
|
"loss": 0.1683,
|
|
"step": 2753
|
|
},
|
|
{
|
|
"epoch": 8.686073957513768,
|
|
"grad_norm": 0.22419932903394638,
|
|
"learning_rate": 5.602395593027327e-06,
|
|
"loss": 0.1716,
|
|
"step": 2754
|
|
},
|
|
{
|
|
"epoch": 8.689221085759245,
|
|
"grad_norm": 0.22482368034008485,
|
|
"learning_rate": 5.594816722748403e-06,
|
|
"loss": 0.1612,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 8.69236821400472,
|
|
"grad_norm": 0.22947954480527974,
|
|
"learning_rate": 5.58725498703387e-06,
|
|
"loss": 0.1703,
|
|
"step": 2756
|
|
},
|
|
{
|
|
"epoch": 8.695515342250197,
|
|
"grad_norm": 0.234698861914943,
|
|
"learning_rate": 5.579710393781666e-06,
|
|
"loss": 0.168,
|
|
"step": 2757
|
|
},
|
|
{
|
|
"epoch": 8.698662470495673,
|
|
"grad_norm": 0.24098945022071477,
|
|
"learning_rate": 5.5721829508718095e-06,
|
|
"loss": 0.1665,
|
|
"step": 2758
|
|
},
|
|
{
|
|
"epoch": 8.701809598741148,
|
|
"grad_norm": 0.2371662907560327,
|
|
"learning_rate": 5.564672666166425e-06,
|
|
"loss": 0.1667,
|
|
"step": 2759
|
|
},
|
|
{
|
|
"epoch": 8.704956726986625,
|
|
"grad_norm": 0.23429888837701635,
|
|
"learning_rate": 5.557179547509703e-06,
|
|
"loss": 0.1718,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 8.708103855232102,
|
|
"grad_norm": 0.23129876881925154,
|
|
"learning_rate": 5.549703602727912e-06,
|
|
"loss": 0.1746,
|
|
"step": 2761
|
|
},
|
|
{
|
|
"epoch": 8.711250983477576,
|
|
"grad_norm": 0.23080453324233027,
|
|
"learning_rate": 5.542244839629379e-06,
|
|
"loss": 0.1654,
|
|
"step": 2762
|
|
},
|
|
{
|
|
"epoch": 8.714398111723053,
|
|
"grad_norm": 0.23544727546366193,
|
|
"learning_rate": 5.534803266004491e-06,
|
|
"loss": 0.1698,
|
|
"step": 2763
|
|
},
|
|
{
|
|
"epoch": 8.717545239968528,
|
|
"grad_norm": 0.23363093531020357,
|
|
"learning_rate": 5.527378889625668e-06,
|
|
"loss": 0.1647,
|
|
"step": 2764
|
|
},
|
|
{
|
|
"epoch": 8.720692368214005,
|
|
"grad_norm": 0.23435894467290583,
|
|
"learning_rate": 5.519971718247384e-06,
|
|
"loss": 0.163,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 8.723839496459481,
|
|
"grad_norm": 0.23005445022151622,
|
|
"learning_rate": 5.512581759606137e-06,
|
|
"loss": 0.1648,
|
|
"step": 2766
|
|
},
|
|
{
|
|
"epoch": 8.726986624704956,
|
|
"grad_norm": 0.22957271730336687,
|
|
"learning_rate": 5.50520902142044e-06,
|
|
"loss": 0.1666,
|
|
"step": 2767
|
|
},
|
|
{
|
|
"epoch": 8.730133752950433,
|
|
"grad_norm": 0.22954716917821802,
|
|
"learning_rate": 5.497853511390836e-06,
|
|
"loss": 0.1688,
|
|
"step": 2768
|
|
},
|
|
{
|
|
"epoch": 8.733280881195908,
|
|
"grad_norm": 0.22392571792472526,
|
|
"learning_rate": 5.490515237199852e-06,
|
|
"loss": 0.17,
|
|
"step": 2769
|
|
},
|
|
{
|
|
"epoch": 8.736428009441385,
|
|
"grad_norm": 0.2355921505287895,
|
|
"learning_rate": 5.483194206512034e-06,
|
|
"loss": 0.1662,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 8.739575137686861,
|
|
"grad_norm": 0.23974943062847195,
|
|
"learning_rate": 5.475890426973903e-06,
|
|
"loss": 0.1694,
|
|
"step": 2771
|
|
},
|
|
{
|
|
"epoch": 8.742722265932336,
|
|
"grad_norm": 0.2338669519112205,
|
|
"learning_rate": 5.46860390621397e-06,
|
|
"loss": 0.1659,
|
|
"step": 2772
|
|
},
|
|
{
|
|
"epoch": 8.745869394177813,
|
|
"grad_norm": 0.23127783986823608,
|
|
"learning_rate": 5.461334651842721e-06,
|
|
"loss": 0.1664,
|
|
"step": 2773
|
|
},
|
|
{
|
|
"epoch": 8.74901652242329,
|
|
"grad_norm": 0.23190110954348572,
|
|
"learning_rate": 5.454082671452597e-06,
|
|
"loss": 0.1676,
|
|
"step": 2774
|
|
},
|
|
{
|
|
"epoch": 8.752163650668765,
|
|
"grad_norm": 0.23152867010137812,
|
|
"learning_rate": 5.446847972618009e-06,
|
|
"loss": 0.1635,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 8.755310778914241,
|
|
"grad_norm": 0.23464418647093355,
|
|
"learning_rate": 5.439630562895311e-06,
|
|
"loss": 0.1601,
|
|
"step": 2776
|
|
},
|
|
{
|
|
"epoch": 8.758457907159716,
|
|
"grad_norm": 0.22904054716255184,
|
|
"learning_rate": 5.43243044982281e-06,
|
|
"loss": 0.1658,
|
|
"step": 2777
|
|
},
|
|
{
|
|
"epoch": 8.761605035405193,
|
|
"grad_norm": 0.22867471399286618,
|
|
"learning_rate": 5.425247640920726e-06,
|
|
"loss": 0.1677,
|
|
"step": 2778
|
|
},
|
|
{
|
|
"epoch": 8.76475216365067,
|
|
"grad_norm": 0.22791012026392768,
|
|
"learning_rate": 5.418082143691229e-06,
|
|
"loss": 0.1732,
|
|
"step": 2779
|
|
},
|
|
{
|
|
"epoch": 8.767899291896144,
|
|
"grad_norm": 0.21856098390948892,
|
|
"learning_rate": 5.410933965618389e-06,
|
|
"loss": 0.1648,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 8.771046420141621,
|
|
"grad_norm": 0.22405701257784277,
|
|
"learning_rate": 5.4038031141682e-06,
|
|
"loss": 0.1597,
|
|
"step": 2781
|
|
},
|
|
{
|
|
"epoch": 8.774193548387096,
|
|
"grad_norm": 0.23307681739252337,
|
|
"learning_rate": 5.396689596788556e-06,
|
|
"loss": 0.1675,
|
|
"step": 2782
|
|
},
|
|
{
|
|
"epoch": 8.777340676632573,
|
|
"grad_norm": 0.23277974511466729,
|
|
"learning_rate": 5.389593420909237e-06,
|
|
"loss": 0.1657,
|
|
"step": 2783
|
|
},
|
|
{
|
|
"epoch": 8.78048780487805,
|
|
"grad_norm": 0.2249371763607525,
|
|
"learning_rate": 5.382514593941926e-06,
|
|
"loss": 0.1667,
|
|
"step": 2784
|
|
},
|
|
{
|
|
"epoch": 8.783634933123524,
|
|
"grad_norm": 0.23060309789801092,
|
|
"learning_rate": 5.375453123280171e-06,
|
|
"loss": 0.1567,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 8.786782061369001,
|
|
"grad_norm": 0.22431101983410784,
|
|
"learning_rate": 5.368409016299404e-06,
|
|
"loss": 0.1646,
|
|
"step": 2786
|
|
},
|
|
{
|
|
"epoch": 8.789929189614476,
|
|
"grad_norm": 0.23707669405318257,
|
|
"learning_rate": 5.36138228035691e-06,
|
|
"loss": 0.1686,
|
|
"step": 2787
|
|
},
|
|
{
|
|
"epoch": 8.793076317859953,
|
|
"grad_norm": 0.22826592786983121,
|
|
"learning_rate": 5.3543729227918375e-06,
|
|
"loss": 0.167,
|
|
"step": 2788
|
|
},
|
|
{
|
|
"epoch": 8.79622344610543,
|
|
"grad_norm": 0.22647434359880703,
|
|
"learning_rate": 5.34738095092519e-06,
|
|
"loss": 0.1671,
|
|
"step": 2789
|
|
},
|
|
{
|
|
"epoch": 8.799370574350904,
|
|
"grad_norm": 0.23277753509076599,
|
|
"learning_rate": 5.340406372059793e-06,
|
|
"loss": 0.1694,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 8.802517702596381,
|
|
"grad_norm": 0.22966481306572614,
|
|
"learning_rate": 5.33344919348033e-06,
|
|
"loss": 0.1677,
|
|
"step": 2791
|
|
},
|
|
{
|
|
"epoch": 8.805664830841856,
|
|
"grad_norm": 0.2345330156945199,
|
|
"learning_rate": 5.3265094224532925e-06,
|
|
"loss": 0.1662,
|
|
"step": 2792
|
|
},
|
|
{
|
|
"epoch": 8.808811959087333,
|
|
"grad_norm": 0.24220581762981963,
|
|
"learning_rate": 5.319587066227e-06,
|
|
"loss": 0.1616,
|
|
"step": 2793
|
|
},
|
|
{
|
|
"epoch": 8.81195908733281,
|
|
"grad_norm": 0.23003977776252021,
|
|
"learning_rate": 5.312682132031575e-06,
|
|
"loss": 0.1692,
|
|
"step": 2794
|
|
},
|
|
{
|
|
"epoch": 8.815106215578284,
|
|
"grad_norm": 0.22663651876262159,
|
|
"learning_rate": 5.3057946270789504e-06,
|
|
"loss": 0.171,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 8.818253343823761,
|
|
"grad_norm": 0.23957359702108452,
|
|
"learning_rate": 5.298924558562852e-06,
|
|
"loss": 0.1653,
|
|
"step": 2796
|
|
},
|
|
{
|
|
"epoch": 8.821400472069238,
|
|
"grad_norm": 0.229995530933423,
|
|
"learning_rate": 5.292071933658794e-06,
|
|
"loss": 0.1608,
|
|
"step": 2797
|
|
},
|
|
{
|
|
"epoch": 8.824547600314713,
|
|
"grad_norm": 0.23340637605795178,
|
|
"learning_rate": 5.2852367595240735e-06,
|
|
"loss": 0.1727,
|
|
"step": 2798
|
|
},
|
|
{
|
|
"epoch": 8.82769472856019,
|
|
"grad_norm": 0.22650780265142523,
|
|
"learning_rate": 5.278419043297756e-06,
|
|
"loss": 0.1727,
|
|
"step": 2799
|
|
},
|
|
{
|
|
"epoch": 8.830841856805664,
|
|
"grad_norm": 0.23469188183263542,
|
|
"learning_rate": 5.271618792100679e-06,
|
|
"loss": 0.1595,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 8.83398898505114,
|
|
"grad_norm": 0.22902689381057262,
|
|
"learning_rate": 5.264836013035435e-06,
|
|
"loss": 0.1666,
|
|
"step": 2801
|
|
},
|
|
{
|
|
"epoch": 8.837136113296618,
|
|
"grad_norm": 0.24601196499502342,
|
|
"learning_rate": 5.25807071318637e-06,
|
|
"loss": 0.1675,
|
|
"step": 2802
|
|
},
|
|
{
|
|
"epoch": 8.840283241542092,
|
|
"grad_norm": 0.23570422675260183,
|
|
"learning_rate": 5.251322899619565e-06,
|
|
"loss": 0.1632,
|
|
"step": 2803
|
|
},
|
|
{
|
|
"epoch": 8.84343036978757,
|
|
"grad_norm": 0.23861546188691643,
|
|
"learning_rate": 5.2445925793828504e-06,
|
|
"loss": 0.1711,
|
|
"step": 2804
|
|
},
|
|
{
|
|
"epoch": 8.846577498033044,
|
|
"grad_norm": 0.22082821092360364,
|
|
"learning_rate": 5.237879759505778e-06,
|
|
"loss": 0.1712,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 8.84972462627852,
|
|
"grad_norm": 0.23120537316015438,
|
|
"learning_rate": 5.2311844469996205e-06,
|
|
"loss": 0.1664,
|
|
"step": 2806
|
|
},
|
|
{
|
|
"epoch": 8.852871754523997,
|
|
"grad_norm": 0.23260665872741915,
|
|
"learning_rate": 5.224506648857374e-06,
|
|
"loss": 0.1557,
|
|
"step": 2807
|
|
},
|
|
{
|
|
"epoch": 8.856018882769472,
|
|
"grad_norm": 0.2310320953143728,
|
|
"learning_rate": 5.217846372053722e-06,
|
|
"loss": 0.1701,
|
|
"step": 2808
|
|
},
|
|
{
|
|
"epoch": 8.859166011014949,
|
|
"grad_norm": 0.23800631732996927,
|
|
"learning_rate": 5.211203623545071e-06,
|
|
"loss": 0.166,
|
|
"step": 2809
|
|
},
|
|
{
|
|
"epoch": 8.862313139260426,
|
|
"grad_norm": 0.2230982585460558,
|
|
"learning_rate": 5.204578410269503e-06,
|
|
"loss": 0.1748,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 8.8654602675059,
|
|
"grad_norm": 0.23177916380091554,
|
|
"learning_rate": 5.197970739146792e-06,
|
|
"loss": 0.1667,
|
|
"step": 2811
|
|
},
|
|
{
|
|
"epoch": 8.868607395751377,
|
|
"grad_norm": 0.23008230747581662,
|
|
"learning_rate": 5.191380617078389e-06,
|
|
"loss": 0.1702,
|
|
"step": 2812
|
|
},
|
|
{
|
|
"epoch": 8.871754523996852,
|
|
"grad_norm": 0.22716372034675994,
|
|
"learning_rate": 5.184808050947413e-06,
|
|
"loss": 0.1627,
|
|
"step": 2813
|
|
},
|
|
{
|
|
"epoch": 8.874901652242329,
|
|
"grad_norm": 0.23335158310105836,
|
|
"learning_rate": 5.178253047618657e-06,
|
|
"loss": 0.1723,
|
|
"step": 2814
|
|
},
|
|
{
|
|
"epoch": 8.878048780487806,
|
|
"grad_norm": 0.22629652030237907,
|
|
"learning_rate": 5.171715613938553e-06,
|
|
"loss": 0.1665,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 8.88119590873328,
|
|
"grad_norm": 0.23433706877773142,
|
|
"learning_rate": 5.165195756735199e-06,
|
|
"loss": 0.1742,
|
|
"step": 2816
|
|
},
|
|
{
|
|
"epoch": 8.884343036978757,
|
|
"grad_norm": 0.2410492157482833,
|
|
"learning_rate": 5.158693482818321e-06,
|
|
"loss": 0.173,
|
|
"step": 2817
|
|
},
|
|
{
|
|
"epoch": 8.887490165224232,
|
|
"grad_norm": 0.2352594527132892,
|
|
"learning_rate": 5.152208798979295e-06,
|
|
"loss": 0.1581,
|
|
"step": 2818
|
|
},
|
|
{
|
|
"epoch": 8.890637293469709,
|
|
"grad_norm": 0.23014025030306132,
|
|
"learning_rate": 5.145741711991104e-06,
|
|
"loss": 0.1674,
|
|
"step": 2819
|
|
},
|
|
{
|
|
"epoch": 8.893784421715186,
|
|
"grad_norm": 0.23167065606455844,
|
|
"learning_rate": 5.139292228608378e-06,
|
|
"loss": 0.1646,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 8.89693154996066,
|
|
"grad_norm": 0.2399892401864134,
|
|
"learning_rate": 5.1328603555673375e-06,
|
|
"loss": 0.1614,
|
|
"step": 2821
|
|
},
|
|
{
|
|
"epoch": 8.900078678206137,
|
|
"grad_norm": 0.24240253361943384,
|
|
"learning_rate": 5.126446099585824e-06,
|
|
"loss": 0.1671,
|
|
"step": 2822
|
|
},
|
|
{
|
|
"epoch": 8.903225806451612,
|
|
"grad_norm": 0.23174387029737067,
|
|
"learning_rate": 5.120049467363275e-06,
|
|
"loss": 0.1625,
|
|
"step": 2823
|
|
},
|
|
{
|
|
"epoch": 8.906372934697089,
|
|
"grad_norm": 0.23155008484418904,
|
|
"learning_rate": 5.1136704655807145e-06,
|
|
"loss": 0.1689,
|
|
"step": 2824
|
|
},
|
|
{
|
|
"epoch": 8.909520062942565,
|
|
"grad_norm": 0.2340628919114927,
|
|
"learning_rate": 5.107309100900762e-06,
|
|
"loss": 0.1623,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 8.91266719118804,
|
|
"grad_norm": 0.22856119137017192,
|
|
"learning_rate": 5.100965379967606e-06,
|
|
"loss": 0.1634,
|
|
"step": 2826
|
|
},
|
|
{
|
|
"epoch": 8.915814319433517,
|
|
"grad_norm": 0.238656910481426,
|
|
"learning_rate": 5.094639309407021e-06,
|
|
"loss": 0.1654,
|
|
"step": 2827
|
|
},
|
|
{
|
|
"epoch": 8.918961447678992,
|
|
"grad_norm": 0.23793602636902197,
|
|
"learning_rate": 5.0883308958263255e-06,
|
|
"loss": 0.1695,
|
|
"step": 2828
|
|
},
|
|
{
|
|
"epoch": 8.922108575924469,
|
|
"grad_norm": 0.23552997928550543,
|
|
"learning_rate": 5.082040145814413e-06,
|
|
"loss": 0.1634,
|
|
"step": 2829
|
|
},
|
|
{
|
|
"epoch": 8.925255704169945,
|
|
"grad_norm": 0.2345591111424614,
|
|
"learning_rate": 5.075767065941728e-06,
|
|
"loss": 0.1712,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 8.92840283241542,
|
|
"grad_norm": 0.23815624814766417,
|
|
"learning_rate": 5.069511662760245e-06,
|
|
"loss": 0.1653,
|
|
"step": 2831
|
|
},
|
|
{
|
|
"epoch": 8.931549960660897,
|
|
"grad_norm": 0.2339825850851606,
|
|
"learning_rate": 5.063273942803491e-06,
|
|
"loss": 0.1713,
|
|
"step": 2832
|
|
},
|
|
{
|
|
"epoch": 8.934697088906374,
|
|
"grad_norm": 0.23772485751502045,
|
|
"learning_rate": 5.057053912586512e-06,
|
|
"loss": 0.1616,
|
|
"step": 2833
|
|
},
|
|
{
|
|
"epoch": 8.937844217151849,
|
|
"grad_norm": 0.23732466399568317,
|
|
"learning_rate": 5.050851578605892e-06,
|
|
"loss": 0.1699,
|
|
"step": 2834
|
|
},
|
|
{
|
|
"epoch": 8.940991345397325,
|
|
"grad_norm": 0.23247387261686783,
|
|
"learning_rate": 5.044666947339716e-06,
|
|
"loss": 0.1677,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 8.9441384736428,
|
|
"grad_norm": 0.22590208575695253,
|
|
"learning_rate": 5.038500025247589e-06,
|
|
"loss": 0.1676,
|
|
"step": 2836
|
|
},
|
|
{
|
|
"epoch": 8.947285601888277,
|
|
"grad_norm": 0.23597275765287926,
|
|
"learning_rate": 5.032350818770616e-06,
|
|
"loss": 0.1677,
|
|
"step": 2837
|
|
},
|
|
{
|
|
"epoch": 8.950432730133754,
|
|
"grad_norm": 0.23033651705619715,
|
|
"learning_rate": 5.0262193343314e-06,
|
|
"loss": 0.1686,
|
|
"step": 2838
|
|
},
|
|
{
|
|
"epoch": 8.953579858379229,
|
|
"grad_norm": 0.2298946051218498,
|
|
"learning_rate": 5.020105578334038e-06,
|
|
"loss": 0.1693,
|
|
"step": 2839
|
|
},
|
|
{
|
|
"epoch": 8.956726986624705,
|
|
"grad_norm": 0.22515833749826317,
|
|
"learning_rate": 5.014009557164099e-06,
|
|
"loss": 0.1658,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 8.95987411487018,
|
|
"grad_norm": 0.22997911948352323,
|
|
"learning_rate": 5.0079312771886425e-06,
|
|
"loss": 0.1709,
|
|
"step": 2841
|
|
},
|
|
{
|
|
"epoch": 8.963021243115657,
|
|
"grad_norm": 0.23065097782750338,
|
|
"learning_rate": 5.001870744756182e-06,
|
|
"loss": 0.1645,
|
|
"step": 2842
|
|
},
|
|
{
|
|
"epoch": 8.966168371361134,
|
|
"grad_norm": 0.23243456881790153,
|
|
"learning_rate": 4.995827966196714e-06,
|
|
"loss": 0.1715,
|
|
"step": 2843
|
|
},
|
|
{
|
|
"epoch": 8.969315499606608,
|
|
"grad_norm": 0.22702087926588302,
|
|
"learning_rate": 4.9898029478216735e-06,
|
|
"loss": 0.1656,
|
|
"step": 2844
|
|
},
|
|
{
|
|
"epoch": 8.972462627852085,
|
|
"grad_norm": 0.23667845742674357,
|
|
"learning_rate": 4.983795695923958e-06,
|
|
"loss": 0.1665,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 8.975609756097562,
|
|
"grad_norm": 0.23376278457824037,
|
|
"learning_rate": 4.977806216777904e-06,
|
|
"loss": 0.1649,
|
|
"step": 2846
|
|
},
|
|
{
|
|
"epoch": 8.978756884343037,
|
|
"grad_norm": 0.22714146711985553,
|
|
"learning_rate": 4.971834516639281e-06,
|
|
"loss": 0.17,
|
|
"step": 2847
|
|
},
|
|
{
|
|
"epoch": 8.981904012588513,
|
|
"grad_norm": 0.23000407639648165,
|
|
"learning_rate": 4.965880601745301e-06,
|
|
"loss": 0.1658,
|
|
"step": 2848
|
|
},
|
|
{
|
|
"epoch": 8.985051140833988,
|
|
"grad_norm": 0.23490571123631973,
|
|
"learning_rate": 4.959944478314586e-06,
|
|
"loss": 0.1637,
|
|
"step": 2849
|
|
},
|
|
{
|
|
"epoch": 8.988198269079465,
|
|
"grad_norm": 0.2357626917407318,
|
|
"learning_rate": 4.954026152547187e-06,
|
|
"loss": 0.1643,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 8.991345397324942,
|
|
"grad_norm": 0.22454926371991393,
|
|
"learning_rate": 4.948125630624556e-06,
|
|
"loss": 0.1712,
|
|
"step": 2851
|
|
},
|
|
{
|
|
"epoch": 8.994492525570417,
|
|
"grad_norm": 0.22383263709497628,
|
|
"learning_rate": 4.9422429187095586e-06,
|
|
"loss": 0.1707,
|
|
"step": 2852
|
|
},
|
|
{
|
|
"epoch": 8.997639653815893,
|
|
"grad_norm": 0.23402521859638922,
|
|
"learning_rate": 4.936378022946449e-06,
|
|
"loss": 0.1627,
|
|
"step": 2853
|
|
},
|
|
{
|
|
"epoch": 9.003147128245477,
|
|
"grad_norm": 0.7259930729062384,
|
|
"learning_rate": 4.930530949460883e-06,
|
|
"loss": 0.3053,
|
|
"step": 2854
|
|
},
|
|
{
|
|
"epoch": 9.006294256490952,
|
|
"grad_norm": 0.2562706446384713,
|
|
"learning_rate": 4.924701704359899e-06,
|
|
"loss": 0.1322,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 9.009441384736428,
|
|
"grad_norm": 0.2854052200203414,
|
|
"learning_rate": 4.918890293731908e-06,
|
|
"loss": 0.1333,
|
|
"step": 2856
|
|
},
|
|
{
|
|
"epoch": 9.012588512981903,
|
|
"grad_norm": 0.4691429203587427,
|
|
"learning_rate": 4.9130967236467026e-06,
|
|
"loss": 0.1374,
|
|
"step": 2857
|
|
},
|
|
{
|
|
"epoch": 9.01573564122738,
|
|
"grad_norm": 0.4028184182051083,
|
|
"learning_rate": 4.907321000155432e-06,
|
|
"loss": 0.1364,
|
|
"step": 2858
|
|
},
|
|
{
|
|
"epoch": 9.018882769472857,
|
|
"grad_norm": 0.27399671132513376,
|
|
"learning_rate": 4.901563129290619e-06,
|
|
"loss": 0.137,
|
|
"step": 2859
|
|
},
|
|
{
|
|
"epoch": 9.022029897718332,
|
|
"grad_norm": 0.3360542252051378,
|
|
"learning_rate": 4.895823117066122e-06,
|
|
"loss": 0.1385,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 9.025177025963808,
|
|
"grad_norm": 0.3362833473207967,
|
|
"learning_rate": 4.890100969477159e-06,
|
|
"loss": 0.1308,
|
|
"step": 2861
|
|
},
|
|
{
|
|
"epoch": 9.028324154209285,
|
|
"grad_norm": 0.280838008338128,
|
|
"learning_rate": 4.884396692500293e-06,
|
|
"loss": 0.1274,
|
|
"step": 2862
|
|
},
|
|
{
|
|
"epoch": 9.03147128245476,
|
|
"grad_norm": 0.27276504949498936,
|
|
"learning_rate": 4.878710292093409e-06,
|
|
"loss": 0.1293,
|
|
"step": 2863
|
|
},
|
|
{
|
|
"epoch": 9.034618410700237,
|
|
"grad_norm": 0.30411776357879683,
|
|
"learning_rate": 4.8730417741957306e-06,
|
|
"loss": 0.1315,
|
|
"step": 2864
|
|
},
|
|
{
|
|
"epoch": 9.037765538945711,
|
|
"grad_norm": 0.3071835435610242,
|
|
"learning_rate": 4.867391144727798e-06,
|
|
"loss": 0.1292,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 9.040912667191188,
|
|
"grad_norm": 0.2774412987717509,
|
|
"learning_rate": 4.861758409591474e-06,
|
|
"loss": 0.1352,
|
|
"step": 2866
|
|
},
|
|
{
|
|
"epoch": 9.044059795436665,
|
|
"grad_norm": 0.24821320889899118,
|
|
"learning_rate": 4.8561435746699224e-06,
|
|
"loss": 0.132,
|
|
"step": 2867
|
|
},
|
|
{
|
|
"epoch": 9.04720692368214,
|
|
"grad_norm": 0.260501036335888,
|
|
"learning_rate": 4.85054664582762e-06,
|
|
"loss": 0.1298,
|
|
"step": 2868
|
|
},
|
|
{
|
|
"epoch": 9.050354051927616,
|
|
"grad_norm": 0.25950354132313963,
|
|
"learning_rate": 4.844967628910332e-06,
|
|
"loss": 0.1382,
|
|
"step": 2869
|
|
},
|
|
{
|
|
"epoch": 9.053501180173091,
|
|
"grad_norm": 0.254911463794064,
|
|
"learning_rate": 4.839406529745122e-06,
|
|
"loss": 0.1331,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 9.056648308418568,
|
|
"grad_norm": 0.2568698001440897,
|
|
"learning_rate": 4.833863354140345e-06,
|
|
"loss": 0.1313,
|
|
"step": 2871
|
|
},
|
|
{
|
|
"epoch": 9.059795436664045,
|
|
"grad_norm": 0.25834630400276143,
|
|
"learning_rate": 4.828338107885621e-06,
|
|
"loss": 0.1339,
|
|
"step": 2872
|
|
},
|
|
{
|
|
"epoch": 9.06294256490952,
|
|
"grad_norm": 0.2626115567381949,
|
|
"learning_rate": 4.822830796751856e-06,
|
|
"loss": 0.1398,
|
|
"step": 2873
|
|
},
|
|
{
|
|
"epoch": 9.066089693154996,
|
|
"grad_norm": 0.23847349899918344,
|
|
"learning_rate": 4.817341426491213e-06,
|
|
"loss": 0.1304,
|
|
"step": 2874
|
|
},
|
|
{
|
|
"epoch": 9.069236821400471,
|
|
"grad_norm": 0.25139369715829973,
|
|
"learning_rate": 4.811870002837126e-06,
|
|
"loss": 0.1309,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 9.072383949645948,
|
|
"grad_norm": 0.2453860537308011,
|
|
"learning_rate": 4.806416531504274e-06,
|
|
"loss": 0.135,
|
|
"step": 2876
|
|
},
|
|
{
|
|
"epoch": 9.075531077891425,
|
|
"grad_norm": 0.25404710592240104,
|
|
"learning_rate": 4.800981018188602e-06,
|
|
"loss": 0.1266,
|
|
"step": 2877
|
|
},
|
|
{
|
|
"epoch": 9.0786782061369,
|
|
"grad_norm": 0.22754008276241747,
|
|
"learning_rate": 4.79556346856728e-06,
|
|
"loss": 0.1357,
|
|
"step": 2878
|
|
},
|
|
{
|
|
"epoch": 9.081825334382376,
|
|
"grad_norm": 0.25168887931836287,
|
|
"learning_rate": 4.79016388829873e-06,
|
|
"loss": 0.1341,
|
|
"step": 2879
|
|
},
|
|
{
|
|
"epoch": 9.084972462627853,
|
|
"grad_norm": 0.2502383435030883,
|
|
"learning_rate": 4.784782283022597e-06,
|
|
"loss": 0.1352,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 9.088119590873328,
|
|
"grad_norm": 0.25986754177545734,
|
|
"learning_rate": 4.7794186583597544e-06,
|
|
"loss": 0.132,
|
|
"step": 2881
|
|
},
|
|
{
|
|
"epoch": 9.091266719118805,
|
|
"grad_norm": 0.22662605665615443,
|
|
"learning_rate": 4.774073019912298e-06,
|
|
"loss": 0.1366,
|
|
"step": 2882
|
|
},
|
|
{
|
|
"epoch": 9.09441384736428,
|
|
"grad_norm": 0.2304079612052606,
|
|
"learning_rate": 4.7687453732635305e-06,
|
|
"loss": 0.131,
|
|
"step": 2883
|
|
},
|
|
{
|
|
"epoch": 9.097560975609756,
|
|
"grad_norm": 0.23596467115393177,
|
|
"learning_rate": 4.763435723977974e-06,
|
|
"loss": 0.1311,
|
|
"step": 2884
|
|
},
|
|
{
|
|
"epoch": 9.100708103855233,
|
|
"grad_norm": 0.23983207466582687,
|
|
"learning_rate": 4.7581440776013425e-06,
|
|
"loss": 0.1295,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 9.103855232100708,
|
|
"grad_norm": 0.22975847317016954,
|
|
"learning_rate": 4.752870439660551e-06,
|
|
"loss": 0.1321,
|
|
"step": 2886
|
|
},
|
|
{
|
|
"epoch": 9.107002360346184,
|
|
"grad_norm": 0.23955053071281487,
|
|
"learning_rate": 4.747614815663711e-06,
|
|
"loss": 0.1355,
|
|
"step": 2887
|
|
},
|
|
{
|
|
"epoch": 9.11014948859166,
|
|
"grad_norm": 0.2342749597252554,
|
|
"learning_rate": 4.742377211100105e-06,
|
|
"loss": 0.1302,
|
|
"step": 2888
|
|
},
|
|
{
|
|
"epoch": 9.113296616837136,
|
|
"grad_norm": 0.24153308674358212,
|
|
"learning_rate": 4.7371576314402135e-06,
|
|
"loss": 0.1264,
|
|
"step": 2889
|
|
},
|
|
{
|
|
"epoch": 9.116443745082613,
|
|
"grad_norm": 0.2321425269869848,
|
|
"learning_rate": 4.731956082135669e-06,
|
|
"loss": 0.1268,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 9.119590873328088,
|
|
"grad_norm": 0.23212954304734737,
|
|
"learning_rate": 4.726772568619297e-06,
|
|
"loss": 0.1325,
|
|
"step": 2891
|
|
},
|
|
{
|
|
"epoch": 9.122738001573564,
|
|
"grad_norm": 0.23275541822697182,
|
|
"learning_rate": 4.721607096305063e-06,
|
|
"loss": 0.1239,
|
|
"step": 2892
|
|
},
|
|
{
|
|
"epoch": 9.12588512981904,
|
|
"grad_norm": 0.23593940968545418,
|
|
"learning_rate": 4.716459670588102e-06,
|
|
"loss": 0.1332,
|
|
"step": 2893
|
|
},
|
|
{
|
|
"epoch": 9.129032258064516,
|
|
"grad_norm": 0.24159409253703298,
|
|
"learning_rate": 4.711330296844695e-06,
|
|
"loss": 0.1337,
|
|
"step": 2894
|
|
},
|
|
{
|
|
"epoch": 9.132179386309993,
|
|
"grad_norm": 0.225298246971766,
|
|
"learning_rate": 4.706218980432269e-06,
|
|
"loss": 0.1332,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 9.135326514555468,
|
|
"grad_norm": 0.23345137024396634,
|
|
"learning_rate": 4.701125726689394e-06,
|
|
"loss": 0.1289,
|
|
"step": 2896
|
|
},
|
|
{
|
|
"epoch": 9.138473642800944,
|
|
"grad_norm": 0.22765284284624834,
|
|
"learning_rate": 4.69605054093577e-06,
|
|
"loss": 0.1332,
|
|
"step": 2897
|
|
},
|
|
{
|
|
"epoch": 9.141620771046421,
|
|
"grad_norm": 0.2365079564611352,
|
|
"learning_rate": 4.690993428472231e-06,
|
|
"loss": 0.1353,
|
|
"step": 2898
|
|
},
|
|
{
|
|
"epoch": 9.144767899291896,
|
|
"grad_norm": 0.22894145213619913,
|
|
"learning_rate": 4.685954394580723e-06,
|
|
"loss": 0.1316,
|
|
"step": 2899
|
|
},
|
|
{
|
|
"epoch": 9.147915027537373,
|
|
"grad_norm": 0.2316844787210736,
|
|
"learning_rate": 4.680933444524327e-06,
|
|
"loss": 0.1319,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 9.151062155782848,
|
|
"grad_norm": 0.2295088666150164,
|
|
"learning_rate": 4.675930583547219e-06,
|
|
"loss": 0.1352,
|
|
"step": 2901
|
|
},
|
|
{
|
|
"epoch": 9.154209284028324,
|
|
"grad_norm": 0.22737542176781833,
|
|
"learning_rate": 4.670945816874691e-06,
|
|
"loss": 0.1362,
|
|
"step": 2902
|
|
},
|
|
{
|
|
"epoch": 9.157356412273801,
|
|
"grad_norm": 0.23483198051992082,
|
|
"learning_rate": 4.66597914971314e-06,
|
|
"loss": 0.123,
|
|
"step": 2903
|
|
},
|
|
{
|
|
"epoch": 9.160503540519276,
|
|
"grad_norm": 0.23493971809693678,
|
|
"learning_rate": 4.661030587250045e-06,
|
|
"loss": 0.1345,
|
|
"step": 2904
|
|
},
|
|
{
|
|
"epoch": 9.163650668764753,
|
|
"grad_norm": 0.23898547282251156,
|
|
"learning_rate": 4.656100134653988e-06,
|
|
"loss": 0.1289,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 9.166797797010227,
|
|
"grad_norm": 0.22591766013667208,
|
|
"learning_rate": 4.65118779707463e-06,
|
|
"loss": 0.1365,
|
|
"step": 2906
|
|
},
|
|
{
|
|
"epoch": 9.169944925255704,
|
|
"grad_norm": 0.22651931489444632,
|
|
"learning_rate": 4.646293579642716e-06,
|
|
"loss": 0.1372,
|
|
"step": 2907
|
|
},
|
|
{
|
|
"epoch": 9.17309205350118,
|
|
"grad_norm": 0.22780380035435008,
|
|
"learning_rate": 4.641417487470058e-06,
|
|
"loss": 0.135,
|
|
"step": 2908
|
|
},
|
|
{
|
|
"epoch": 9.176239181746656,
|
|
"grad_norm": 0.23276681424595513,
|
|
"learning_rate": 4.636559525649546e-06,
|
|
"loss": 0.1362,
|
|
"step": 2909
|
|
},
|
|
{
|
|
"epoch": 9.179386309992132,
|
|
"grad_norm": 0.23869234341896103,
|
|
"learning_rate": 4.631719699255123e-06,
|
|
"loss": 0.1352,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 9.182533438237607,
|
|
"grad_norm": 0.23080606084113706,
|
|
"learning_rate": 4.626898013341801e-06,
|
|
"loss": 0.1347,
|
|
"step": 2911
|
|
},
|
|
{
|
|
"epoch": 9.185680566483084,
|
|
"grad_norm": 0.23836084114678538,
|
|
"learning_rate": 4.622094472945639e-06,
|
|
"loss": 0.1246,
|
|
"step": 2912
|
|
},
|
|
{
|
|
"epoch": 9.18882769472856,
|
|
"grad_norm": 0.2404240676609406,
|
|
"learning_rate": 4.6173090830837434e-06,
|
|
"loss": 0.1325,
|
|
"step": 2913
|
|
},
|
|
{
|
|
"epoch": 9.191974822974036,
|
|
"grad_norm": 0.2123536854744844,
|
|
"learning_rate": 4.612541848754265e-06,
|
|
"loss": 0.1355,
|
|
"step": 2914
|
|
},
|
|
{
|
|
"epoch": 9.195121951219512,
|
|
"grad_norm": 0.23581558553764673,
|
|
"learning_rate": 4.60779277493639e-06,
|
|
"loss": 0.1314,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 9.198269079464989,
|
|
"grad_norm": 0.23688342974144003,
|
|
"learning_rate": 4.6030618665903425e-06,
|
|
"loss": 0.1317,
|
|
"step": 2916
|
|
},
|
|
{
|
|
"epoch": 9.201416207710464,
|
|
"grad_norm": 0.22422105201679832,
|
|
"learning_rate": 4.598349128657362e-06,
|
|
"loss": 0.1276,
|
|
"step": 2917
|
|
},
|
|
{
|
|
"epoch": 9.20456333595594,
|
|
"grad_norm": 0.22578943792556588,
|
|
"learning_rate": 4.593654566059721e-06,
|
|
"loss": 0.1339,
|
|
"step": 2918
|
|
},
|
|
{
|
|
"epoch": 9.207710464201416,
|
|
"grad_norm": 0.24065645526761678,
|
|
"learning_rate": 4.588978183700705e-06,
|
|
"loss": 0.1265,
|
|
"step": 2919
|
|
},
|
|
{
|
|
"epoch": 9.210857592446892,
|
|
"grad_norm": 0.22356993270115313,
|
|
"learning_rate": 4.584319986464608e-06,
|
|
"loss": 0.1282,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 9.214004720692369,
|
|
"grad_norm": 0.22840256425011418,
|
|
"learning_rate": 4.579679979216736e-06,
|
|
"loss": 0.1354,
|
|
"step": 2921
|
|
},
|
|
{
|
|
"epoch": 9.217151848937844,
|
|
"grad_norm": 0.22189263858930341,
|
|
"learning_rate": 4.575058166803388e-06,
|
|
"loss": 0.1292,
|
|
"step": 2922
|
|
},
|
|
{
|
|
"epoch": 9.22029897718332,
|
|
"grad_norm": 0.2373676925550199,
|
|
"learning_rate": 4.570454554051869e-06,
|
|
"loss": 0.1308,
|
|
"step": 2923
|
|
},
|
|
{
|
|
"epoch": 9.223446105428796,
|
|
"grad_norm": 0.23466096466526548,
|
|
"learning_rate": 4.565869145770464e-06,
|
|
"loss": 0.1307,
|
|
"step": 2924
|
|
},
|
|
{
|
|
"epoch": 9.226593233674272,
|
|
"grad_norm": 0.22341580256810417,
|
|
"learning_rate": 4.561301946748457e-06,
|
|
"loss": 0.1356,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 9.229740361919749,
|
|
"grad_norm": 0.2337076552757127,
|
|
"learning_rate": 4.5567529617561015e-06,
|
|
"loss": 0.1351,
|
|
"step": 2926
|
|
},
|
|
{
|
|
"epoch": 9.232887490165224,
|
|
"grad_norm": 0.22744541315055633,
|
|
"learning_rate": 4.552222195544636e-06,
|
|
"loss": 0.1312,
|
|
"step": 2927
|
|
},
|
|
{
|
|
"epoch": 9.2360346184107,
|
|
"grad_norm": 0.23338172402658214,
|
|
"learning_rate": 4.547709652846264e-06,
|
|
"loss": 0.1284,
|
|
"step": 2928
|
|
},
|
|
{
|
|
"epoch": 9.239181746656175,
|
|
"grad_norm": 0.23508676122090486,
|
|
"learning_rate": 4.543215338374159e-06,
|
|
"loss": 0.1317,
|
|
"step": 2929
|
|
},
|
|
{
|
|
"epoch": 9.242328874901652,
|
|
"grad_norm": 0.24601704848722142,
|
|
"learning_rate": 4.538739256822453e-06,
|
|
"loss": 0.1338,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 9.245476003147129,
|
|
"grad_norm": 0.22389912494058156,
|
|
"learning_rate": 4.5342814128662376e-06,
|
|
"loss": 0.1316,
|
|
"step": 2931
|
|
},
|
|
{
|
|
"epoch": 9.248623131392604,
|
|
"grad_norm": 0.2263022423534222,
|
|
"learning_rate": 4.529841811161555e-06,
|
|
"loss": 0.1321,
|
|
"step": 2932
|
|
},
|
|
{
|
|
"epoch": 9.25177025963808,
|
|
"grad_norm": 0.23229249077068176,
|
|
"learning_rate": 4.5254204563453866e-06,
|
|
"loss": 0.1347,
|
|
"step": 2933
|
|
},
|
|
{
|
|
"epoch": 9.254917387883557,
|
|
"grad_norm": 0.2310347838411131,
|
|
"learning_rate": 4.521017353035675e-06,
|
|
"loss": 0.131,
|
|
"step": 2934
|
|
},
|
|
{
|
|
"epoch": 9.258064516129032,
|
|
"grad_norm": 0.23080904694053458,
|
|
"learning_rate": 4.5166325058312745e-06,
|
|
"loss": 0.1358,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 9.261211644374509,
|
|
"grad_norm": 0.22383054119713772,
|
|
"learning_rate": 4.512265919311992e-06,
|
|
"loss": 0.1348,
|
|
"step": 2936
|
|
},
|
|
{
|
|
"epoch": 9.264358772619984,
|
|
"grad_norm": 0.23282253286973853,
|
|
"learning_rate": 4.5079175980385546e-06,
|
|
"loss": 0.1291,
|
|
"step": 2937
|
|
},
|
|
{
|
|
"epoch": 9.26750590086546,
|
|
"grad_norm": 0.22462772007805537,
|
|
"learning_rate": 4.503587546552607e-06,
|
|
"loss": 0.1326,
|
|
"step": 2938
|
|
},
|
|
{
|
|
"epoch": 9.270653029110937,
|
|
"grad_norm": 0.2404642052007977,
|
|
"learning_rate": 4.49927576937672e-06,
|
|
"loss": 0.1353,
|
|
"step": 2939
|
|
},
|
|
{
|
|
"epoch": 9.273800157356412,
|
|
"grad_norm": 0.22056179286469124,
|
|
"learning_rate": 4.494982271014371e-06,
|
|
"loss": 0.1327,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 9.276947285601889,
|
|
"grad_norm": 0.2366854429506929,
|
|
"learning_rate": 4.490707055949954e-06,
|
|
"loss": 0.1324,
|
|
"step": 2941
|
|
},
|
|
{
|
|
"epoch": 9.280094413847364,
|
|
"grad_norm": 0.2334841351792365,
|
|
"learning_rate": 4.4864501286487574e-06,
|
|
"loss": 0.1303,
|
|
"step": 2942
|
|
},
|
|
{
|
|
"epoch": 9.28324154209284,
|
|
"grad_norm": 0.2411585813043938,
|
|
"learning_rate": 4.482211493556974e-06,
|
|
"loss": 0.1346,
|
|
"step": 2943
|
|
},
|
|
{
|
|
"epoch": 9.286388670338317,
|
|
"grad_norm": 0.21989262346519506,
|
|
"learning_rate": 4.4779911551016934e-06,
|
|
"loss": 0.1316,
|
|
"step": 2944
|
|
},
|
|
{
|
|
"epoch": 9.289535798583792,
|
|
"grad_norm": 0.22995367913581893,
|
|
"learning_rate": 4.473789117690887e-06,
|
|
"loss": 0.1277,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 9.292682926829269,
|
|
"grad_norm": 0.22756421143574948,
|
|
"learning_rate": 4.469605385713421e-06,
|
|
"loss": 0.1351,
|
|
"step": 2946
|
|
},
|
|
{
|
|
"epoch": 9.295830055074743,
|
|
"grad_norm": 0.23592901330387175,
|
|
"learning_rate": 4.465439963539034e-06,
|
|
"loss": 0.1289,
|
|
"step": 2947
|
|
},
|
|
{
|
|
"epoch": 9.29897718332022,
|
|
"grad_norm": 0.22539025532476353,
|
|
"learning_rate": 4.4612928555183486e-06,
|
|
"loss": 0.1348,
|
|
"step": 2948
|
|
},
|
|
{
|
|
"epoch": 9.302124311565697,
|
|
"grad_norm": 0.22791556449603953,
|
|
"learning_rate": 4.45716406598285e-06,
|
|
"loss": 0.1372,
|
|
"step": 2949
|
|
},
|
|
{
|
|
"epoch": 9.305271439811172,
|
|
"grad_norm": 0.23492214436922185,
|
|
"learning_rate": 4.453053599244903e-06,
|
|
"loss": 0.1378,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 9.308418568056648,
|
|
"grad_norm": 0.2245331885362177,
|
|
"learning_rate": 4.448961459597719e-06,
|
|
"loss": 0.1334,
|
|
"step": 2951
|
|
},
|
|
{
|
|
"epoch": 9.311565696302125,
|
|
"grad_norm": 0.22809869975483035,
|
|
"learning_rate": 4.444887651315381e-06,
|
|
"loss": 0.1296,
|
|
"step": 2952
|
|
},
|
|
{
|
|
"epoch": 9.3147128245476,
|
|
"grad_norm": 0.2324760160461881,
|
|
"learning_rate": 4.440832178652819e-06,
|
|
"loss": 0.1334,
|
|
"step": 2953
|
|
},
|
|
{
|
|
"epoch": 9.317859952793077,
|
|
"grad_norm": 0.2446173081810758,
|
|
"learning_rate": 4.436795045845812e-06,
|
|
"loss": 0.1313,
|
|
"step": 2954
|
|
},
|
|
{
|
|
"epoch": 9.321007081038552,
|
|
"grad_norm": 0.2320500946229002,
|
|
"learning_rate": 4.432776257110989e-06,
|
|
"loss": 0.1356,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 9.324154209284028,
|
|
"grad_norm": 0.2448068713490391,
|
|
"learning_rate": 4.428775816645813e-06,
|
|
"loss": 0.1329,
|
|
"step": 2956
|
|
},
|
|
{
|
|
"epoch": 9.327301337529505,
|
|
"grad_norm": 0.22982431355817806,
|
|
"learning_rate": 4.424793728628586e-06,
|
|
"loss": 0.134,
|
|
"step": 2957
|
|
},
|
|
{
|
|
"epoch": 9.33044846577498,
|
|
"grad_norm": 0.22806790138954225,
|
|
"learning_rate": 4.420829997218441e-06,
|
|
"loss": 0.1362,
|
|
"step": 2958
|
|
},
|
|
{
|
|
"epoch": 9.333595594020457,
|
|
"grad_norm": 0.23824485969121148,
|
|
"learning_rate": 4.416884626555339e-06,
|
|
"loss": 0.1308,
|
|
"step": 2959
|
|
},
|
|
{
|
|
"epoch": 9.336742722265932,
|
|
"grad_norm": 0.23917326850947188,
|
|
"learning_rate": 4.412957620760065e-06,
|
|
"loss": 0.1235,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 9.339889850511408,
|
|
"grad_norm": 0.23389654883715155,
|
|
"learning_rate": 4.409048983934219e-06,
|
|
"loss": 0.1384,
|
|
"step": 2961
|
|
},
|
|
{
|
|
"epoch": 9.343036978756885,
|
|
"grad_norm": 0.2410002101605033,
|
|
"learning_rate": 4.405158720160217e-06,
|
|
"loss": 0.1391,
|
|
"step": 2962
|
|
},
|
|
{
|
|
"epoch": 9.34618410700236,
|
|
"grad_norm": 0.2463484092587633,
|
|
"learning_rate": 4.4012868335012865e-06,
|
|
"loss": 0.1313,
|
|
"step": 2963
|
|
},
|
|
{
|
|
"epoch": 9.349331235247837,
|
|
"grad_norm": 0.23216314413671288,
|
|
"learning_rate": 4.3974333280014605e-06,
|
|
"loss": 0.1321,
|
|
"step": 2964
|
|
},
|
|
{
|
|
"epoch": 9.352478363493312,
|
|
"grad_norm": 0.25795328112436694,
|
|
"learning_rate": 4.393598207685572e-06,
|
|
"loss": 0.134,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 9.355625491738788,
|
|
"grad_norm": 0.22300360535493108,
|
|
"learning_rate": 4.389781476559255e-06,
|
|
"loss": 0.1358,
|
|
"step": 2966
|
|
},
|
|
{
|
|
"epoch": 9.358772619984265,
|
|
"grad_norm": 0.2361759272681179,
|
|
"learning_rate": 4.385983138608928e-06,
|
|
"loss": 0.1355,
|
|
"step": 2967
|
|
},
|
|
{
|
|
"epoch": 9.36191974822974,
|
|
"grad_norm": 0.2393199934322128,
|
|
"learning_rate": 4.38220319780181e-06,
|
|
"loss": 0.1323,
|
|
"step": 2968
|
|
},
|
|
{
|
|
"epoch": 9.365066876475217,
|
|
"grad_norm": 0.2357587424561091,
|
|
"learning_rate": 4.378441658085899e-06,
|
|
"loss": 0.1358,
|
|
"step": 2969
|
|
},
|
|
{
|
|
"epoch": 9.368214004720693,
|
|
"grad_norm": 0.24506755276712727,
|
|
"learning_rate": 4.374698523389971e-06,
|
|
"loss": 0.1251,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 9.371361132966168,
|
|
"grad_norm": 0.23734868049410257,
|
|
"learning_rate": 4.370973797623585e-06,
|
|
"loss": 0.1355,
|
|
"step": 2971
|
|
},
|
|
{
|
|
"epoch": 9.374508261211645,
|
|
"grad_norm": 0.23479098741562127,
|
|
"learning_rate": 4.367267484677067e-06,
|
|
"loss": 0.1332,
|
|
"step": 2972
|
|
},
|
|
{
|
|
"epoch": 9.37765538945712,
|
|
"grad_norm": 0.2301962617124699,
|
|
"learning_rate": 4.363579588421517e-06,
|
|
"loss": 0.1282,
|
|
"step": 2973
|
|
},
|
|
{
|
|
"epoch": 9.380802517702596,
|
|
"grad_norm": 0.24990595732884477,
|
|
"learning_rate": 4.3599101127087944e-06,
|
|
"loss": 0.1287,
|
|
"step": 2974
|
|
},
|
|
{
|
|
"epoch": 9.383949645948073,
|
|
"grad_norm": 0.24405837184477827,
|
|
"learning_rate": 4.356259061371524e-06,
|
|
"loss": 0.1322,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 9.387096774193548,
|
|
"grad_norm": 0.23354503269275637,
|
|
"learning_rate": 4.3526264382230806e-06,
|
|
"loss": 0.1301,
|
|
"step": 2976
|
|
},
|
|
{
|
|
"epoch": 9.390243902439025,
|
|
"grad_norm": 0.22497899824756662,
|
|
"learning_rate": 4.349012247057597e-06,
|
|
"loss": 0.1341,
|
|
"step": 2977
|
|
},
|
|
{
|
|
"epoch": 9.3933910306845,
|
|
"grad_norm": 0.233509003106069,
|
|
"learning_rate": 4.345416491649954e-06,
|
|
"loss": 0.1291,
|
|
"step": 2978
|
|
},
|
|
{
|
|
"epoch": 9.396538158929976,
|
|
"grad_norm": 0.23591177828002854,
|
|
"learning_rate": 4.3418391757557745e-06,
|
|
"loss": 0.1311,
|
|
"step": 2979
|
|
},
|
|
{
|
|
"epoch": 9.399685287175453,
|
|
"grad_norm": 0.22958226547889216,
|
|
"learning_rate": 4.338280303111426e-06,
|
|
"loss": 0.1321,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 9.402832415420928,
|
|
"grad_norm": 0.24045468326756445,
|
|
"learning_rate": 4.334739877434006e-06,
|
|
"loss": 0.1326,
|
|
"step": 2981
|
|
},
|
|
{
|
|
"epoch": 9.405979543666405,
|
|
"grad_norm": 0.24332656727987065,
|
|
"learning_rate": 4.33121790242135e-06,
|
|
"loss": 0.1353,
|
|
"step": 2982
|
|
},
|
|
{
|
|
"epoch": 9.40912667191188,
|
|
"grad_norm": 0.2318763713593753,
|
|
"learning_rate": 4.327714381752023e-06,
|
|
"loss": 0.1309,
|
|
"step": 2983
|
|
},
|
|
{
|
|
"epoch": 9.412273800157356,
|
|
"grad_norm": 0.23460111972050288,
|
|
"learning_rate": 4.32422931908531e-06,
|
|
"loss": 0.1306,
|
|
"step": 2984
|
|
},
|
|
{
|
|
"epoch": 9.415420928402833,
|
|
"grad_norm": 0.23033494597201723,
|
|
"learning_rate": 4.320762718061228e-06,
|
|
"loss": 0.1341,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 9.418568056648308,
|
|
"grad_norm": 0.24542818156662408,
|
|
"learning_rate": 4.317314582300496e-06,
|
|
"loss": 0.1324,
|
|
"step": 2986
|
|
},
|
|
{
|
|
"epoch": 9.421715184893785,
|
|
"grad_norm": 0.2363958350642206,
|
|
"learning_rate": 4.313884915404562e-06,
|
|
"loss": 0.1346,
|
|
"step": 2987
|
|
},
|
|
{
|
|
"epoch": 9.424862313139261,
|
|
"grad_norm": 0.24392888891052444,
|
|
"learning_rate": 4.3104737209555735e-06,
|
|
"loss": 0.1293,
|
|
"step": 2988
|
|
},
|
|
{
|
|
"epoch": 9.428009441384736,
|
|
"grad_norm": 0.2270478248172354,
|
|
"learning_rate": 4.30708100251639e-06,
|
|
"loss": 0.1342,
|
|
"step": 2989
|
|
},
|
|
{
|
|
"epoch": 9.431156569630213,
|
|
"grad_norm": 0.2419755010247926,
|
|
"learning_rate": 4.3037067636305695e-06,
|
|
"loss": 0.1361,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 9.434303697875688,
|
|
"grad_norm": 0.24302820086496213,
|
|
"learning_rate": 4.3003510078223735e-06,
|
|
"loss": 0.1357,
|
|
"step": 2991
|
|
},
|
|
{
|
|
"epoch": 9.437450826121164,
|
|
"grad_norm": 0.22889930071492856,
|
|
"learning_rate": 4.297013738596754e-06,
|
|
"loss": 0.1326,
|
|
"step": 2992
|
|
},
|
|
{
|
|
"epoch": 9.440597954366641,
|
|
"grad_norm": 0.2470105830931397,
|
|
"learning_rate": 4.293694959439357e-06,
|
|
"loss": 0.1307,
|
|
"step": 2993
|
|
},
|
|
{
|
|
"epoch": 9.443745082612116,
|
|
"grad_norm": 0.24509387802011043,
|
|
"learning_rate": 4.290394673816518e-06,
|
|
"loss": 0.1351,
|
|
"step": 2994
|
|
},
|
|
{
|
|
"epoch": 9.446892210857593,
|
|
"grad_norm": 0.24182787680185575,
|
|
"learning_rate": 4.287112885175252e-06,
|
|
"loss": 0.1392,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 9.450039339103068,
|
|
"grad_norm": 0.2467508126477943,
|
|
"learning_rate": 4.283849596943258e-06,
|
|
"loss": 0.1263,
|
|
"step": 2996
|
|
},
|
|
{
|
|
"epoch": 9.453186467348544,
|
|
"grad_norm": 0.2316135678434177,
|
|
"learning_rate": 4.280604812528912e-06,
|
|
"loss": 0.1324,
|
|
"step": 2997
|
|
},
|
|
{
|
|
"epoch": 9.456333595594021,
|
|
"grad_norm": 0.2268465070914208,
|
|
"learning_rate": 4.277378535321262e-06,
|
|
"loss": 0.1328,
|
|
"step": 2998
|
|
},
|
|
{
|
|
"epoch": 9.459480723839496,
|
|
"grad_norm": 0.2332807858816893,
|
|
"learning_rate": 4.274170768690028e-06,
|
|
"loss": 0.1373,
|
|
"step": 2999
|
|
},
|
|
{
|
|
"epoch": 9.462627852084973,
|
|
"grad_norm": 0.22900858658201778,
|
|
"learning_rate": 4.270981515985594e-06,
|
|
"loss": 0.1329,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 9.465774980330448,
|
|
"grad_norm": 0.2369332302133477,
|
|
"learning_rate": 4.26781078053901e-06,
|
|
"loss": 0.1314,
|
|
"step": 3001
|
|
},
|
|
{
|
|
"epoch": 9.468922108575924,
|
|
"grad_norm": 0.24455011847922023,
|
|
"learning_rate": 4.264658565661981e-06,
|
|
"loss": 0.1285,
|
|
"step": 3002
|
|
},
|
|
{
|
|
"epoch": 9.472069236821401,
|
|
"grad_norm": 0.239435503067824,
|
|
"learning_rate": 4.261524874646873e-06,
|
|
"loss": 0.1332,
|
|
"step": 3003
|
|
},
|
|
{
|
|
"epoch": 9.475216365066876,
|
|
"grad_norm": 0.2492226908546605,
|
|
"learning_rate": 4.258409710766699e-06,
|
|
"loss": 0.1278,
|
|
"step": 3004
|
|
},
|
|
{
|
|
"epoch": 9.478363493312353,
|
|
"grad_norm": 0.23444315425803552,
|
|
"learning_rate": 4.255313077275127e-06,
|
|
"loss": 0.1376,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 9.48151062155783,
|
|
"grad_norm": 0.23211757532136212,
|
|
"learning_rate": 4.252234977406469e-06,
|
|
"loss": 0.1327,
|
|
"step": 3006
|
|
},
|
|
{
|
|
"epoch": 9.484657749803304,
|
|
"grad_norm": 0.2313035945554957,
|
|
"learning_rate": 4.249175414375676e-06,
|
|
"loss": 0.1335,
|
|
"step": 3007
|
|
},
|
|
{
|
|
"epoch": 9.487804878048781,
|
|
"grad_norm": 0.24305332604029584,
|
|
"learning_rate": 4.246134391378343e-06,
|
|
"loss": 0.1288,
|
|
"step": 3008
|
|
},
|
|
{
|
|
"epoch": 9.490952006294256,
|
|
"grad_norm": 0.2416576523834277,
|
|
"learning_rate": 4.243111911590694e-06,
|
|
"loss": 0.1346,
|
|
"step": 3009
|
|
},
|
|
{
|
|
"epoch": 9.494099134539733,
|
|
"grad_norm": 0.23223887323478282,
|
|
"learning_rate": 4.240107978169594e-06,
|
|
"loss": 0.1357,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 9.49724626278521,
|
|
"grad_norm": 0.23335311574377063,
|
|
"learning_rate": 4.23712259425253e-06,
|
|
"loss": 0.1336,
|
|
"step": 3011
|
|
},
|
|
{
|
|
"epoch": 9.500393391030684,
|
|
"grad_norm": 0.23028601506740967,
|
|
"learning_rate": 4.234155762957619e-06,
|
|
"loss": 0.1367,
|
|
"step": 3012
|
|
},
|
|
{
|
|
"epoch": 9.50354051927616,
|
|
"grad_norm": 0.2446292329288432,
|
|
"learning_rate": 4.231207487383596e-06,
|
|
"loss": 0.1363,
|
|
"step": 3013
|
|
},
|
|
{
|
|
"epoch": 9.506687647521636,
|
|
"grad_norm": 0.24503357102458362,
|
|
"learning_rate": 4.228277770609821e-06,
|
|
"loss": 0.1386,
|
|
"step": 3014
|
|
},
|
|
{
|
|
"epoch": 9.509834775767112,
|
|
"grad_norm": 0.2445600531818062,
|
|
"learning_rate": 4.225366615696263e-06,
|
|
"loss": 0.1369,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 9.51298190401259,
|
|
"grad_norm": 0.23791913882537596,
|
|
"learning_rate": 4.222474025683514e-06,
|
|
"loss": 0.1346,
|
|
"step": 3016
|
|
},
|
|
{
|
|
"epoch": 9.516129032258064,
|
|
"grad_norm": 0.2429591101645711,
|
|
"learning_rate": 4.219600003592767e-06,
|
|
"loss": 0.1307,
|
|
"step": 3017
|
|
},
|
|
{
|
|
"epoch": 9.51927616050354,
|
|
"grad_norm": 0.3968467791341316,
|
|
"learning_rate": 4.2167445524258226e-06,
|
|
"loss": 0.1379,
|
|
"step": 3018
|
|
},
|
|
{
|
|
"epoch": 9.522423288749017,
|
|
"grad_norm": 0.23354286684086903,
|
|
"learning_rate": 4.213907675165086e-06,
|
|
"loss": 0.1312,
|
|
"step": 3019
|
|
},
|
|
{
|
|
"epoch": 9.525570416994492,
|
|
"grad_norm": 0.23749531424478923,
|
|
"learning_rate": 4.2110893747735655e-06,
|
|
"loss": 0.1308,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 9.528717545239969,
|
|
"grad_norm": 0.24147254873584723,
|
|
"learning_rate": 4.2082896541948675e-06,
|
|
"loss": 0.1374,
|
|
"step": 3021
|
|
},
|
|
{
|
|
"epoch": 9.531864673485444,
|
|
"grad_norm": 0.252926435908387,
|
|
"learning_rate": 4.205508516353183e-06,
|
|
"loss": 0.139,
|
|
"step": 3022
|
|
},
|
|
{
|
|
"epoch": 9.53501180173092,
|
|
"grad_norm": 0.24123310444093413,
|
|
"learning_rate": 4.202745964153305e-06,
|
|
"loss": 0.1296,
|
|
"step": 3023
|
|
},
|
|
{
|
|
"epoch": 9.538158929976397,
|
|
"grad_norm": 0.22888213642637095,
|
|
"learning_rate": 4.200002000480605e-06,
|
|
"loss": 0.1333,
|
|
"step": 3024
|
|
},
|
|
{
|
|
"epoch": 9.541306058221872,
|
|
"grad_norm": 0.23412443013585427,
|
|
"learning_rate": 4.197276628201048e-06,
|
|
"loss": 0.1357,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 9.544453186467349,
|
|
"grad_norm": 0.2506322070152408,
|
|
"learning_rate": 4.194569850161179e-06,
|
|
"loss": 0.1351,
|
|
"step": 3026
|
|
},
|
|
{
|
|
"epoch": 9.547600314712824,
|
|
"grad_norm": 0.2314307114246427,
|
|
"learning_rate": 4.191881669188117e-06,
|
|
"loss": 0.1377,
|
|
"step": 3027
|
|
},
|
|
{
|
|
"epoch": 9.5507474429583,
|
|
"grad_norm": 0.2383274553063989,
|
|
"learning_rate": 4.1892120880895605e-06,
|
|
"loss": 0.1333,
|
|
"step": 3028
|
|
},
|
|
{
|
|
"epoch": 9.553894571203777,
|
|
"grad_norm": 0.23157419286047795,
|
|
"learning_rate": 4.186561109653784e-06,
|
|
"loss": 0.1401,
|
|
"step": 3029
|
|
},
|
|
{
|
|
"epoch": 9.557041699449252,
|
|
"grad_norm": 0.2454714122459766,
|
|
"learning_rate": 4.1839287366496285e-06,
|
|
"loss": 0.1351,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 9.560188827694729,
|
|
"grad_norm": 0.23213441228577894,
|
|
"learning_rate": 4.181314971826502e-06,
|
|
"loss": 0.1349,
|
|
"step": 3031
|
|
},
|
|
{
|
|
"epoch": 9.563335955940204,
|
|
"grad_norm": 0.2383713805883692,
|
|
"learning_rate": 4.178719817914378e-06,
|
|
"loss": 0.1322,
|
|
"step": 3032
|
|
},
|
|
{
|
|
"epoch": 9.56648308418568,
|
|
"grad_norm": 0.23809296540852945,
|
|
"learning_rate": 4.176143277623796e-06,
|
|
"loss": 0.1236,
|
|
"step": 3033
|
|
},
|
|
{
|
|
"epoch": 9.569630212431157,
|
|
"grad_norm": 0.2452859142596891,
|
|
"learning_rate": 4.1735853536458455e-06,
|
|
"loss": 0.1334,
|
|
"step": 3034
|
|
},
|
|
{
|
|
"epoch": 9.572777340676632,
|
|
"grad_norm": 0.2528048789801201,
|
|
"learning_rate": 4.1710460486521795e-06,
|
|
"loss": 0.1345,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 9.575924468922109,
|
|
"grad_norm": 0.24602308858149582,
|
|
"learning_rate": 4.168525365295002e-06,
|
|
"loss": 0.1352,
|
|
"step": 3036
|
|
},
|
|
{
|
|
"epoch": 9.579071597167584,
|
|
"grad_norm": 0.24143788400432517,
|
|
"learning_rate": 4.166023306207066e-06,
|
|
"loss": 0.1344,
|
|
"step": 3037
|
|
},
|
|
{
|
|
"epoch": 9.58221872541306,
|
|
"grad_norm": 0.24049001092299743,
|
|
"learning_rate": 4.163539874001671e-06,
|
|
"loss": 0.1361,
|
|
"step": 3038
|
|
},
|
|
{
|
|
"epoch": 9.585365853658537,
|
|
"grad_norm": 0.23227717157473632,
|
|
"learning_rate": 4.161075071272668e-06,
|
|
"loss": 0.1305,
|
|
"step": 3039
|
|
},
|
|
{
|
|
"epoch": 9.588512981904012,
|
|
"grad_norm": 0.2350438398401355,
|
|
"learning_rate": 4.158628900594442e-06,
|
|
"loss": 0.1313,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 9.591660110149489,
|
|
"grad_norm": 0.2374230007231774,
|
|
"learning_rate": 4.156201364521924e-06,
|
|
"loss": 0.1356,
|
|
"step": 3041
|
|
},
|
|
{
|
|
"epoch": 9.594807238394965,
|
|
"grad_norm": 0.23945340129922146,
|
|
"learning_rate": 4.1537924655905785e-06,
|
|
"loss": 0.137,
|
|
"step": 3042
|
|
},
|
|
{
|
|
"epoch": 9.59795436664044,
|
|
"grad_norm": 0.23799731806867103,
|
|
"learning_rate": 4.151402206316405e-06,
|
|
"loss": 0.1294,
|
|
"step": 3043
|
|
},
|
|
{
|
|
"epoch": 9.601101494885917,
|
|
"grad_norm": 0.2361952391608318,
|
|
"learning_rate": 4.1490305891959334e-06,
|
|
"loss": 0.1312,
|
|
"step": 3044
|
|
},
|
|
{
|
|
"epoch": 9.604248623131392,
|
|
"grad_norm": 0.24879673775767014,
|
|
"learning_rate": 4.146677616706226e-06,
|
|
"loss": 0.1305,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 9.607395751376869,
|
|
"grad_norm": 0.23579330579462654,
|
|
"learning_rate": 4.144343291304867e-06,
|
|
"loss": 0.1342,
|
|
"step": 3046
|
|
},
|
|
{
|
|
"epoch": 9.610542879622345,
|
|
"grad_norm": 0.2293828810426797,
|
|
"learning_rate": 4.14202761542997e-06,
|
|
"loss": 0.1397,
|
|
"step": 3047
|
|
},
|
|
{
|
|
"epoch": 9.61369000786782,
|
|
"grad_norm": 0.22998664945318115,
|
|
"learning_rate": 4.139730591500165e-06,
|
|
"loss": 0.1343,
|
|
"step": 3048
|
|
},
|
|
{
|
|
"epoch": 9.616837136113297,
|
|
"grad_norm": 0.22872577120241833,
|
|
"learning_rate": 4.137452221914602e-06,
|
|
"loss": 0.1315,
|
|
"step": 3049
|
|
},
|
|
{
|
|
"epoch": 9.619984264358772,
|
|
"grad_norm": 0.25033210170902204,
|
|
"learning_rate": 4.135192509052947e-06,
|
|
"loss": 0.1324,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 9.623131392604249,
|
|
"grad_norm": 0.23939961130535323,
|
|
"learning_rate": 4.132951455275385e-06,
|
|
"loss": 0.1347,
|
|
"step": 3051
|
|
},
|
|
{
|
|
"epoch": 9.626278520849725,
|
|
"grad_norm": 0.24037659462353853,
|
|
"learning_rate": 4.130729062922602e-06,
|
|
"loss": 0.1323,
|
|
"step": 3052
|
|
},
|
|
{
|
|
"epoch": 9.6294256490952,
|
|
"grad_norm": 0.22674798888307743,
|
|
"learning_rate": 4.1285253343158045e-06,
|
|
"loss": 0.1418,
|
|
"step": 3053
|
|
},
|
|
{
|
|
"epoch": 9.632572777340677,
|
|
"grad_norm": 0.24399535601791178,
|
|
"learning_rate": 4.126340271756696e-06,
|
|
"loss": 0.1345,
|
|
"step": 3054
|
|
},
|
|
{
|
|
"epoch": 9.635719905586154,
|
|
"grad_norm": 0.2380467339574685,
|
|
"learning_rate": 4.1241738775274875e-06,
|
|
"loss": 0.1322,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 9.638867033831628,
|
|
"grad_norm": 0.2291006675130497,
|
|
"learning_rate": 4.122026153890896e-06,
|
|
"loss": 0.1365,
|
|
"step": 3056
|
|
},
|
|
{
|
|
"epoch": 9.642014162077105,
|
|
"grad_norm": 0.23764446572668885,
|
|
"learning_rate": 4.119897103090129e-06,
|
|
"loss": 0.1362,
|
|
"step": 3057
|
|
},
|
|
{
|
|
"epoch": 9.64516129032258,
|
|
"grad_norm": 0.24054188658200384,
|
|
"learning_rate": 4.117786727348898e-06,
|
|
"loss": 0.135,
|
|
"step": 3058
|
|
},
|
|
{
|
|
"epoch": 9.648308418568057,
|
|
"grad_norm": 0.22923630142098805,
|
|
"learning_rate": 4.1156950288714084e-06,
|
|
"loss": 0.1376,
|
|
"step": 3059
|
|
},
|
|
{
|
|
"epoch": 9.651455546813533,
|
|
"grad_norm": 0.23674042582246743,
|
|
"learning_rate": 4.113622009842354e-06,
|
|
"loss": 0.138,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 9.654602675059008,
|
|
"grad_norm": 0.23241455559511415,
|
|
"learning_rate": 4.111567672426922e-06,
|
|
"loss": 0.1394,
|
|
"step": 3061
|
|
},
|
|
{
|
|
"epoch": 9.657749803304485,
|
|
"grad_norm": 0.22726828049867015,
|
|
"learning_rate": 4.109532018770787e-06,
|
|
"loss": 0.1299,
|
|
"step": 3062
|
|
},
|
|
{
|
|
"epoch": 9.66089693154996,
|
|
"grad_norm": 0.2320797444398632,
|
|
"learning_rate": 4.107515051000108e-06,
|
|
"loss": 0.1364,
|
|
"step": 3063
|
|
},
|
|
{
|
|
"epoch": 9.664044059795437,
|
|
"grad_norm": 0.237128823718978,
|
|
"learning_rate": 4.105516771221528e-06,
|
|
"loss": 0.1312,
|
|
"step": 3064
|
|
},
|
|
{
|
|
"epoch": 9.667191188040913,
|
|
"grad_norm": 0.24882016641887955,
|
|
"learning_rate": 4.10353718152217e-06,
|
|
"loss": 0.1342,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 9.670338316286388,
|
|
"grad_norm": 0.23873796840310055,
|
|
"learning_rate": 4.1015762839696396e-06,
|
|
"loss": 0.1345,
|
|
"step": 3066
|
|
},
|
|
{
|
|
"epoch": 9.673485444531865,
|
|
"grad_norm": 0.2391341419187129,
|
|
"learning_rate": 4.099634080612016e-06,
|
|
"loss": 0.1324,
|
|
"step": 3067
|
|
},
|
|
{
|
|
"epoch": 9.67663257277734,
|
|
"grad_norm": 0.23289972746538012,
|
|
"learning_rate": 4.097710573477852e-06,
|
|
"loss": 0.1389,
|
|
"step": 3068
|
|
},
|
|
{
|
|
"epoch": 9.679779701022817,
|
|
"grad_norm": 0.23306239327347605,
|
|
"learning_rate": 4.095805764576177e-06,
|
|
"loss": 0.1362,
|
|
"step": 3069
|
|
},
|
|
{
|
|
"epoch": 9.682926829268293,
|
|
"grad_norm": 0.24480810822083465,
|
|
"learning_rate": 4.093919655896484e-06,
|
|
"loss": 0.1278,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 9.686073957513768,
|
|
"grad_norm": 0.23785192923431273,
|
|
"learning_rate": 4.092052249408746e-06,
|
|
"loss": 0.1325,
|
|
"step": 3071
|
|
},
|
|
{
|
|
"epoch": 9.689221085759245,
|
|
"grad_norm": 0.23076825052848043,
|
|
"learning_rate": 4.090203547063389e-06,
|
|
"loss": 0.136,
|
|
"step": 3072
|
|
},
|
|
{
|
|
"epoch": 9.69236821400472,
|
|
"grad_norm": 0.2339128976885369,
|
|
"learning_rate": 4.0883735507913105e-06,
|
|
"loss": 0.1324,
|
|
"step": 3073
|
|
},
|
|
{
|
|
"epoch": 9.695515342250197,
|
|
"grad_norm": 0.24873730826363002,
|
|
"learning_rate": 4.0865622625038725e-06,
|
|
"loss": 0.1305,
|
|
"step": 3074
|
|
},
|
|
{
|
|
"epoch": 9.698662470495673,
|
|
"grad_norm": 0.23691902367088646,
|
|
"learning_rate": 4.08476968409289e-06,
|
|
"loss": 0.1324,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 9.701809598741148,
|
|
"grad_norm": 0.24004736754782502,
|
|
"learning_rate": 4.0829958174306435e-06,
|
|
"loss": 0.1395,
|
|
"step": 3076
|
|
},
|
|
{
|
|
"epoch": 9.704956726986625,
|
|
"grad_norm": 0.23649363473883206,
|
|
"learning_rate": 4.081240664369862e-06,
|
|
"loss": 0.1297,
|
|
"step": 3077
|
|
},
|
|
{
|
|
"epoch": 9.708103855232102,
|
|
"grad_norm": 0.2419505558569642,
|
|
"learning_rate": 4.079504226743739e-06,
|
|
"loss": 0.136,
|
|
"step": 3078
|
|
},
|
|
{
|
|
"epoch": 9.711250983477576,
|
|
"grad_norm": 0.23502797156800156,
|
|
"learning_rate": 4.077786506365911e-06,
|
|
"loss": 0.1334,
|
|
"step": 3079
|
|
},
|
|
{
|
|
"epoch": 9.714398111723053,
|
|
"grad_norm": 0.21960473813286804,
|
|
"learning_rate": 4.076087505030471e-06,
|
|
"loss": 0.1356,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 9.717545239968528,
|
|
"grad_norm": 0.23767379913931116,
|
|
"learning_rate": 4.074407224511955e-06,
|
|
"loss": 0.1325,
|
|
"step": 3081
|
|
},
|
|
{
|
|
"epoch": 9.720692368214005,
|
|
"grad_norm": 0.2296867126057468,
|
|
"learning_rate": 4.072745666565352e-06,
|
|
"loss": 0.1345,
|
|
"step": 3082
|
|
},
|
|
{
|
|
"epoch": 9.723839496459481,
|
|
"grad_norm": 0.23047397670360414,
|
|
"learning_rate": 4.071102832926097e-06,
|
|
"loss": 0.1302,
|
|
"step": 3083
|
|
},
|
|
{
|
|
"epoch": 9.726986624704956,
|
|
"grad_norm": 0.23998182867200016,
|
|
"learning_rate": 4.0694787253100585e-06,
|
|
"loss": 0.1338,
|
|
"step": 3084
|
|
},
|
|
{
|
|
"epoch": 9.730133752950433,
|
|
"grad_norm": 0.2323852688580098,
|
|
"learning_rate": 4.067873345413555e-06,
|
|
"loss": 0.1315,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 9.733280881195908,
|
|
"grad_norm": 0.24171313095121516,
|
|
"learning_rate": 4.066286694913345e-06,
|
|
"loss": 0.1341,
|
|
"step": 3086
|
|
},
|
|
{
|
|
"epoch": 9.736428009441385,
|
|
"grad_norm": 0.22679821098821545,
|
|
"learning_rate": 4.064718775466618e-06,
|
|
"loss": 0.1269,
|
|
"step": 3087
|
|
},
|
|
{
|
|
"epoch": 9.739575137686861,
|
|
"grad_norm": 0.23502911107275046,
|
|
"learning_rate": 4.063169588711004e-06,
|
|
"loss": 0.1345,
|
|
"step": 3088
|
|
},
|
|
{
|
|
"epoch": 9.742722265932336,
|
|
"grad_norm": 0.23836190822770764,
|
|
"learning_rate": 4.0616391362645715e-06,
|
|
"loss": 0.1346,
|
|
"step": 3089
|
|
},
|
|
{
|
|
"epoch": 9.745869394177813,
|
|
"grad_norm": 0.2278799148478252,
|
|
"learning_rate": 4.060127419725812e-06,
|
|
"loss": 0.1367,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 9.74901652242329,
|
|
"grad_norm": 0.2456917732347001,
|
|
"learning_rate": 4.058634440673658e-06,
|
|
"loss": 0.1326,
|
|
"step": 3091
|
|
},
|
|
{
|
|
"epoch": 9.752163650668765,
|
|
"grad_norm": 0.23836719259607256,
|
|
"learning_rate": 4.057160200667464e-06,
|
|
"loss": 0.1308,
|
|
"step": 3092
|
|
},
|
|
{
|
|
"epoch": 9.755310778914241,
|
|
"grad_norm": 0.2429286720699004,
|
|
"learning_rate": 4.055704701247018e-06,
|
|
"loss": 0.1327,
|
|
"step": 3093
|
|
},
|
|
{
|
|
"epoch": 9.758457907159716,
|
|
"grad_norm": 0.23875447802288385,
|
|
"learning_rate": 4.05426794393253e-06,
|
|
"loss": 0.1314,
|
|
"step": 3094
|
|
},
|
|
{
|
|
"epoch": 9.761605035405193,
|
|
"grad_norm": 0.2278467270000926,
|
|
"learning_rate": 4.052849930224636e-06,
|
|
"loss": 0.1356,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 9.76475216365067,
|
|
"grad_norm": 0.23233849614758936,
|
|
"learning_rate": 4.051450661604395e-06,
|
|
"loss": 0.1311,
|
|
"step": 3096
|
|
},
|
|
{
|
|
"epoch": 9.767899291896144,
|
|
"grad_norm": 0.23866830169952308,
|
|
"learning_rate": 4.0500701395332875e-06,
|
|
"loss": 0.1297,
|
|
"step": 3097
|
|
},
|
|
{
|
|
"epoch": 9.771046420141621,
|
|
"grad_norm": 0.23851746320196832,
|
|
"learning_rate": 4.0487083654532165e-06,
|
|
"loss": 0.1357,
|
|
"step": 3098
|
|
},
|
|
{
|
|
"epoch": 9.774193548387096,
|
|
"grad_norm": 0.2327507724699466,
|
|
"learning_rate": 4.047365340786496e-06,
|
|
"loss": 0.1338,
|
|
"step": 3099
|
|
},
|
|
{
|
|
"epoch": 9.777340676632573,
|
|
"grad_norm": 0.2293150922887092,
|
|
"learning_rate": 4.046041066935868e-06,
|
|
"loss": 0.1295,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 9.78048780487805,
|
|
"grad_norm": 0.2391233027321985,
|
|
"learning_rate": 4.044735545284482e-06,
|
|
"loss": 0.1389,
|
|
"step": 3101
|
|
},
|
|
{
|
|
"epoch": 9.783634933123524,
|
|
"grad_norm": 0.23945974117378163,
|
|
"learning_rate": 4.043448777195901e-06,
|
|
"loss": 0.1381,
|
|
"step": 3102
|
|
},
|
|
{
|
|
"epoch": 9.786782061369001,
|
|
"grad_norm": 0.22786053750451218,
|
|
"learning_rate": 4.042180764014107e-06,
|
|
"loss": 0.1385,
|
|
"step": 3103
|
|
},
|
|
{
|
|
"epoch": 9.789929189614476,
|
|
"grad_norm": 0.23783743029557908,
|
|
"learning_rate": 4.040931507063487e-06,
|
|
"loss": 0.1366,
|
|
"step": 3104
|
|
},
|
|
{
|
|
"epoch": 9.793076317859953,
|
|
"grad_norm": 0.24131425400897927,
|
|
"learning_rate": 4.039701007648843e-06,
|
|
"loss": 0.1336,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 9.79622344610543,
|
|
"grad_norm": 0.24372846124327077,
|
|
"learning_rate": 4.0384892670553795e-06,
|
|
"loss": 0.1383,
|
|
"step": 3106
|
|
},
|
|
{
|
|
"epoch": 9.799370574350904,
|
|
"grad_norm": 0.23973719599414378,
|
|
"learning_rate": 4.0372962865487145e-06,
|
|
"loss": 0.1326,
|
|
"step": 3107
|
|
},
|
|
{
|
|
"epoch": 9.802517702596381,
|
|
"grad_norm": 0.24009549823324744,
|
|
"learning_rate": 4.036122067374869e-06,
|
|
"loss": 0.1338,
|
|
"step": 3108
|
|
},
|
|
{
|
|
"epoch": 9.805664830841856,
|
|
"grad_norm": 0.23976142453197466,
|
|
"learning_rate": 4.034966610760265e-06,
|
|
"loss": 0.1401,
|
|
"step": 3109
|
|
},
|
|
{
|
|
"epoch": 9.808811959087333,
|
|
"grad_norm": 0.22620166570113598,
|
|
"learning_rate": 4.033829917911736e-06,
|
|
"loss": 0.138,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 9.81195908733281,
|
|
"grad_norm": 0.23910385806970655,
|
|
"learning_rate": 4.032711990016509e-06,
|
|
"loss": 0.1319,
|
|
"step": 3111
|
|
},
|
|
{
|
|
"epoch": 9.815106215578284,
|
|
"grad_norm": 0.22753533073166138,
|
|
"learning_rate": 4.031612828242216e-06,
|
|
"loss": 0.1338,
|
|
"step": 3112
|
|
},
|
|
{
|
|
"epoch": 9.818253343823761,
|
|
"grad_norm": 0.2421208770020626,
|
|
"learning_rate": 4.030532433736889e-06,
|
|
"loss": 0.1365,
|
|
"step": 3113
|
|
},
|
|
{
|
|
"epoch": 9.821400472069238,
|
|
"grad_norm": 0.23782192539849972,
|
|
"learning_rate": 4.029470807628956e-06,
|
|
"loss": 0.1356,
|
|
"step": 3114
|
|
},
|
|
{
|
|
"epoch": 9.824547600314713,
|
|
"grad_norm": 0.23934965568846245,
|
|
"learning_rate": 4.028427951027245e-06,
|
|
"loss": 0.1358,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 9.82769472856019,
|
|
"grad_norm": 0.23281488681008186,
|
|
"learning_rate": 4.027403865020977e-06,
|
|
"loss": 0.1308,
|
|
"step": 3116
|
|
},
|
|
{
|
|
"epoch": 9.830841856805664,
|
|
"grad_norm": 0.2387332651986393,
|
|
"learning_rate": 4.026398550679772e-06,
|
|
"loss": 0.1317,
|
|
"step": 3117
|
|
},
|
|
{
|
|
"epoch": 9.83398898505114,
|
|
"grad_norm": 0.24160259965898973,
|
|
"learning_rate": 4.025412009053636e-06,
|
|
"loss": 0.1364,
|
|
"step": 3118
|
|
},
|
|
{
|
|
"epoch": 9.837136113296618,
|
|
"grad_norm": 0.23512080872741115,
|
|
"learning_rate": 4.0244442411729775e-06,
|
|
"loss": 0.135,
|
|
"step": 3119
|
|
},
|
|
{
|
|
"epoch": 9.840283241542092,
|
|
"grad_norm": 0.23515000349024553,
|
|
"learning_rate": 4.02349524804859e-06,
|
|
"loss": 0.1424,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 9.84343036978757,
|
|
"grad_norm": 0.24040363807543386,
|
|
"learning_rate": 4.02256503067166e-06,
|
|
"loss": 0.1326,
|
|
"step": 3121
|
|
},
|
|
{
|
|
"epoch": 9.846577498033044,
|
|
"grad_norm": 0.23796510922943842,
|
|
"learning_rate": 4.021653590013759e-06,
|
|
"loss": 0.1402,
|
|
"step": 3122
|
|
},
|
|
{
|
|
"epoch": 9.84972462627852,
|
|
"grad_norm": 0.2405813863455342,
|
|
"learning_rate": 4.020760927026856e-06,
|
|
"loss": 0.1382,
|
|
"step": 3123
|
|
},
|
|
{
|
|
"epoch": 9.852871754523997,
|
|
"grad_norm": 0.2476312470454177,
|
|
"learning_rate": 4.019887042643299e-06,
|
|
"loss": 0.1308,
|
|
"step": 3124
|
|
},
|
|
{
|
|
"epoch": 9.856018882769472,
|
|
"grad_norm": 0.23636778802712008,
|
|
"learning_rate": 4.019031937775827e-06,
|
|
"loss": 0.1351,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 9.859166011014949,
|
|
"grad_norm": 0.23409257155120913,
|
|
"learning_rate": 4.01819561331756e-06,
|
|
"loss": 0.1376,
|
|
"step": 3126
|
|
},
|
|
{
|
|
"epoch": 9.862313139260426,
|
|
"grad_norm": 0.234688041297315,
|
|
"learning_rate": 4.017378070142011e-06,
|
|
"loss": 0.131,
|
|
"step": 3127
|
|
},
|
|
{
|
|
"epoch": 9.8654602675059,
|
|
"grad_norm": 0.23250184458501116,
|
|
"learning_rate": 4.016579309103068e-06,
|
|
"loss": 0.1312,
|
|
"step": 3128
|
|
},
|
|
{
|
|
"epoch": 9.868607395751377,
|
|
"grad_norm": 0.2334349376577723,
|
|
"learning_rate": 4.015799331035007e-06,
|
|
"loss": 0.1323,
|
|
"step": 3129
|
|
},
|
|
{
|
|
"epoch": 9.871754523996852,
|
|
"grad_norm": 0.23703297054055375,
|
|
"learning_rate": 4.015038136752481e-06,
|
|
"loss": 0.1343,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 9.874901652242329,
|
|
"grad_norm": 0.24196689257467524,
|
|
"learning_rate": 4.01429572705053e-06,
|
|
"loss": 0.1355,
|
|
"step": 3131
|
|
},
|
|
{
|
|
"epoch": 9.878048780487806,
|
|
"grad_norm": 0.24673350426408439,
|
|
"learning_rate": 4.013572102704572e-06,
|
|
"loss": 0.1323,
|
|
"step": 3132
|
|
},
|
|
{
|
|
"epoch": 9.88119590873328,
|
|
"grad_norm": 0.23280087204706737,
|
|
"learning_rate": 4.012867264470404e-06,
|
|
"loss": 0.1336,
|
|
"step": 3133
|
|
},
|
|
{
|
|
"epoch": 9.884343036978757,
|
|
"grad_norm": 0.24120088645534854,
|
|
"learning_rate": 4.0121812130842e-06,
|
|
"loss": 0.14,
|
|
"step": 3134
|
|
},
|
|
{
|
|
"epoch": 9.887490165224232,
|
|
"grad_norm": 0.2337805163325715,
|
|
"learning_rate": 4.0115139492625134e-06,
|
|
"loss": 0.1361,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 9.890637293469709,
|
|
"grad_norm": 0.23532202894035434,
|
|
"learning_rate": 4.0108654737022755e-06,
|
|
"loss": 0.1335,
|
|
"step": 3136
|
|
},
|
|
{
|
|
"epoch": 9.893784421715186,
|
|
"grad_norm": 0.24125197048086924,
|
|
"learning_rate": 4.010235787080794e-06,
|
|
"loss": 0.1378,
|
|
"step": 3137
|
|
},
|
|
{
|
|
"epoch": 9.89693154996066,
|
|
"grad_norm": 0.24669623748502031,
|
|
"learning_rate": 4.00962489005575e-06,
|
|
"loss": 0.1326,
|
|
"step": 3138
|
|
},
|
|
{
|
|
"epoch": 9.900078678206137,
|
|
"grad_norm": 0.24343818316183338,
|
|
"learning_rate": 4.009032783265204e-06,
|
|
"loss": 0.1348,
|
|
"step": 3139
|
|
},
|
|
{
|
|
"epoch": 9.903225806451612,
|
|
"grad_norm": 0.22541573373892404,
|
|
"learning_rate": 4.008459467327586e-06,
|
|
"loss": 0.1334,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 9.906372934697089,
|
|
"grad_norm": 0.24314992873545332,
|
|
"learning_rate": 4.007904942841702e-06,
|
|
"loss": 0.1333,
|
|
"step": 3141
|
|
},
|
|
{
|
|
"epoch": 9.909520062942565,
|
|
"grad_norm": 0.24444507569036877,
|
|
"learning_rate": 4.007369210386732e-06,
|
|
"loss": 0.1355,
|
|
"step": 3142
|
|
},
|
|
{
|
|
"epoch": 9.91266719118804,
|
|
"grad_norm": 0.23456707600284918,
|
|
"learning_rate": 4.006852270522226e-06,
|
|
"loss": 0.1373,
|
|
"step": 3143
|
|
},
|
|
{
|
|
"epoch": 9.915814319433517,
|
|
"grad_norm": 0.22873767694094543,
|
|
"learning_rate": 4.006354123788107e-06,
|
|
"loss": 0.1382,
|
|
"step": 3144
|
|
},
|
|
{
|
|
"epoch": 9.918961447678992,
|
|
"grad_norm": 0.23735618369859499,
|
|
"learning_rate": 4.00587477070467e-06,
|
|
"loss": 0.1379,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 9.922108575924469,
|
|
"grad_norm": 0.23956072263659126,
|
|
"learning_rate": 4.005414211772583e-06,
|
|
"loss": 0.1371,
|
|
"step": 3146
|
|
},
|
|
{
|
|
"epoch": 9.925255704169945,
|
|
"grad_norm": 0.23043205951102974,
|
|
"learning_rate": 4.004972447472878e-06,
|
|
"loss": 0.1327,
|
|
"step": 3147
|
|
},
|
|
{
|
|
"epoch": 9.92840283241542,
|
|
"grad_norm": 0.2344662864402095,
|
|
"learning_rate": 4.00454947826696e-06,
|
|
"loss": 0.1319,
|
|
"step": 3148
|
|
},
|
|
{
|
|
"epoch": 9.931549960660897,
|
|
"grad_norm": 0.24245229721032507,
|
|
"learning_rate": 4.0041453045966055e-06,
|
|
"loss": 0.1383,
|
|
"step": 3149
|
|
},
|
|
{
|
|
"epoch": 9.934697088906374,
|
|
"grad_norm": 0.24522019444100665,
|
|
"learning_rate": 4.003759926883958e-06,
|
|
"loss": 0.1346,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 9.937844217151849,
|
|
"grad_norm": 0.23782263325071246,
|
|
"learning_rate": 4.003393345531529e-06,
|
|
"loss": 0.145,
|
|
"step": 3151
|
|
},
|
|
{
|
|
"epoch": 9.940991345397325,
|
|
"grad_norm": 0.24011271527037592,
|
|
"learning_rate": 4.0030455609221975e-06,
|
|
"loss": 0.1341,
|
|
"step": 3152
|
|
},
|
|
{
|
|
"epoch": 9.9441384736428,
|
|
"grad_norm": 0.23685295626777536,
|
|
"learning_rate": 4.0027165734192115e-06,
|
|
"loss": 0.1343,
|
|
"step": 3153
|
|
},
|
|
{
|
|
"epoch": 9.947285601888277,
|
|
"grad_norm": 0.24415877369974173,
|
|
"learning_rate": 4.002406383366186e-06,
|
|
"loss": 0.1343,
|
|
"step": 3154
|
|
},
|
|
{
|
|
"epoch": 9.950432730133754,
|
|
"grad_norm": 0.23530638296528372,
|
|
"learning_rate": 4.0021149910871e-06,
|
|
"loss": 0.1344,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 9.953579858379229,
|
|
"grad_norm": 0.24560555060327768,
|
|
"learning_rate": 4.001842396886302e-06,
|
|
"loss": 0.1364,
|
|
"step": 3156
|
|
},
|
|
{
|
|
"epoch": 9.956726986624705,
|
|
"grad_norm": 0.23476225232747105,
|
|
"learning_rate": 4.001588601048508e-06,
|
|
"loss": 0.1339,
|
|
"step": 3157
|
|
},
|
|
{
|
|
"epoch": 9.95987411487018,
|
|
"grad_norm": 0.2326742437614356,
|
|
"learning_rate": 4.0013536038387946e-06,
|
|
"loss": 0.138,
|
|
"step": 3158
|
|
},
|
|
{
|
|
"epoch": 9.963021243115657,
|
|
"grad_norm": 0.23396824661309967,
|
|
"learning_rate": 4.00113740550261e-06,
|
|
"loss": 0.133,
|
|
"step": 3159
|
|
},
|
|
{
|
|
"epoch": 9.966168371361134,
|
|
"grad_norm": 0.23890877834056157,
|
|
"learning_rate": 4.000940006265763e-06,
|
|
"loss": 0.1362,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 9.969315499606608,
|
|
"grad_norm": 0.232863057932829,
|
|
"learning_rate": 4.000761406334429e-06,
|
|
"loss": 0.1298,
|
|
"step": 3161
|
|
},
|
|
{
|
|
"epoch": 9.972462627852085,
|
|
"grad_norm": 0.24073916528859923,
|
|
"learning_rate": 4.000601605895147e-06,
|
|
"loss": 0.1449,
|
|
"step": 3162
|
|
},
|
|
{
|
|
"epoch": 9.975609756097562,
|
|
"grad_norm": 0.23431234155479022,
|
|
"learning_rate": 4.000460605114827e-06,
|
|
"loss": 0.1384,
|
|
"step": 3163
|
|
},
|
|
{
|
|
"epoch": 9.978756884343037,
|
|
"grad_norm": 0.2290325302172204,
|
|
"learning_rate": 4.000338404140736e-06,
|
|
"loss": 0.1353,
|
|
"step": 3164
|
|
},
|
|
{
|
|
"epoch": 9.981904012588513,
|
|
"grad_norm": 0.2480177908337849,
|
|
"learning_rate": 4.00023500310051e-06,
|
|
"loss": 0.1325,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 9.985051140833988,
|
|
"grad_norm": 0.23707047681989588,
|
|
"learning_rate": 4.000150402102143e-06,
|
|
"loss": 0.1358,
|
|
"step": 3166
|
|
},
|
|
{
|
|
"epoch": 9.988198269079465,
|
|
"grad_norm": 0.25048005095022025,
|
|
"learning_rate": 4.000084601234001e-06,
|
|
"loss": 0.1356,
|
|
"step": 3167
|
|
},
|
|
{
|
|
"epoch": 9.991345397324942,
|
|
"grad_norm": 0.24387714398171834,
|
|
"learning_rate": 4.000037600564808e-06,
|
|
"loss": 0.1329,
|
|
"step": 3168
|
|
},
|
|
{
|
|
"epoch": 9.994492525570417,
|
|
"grad_norm": 0.24315440115509737,
|
|
"learning_rate": 4.000009400143658e-06,
|
|
"loss": 0.137,
|
|
"step": 3169
|
|
},
|
|
{
|
|
"epoch": 9.997639653815893,
|
|
"grad_norm": 0.22876527267837543,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 0.1403,
|
|
"step": 3170
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 3170,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 10,
|
|
"save_steps": 634,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.0340729464569725e+19,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|