5187 lines
125 KiB
JSON
5187 lines
125 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 2.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 736,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.002717391304347826,
|
||
|
|
"grad_norm": 1.1728428602218628,
|
||
|
|
"learning_rate": 1.0958904109589041e-07,
|
||
|
|
"loss": 1.5302,
|
||
|
|
"step": 1
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005434782608695652,
|
||
|
|
"grad_norm": 1.2432262897491455,
|
||
|
|
"learning_rate": 2.1917808219178082e-07,
|
||
|
|
"loss": 1.5229,
|
||
|
|
"step": 2
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008152173913043478,
|
||
|
|
"grad_norm": 1.276131510734558,
|
||
|
|
"learning_rate": 3.2876712328767123e-07,
|
||
|
|
"loss": 1.465,
|
||
|
|
"step": 3
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.010869565217391304,
|
||
|
|
"grad_norm": 1.3332633972167969,
|
||
|
|
"learning_rate": 4.3835616438356164e-07,
|
||
|
|
"loss": 1.4591,
|
||
|
|
"step": 4
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.01358695652173913,
|
||
|
|
"grad_norm": 1.3067371845245361,
|
||
|
|
"learning_rate": 5.47945205479452e-07,
|
||
|
|
"loss": 1.4768,
|
||
|
|
"step": 5
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.016304347826086956,
|
||
|
|
"grad_norm": 1.343665361404419,
|
||
|
|
"learning_rate": 6.575342465753425e-07,
|
||
|
|
"loss": 1.3851,
|
||
|
|
"step": 6
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.019021739130434784,
|
||
|
|
"grad_norm": 1.3181569576263428,
|
||
|
|
"learning_rate": 7.671232876712329e-07,
|
||
|
|
"loss": 1.465,
|
||
|
|
"step": 7
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.021739130434782608,
|
||
|
|
"grad_norm": 1.3282394409179688,
|
||
|
|
"learning_rate": 8.767123287671233e-07,
|
||
|
|
"loss": 1.4626,
|
||
|
|
"step": 8
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.024456521739130436,
|
||
|
|
"grad_norm": 1.3123646974563599,
|
||
|
|
"learning_rate": 9.863013698630137e-07,
|
||
|
|
"loss": 1.372,
|
||
|
|
"step": 9
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.02717391304347826,
|
||
|
|
"grad_norm": 1.1737868785858154,
|
||
|
|
"learning_rate": 1.095890410958904e-06,
|
||
|
|
"loss": 1.4761,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.029891304347826088,
|
||
|
|
"grad_norm": 1.1780813932418823,
|
||
|
|
"learning_rate": 1.2054794520547945e-06,
|
||
|
|
"loss": 1.4729,
|
||
|
|
"step": 11
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03260869565217391,
|
||
|
|
"grad_norm": 1.2191578149795532,
|
||
|
|
"learning_rate": 1.315068493150685e-06,
|
||
|
|
"loss": 1.5081,
|
||
|
|
"step": 12
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.035326086956521736,
|
||
|
|
"grad_norm": 1.082284927368164,
|
||
|
|
"learning_rate": 1.4246575342465753e-06,
|
||
|
|
"loss": 1.4792,
|
||
|
|
"step": 13
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03804347826086957,
|
||
|
|
"grad_norm": 1.013970136642456,
|
||
|
|
"learning_rate": 1.5342465753424657e-06,
|
||
|
|
"loss": 1.4913,
|
||
|
|
"step": 14
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04076086956521739,
|
||
|
|
"grad_norm": 1.0283228158950806,
|
||
|
|
"learning_rate": 1.643835616438356e-06,
|
||
|
|
"loss": 1.4546,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.043478260869565216,
|
||
|
|
"grad_norm": 1.0099676847457886,
|
||
|
|
"learning_rate": 1.7534246575342465e-06,
|
||
|
|
"loss": 1.43,
|
||
|
|
"step": 16
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04619565217391304,
|
||
|
|
"grad_norm": 0.8908332586288452,
|
||
|
|
"learning_rate": 1.863013698630137e-06,
|
||
|
|
"loss": 1.606,
|
||
|
|
"step": 17
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04891304347826087,
|
||
|
|
"grad_norm": 0.8198248744010925,
|
||
|
|
"learning_rate": 1.9726027397260274e-06,
|
||
|
|
"loss": 1.4755,
|
||
|
|
"step": 18
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.051630434782608696,
|
||
|
|
"grad_norm": 0.8422051072120667,
|
||
|
|
"learning_rate": 2.0821917808219176e-06,
|
||
|
|
"loss": 1.4863,
|
||
|
|
"step": 19
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05434782608695652,
|
||
|
|
"grad_norm": 0.7524480819702148,
|
||
|
|
"learning_rate": 2.191780821917808e-06,
|
||
|
|
"loss": 1.4982,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.057065217391304345,
|
||
|
|
"grad_norm": 0.7762174010276794,
|
||
|
|
"learning_rate": 2.3013698630136984e-06,
|
||
|
|
"loss": 1.4734,
|
||
|
|
"step": 21
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.059782608695652176,
|
||
|
|
"grad_norm": 0.7196646332740784,
|
||
|
|
"learning_rate": 2.410958904109589e-06,
|
||
|
|
"loss": 1.5099,
|
||
|
|
"step": 22
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0625,
|
||
|
|
"grad_norm": 0.7181140780448914,
|
||
|
|
"learning_rate": 2.5205479452054796e-06,
|
||
|
|
"loss": 1.4242,
|
||
|
|
"step": 23
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06521739130434782,
|
||
|
|
"grad_norm": 0.7445970177650452,
|
||
|
|
"learning_rate": 2.63013698630137e-06,
|
||
|
|
"loss": 1.4157,
|
||
|
|
"step": 24
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06793478260869565,
|
||
|
|
"grad_norm": 0.6932166814804077,
|
||
|
|
"learning_rate": 2.73972602739726e-06,
|
||
|
|
"loss": 1.4898,
|
||
|
|
"step": 25
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07065217391304347,
|
||
|
|
"grad_norm": 0.6732593178749084,
|
||
|
|
"learning_rate": 2.8493150684931506e-06,
|
||
|
|
"loss": 1.4643,
|
||
|
|
"step": 26
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07336956521739131,
|
||
|
|
"grad_norm": 0.6919940114021301,
|
||
|
|
"learning_rate": 2.958904109589041e-06,
|
||
|
|
"loss": 1.4602,
|
||
|
|
"step": 27
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07608695652173914,
|
||
|
|
"grad_norm": 0.6646421551704407,
|
||
|
|
"learning_rate": 3.0684931506849314e-06,
|
||
|
|
"loss": 1.5683,
|
||
|
|
"step": 28
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07880434782608696,
|
||
|
|
"grad_norm": 0.6307073831558228,
|
||
|
|
"learning_rate": 3.178082191780822e-06,
|
||
|
|
"loss": 1.4543,
|
||
|
|
"step": 29
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08152173913043478,
|
||
|
|
"grad_norm": 0.6445148587226868,
|
||
|
|
"learning_rate": 3.287671232876712e-06,
|
||
|
|
"loss": 1.4359,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08423913043478261,
|
||
|
|
"grad_norm": 0.6217952370643616,
|
||
|
|
"learning_rate": 3.397260273972603e-06,
|
||
|
|
"loss": 1.4731,
|
||
|
|
"step": 31
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08695652173913043,
|
||
|
|
"grad_norm": 0.5980370044708252,
|
||
|
|
"learning_rate": 3.506849315068493e-06,
|
||
|
|
"loss": 1.4586,
|
||
|
|
"step": 32
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08967391304347826,
|
||
|
|
"grad_norm": 0.5764021873474121,
|
||
|
|
"learning_rate": 3.6164383561643833e-06,
|
||
|
|
"loss": 1.5436,
|
||
|
|
"step": 33
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09239130434782608,
|
||
|
|
"grad_norm": 0.5938739776611328,
|
||
|
|
"learning_rate": 3.726027397260274e-06,
|
||
|
|
"loss": 1.439,
|
||
|
|
"step": 34
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09510869565217392,
|
||
|
|
"grad_norm": 0.5414128303527832,
|
||
|
|
"learning_rate": 3.835616438356164e-06,
|
||
|
|
"loss": 1.4686,
|
||
|
|
"step": 35
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09782608695652174,
|
||
|
|
"grad_norm": 0.5185249447822571,
|
||
|
|
"learning_rate": 3.945205479452055e-06,
|
||
|
|
"loss": 1.5181,
|
||
|
|
"step": 36
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10054347826086957,
|
||
|
|
"grad_norm": 0.4969967007637024,
|
||
|
|
"learning_rate": 4.054794520547945e-06,
|
||
|
|
"loss": 1.3971,
|
||
|
|
"step": 37
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10326086956521739,
|
||
|
|
"grad_norm": 0.4812517464160919,
|
||
|
|
"learning_rate": 4.164383561643835e-06,
|
||
|
|
"loss": 1.4484,
|
||
|
|
"step": 38
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10597826086956522,
|
||
|
|
"grad_norm": 0.45815640687942505,
|
||
|
|
"learning_rate": 4.273972602739726e-06,
|
||
|
|
"loss": 1.3937,
|
||
|
|
"step": 39
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10869565217391304,
|
||
|
|
"grad_norm": 0.46528932452201843,
|
||
|
|
"learning_rate": 4.383561643835616e-06,
|
||
|
|
"loss": 1.4014,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11141304347826086,
|
||
|
|
"grad_norm": 0.47149142622947693,
|
||
|
|
"learning_rate": 4.4931506849315066e-06,
|
||
|
|
"loss": 1.379,
|
||
|
|
"step": 41
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11413043478260869,
|
||
|
|
"grad_norm": 0.452342689037323,
|
||
|
|
"learning_rate": 4.602739726027397e-06,
|
||
|
|
"loss": 1.525,
|
||
|
|
"step": 42
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11684782608695653,
|
||
|
|
"grad_norm": 0.43710047006607056,
|
||
|
|
"learning_rate": 4.712328767123287e-06,
|
||
|
|
"loss": 1.3985,
|
||
|
|
"step": 43
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11956521739130435,
|
||
|
|
"grad_norm": 0.45290184020996094,
|
||
|
|
"learning_rate": 4.821917808219178e-06,
|
||
|
|
"loss": 1.3659,
|
||
|
|
"step": 44
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12228260869565218,
|
||
|
|
"grad_norm": 0.4344335198402405,
|
||
|
|
"learning_rate": 4.931506849315068e-06,
|
||
|
|
"loss": 1.3643,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.125,
|
||
|
|
"grad_norm": 0.4450250566005707,
|
||
|
|
"learning_rate": 5.041095890410959e-06,
|
||
|
|
"loss": 1.4228,
|
||
|
|
"step": 46
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12771739130434784,
|
||
|
|
"grad_norm": 0.42544320225715637,
|
||
|
|
"learning_rate": 5.1506849315068494e-06,
|
||
|
|
"loss": 1.4202,
|
||
|
|
"step": 47
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13043478260869565,
|
||
|
|
"grad_norm": 0.40666356682777405,
|
||
|
|
"learning_rate": 5.26027397260274e-06,
|
||
|
|
"loss": 1.4102,
|
||
|
|
"step": 48
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1331521739130435,
|
||
|
|
"grad_norm": 0.43085426092147827,
|
||
|
|
"learning_rate": 5.36986301369863e-06,
|
||
|
|
"loss": 1.4723,
|
||
|
|
"step": 49
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1358695652173913,
|
||
|
|
"grad_norm": 0.42747458815574646,
|
||
|
|
"learning_rate": 5.47945205479452e-06,
|
||
|
|
"loss": 1.3923,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13858695652173914,
|
||
|
|
"grad_norm": 0.42834821343421936,
|
||
|
|
"learning_rate": 5.589041095890411e-06,
|
||
|
|
"loss": 1.5331,
|
||
|
|
"step": 51
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14130434782608695,
|
||
|
|
"grad_norm": 0.4218186140060425,
|
||
|
|
"learning_rate": 5.698630136986301e-06,
|
||
|
|
"loss": 1.5421,
|
||
|
|
"step": 52
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14402173913043478,
|
||
|
|
"grad_norm": 0.4152364432811737,
|
||
|
|
"learning_rate": 5.8082191780821915e-06,
|
||
|
|
"loss": 1.3454,
|
||
|
|
"step": 53
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14673913043478262,
|
||
|
|
"grad_norm": 0.4136430621147156,
|
||
|
|
"learning_rate": 5.917808219178082e-06,
|
||
|
|
"loss": 1.4521,
|
||
|
|
"step": 54
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14945652173913043,
|
||
|
|
"grad_norm": 0.40648165345191956,
|
||
|
|
"learning_rate": 6.027397260273972e-06,
|
||
|
|
"loss": 1.4088,
|
||
|
|
"step": 55
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15217391304347827,
|
||
|
|
"grad_norm": 0.4164981544017792,
|
||
|
|
"learning_rate": 6.136986301369863e-06,
|
||
|
|
"loss": 1.4305,
|
||
|
|
"step": 56
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15489130434782608,
|
||
|
|
"grad_norm": 0.39643916487693787,
|
||
|
|
"learning_rate": 6.246575342465753e-06,
|
||
|
|
"loss": 1.4827,
|
||
|
|
"step": 57
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15760869565217392,
|
||
|
|
"grad_norm": 0.38055455684661865,
|
||
|
|
"learning_rate": 6.356164383561644e-06,
|
||
|
|
"loss": 1.4725,
|
||
|
|
"step": 58
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16032608695652173,
|
||
|
|
"grad_norm": 0.37801629304885864,
|
||
|
|
"learning_rate": 6.465753424657534e-06,
|
||
|
|
"loss": 1.3829,
|
||
|
|
"step": 59
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16304347826086957,
|
||
|
|
"grad_norm": 0.3744637370109558,
|
||
|
|
"learning_rate": 6.575342465753424e-06,
|
||
|
|
"loss": 1.3753,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16576086956521738,
|
||
|
|
"grad_norm": 0.3783905506134033,
|
||
|
|
"learning_rate": 6.684931506849315e-06,
|
||
|
|
"loss": 1.393,
|
||
|
|
"step": 61
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16847826086956522,
|
||
|
|
"grad_norm": 0.40007972717285156,
|
||
|
|
"learning_rate": 6.794520547945206e-06,
|
||
|
|
"loss": 1.4476,
|
||
|
|
"step": 62
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17119565217391305,
|
||
|
|
"grad_norm": 0.39294755458831787,
|
||
|
|
"learning_rate": 6.904109589041096e-06,
|
||
|
|
"loss": 1.3876,
|
||
|
|
"step": 63
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17391304347826086,
|
||
|
|
"grad_norm": 0.377951979637146,
|
||
|
|
"learning_rate": 7.013698630136986e-06,
|
||
|
|
"loss": 1.4476,
|
||
|
|
"step": 64
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1766304347826087,
|
||
|
|
"grad_norm": 0.3997436463832855,
|
||
|
|
"learning_rate": 7.123287671232876e-06,
|
||
|
|
"loss": 1.3766,
|
||
|
|
"step": 65
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1793478260869565,
|
||
|
|
"grad_norm": 0.38416406512260437,
|
||
|
|
"learning_rate": 7.2328767123287666e-06,
|
||
|
|
"loss": 1.3872,
|
||
|
|
"step": 66
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18206521739130435,
|
||
|
|
"grad_norm": 0.3918316662311554,
|
||
|
|
"learning_rate": 7.342465753424658e-06,
|
||
|
|
"loss": 1.3798,
|
||
|
|
"step": 67
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18478260869565216,
|
||
|
|
"grad_norm": 0.38442927598953247,
|
||
|
|
"learning_rate": 7.452054794520548e-06,
|
||
|
|
"loss": 1.4045,
|
||
|
|
"step": 68
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1875,
|
||
|
|
"grad_norm": 0.41492289304733276,
|
||
|
|
"learning_rate": 7.561643835616438e-06,
|
||
|
|
"loss": 1.4799,
|
||
|
|
"step": 69
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19021739130434784,
|
||
|
|
"grad_norm": 0.368286669254303,
|
||
|
|
"learning_rate": 7.671232876712327e-06,
|
||
|
|
"loss": 1.4169,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19293478260869565,
|
||
|
|
"grad_norm": 0.3741121292114258,
|
||
|
|
"learning_rate": 7.780821917808218e-06,
|
||
|
|
"loss": 1.3663,
|
||
|
|
"step": 71
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1956521739130435,
|
||
|
|
"grad_norm": 0.4842956066131592,
|
||
|
|
"learning_rate": 7.89041095890411e-06,
|
||
|
|
"loss": 1.4287,
|
||
|
|
"step": 72
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1983695652173913,
|
||
|
|
"grad_norm": 0.3899683654308319,
|
||
|
|
"learning_rate": 8e-06,
|
||
|
|
"loss": 1.347,
|
||
|
|
"step": 73
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20108695652173914,
|
||
|
|
"grad_norm": 0.4036129117012024,
|
||
|
|
"learning_rate": 7.998791723304636e-06,
|
||
|
|
"loss": 1.5064,
|
||
|
|
"step": 74
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20380434782608695,
|
||
|
|
"grad_norm": 0.35146766901016235,
|
||
|
|
"learning_rate": 7.997580157289776e-06,
|
||
|
|
"loss": 1.3794,
|
||
|
|
"step": 75
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20652173913043478,
|
||
|
|
"grad_norm": 0.3625771403312683,
|
||
|
|
"learning_rate": 7.996365288505225e-06,
|
||
|
|
"loss": 1.3338,
|
||
|
|
"step": 76
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20923913043478262,
|
||
|
|
"grad_norm": 0.3601396679878235,
|
||
|
|
"learning_rate": 7.995147103427359e-06,
|
||
|
|
"loss": 1.5026,
|
||
|
|
"step": 77
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21195652173913043,
|
||
|
|
"grad_norm": 0.37662559747695923,
|
||
|
|
"learning_rate": 7.993925588458618e-06,
|
||
|
|
"loss": 1.3867,
|
||
|
|
"step": 78
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21467391304347827,
|
||
|
|
"grad_norm": 0.37634652853012085,
|
||
|
|
"learning_rate": 7.992700729927007e-06,
|
||
|
|
"loss": 1.4557,
|
||
|
|
"step": 79
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21739130434782608,
|
||
|
|
"grad_norm": 0.3699498772621155,
|
||
|
|
"learning_rate": 7.99147251408558e-06,
|
||
|
|
"loss": 1.4237,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22010869565217392,
|
||
|
|
"grad_norm": 0.3638997972011566,
|
||
|
|
"learning_rate": 7.990240927111924e-06,
|
||
|
|
"loss": 1.3822,
|
||
|
|
"step": 81
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22282608695652173,
|
||
|
|
"grad_norm": 0.3786831498146057,
|
||
|
|
"learning_rate": 7.989005955107651e-06,
|
||
|
|
"loss": 1.3545,
|
||
|
|
"step": 82
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22554347826086957,
|
||
|
|
"grad_norm": 0.3499252200126648,
|
||
|
|
"learning_rate": 7.987767584097859e-06,
|
||
|
|
"loss": 1.4278,
|
||
|
|
"step": 83
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22826086956521738,
|
||
|
|
"grad_norm": 0.3761653006076813,
|
||
|
|
"learning_rate": 7.986525800030624e-06,
|
||
|
|
"loss": 1.4175,
|
||
|
|
"step": 84
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23097826086956522,
|
||
|
|
"grad_norm": 0.3493477702140808,
|
||
|
|
"learning_rate": 7.985280588776449e-06,
|
||
|
|
"loss": 1.3315,
|
||
|
|
"step": 85
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23369565217391305,
|
||
|
|
"grad_norm": 0.3659382462501526,
|
||
|
|
"learning_rate": 7.984031936127745e-06,
|
||
|
|
"loss": 1.3883,
|
||
|
|
"step": 86
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23641304347826086,
|
||
|
|
"grad_norm": 0.381062388420105,
|
||
|
|
"learning_rate": 7.982779827798278e-06,
|
||
|
|
"loss": 1.3715,
|
||
|
|
"step": 87
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2391304347826087,
|
||
|
|
"grad_norm": 0.3763432800769806,
|
||
|
|
"learning_rate": 7.981524249422633e-06,
|
||
|
|
"loss": 1.4077,
|
||
|
|
"step": 88
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2418478260869565,
|
||
|
|
"grad_norm": 0.3684273064136505,
|
||
|
|
"learning_rate": 7.980265186555657e-06,
|
||
|
|
"loss": 1.4948,
|
||
|
|
"step": 89
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24456521739130435,
|
||
|
|
"grad_norm": 0.36673790216445923,
|
||
|
|
"learning_rate": 7.979002624671916e-06,
|
||
|
|
"loss": 1.3961,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24728260869565216,
|
||
|
|
"grad_norm": 0.35955995321273804,
|
||
|
|
"learning_rate": 7.97773654916512e-06,
|
||
|
|
"loss": 1.3453,
|
||
|
|
"step": 91
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25,
|
||
|
|
"grad_norm": 0.3854355812072754,
|
||
|
|
"learning_rate": 7.976466945347576e-06,
|
||
|
|
"loss": 1.4365,
|
||
|
|
"step": 92
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25271739130434784,
|
||
|
|
"grad_norm": 0.3639521896839142,
|
||
|
|
"learning_rate": 7.975193798449611e-06,
|
||
|
|
"loss": 1.4144,
|
||
|
|
"step": 93
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2554347826086957,
|
||
|
|
"grad_norm": 0.3835349678993225,
|
||
|
|
"learning_rate": 7.973917093619002e-06,
|
||
|
|
"loss": 1.4329,
|
||
|
|
"step": 94
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25815217391304346,
|
||
|
|
"grad_norm": 0.3855659067630768,
|
||
|
|
"learning_rate": 7.972636815920398e-06,
|
||
|
|
"loss": 1.4355,
|
||
|
|
"step": 95
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2608695652173913,
|
||
|
|
"grad_norm": 0.3564344644546509,
|
||
|
|
"learning_rate": 7.971352950334734e-06,
|
||
|
|
"loss": 1.3849,
|
||
|
|
"step": 96
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26358695652173914,
|
||
|
|
"grad_norm": 0.3524990975856781,
|
||
|
|
"learning_rate": 7.970065481758653e-06,
|
||
|
|
"loss": 1.3864,
|
||
|
|
"step": 97
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.266304347826087,
|
||
|
|
"grad_norm": 0.3606851398944855,
|
||
|
|
"learning_rate": 7.968774395003903e-06,
|
||
|
|
"loss": 1.4575,
|
||
|
|
"step": 98
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26902173913043476,
|
||
|
|
"grad_norm": 0.35041898488998413,
|
||
|
|
"learning_rate": 7.967479674796748e-06,
|
||
|
|
"loss": 1.4361,
|
||
|
|
"step": 99
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2717391304347826,
|
||
|
|
"grad_norm": 0.35816073417663574,
|
||
|
|
"learning_rate": 7.96618130577736e-06,
|
||
|
|
"loss": 1.3062,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27445652173913043,
|
||
|
|
"grad_norm": 0.3600623309612274,
|
||
|
|
"learning_rate": 7.964879272499215e-06,
|
||
|
|
"loss": 1.2906,
|
||
|
|
"step": 101
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27717391304347827,
|
||
|
|
"grad_norm": 0.36027124524116516,
|
||
|
|
"learning_rate": 7.96357355942848e-06,
|
||
|
|
"loss": 1.3665,
|
||
|
|
"step": 102
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2798913043478261,
|
||
|
|
"grad_norm": 0.3527257442474365,
|
||
|
|
"learning_rate": 7.962264150943397e-06,
|
||
|
|
"loss": 1.374,
|
||
|
|
"step": 103
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2826086956521739,
|
||
|
|
"grad_norm": 0.352342814207077,
|
||
|
|
"learning_rate": 7.960951031333648e-06,
|
||
|
|
"loss": 1.379,
|
||
|
|
"step": 104
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28532608695652173,
|
||
|
|
"grad_norm": 0.365664541721344,
|
||
|
|
"learning_rate": 7.959634184799749e-06,
|
||
|
|
"loss": 1.4299,
|
||
|
|
"step": 105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28804347826086957,
|
||
|
|
"grad_norm": 0.34988468885421753,
|
||
|
|
"learning_rate": 7.958313595452392e-06,
|
||
|
|
"loss": 1.4161,
|
||
|
|
"step": 106
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2907608695652174,
|
||
|
|
"grad_norm": 0.3694222569465637,
|
||
|
|
"learning_rate": 7.956989247311828e-06,
|
||
|
|
"loss": 1.371,
|
||
|
|
"step": 107
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29347826086956524,
|
||
|
|
"grad_norm": 0.3477892577648163,
|
||
|
|
"learning_rate": 7.955661124307205e-06,
|
||
|
|
"loss": 1.4617,
|
||
|
|
"step": 108
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.296195652173913,
|
||
|
|
"grad_norm": 0.34717634320259094,
|
||
|
|
"learning_rate": 7.954329210275928e-06,
|
||
|
|
"loss": 1.3667,
|
||
|
|
"step": 109
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29891304347826086,
|
||
|
|
"grad_norm": 0.3612186014652252,
|
||
|
|
"learning_rate": 7.952993488962999e-06,
|
||
|
|
"loss": 1.4065,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3016304347826087,
|
||
|
|
"grad_norm": 0.3764769434928894,
|
||
|
|
"learning_rate": 7.951653944020356e-06,
|
||
|
|
"loss": 1.3538,
|
||
|
|
"step": 111
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30434782608695654,
|
||
|
|
"grad_norm": 0.34798580408096313,
|
||
|
|
"learning_rate": 7.95031055900621e-06,
|
||
|
|
"loss": 1.3902,
|
||
|
|
"step": 112
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3070652173913043,
|
||
|
|
"grad_norm": 0.3572092652320862,
|
||
|
|
"learning_rate": 7.94896331738437e-06,
|
||
|
|
"loss": 1.3614,
|
||
|
|
"step": 113
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30978260869565216,
|
||
|
|
"grad_norm": 0.34781020879745483,
|
||
|
|
"learning_rate": 7.94761220252356e-06,
|
||
|
|
"loss": 1.406,
|
||
|
|
"step": 114
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3125,
|
||
|
|
"grad_norm": 0.37208840250968933,
|
||
|
|
"learning_rate": 7.946257197696737e-06,
|
||
|
|
"loss": 1.4388,
|
||
|
|
"step": 115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31521739130434784,
|
||
|
|
"grad_norm": 0.3797365427017212,
|
||
|
|
"learning_rate": 7.94489828608041e-06,
|
||
|
|
"loss": 1.3632,
|
||
|
|
"step": 116
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3179347826086957,
|
||
|
|
"grad_norm": 0.34749776124954224,
|
||
|
|
"learning_rate": 7.94353545075393e-06,
|
||
|
|
"loss": 1.3529,
|
||
|
|
"step": 117
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32065217391304346,
|
||
|
|
"grad_norm": 0.3999796211719513,
|
||
|
|
"learning_rate": 7.942168674698794e-06,
|
||
|
|
"loss": 1.3701,
|
||
|
|
"step": 118
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3233695652173913,
|
||
|
|
"grad_norm": 0.38252121210098267,
|
||
|
|
"learning_rate": 7.94079794079794e-06,
|
||
|
|
"loss": 1.4129,
|
||
|
|
"step": 119
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32608695652173914,
|
||
|
|
"grad_norm": 0.3657996356487274,
|
||
|
|
"learning_rate": 7.939423231835025e-06,
|
||
|
|
"loss": 1.3735,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.328804347826087,
|
||
|
|
"grad_norm": 0.39039069414138794,
|
||
|
|
"learning_rate": 7.938044530493707e-06,
|
||
|
|
"loss": 1.4762,
|
||
|
|
"step": 121
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33152173913043476,
|
||
|
|
"grad_norm": 0.35975077748298645,
|
||
|
|
"learning_rate": 7.936661819356923e-06,
|
||
|
|
"loss": 1.4388,
|
||
|
|
"step": 122
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3342391304347826,
|
||
|
|
"grad_norm": 0.36750245094299316,
|
||
|
|
"learning_rate": 7.93527508090615e-06,
|
||
|
|
"loss": 1.3754,
|
||
|
|
"step": 123
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33695652173913043,
|
||
|
|
"grad_norm": 0.38908377289772034,
|
||
|
|
"learning_rate": 7.933884297520661e-06,
|
||
|
|
"loss": 1.2874,
|
||
|
|
"step": 124
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33967391304347827,
|
||
|
|
"grad_norm": 0.3685767352581024,
|
||
|
|
"learning_rate": 7.932489451476793e-06,
|
||
|
|
"loss": 1.4003,
|
||
|
|
"step": 125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3423913043478261,
|
||
|
|
"grad_norm": 0.3810523450374603,
|
||
|
|
"learning_rate": 7.93109052494718e-06,
|
||
|
|
"loss": 1.4163,
|
||
|
|
"step": 126
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3451086956521739,
|
||
|
|
"grad_norm": 0.3630084693431854,
|
||
|
|
"learning_rate": 7.9296875e-06,
|
||
|
|
"loss": 1.3586,
|
||
|
|
"step": 127
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34782608695652173,
|
||
|
|
"grad_norm": 0.35243499279022217,
|
||
|
|
"learning_rate": 7.928280358598207e-06,
|
||
|
|
"loss": 1.4657,
|
||
|
|
"step": 128
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35054347826086957,
|
||
|
|
"grad_norm": 0.3778194785118103,
|
||
|
|
"learning_rate": 7.926869082598759e-06,
|
||
|
|
"loss": 1.3592,
|
||
|
|
"step": 129
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3532608695652174,
|
||
|
|
"grad_norm": 0.3549621105194092,
|
||
|
|
"learning_rate": 7.925453653751838e-06,
|
||
|
|
"loss": 1.3663,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35597826086956524,
|
||
|
|
"grad_norm": 0.37039250135421753,
|
||
|
|
"learning_rate": 7.924034053700065e-06,
|
||
|
|
"loss": 1.4375,
|
||
|
|
"step": 131
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.358695652173913,
|
||
|
|
"grad_norm": 0.3636303246021271,
|
||
|
|
"learning_rate": 7.922610263977701e-06,
|
||
|
|
"loss": 1.4134,
|
||
|
|
"step": 132
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36141304347826086,
|
||
|
|
"grad_norm": 0.3509652614593506,
|
||
|
|
"learning_rate": 7.921182266009852e-06,
|
||
|
|
"loss": 1.4279,
|
||
|
|
"step": 133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3641304347826087,
|
||
|
|
"grad_norm": 0.3587414622306824,
|
||
|
|
"learning_rate": 7.919750041111659e-06,
|
||
|
|
"loss": 1.4191,
|
||
|
|
"step": 134
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36684782608695654,
|
||
|
|
"grad_norm": 0.3569912910461426,
|
||
|
|
"learning_rate": 7.918313570487484e-06,
|
||
|
|
"loss": 1.4302,
|
||
|
|
"step": 135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3695652173913043,
|
||
|
|
"grad_norm": 0.3657996654510498,
|
||
|
|
"learning_rate": 7.916872835230084e-06,
|
||
|
|
"loss": 1.3599,
|
||
|
|
"step": 136
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37228260869565216,
|
||
|
|
"grad_norm": 0.3624045252799988,
|
||
|
|
"learning_rate": 7.915427816319788e-06,
|
||
|
|
"loss": 1.309,
|
||
|
|
"step": 137
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.375,
|
||
|
|
"grad_norm": 0.34657520055770874,
|
||
|
|
"learning_rate": 7.913978494623655e-06,
|
||
|
|
"loss": 1.3104,
|
||
|
|
"step": 138
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37771739130434784,
|
||
|
|
"grad_norm": 0.3397389352321625,
|
||
|
|
"learning_rate": 7.912524850894631e-06,
|
||
|
|
"loss": 1.4129,
|
||
|
|
"step": 139
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3804347826086957,
|
||
|
|
"grad_norm": 0.356838583946228,
|
||
|
|
"learning_rate": 7.911066865770698e-06,
|
||
|
|
"loss": 1.4378,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38315217391304346,
|
||
|
|
"grad_norm": 0.37165331840515137,
|
||
|
|
"learning_rate": 7.909604519774012e-06,
|
||
|
|
"loss": 1.4103,
|
||
|
|
"step": 141
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3858695652173913,
|
||
|
|
"grad_norm": 0.3432267904281616,
|
||
|
|
"learning_rate": 7.908137793310034e-06,
|
||
|
|
"loss": 1.3907,
|
||
|
|
"step": 142
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38858695652173914,
|
||
|
|
"grad_norm": 0.36105579137802124,
|
||
|
|
"learning_rate": 7.906666666666667e-06,
|
||
|
|
"loss": 1.3705,
|
||
|
|
"step": 143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.391304347826087,
|
||
|
|
"grad_norm": 0.37910157442092896,
|
||
|
|
"learning_rate": 7.905191120013353e-06,
|
||
|
|
"loss": 1.3521,
|
||
|
|
"step": 144
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39402173913043476,
|
||
|
|
"grad_norm": 0.35084208846092224,
|
||
|
|
"learning_rate": 7.9037111334002e-06,
|
||
|
|
"loss": 1.4527,
|
||
|
|
"step": 145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3967391304347826,
|
||
|
|
"grad_norm": 0.3619990050792694,
|
||
|
|
"learning_rate": 7.902226686757073e-06,
|
||
|
|
"loss": 1.4008,
|
||
|
|
"step": 146
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39945652173913043,
|
||
|
|
"grad_norm": 0.3557009994983673,
|
||
|
|
"learning_rate": 7.90073775989269e-06,
|
||
|
|
"loss": 1.4537,
|
||
|
|
"step": 147
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.40217391304347827,
|
||
|
|
"grad_norm": 0.36351799964904785,
|
||
|
|
"learning_rate": 7.899244332493702e-06,
|
||
|
|
"loss": 1.3187,
|
||
|
|
"step": 148
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4048913043478261,
|
||
|
|
"grad_norm": 0.3715021312236786,
|
||
|
|
"learning_rate": 7.897746384123781e-06,
|
||
|
|
"loss": 1.4033,
|
||
|
|
"step": 149
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4076086956521739,
|
||
|
|
"grad_norm": 0.353608638048172,
|
||
|
|
"learning_rate": 7.896243894222672e-06,
|
||
|
|
"loss": 1.4045,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41032608695652173,
|
||
|
|
"grad_norm": 0.36378586292266846,
|
||
|
|
"learning_rate": 7.894736842105263e-06,
|
||
|
|
"loss": 1.3938,
|
||
|
|
"step": 151
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41304347826086957,
|
||
|
|
"grad_norm": 0.34492647647857666,
|
||
|
|
"learning_rate": 7.893225206960635e-06,
|
||
|
|
"loss": 1.3993,
|
||
|
|
"step": 152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4157608695652174,
|
||
|
|
"grad_norm": 0.3632250726222992,
|
||
|
|
"learning_rate": 7.891708967851099e-06,
|
||
|
|
"loss": 1.3533,
|
||
|
|
"step": 153
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41847826086956524,
|
||
|
|
"grad_norm": 0.349479079246521,
|
||
|
|
"learning_rate": 7.890188103711236e-06,
|
||
|
|
"loss": 1.3298,
|
||
|
|
"step": 154
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.421195652173913,
|
||
|
|
"grad_norm": 0.3889572322368622,
|
||
|
|
"learning_rate": 7.88866259334691e-06,
|
||
|
|
"loss": 1.3372,
|
||
|
|
"step": 155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42391304347826086,
|
||
|
|
"grad_norm": 0.38098421692848206,
|
||
|
|
"learning_rate": 7.887132415434302e-06,
|
||
|
|
"loss": 1.3923,
|
||
|
|
"step": 156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4266304347826087,
|
||
|
|
"grad_norm": 0.36011362075805664,
|
||
|
|
"learning_rate": 7.885597548518897e-06,
|
||
|
|
"loss": 1.4739,
|
||
|
|
"step": 157
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42934782608695654,
|
||
|
|
"grad_norm": 0.3780601918697357,
|
||
|
|
"learning_rate": 7.884057971014493e-06,
|
||
|
|
"loss": 1.366,
|
||
|
|
"step": 158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4320652173913043,
|
||
|
|
"grad_norm": 0.3612130582332611,
|
||
|
|
"learning_rate": 7.882513661202186e-06,
|
||
|
|
"loss": 1.3845,
|
||
|
|
"step": 159
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.43478260869565216,
|
||
|
|
"grad_norm": 0.36176979541778564,
|
||
|
|
"learning_rate": 7.88096459722935e-06,
|
||
|
|
"loss": 1.4583,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4375,
|
||
|
|
"grad_norm": 0.3663961589336395,
|
||
|
|
"learning_rate": 7.8794107571086e-06,
|
||
|
|
"loss": 1.3918,
|
||
|
|
"step": 161
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44021739130434784,
|
||
|
|
"grad_norm": 0.3562765121459961,
|
||
|
|
"learning_rate": 7.877852118716762e-06,
|
||
|
|
"loss": 1.389,
|
||
|
|
"step": 162
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4429347826086957,
|
||
|
|
"grad_norm": 0.35205936431884766,
|
||
|
|
"learning_rate": 7.876288659793814e-06,
|
||
|
|
"loss": 1.3838,
|
||
|
|
"step": 163
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44565217391304346,
|
||
|
|
"grad_norm": 0.36153075098991394,
|
||
|
|
"learning_rate": 7.874720357941835e-06,
|
||
|
|
"loss": 1.3615,
|
||
|
|
"step": 164
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4483695652173913,
|
||
|
|
"grad_norm": 0.39255401492118835,
|
||
|
|
"learning_rate": 7.873147190623922e-06,
|
||
|
|
"loss": 1.3994,
|
||
|
|
"step": 165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45108695652173914,
|
||
|
|
"grad_norm": 0.3576897084712982,
|
||
|
|
"learning_rate": 7.871569135163129e-06,
|
||
|
|
"loss": 1.3612,
|
||
|
|
"step": 166
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.453804347826087,
|
||
|
|
"grad_norm": 0.3810504674911499,
|
||
|
|
"learning_rate": 7.869986168741356e-06,
|
||
|
|
"loss": 1.3979,
|
||
|
|
"step": 167
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45652173913043476,
|
||
|
|
"grad_norm": 0.3665294349193573,
|
||
|
|
"learning_rate": 7.868398268398268e-06,
|
||
|
|
"loss": 1.416,
|
||
|
|
"step": 168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4592391304347826,
|
||
|
|
"grad_norm": 0.37046587467193604,
|
||
|
|
"learning_rate": 7.866805411030177e-06,
|
||
|
|
"loss": 1.3696,
|
||
|
|
"step": 169
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46195652173913043,
|
||
|
|
"grad_norm": 0.3550441265106201,
|
||
|
|
"learning_rate": 7.865207573388918e-06,
|
||
|
|
"loss": 1.4137,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46467391304347827,
|
||
|
|
"grad_norm": 0.35164910554885864,
|
||
|
|
"learning_rate": 7.863604732080725e-06,
|
||
|
|
"loss": 1.3666,
|
||
|
|
"step": 171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4673913043478261,
|
||
|
|
"grad_norm": 0.334942489862442,
|
||
|
|
"learning_rate": 7.861996863565082e-06,
|
||
|
|
"loss": 1.3725,
|
||
|
|
"step": 172
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4701086956521739,
|
||
|
|
"grad_norm": 0.36114388704299927,
|
||
|
|
"learning_rate": 7.860383944153578e-06,
|
||
|
|
"loss": 1.361,
|
||
|
|
"step": 173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47282608695652173,
|
||
|
|
"grad_norm": 0.3654029965400696,
|
||
|
|
"learning_rate": 7.85876595000874e-06,
|
||
|
|
"loss": 1.3703,
|
||
|
|
"step": 174
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47554347826086957,
|
||
|
|
"grad_norm": 0.3569169044494629,
|
||
|
|
"learning_rate": 7.857142857142858e-06,
|
||
|
|
"loss": 1.384,
|
||
|
|
"step": 175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4782608695652174,
|
||
|
|
"grad_norm": 0.38499578833580017,
|
||
|
|
"learning_rate": 7.855514641416798e-06,
|
||
|
|
"loss": 1.3818,
|
||
|
|
"step": 176
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48097826086956524,
|
||
|
|
"grad_norm": 0.353002667427063,
|
||
|
|
"learning_rate": 7.853881278538813e-06,
|
||
|
|
"loss": 1.4214,
|
||
|
|
"step": 177
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.483695652173913,
|
||
|
|
"grad_norm": 0.34458428621292114,
|
||
|
|
"learning_rate": 7.852242744063325e-06,
|
||
|
|
"loss": 1.4166,
|
||
|
|
"step": 178
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48641304347826086,
|
||
|
|
"grad_norm": 0.3483969271183014,
|
||
|
|
"learning_rate": 7.850599013389712e-06,
|
||
|
|
"loss": 1.3618,
|
||
|
|
"step": 179
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4891304347826087,
|
||
|
|
"grad_norm": 0.35649237036705017,
|
||
|
|
"learning_rate": 7.848950061761072e-06,
|
||
|
|
"loss": 1.4088,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49184782608695654,
|
||
|
|
"grad_norm": 0.37080127000808716,
|
||
|
|
"learning_rate": 7.84729586426299e-06,
|
||
|
|
"loss": 1.3909,
|
||
|
|
"step": 181
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4945652173913043,
|
||
|
|
"grad_norm": 0.35322239995002747,
|
||
|
|
"learning_rate": 7.845636395822269e-06,
|
||
|
|
"loss": 1.2835,
|
||
|
|
"step": 182
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49728260869565216,
|
||
|
|
"grad_norm": 0.34811776876449585,
|
||
|
|
"learning_rate": 7.843971631205673e-06,
|
||
|
|
"loss": 1.3693,
|
||
|
|
"step": 183
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5,
|
||
|
|
"grad_norm": 0.3507843315601349,
|
||
|
|
"learning_rate": 7.842301545018646e-06,
|
||
|
|
"loss": 1.4295,
|
||
|
|
"step": 184
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5027173913043478,
|
||
|
|
"grad_norm": 0.36517709493637085,
|
||
|
|
"learning_rate": 7.84062611170402e-06,
|
||
|
|
"loss": 1.3763,
|
||
|
|
"step": 185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5054347826086957,
|
||
|
|
"grad_norm": 0.3709813058376312,
|
||
|
|
"learning_rate": 7.83894530554071e-06,
|
||
|
|
"loss": 1.3681,
|
||
|
|
"step": 186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5081521739130435,
|
||
|
|
"grad_norm": 0.3524121046066284,
|
||
|
|
"learning_rate": 7.837259100642398e-06,
|
||
|
|
"loss": 1.3644,
|
||
|
|
"step": 187
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5108695652173914,
|
||
|
|
"grad_norm": 0.3615017533302307,
|
||
|
|
"learning_rate": 7.83556747095621e-06,
|
||
|
|
"loss": 1.4233,
|
||
|
|
"step": 188
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5135869565217391,
|
||
|
|
"grad_norm": 0.3449147939682007,
|
||
|
|
"learning_rate": 7.833870390261366e-06,
|
||
|
|
"loss": 1.4012,
|
||
|
|
"step": 189
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5163043478260869,
|
||
|
|
"grad_norm": 0.3625212013721466,
|
||
|
|
"learning_rate": 7.832167832167831e-06,
|
||
|
|
"loss": 1.4591,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5190217391304348,
|
||
|
|
"grad_norm": 0.390602707862854,
|
||
|
|
"learning_rate": 7.830459770114941e-06,
|
||
|
|
"loss": 1.4077,
|
||
|
|
"step": 191
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5217391304347826,
|
||
|
|
"grad_norm": 0.3582949936389923,
|
||
|
|
"learning_rate": 7.82874617737003e-06,
|
||
|
|
"loss": 1.3576,
|
||
|
|
"step": 192
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5244565217391305,
|
||
|
|
"grad_norm": 0.3695563077926636,
|
||
|
|
"learning_rate": 7.827027027027026e-06,
|
||
|
|
"loss": 1.3497,
|
||
|
|
"step": 193
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5271739130434783,
|
||
|
|
"grad_norm": 0.3619958162307739,
|
||
|
|
"learning_rate": 7.825302292005052e-06,
|
||
|
|
"loss": 1.319,
|
||
|
|
"step": 194
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.529891304347826,
|
||
|
|
"grad_norm": 0.364831805229187,
|
||
|
|
"learning_rate": 7.823571945046999e-06,
|
||
|
|
"loss": 1.3109,
|
||
|
|
"step": 195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.532608695652174,
|
||
|
|
"grad_norm": 0.3568253219127655,
|
||
|
|
"learning_rate": 7.821835958718086e-06,
|
||
|
|
"loss": 1.3776,
|
||
|
|
"step": 196
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5353260869565217,
|
||
|
|
"grad_norm": 0.37014317512512207,
|
||
|
|
"learning_rate": 7.820094305404424e-06,
|
||
|
|
"loss": 1.4481,
|
||
|
|
"step": 197
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5380434782608695,
|
||
|
|
"grad_norm": 0.37578845024108887,
|
||
|
|
"learning_rate": 7.818346957311535e-06,
|
||
|
|
"loss": 1.3715,
|
||
|
|
"step": 198
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5407608695652174,
|
||
|
|
"grad_norm": 0.347926527261734,
|
||
|
|
"learning_rate": 7.816593886462881e-06,
|
||
|
|
"loss": 1.3542,
|
||
|
|
"step": 199
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5434782608695652,
|
||
|
|
"grad_norm": 0.36149612069129944,
|
||
|
|
"learning_rate": 7.814835064698378e-06,
|
||
|
|
"loss": 1.4431,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5461956521739131,
|
||
|
|
"grad_norm": 0.35200005769729614,
|
||
|
|
"learning_rate": 7.813070463672874e-06,
|
||
|
|
"loss": 1.3958,
|
||
|
|
"step": 201
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5489130434782609,
|
||
|
|
"grad_norm": 0.34820863604545593,
|
||
|
|
"learning_rate": 7.811300054854634e-06,
|
||
|
|
"loss": 1.2673,
|
||
|
|
"step": 202
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5516304347826086,
|
||
|
|
"grad_norm": 0.36692818999290466,
|
||
|
|
"learning_rate": 7.80952380952381e-06,
|
||
|
|
"loss": 1.3734,
|
||
|
|
"step": 203
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5543478260869565,
|
||
|
|
"grad_norm": 0.3500845432281494,
|
||
|
|
"learning_rate": 7.807741698770867e-06,
|
||
|
|
"loss": 1.3535,
|
||
|
|
"step": 204
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5570652173913043,
|
||
|
|
"grad_norm": 0.3685120642185211,
|
||
|
|
"learning_rate": 7.805953693495038e-06,
|
||
|
|
"loss": 1.3531,
|
||
|
|
"step": 205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5597826086956522,
|
||
|
|
"grad_norm": 0.3602903485298157,
|
||
|
|
"learning_rate": 7.804159764402723e-06,
|
||
|
|
"loss": 1.3251,
|
||
|
|
"step": 206
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5625,
|
||
|
|
"grad_norm": 0.359762966632843,
|
||
|
|
"learning_rate": 7.802359882005899e-06,
|
||
|
|
"loss": 1.3668,
|
||
|
|
"step": 207
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5652173913043478,
|
||
|
|
"grad_norm": 0.3595513701438904,
|
||
|
|
"learning_rate": 7.800554016620498e-06,
|
||
|
|
"loss": 1.5346,
|
||
|
|
"step": 208
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5679347826086957,
|
||
|
|
"grad_norm": 0.372349351644516,
|
||
|
|
"learning_rate": 7.79874213836478e-06,
|
||
|
|
"loss": 1.431,
|
||
|
|
"step": 209
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5706521739130435,
|
||
|
|
"grad_norm": 0.3687814474105835,
|
||
|
|
"learning_rate": 7.79692421715768e-06,
|
||
|
|
"loss": 1.3849,
|
||
|
|
"step": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5733695652173914,
|
||
|
|
"grad_norm": 0.3541337251663208,
|
||
|
|
"learning_rate": 7.795100222717148e-06,
|
||
|
|
"loss": 1.3575,
|
||
|
|
"step": 211
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5760869565217391,
|
||
|
|
"grad_norm": 0.3784167170524597,
|
||
|
|
"learning_rate": 7.793270124558468e-06,
|
||
|
|
"loss": 1.3768,
|
||
|
|
"step": 212
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5788043478260869,
|
||
|
|
"grad_norm": 0.3548452854156494,
|
||
|
|
"learning_rate": 7.79143389199255e-06,
|
||
|
|
"loss": 1.416,
|
||
|
|
"step": 213
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5815217391304348,
|
||
|
|
"grad_norm": 0.3744758367538452,
|
||
|
|
"learning_rate": 7.78959149412423e-06,
|
||
|
|
"loss": 1.4005,
|
||
|
|
"step": 214
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5842391304347826,
|
||
|
|
"grad_norm": 0.34252363443374634,
|
||
|
|
"learning_rate": 7.787742899850522e-06,
|
||
|
|
"loss": 1.3614,
|
||
|
|
"step": 215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5869565217391305,
|
||
|
|
"grad_norm": 0.34789708256721497,
|
||
|
|
"learning_rate": 7.78588807785888e-06,
|
||
|
|
"loss": 1.3754,
|
||
|
|
"step": 216
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5896739130434783,
|
||
|
|
"grad_norm": 0.366502046585083,
|
||
|
|
"learning_rate": 7.784026996625422e-06,
|
||
|
|
"loss": 1.3839,
|
||
|
|
"step": 217
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.592391304347826,
|
||
|
|
"grad_norm": 0.39890167117118835,
|
||
|
|
"learning_rate": 7.782159624413145e-06,
|
||
|
|
"loss": 1.4088,
|
||
|
|
"step": 218
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.595108695652174,
|
||
|
|
"grad_norm": 0.35646381974220276,
|
||
|
|
"learning_rate": 7.780285929270127e-06,
|
||
|
|
"loss": 1.3362,
|
||
|
|
"step": 219
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5978260869565217,
|
||
|
|
"grad_norm": 0.36038488149642944,
|
||
|
|
"learning_rate": 7.778405879027698e-06,
|
||
|
|
"loss": 1.349,
|
||
|
|
"step": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6005434782608695,
|
||
|
|
"grad_norm": 0.35663580894470215,
|
||
|
|
"learning_rate": 7.776519441298602e-06,
|
||
|
|
"loss": 1.3727,
|
||
|
|
"step": 221
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6032608695652174,
|
||
|
|
"grad_norm": 0.36461469531059265,
|
||
|
|
"learning_rate": 7.774626583475137e-06,
|
||
|
|
"loss": 1.4629,
|
||
|
|
"step": 222
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6059782608695652,
|
||
|
|
"grad_norm": 0.35047125816345215,
|
||
|
|
"learning_rate": 7.772727272727272e-06,
|
||
|
|
"loss": 1.2795,
|
||
|
|
"step": 223
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6086956521739131,
|
||
|
|
"grad_norm": 0.36245492100715637,
|
||
|
|
"learning_rate": 7.770821476000759e-06,
|
||
|
|
"loss": 1.3618,
|
||
|
|
"step": 224
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6114130434782609,
|
||
|
|
"grad_norm": 0.3742018938064575,
|
||
|
|
"learning_rate": 7.768909160015202e-06,
|
||
|
|
"loss": 1.459,
|
||
|
|
"step": 225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6141304347826086,
|
||
|
|
"grad_norm": 0.35593125224113464,
|
||
|
|
"learning_rate": 7.766990291262136e-06,
|
||
|
|
"loss": 1.4521,
|
||
|
|
"step": 226
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6168478260869565,
|
||
|
|
"grad_norm": 0.3579864203929901,
|
||
|
|
"learning_rate": 7.76506483600305e-06,
|
||
|
|
"loss": 1.4157,
|
||
|
|
"step": 227
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6195652173913043,
|
||
|
|
"grad_norm": 0.352098286151886,
|
||
|
|
"learning_rate": 7.76313276026743e-06,
|
||
|
|
"loss": 1.4013,
|
||
|
|
"step": 228
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6222826086956522,
|
||
|
|
"grad_norm": 0.36963212490081787,
|
||
|
|
"learning_rate": 7.761194029850745e-06,
|
||
|
|
"loss": 1.4101,
|
||
|
|
"step": 229
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.625,
|
||
|
|
"grad_norm": 0.36608660221099854,
|
||
|
|
"learning_rate": 7.75924861031244e-06,
|
||
|
|
"loss": 1.3273,
|
||
|
|
"step": 230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6277173913043478,
|
||
|
|
"grad_norm": 0.37208378314971924,
|
||
|
|
"learning_rate": 7.757296466973885e-06,
|
||
|
|
"loss": 1.388,
|
||
|
|
"step": 231
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6304347826086957,
|
||
|
|
"grad_norm": 0.34387239813804626,
|
||
|
|
"learning_rate": 7.75533756491633e-06,
|
||
|
|
"loss": 1.3463,
|
||
|
|
"step": 232
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6331521739130435,
|
||
|
|
"grad_norm": 0.338460773229599,
|
||
|
|
"learning_rate": 7.753371868978805e-06,
|
||
|
|
"loss": 1.3765,
|
||
|
|
"step": 233
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6358695652173914,
|
||
|
|
"grad_norm": 0.3468306064605713,
|
||
|
|
"learning_rate": 7.751399343756032e-06,
|
||
|
|
"loss": 1.311,
|
||
|
|
"step": 234
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6385869565217391,
|
||
|
|
"grad_norm": 0.3542969524860382,
|
||
|
|
"learning_rate": 7.749419953596288e-06,
|
||
|
|
"loss": 1.3258,
|
||
|
|
"step": 235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6413043478260869,
|
||
|
|
"grad_norm": 0.3702642023563385,
|
||
|
|
"learning_rate": 7.747433662599263e-06,
|
||
|
|
"loss": 1.4132,
|
||
|
|
"step": 236
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6440217391304348,
|
||
|
|
"grad_norm": 0.42715317010879517,
|
||
|
|
"learning_rate": 7.745440434613891e-06,
|
||
|
|
"loss": 1.3031,
|
||
|
|
"step": 237
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6467391304347826,
|
||
|
|
"grad_norm": 0.35499247908592224,
|
||
|
|
"learning_rate": 7.74344023323615e-06,
|
||
|
|
"loss": 1.4496,
|
||
|
|
"step": 238
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6494565217391305,
|
||
|
|
"grad_norm": 0.37534981966018677,
|
||
|
|
"learning_rate": 7.741433021806853e-06,
|
||
|
|
"loss": 1.4033,
|
||
|
|
"step": 239
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6521739130434783,
|
||
|
|
"grad_norm": 0.35908043384552,
|
||
|
|
"learning_rate": 7.739418763409401e-06,
|
||
|
|
"loss": 1.4686,
|
||
|
|
"step": 240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.654891304347826,
|
||
|
|
"grad_norm": 0.365974485874176,
|
||
|
|
"learning_rate": 7.737397420867525e-06,
|
||
|
|
"loss": 1.3485,
|
||
|
|
"step": 241
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.657608695652174,
|
||
|
|
"grad_norm": 0.3661860227584839,
|
||
|
|
"learning_rate": 7.735368956743002e-06,
|
||
|
|
"loss": 1.3576,
|
||
|
|
"step": 242
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6603260869565217,
|
||
|
|
"grad_norm": 0.352400004863739,
|
||
|
|
"learning_rate": 7.733333333333333e-06,
|
||
|
|
"loss": 1.4192,
|
||
|
|
"step": 243
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6630434782608695,
|
||
|
|
"grad_norm": 0.3496619760990143,
|
||
|
|
"learning_rate": 7.731290512669416e-06,
|
||
|
|
"loss": 1.3675,
|
||
|
|
"step": 244
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6657608695652174,
|
||
|
|
"grad_norm": 0.34792232513427734,
|
||
|
|
"learning_rate": 7.729240456513184e-06,
|
||
|
|
"loss": 1.3972,
|
||
|
|
"step": 245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6684782608695652,
|
||
|
|
"grad_norm": 0.346755713224411,
|
||
|
|
"learning_rate": 7.727183126355213e-06,
|
||
|
|
"loss": 1.4235,
|
||
|
|
"step": 246
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6711956521739131,
|
||
|
|
"grad_norm": 0.35014012455940247,
|
||
|
|
"learning_rate": 7.725118483412322e-06,
|
||
|
|
"loss": 1.3392,
|
||
|
|
"step": 247
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6739130434782609,
|
||
|
|
"grad_norm": 0.3645906150341034,
|
||
|
|
"learning_rate": 7.723046488625123e-06,
|
||
|
|
"loss": 1.3828,
|
||
|
|
"step": 248
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6766304347826086,
|
||
|
|
"grad_norm": 0.3508262634277344,
|
||
|
|
"learning_rate": 7.720967102655568e-06,
|
||
|
|
"loss": 1.3201,
|
||
|
|
"step": 249
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6793478260869565,
|
||
|
|
"grad_norm": 0.3524917960166931,
|
||
|
|
"learning_rate": 7.718880285884455e-06,
|
||
|
|
"loss": 1.3746,
|
||
|
|
"step": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6820652173913043,
|
||
|
|
"grad_norm": 0.35207194089889526,
|
||
|
|
"learning_rate": 7.71678599840891e-06,
|
||
|
|
"loss": 1.3566,
|
||
|
|
"step": 251
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6847826086956522,
|
||
|
|
"grad_norm": 0.34460002183914185,
|
||
|
|
"learning_rate": 7.714684200039848e-06,
|
||
|
|
"loss": 1.3067,
|
||
|
|
"step": 252
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6875,
|
||
|
|
"grad_norm": 0.3507847785949707,
|
||
|
|
"learning_rate": 7.7125748502994e-06,
|
||
|
|
"loss": 1.3619,
|
||
|
|
"step": 253
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6902173913043478,
|
||
|
|
"grad_norm": 0.36836346983909607,
|
||
|
|
"learning_rate": 7.710457908418317e-06,
|
||
|
|
"loss": 1.3668,
|
||
|
|
"step": 254
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6929347826086957,
|
||
|
|
"grad_norm": 0.35324111580848694,
|
||
|
|
"learning_rate": 7.708333333333332e-06,
|
||
|
|
"loss": 1.384,
|
||
|
|
"step": 255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6956521739130435,
|
||
|
|
"grad_norm": 0.3476124405860901,
|
||
|
|
"learning_rate": 7.706201083684526e-06,
|
||
|
|
"loss": 1.3887,
|
||
|
|
"step": 256
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6983695652173914,
|
||
|
|
"grad_norm": 0.3560345768928528,
|
||
|
|
"learning_rate": 7.704061117812625e-06,
|
||
|
|
"loss": 1.3833,
|
||
|
|
"step": 257
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7010869565217391,
|
||
|
|
"grad_norm": 0.35968106985092163,
|
||
|
|
"learning_rate": 7.701913393756293e-06,
|
||
|
|
"loss": 1.3493,
|
||
|
|
"step": 258
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7038043478260869,
|
||
|
|
"grad_norm": 0.3483898639678955,
|
||
|
|
"learning_rate": 7.699757869249394e-06,
|
||
|
|
"loss": 1.3651,
|
||
|
|
"step": 259
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7065217391304348,
|
||
|
|
"grad_norm": 0.37258586287498474,
|
||
|
|
"learning_rate": 7.697594501718214e-06,
|
||
|
|
"loss": 1.4654,
|
||
|
|
"step": 260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7092391304347826,
|
||
|
|
"grad_norm": 0.37677285075187683,
|
||
|
|
"learning_rate": 7.695423248278656e-06,
|
||
|
|
"loss": 1.331,
|
||
|
|
"step": 261
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7119565217391305,
|
||
|
|
"grad_norm": 0.3459789454936981,
|
||
|
|
"learning_rate": 7.693244065733414e-06,
|
||
|
|
"loss": 1.4426,
|
||
|
|
"step": 262
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7146739130434783,
|
||
|
|
"grad_norm": 0.35081684589385986,
|
||
|
|
"learning_rate": 7.691056910569105e-06,
|
||
|
|
"loss": 1.3873,
|
||
|
|
"step": 263
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.717391304347826,
|
||
|
|
"grad_norm": 0.377763956785202,
|
||
|
|
"learning_rate": 7.68886173895337e-06,
|
||
|
|
"loss": 1.3201,
|
||
|
|
"step": 264
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.720108695652174,
|
||
|
|
"grad_norm": 0.35093316435813904,
|
||
|
|
"learning_rate": 7.686658506731945e-06,
|
||
|
|
"loss": 1.3274,
|
||
|
|
"step": 265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7228260869565217,
|
||
|
|
"grad_norm": 0.3494810461997986,
|
||
|
|
"learning_rate": 7.68444716942571e-06,
|
||
|
|
"loss": 1.3173,
|
||
|
|
"step": 266
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7255434782608695,
|
||
|
|
"grad_norm": 0.3792516887187958,
|
||
|
|
"learning_rate": 7.682227682227683e-06,
|
||
|
|
"loss": 1.4449,
|
||
|
|
"step": 267
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7282608695652174,
|
||
|
|
"grad_norm": 0.3558602035045624,
|
||
|
|
"learning_rate": 7.68e-06,
|
||
|
|
"loss": 1.3737,
|
||
|
|
"step": 268
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7309782608695652,
|
||
|
|
"grad_norm": 0.3495853543281555,
|
||
|
|
"learning_rate": 7.677764077270858e-06,
|
||
|
|
"loss": 1.4514,
|
||
|
|
"step": 269
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7336956521739131,
|
||
|
|
"grad_norm": 0.3459034860134125,
|
||
|
|
"learning_rate": 7.675519868231419e-06,
|
||
|
|
"loss": 1.3891,
|
||
|
|
"step": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7364130434782609,
|
||
|
|
"grad_norm": 0.37044718861579895,
|
||
|
|
"learning_rate": 7.673267326732673e-06,
|
||
|
|
"loss": 1.4816,
|
||
|
|
"step": 271
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7391304347826086,
|
||
|
|
"grad_norm": 0.33735278248786926,
|
||
|
|
"learning_rate": 7.671006406282289e-06,
|
||
|
|
"loss": 1.3679,
|
||
|
|
"step": 272
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7418478260869565,
|
||
|
|
"grad_norm": 0.3841879069805145,
|
||
|
|
"learning_rate": 7.668737060041406e-06,
|
||
|
|
"loss": 1.3936,
|
||
|
|
"step": 273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7445652173913043,
|
||
|
|
"grad_norm": 0.3517480790615082,
|
||
|
|
"learning_rate": 7.666459240821406e-06,
|
||
|
|
"loss": 1.3055,
|
||
|
|
"step": 274
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7472826086956522,
|
||
|
|
"grad_norm": 0.34539881348609924,
|
||
|
|
"learning_rate": 7.664172901080633e-06,
|
||
|
|
"loss": 1.3261,
|
||
|
|
"step": 275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.75,
|
||
|
|
"grad_norm": 0.3954020142555237,
|
||
|
|
"learning_rate": 7.66187799292109e-06,
|
||
|
|
"loss": 1.3094,
|
||
|
|
"step": 276
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7527173913043478,
|
||
|
|
"grad_norm": 0.3624208867549896,
|
||
|
|
"learning_rate": 7.659574468085107e-06,
|
||
|
|
"loss": 1.3864,
|
||
|
|
"step": 277
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7554347826086957,
|
||
|
|
"grad_norm": 0.34993529319763184,
|
||
|
|
"learning_rate": 7.657262277951932e-06,
|
||
|
|
"loss": 1.4306,
|
||
|
|
"step": 278
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7581521739130435,
|
||
|
|
"grad_norm": 0.37371665239334106,
|
||
|
|
"learning_rate": 7.654941373534337e-06,
|
||
|
|
"loss": 1.3844,
|
||
|
|
"step": 279
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7608695652173914,
|
||
|
|
"grad_norm": 0.36566704511642456,
|
||
|
|
"learning_rate": 7.652611705475141e-06,
|
||
|
|
"loss": 1.4169,
|
||
|
|
"step": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7635869565217391,
|
||
|
|
"grad_norm": 0.3945908546447754,
|
||
|
|
"learning_rate": 7.650273224043716e-06,
|
||
|
|
"loss": 1.4021,
|
||
|
|
"step": 281
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7663043478260869,
|
||
|
|
"grad_norm": 0.36350396275520325,
|
||
|
|
"learning_rate": 7.647925879132448e-06,
|
||
|
|
"loss": 1.4002,
|
||
|
|
"step": 282
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7690217391304348,
|
||
|
|
"grad_norm": 0.3854394555091858,
|
||
|
|
"learning_rate": 7.645569620253164e-06,
|
||
|
|
"loss": 1.4119,
|
||
|
|
"step": 283
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7717391304347826,
|
||
|
|
"grad_norm": 0.3648253083229065,
|
||
|
|
"learning_rate": 7.643204396533503e-06,
|
||
|
|
"loss": 1.3952,
|
||
|
|
"step": 284
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7744565217391305,
|
||
|
|
"grad_norm": 0.3636375665664673,
|
||
|
|
"learning_rate": 7.640830156713257e-06,
|
||
|
|
"loss": 1.4338,
|
||
|
|
"step": 285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7771739130434783,
|
||
|
|
"grad_norm": 0.3662267327308655,
|
||
|
|
"learning_rate": 7.638446849140676e-06,
|
||
|
|
"loss": 1.3377,
|
||
|
|
"step": 286
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.779891304347826,
|
||
|
|
"grad_norm": 0.3441767394542694,
|
||
|
|
"learning_rate": 7.636054421768708e-06,
|
||
|
|
"loss": 1.373,
|
||
|
|
"step": 287
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.782608695652174,
|
||
|
|
"grad_norm": 0.3580285310745239,
|
||
|
|
"learning_rate": 7.633652822151224e-06,
|
||
|
|
"loss": 1.4107,
|
||
|
|
"step": 288
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7853260869565217,
|
||
|
|
"grad_norm": 0.3530341386795044,
|
||
|
|
"learning_rate": 7.63124199743918e-06,
|
||
|
|
"loss": 1.3704,
|
||
|
|
"step": 289
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7880434782608695,
|
||
|
|
"grad_norm": 0.35250723361968994,
|
||
|
|
"learning_rate": 7.628821894376737e-06,
|
||
|
|
"loss": 1.3558,
|
||
|
|
"step": 290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7907608695652174,
|
||
|
|
"grad_norm": 0.35989367961883545,
|
||
|
|
"learning_rate": 7.626392459297343e-06,
|
||
|
|
"loss": 1.3583,
|
||
|
|
"step": 291
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7934782608695652,
|
||
|
|
"grad_norm": 0.34206631779670715,
|
||
|
|
"learning_rate": 7.623953638119768e-06,
|
||
|
|
"loss": 1.4009,
|
||
|
|
"step": 292
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7961956521739131,
|
||
|
|
"grad_norm": 0.3655489683151245,
|
||
|
|
"learning_rate": 7.621505376344085e-06,
|
||
|
|
"loss": 1.3665,
|
||
|
|
"step": 293
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7989130434782609,
|
||
|
|
"grad_norm": 0.36722490191459656,
|
||
|
|
"learning_rate": 7.619047619047619e-06,
|
||
|
|
"loss": 1.4071,
|
||
|
|
"step": 294
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8016304347826086,
|
||
|
|
"grad_norm": 0.36561647057533264,
|
||
|
|
"learning_rate": 7.616580310880829e-06,
|
||
|
|
"loss": 1.3369,
|
||
|
|
"step": 295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8043478260869565,
|
||
|
|
"grad_norm": 0.3570895195007324,
|
||
|
|
"learning_rate": 7.614103396063162e-06,
|
||
|
|
"loss": 1.3277,
|
||
|
|
"step": 296
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8070652173913043,
|
||
|
|
"grad_norm": 0.3854903280735016,
|
||
|
|
"learning_rate": 7.611616818378846e-06,
|
||
|
|
"loss": 1.4852,
|
||
|
|
"step": 297
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8097826086956522,
|
||
|
|
"grad_norm": 0.3468440771102905,
|
||
|
|
"learning_rate": 7.609120521172638e-06,
|
||
|
|
"loss": 1.3272,
|
||
|
|
"step": 298
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8125,
|
||
|
|
"grad_norm": 0.3752823770046234,
|
||
|
|
"learning_rate": 7.606614447345517e-06,
|
||
|
|
"loss": 1.4438,
|
||
|
|
"step": 299
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8152173913043478,
|
||
|
|
"grad_norm": 0.3850380480289459,
|
||
|
|
"learning_rate": 7.6040985393503375e-06,
|
||
|
|
"loss": 1.3909,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8179347826086957,
|
||
|
|
"grad_norm": 0.34979549050331116,
|
||
|
|
"learning_rate": 7.6015727391874176e-06,
|
||
|
|
"loss": 1.3671,
|
||
|
|
"step": 301
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8206521739130435,
|
||
|
|
"grad_norm": 0.37657612562179565,
|
||
|
|
"learning_rate": 7.599036988400088e-06,
|
||
|
|
"loss": 1.3691,
|
||
|
|
"step": 302
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8233695652173914,
|
||
|
|
"grad_norm": 0.36025533080101013,
|
||
|
|
"learning_rate": 7.596491228070175e-06,
|
||
|
|
"loss": 1.4006,
|
||
|
|
"step": 303
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8260869565217391,
|
||
|
|
"grad_norm": 0.3619629442691803,
|
||
|
|
"learning_rate": 7.593935398813448e-06,
|
||
|
|
"loss": 1.4321,
|
||
|
|
"step": 304
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8288043478260869,
|
||
|
|
"grad_norm": 0.3546094596385956,
|
||
|
|
"learning_rate": 7.5913694407749885e-06,
|
||
|
|
"loss": 1.2595,
|
||
|
|
"step": 305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8315217391304348,
|
||
|
|
"grad_norm": 0.3541072607040405,
|
||
|
|
"learning_rate": 7.588793293624531e-06,
|
||
|
|
"loss": 1.3578,
|
||
|
|
"step": 306
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8342391304347826,
|
||
|
|
"grad_norm": 0.3686266541481018,
|
||
|
|
"learning_rate": 7.586206896551724e-06,
|
||
|
|
"loss": 1.4234,
|
||
|
|
"step": 307
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8369565217391305,
|
||
|
|
"grad_norm": 0.3526665270328522,
|
||
|
|
"learning_rate": 7.583610188261351e-06,
|
||
|
|
"loss": 1.4652,
|
||
|
|
"step": 308
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8396739130434783,
|
||
|
|
"grad_norm": 0.3411577045917511,
|
||
|
|
"learning_rate": 7.5810031069684864e-06,
|
||
|
|
"loss": 1.3768,
|
||
|
|
"step": 309
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.842391304347826,
|
||
|
|
"grad_norm": 0.35096436738967896,
|
||
|
|
"learning_rate": 7.578385590393596e-06,
|
||
|
|
"loss": 1.438,
|
||
|
|
"step": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.845108695652174,
|
||
|
|
"grad_norm": 0.36183464527130127,
|
||
|
|
"learning_rate": 7.575757575757576e-06,
|
||
|
|
"loss": 1.3347,
|
||
|
|
"step": 311
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8478260869565217,
|
||
|
|
"grad_norm": 0.362402081489563,
|
||
|
|
"learning_rate": 7.5731189997767355e-06,
|
||
|
|
"loss": 1.3988,
|
||
|
|
"step": 312
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8505434782608695,
|
||
|
|
"grad_norm": 0.3439631462097168,
|
||
|
|
"learning_rate": 7.570469798657717e-06,
|
||
|
|
"loss": 1.3964,
|
||
|
|
"step": 313
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8532608695652174,
|
||
|
|
"grad_norm": 0.34573471546173096,
|
||
|
|
"learning_rate": 7.567809908092356e-06,
|
||
|
|
"loss": 1.4407,
|
||
|
|
"step": 314
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8559782608695652,
|
||
|
|
"grad_norm": 0.37581852078437805,
|
||
|
|
"learning_rate": 7.5651392632524705e-06,
|
||
|
|
"loss": 1.3481,
|
||
|
|
"step": 315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8586956521739131,
|
||
|
|
"grad_norm": 0.37714245915412903,
|
||
|
|
"learning_rate": 7.5624577987846045e-06,
|
||
|
|
"loss": 1.3223,
|
||
|
|
"step": 316
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8614130434782609,
|
||
|
|
"grad_norm": 0.3433416485786438,
|
||
|
|
"learning_rate": 7.559765448804691e-06,
|
||
|
|
"loss": 1.3849,
|
||
|
|
"step": 317
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8641304347826086,
|
||
|
|
"grad_norm": 0.35157305002212524,
|
||
|
|
"learning_rate": 7.5570621468926556e-06,
|
||
|
|
"loss": 1.4306,
|
||
|
|
"step": 318
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8668478260869565,
|
||
|
|
"grad_norm": 0.3580811321735382,
|
||
|
|
"learning_rate": 7.554347826086957e-06,
|
||
|
|
"loss": 1.4124,
|
||
|
|
"step": 319
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8695652173913043,
|
||
|
|
"grad_norm": 0.35856080055236816,
|
||
|
|
"learning_rate": 7.5516224188790555e-06,
|
||
|
|
"loss": 1.3477,
|
||
|
|
"step": 320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8722826086956522,
|
||
|
|
"grad_norm": 0.35569003224372864,
|
||
|
|
"learning_rate": 7.548885857207822e-06,
|
||
|
|
"loss": 1.4032,
|
||
|
|
"step": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.875,
|
||
|
|
"grad_norm": 0.34170466661453247,
|
||
|
|
"learning_rate": 7.546138072453862e-06,
|
||
|
|
"loss": 1.3616,
|
||
|
|
"step": 322
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8777173913043478,
|
||
|
|
"grad_norm": 0.34092995524406433,
|
||
|
|
"learning_rate": 7.54337899543379e-06,
|
||
|
|
"loss": 1.3248,
|
||
|
|
"step": 323
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8804347826086957,
|
||
|
|
"grad_norm": 0.3783915340900421,
|
||
|
|
"learning_rate": 7.540608556394418e-06,
|
||
|
|
"loss": 1.3918,
|
||
|
|
"step": 324
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8831521739130435,
|
||
|
|
"grad_norm": 0.35835227370262146,
|
||
|
|
"learning_rate": 7.537826685006877e-06,
|
||
|
|
"loss": 1.3379,
|
||
|
|
"step": 325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8858695652173914,
|
||
|
|
"grad_norm": 0.3583104908466339,
|
||
|
|
"learning_rate": 7.535033310360671e-06,
|
||
|
|
"loss": 1.4401,
|
||
|
|
"step": 326
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8885869565217391,
|
||
|
|
"grad_norm": 0.3653166592121124,
|
||
|
|
"learning_rate": 7.532228360957643e-06,
|
||
|
|
"loss": 1.3051,
|
||
|
|
"step": 327
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8913043478260869,
|
||
|
|
"grad_norm": 0.3713383376598358,
|
||
|
|
"learning_rate": 7.529411764705882e-06,
|
||
|
|
"loss": 1.3492,
|
||
|
|
"step": 328
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8940217391304348,
|
||
|
|
"grad_norm": 0.3575204908847809,
|
||
|
|
"learning_rate": 7.5265834489135455e-06,
|
||
|
|
"loss": 1.3615,
|
||
|
|
"step": 329
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8967391304347826,
|
||
|
|
"grad_norm": 0.366793692111969,
|
||
|
|
"learning_rate": 7.523743340282604e-06,
|
||
|
|
"loss": 1.3239,
|
||
|
|
"step": 330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8994565217391305,
|
||
|
|
"grad_norm": 0.3851720988750458,
|
||
|
|
"learning_rate": 7.520891364902507e-06,
|
||
|
|
"loss": 1.4105,
|
||
|
|
"step": 331
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9021739130434783,
|
||
|
|
"grad_norm": 0.3682537078857422,
|
||
|
|
"learning_rate": 7.518027448243778e-06,
|
||
|
|
"loss": 1.3926,
|
||
|
|
"step": 332
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.904891304347826,
|
||
|
|
"grad_norm": 0.35508108139038086,
|
||
|
|
"learning_rate": 7.515151515151516e-06,
|
||
|
|
"loss": 1.39,
|
||
|
|
"step": 333
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.907608695652174,
|
||
|
|
"grad_norm": 0.35429316759109497,
|
||
|
|
"learning_rate": 7.512263489838823e-06,
|
||
|
|
"loss": 1.4277,
|
||
|
|
"step": 334
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9103260869565217,
|
||
|
|
"grad_norm": 0.35008707642555237,
|
||
|
|
"learning_rate": 7.50936329588015e-06,
|
||
|
|
"loss": 1.3593,
|
||
|
|
"step": 335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9130434782608695,
|
||
|
|
"grad_norm": 0.35157814621925354,
|
||
|
|
"learning_rate": 7.5064508562045505e-06,
|
||
|
|
"loss": 1.3293,
|
||
|
|
"step": 336
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9157608695652174,
|
||
|
|
"grad_norm": 0.3862317204475403,
|
||
|
|
"learning_rate": 7.503526093088858e-06,
|
||
|
|
"loss": 1.3472,
|
||
|
|
"step": 337
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9184782608695652,
|
||
|
|
"grad_norm": 0.36031222343444824,
|
||
|
|
"learning_rate": 7.500588928150765e-06,
|
||
|
|
"loss": 1.3446,
|
||
|
|
"step": 338
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9211956521739131,
|
||
|
|
"grad_norm": 0.3770779073238373,
|
||
|
|
"learning_rate": 7.497639282341832e-06,
|
||
|
|
"loss": 1.3005,
|
||
|
|
"step": 339
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9239130434782609,
|
||
|
|
"grad_norm": 0.3638140857219696,
|
||
|
|
"learning_rate": 7.494677075940384e-06,
|
||
|
|
"loss": 1.3519,
|
||
|
|
"step": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9266304347826086,
|
||
|
|
"grad_norm": 0.3515496253967285,
|
||
|
|
"learning_rate": 7.491702228544334e-06,
|
||
|
|
"loss": 1.4093,
|
||
|
|
"step": 341
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9293478260869565,
|
||
|
|
"grad_norm": 0.3889096975326538,
|
||
|
|
"learning_rate": 7.48871465906391e-06,
|
||
|
|
"loss": 1.3816,
|
||
|
|
"step": 342
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9320652173913043,
|
||
|
|
"grad_norm": 0.3584050238132477,
|
||
|
|
"learning_rate": 7.485714285714285e-06,
|
||
|
|
"loss": 1.4043,
|
||
|
|
"step": 343
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9347826086956522,
|
||
|
|
"grad_norm": 0.360524982213974,
|
||
|
|
"learning_rate": 7.4827010260081125e-06,
|
||
|
|
"loss": 1.3481,
|
||
|
|
"step": 344
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9375,
|
||
|
|
"grad_norm": 0.3846801817417145,
|
||
|
|
"learning_rate": 7.4796747967479676e-06,
|
||
|
|
"loss": 1.4366,
|
||
|
|
"step": 345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9402173913043478,
|
||
|
|
"grad_norm": 0.35864099860191345,
|
||
|
|
"learning_rate": 7.476635514018692e-06,
|
||
|
|
"loss": 1.3096,
|
||
|
|
"step": 346
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9429347826086957,
|
||
|
|
"grad_norm": 0.37097325921058655,
|
||
|
|
"learning_rate": 7.473583093179635e-06,
|
||
|
|
"loss": 1.2847,
|
||
|
|
"step": 347
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9456521739130435,
|
||
|
|
"grad_norm": 0.396785169839859,
|
||
|
|
"learning_rate": 7.4705174488568e-06,
|
||
|
|
"loss": 1.3337,
|
||
|
|
"step": 348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9483695652173914,
|
||
|
|
"grad_norm": 0.3547925353050232,
|
||
|
|
"learning_rate": 7.467438494934877e-06,
|
||
|
|
"loss": 1.3112,
|
||
|
|
"step": 349
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9510869565217391,
|
||
|
|
"grad_norm": 0.3848293721675873,
|
||
|
|
"learning_rate": 7.46434614454919e-06,
|
||
|
|
"loss": 1.3002,
|
||
|
|
"step": 350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9538043478260869,
|
||
|
|
"grad_norm": 0.38206636905670166,
|
||
|
|
"learning_rate": 7.46124031007752e-06,
|
||
|
|
"loss": 1.3621,
|
||
|
|
"step": 351
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9565217391304348,
|
||
|
|
"grad_norm": 0.3636261820793152,
|
||
|
|
"learning_rate": 7.458120903131829e-06,
|
||
|
|
"loss": 1.3595,
|
||
|
|
"step": 352
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9592391304347826,
|
||
|
|
"grad_norm": 0.3859856426715851,
|
||
|
|
"learning_rate": 7.454987834549879e-06,
|
||
|
|
"loss": 1.3782,
|
||
|
|
"step": 353
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9619565217391305,
|
||
|
|
"grad_norm": 0.3692803382873535,
|
||
|
|
"learning_rate": 7.451841014386735e-06,
|
||
|
|
"loss": 1.3679,
|
||
|
|
"step": 354
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9646739130434783,
|
||
|
|
"grad_norm": 0.3576744794845581,
|
||
|
|
"learning_rate": 7.448680351906158e-06,
|
||
|
|
"loss": 1.3806,
|
||
|
|
"step": 355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.967391304347826,
|
||
|
|
"grad_norm": 0.4264325499534607,
|
||
|
|
"learning_rate": 7.4455057555718835e-06,
|
||
|
|
"loss": 1.4033,
|
||
|
|
"step": 356
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.970108695652174,
|
||
|
|
"grad_norm": 0.3938155770301819,
|
||
|
|
"learning_rate": 7.442317133038782e-06,
|
||
|
|
"loss": 1.3224,
|
||
|
|
"step": 357
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9728260869565217,
|
||
|
|
"grad_norm": 0.37210017442703247,
|
||
|
|
"learning_rate": 7.439114391143912e-06,
|
||
|
|
"loss": 1.3148,
|
||
|
|
"step": 358
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9755434782608695,
|
||
|
|
"grad_norm": 0.35559719800949097,
|
||
|
|
"learning_rate": 7.435897435897436e-06,
|
||
|
|
"loss": 1.3092,
|
||
|
|
"step": 359
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9782608695652174,
|
||
|
|
"grad_norm": 0.4250313937664032,
|
||
|
|
"learning_rate": 7.432666172473437e-06,
|
||
|
|
"loss": 1.3665,
|
||
|
|
"step": 360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9809782608695652,
|
||
|
|
"grad_norm": 0.34938138723373413,
|
||
|
|
"learning_rate": 7.429420505200594e-06,
|
||
|
|
"loss": 1.3979,
|
||
|
|
"step": 361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9836956521739131,
|
||
|
|
"grad_norm": 0.3839857876300812,
|
||
|
|
"learning_rate": 7.426160337552743e-06,
|
||
|
|
"loss": 1.3008,
|
||
|
|
"step": 362
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9864130434782609,
|
||
|
|
"grad_norm": 0.37689098715782166,
|
||
|
|
"learning_rate": 7.422885572139303e-06,
|
||
|
|
"loss": 1.4584,
|
||
|
|
"step": 363
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9891304347826086,
|
||
|
|
"grad_norm": 0.35095831751823425,
|
||
|
|
"learning_rate": 7.4195961106955874e-06,
|
||
|
|
"loss": 1.286,
|
||
|
|
"step": 364
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9918478260869565,
|
||
|
|
"grad_norm": 0.34754878282546997,
|
||
|
|
"learning_rate": 7.416291854072963e-06,
|
||
|
|
"loss": 1.2923,
|
||
|
|
"step": 365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9945652173913043,
|
||
|
|
"grad_norm": 0.3640921413898468,
|
||
|
|
"learning_rate": 7.412972702228902e-06,
|
||
|
|
"loss": 1.4107,
|
||
|
|
"step": 366
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9972826086956522,
|
||
|
|
"grad_norm": 0.33963489532470703,
|
||
|
|
"learning_rate": 7.409638554216868e-06,
|
||
|
|
"loss": 1.3914,
|
||
|
|
"step": 367
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 0.36554214358329773,
|
||
|
|
"learning_rate": 7.406289308176099e-06,
|
||
|
|
"loss": 1.4259,
|
||
|
|
"step": 368
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.002717391304348,
|
||
|
|
"grad_norm": 0.34235456585884094,
|
||
|
|
"learning_rate": 7.40292486132123e-06,
|
||
|
|
"loss": 1.4269,
|
||
|
|
"step": 369
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0054347826086956,
|
||
|
|
"grad_norm": 0.34481701254844666,
|
||
|
|
"learning_rate": 7.399545109931765e-06,
|
||
|
|
"loss": 1.4128,
|
||
|
|
"step": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0081521739130435,
|
||
|
|
"grad_norm": 0.362263023853302,
|
||
|
|
"learning_rate": 7.396149949341437e-06,
|
||
|
|
"loss": 1.3562,
|
||
|
|
"step": 371
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0108695652173914,
|
||
|
|
"grad_norm": 0.351425439119339,
|
||
|
|
"learning_rate": 7.392739273927391e-06,
|
||
|
|
"loss": 1.3482,
|
||
|
|
"step": 372
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.013586956521739,
|
||
|
|
"grad_norm": 0.33792799711227417,
|
||
|
|
"learning_rate": 7.3893129770992355e-06,
|
||
|
|
"loss": 1.3668,
|
||
|
|
"step": 373
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.016304347826087,
|
||
|
|
"grad_norm": 0.35396307706832886,
|
||
|
|
"learning_rate": 7.385870951287936e-06,
|
||
|
|
"loss": 1.2686,
|
||
|
|
"step": 374
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0190217391304348,
|
||
|
|
"grad_norm": 0.3580890893936157,
|
||
|
|
"learning_rate": 7.38241308793456e-06,
|
||
|
|
"loss": 1.3531,
|
||
|
|
"step": 375
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0217391304347827,
|
||
|
|
"grad_norm": 0.3556496798992157,
|
||
|
|
"learning_rate": 7.3789392774788615e-06,
|
||
|
|
"loss": 1.347,
|
||
|
|
"step": 376
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0244565217391304,
|
||
|
|
"grad_norm": 0.3453238904476166,
|
||
|
|
"learning_rate": 7.3754494093477135e-06,
|
||
|
|
"loss": 1.2562,
|
||
|
|
"step": 377
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0271739130434783,
|
||
|
|
"grad_norm": 0.3520653545856476,
|
||
|
|
"learning_rate": 7.371943371943371e-06,
|
||
|
|
"loss": 1.3663,
|
||
|
|
"step": 378
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0298913043478262,
|
||
|
|
"grad_norm": 0.35983365774154663,
|
||
|
|
"learning_rate": 7.368421052631578e-06,
|
||
|
|
"loss": 1.3657,
|
||
|
|
"step": 379
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0326086956521738,
|
||
|
|
"grad_norm": 0.34599393606185913,
|
||
|
|
"learning_rate": 7.364882337729505e-06,
|
||
|
|
"loss": 1.3984,
|
||
|
|
"step": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0353260869565217,
|
||
|
|
"grad_norm": 0.34382811188697815,
|
||
|
|
"learning_rate": 7.361327112493519e-06,
|
||
|
|
"loss": 1.3781,
|
||
|
|
"step": 381
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0380434782608696,
|
||
|
|
"grad_norm": 0.340087890625,
|
||
|
|
"learning_rate": 7.357755261106781e-06,
|
||
|
|
"loss": 1.3895,
|
||
|
|
"step": 382
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0407608695652173,
|
||
|
|
"grad_norm": 0.3491702079772949,
|
||
|
|
"learning_rate": 7.3541666666666656e-06,
|
||
|
|
"loss": 1.3423,
|
||
|
|
"step": 383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0434782608695652,
|
||
|
|
"grad_norm": 0.3492245674133301,
|
||
|
|
"learning_rate": 7.350561211172017e-06,
|
||
|
|
"loss": 1.3223,
|
||
|
|
"step": 384
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.046195652173913,
|
||
|
|
"grad_norm": 0.35501372814178467,
|
||
|
|
"learning_rate": 7.346938775510203e-06,
|
||
|
|
"loss": 1.5079,
|
||
|
|
"step": 385
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.048913043478261,
|
||
|
|
"grad_norm": 0.3509463965892792,
|
||
|
|
"learning_rate": 7.343299239444006e-06,
|
||
|
|
"loss": 1.3769,
|
||
|
|
"step": 386
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0516304347826086,
|
||
|
|
"grad_norm": 0.3643050789833069,
|
||
|
|
"learning_rate": 7.339642481598317e-06,
|
||
|
|
"loss": 1.3845,
|
||
|
|
"step": 387
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0543478260869565,
|
||
|
|
"grad_norm": 0.3647449016571045,
|
||
|
|
"learning_rate": 7.335968379446639e-06,
|
||
|
|
"loss": 1.3997,
|
||
|
|
"step": 388
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0570652173913044,
|
||
|
|
"grad_norm": 0.34432703256607056,
|
||
|
|
"learning_rate": 7.33227680929741e-06,
|
||
|
|
"loss": 1.3715,
|
||
|
|
"step": 389
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.059782608695652,
|
||
|
|
"grad_norm": 0.3494293689727783,
|
||
|
|
"learning_rate": 7.328567646280116e-06,
|
||
|
|
"loss": 1.4117,
|
||
|
|
"step": 390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0625,
|
||
|
|
"grad_norm": 0.35947683453559875,
|
||
|
|
"learning_rate": 7.324840764331209e-06,
|
||
|
|
"loss": 1.3205,
|
||
|
|
"step": 391
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.065217391304348,
|
||
|
|
"grad_norm": 0.35863590240478516,
|
||
|
|
"learning_rate": 7.321096036179834e-06,
|
||
|
|
"loss": 1.3203,
|
||
|
|
"step": 392
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0679347826086956,
|
||
|
|
"grad_norm": 0.35642603039741516,
|
||
|
|
"learning_rate": 7.317333333333333e-06,
|
||
|
|
"loss": 1.3938,
|
||
|
|
"step": 393
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0706521739130435,
|
||
|
|
"grad_norm": 0.35906854271888733,
|
||
|
|
"learning_rate": 7.313552526062549e-06,
|
||
|
|
"loss": 1.3695,
|
||
|
|
"step": 394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0733695652173914,
|
||
|
|
"grad_norm": 0.3712838590145111,
|
||
|
|
"learning_rate": 7.309753483386923e-06,
|
||
|
|
"loss": 1.3674,
|
||
|
|
"step": 395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0760869565217392,
|
||
|
|
"grad_norm": 0.38039541244506836,
|
||
|
|
"learning_rate": 7.30593607305936e-06,
|
||
|
|
"loss": 1.4794,
|
||
|
|
"step": 396
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.078804347826087,
|
||
|
|
"grad_norm": 0.3319692015647888,
|
||
|
|
"learning_rate": 7.302100161550887e-06,
|
||
|
|
"loss": 1.3669,
|
||
|
|
"step": 397
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0815217391304348,
|
||
|
|
"grad_norm": 0.3548648953437805,
|
||
|
|
"learning_rate": 7.298245614035087e-06,
|
||
|
|
"loss": 1.3456,
|
||
|
|
"step": 398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0842391304347827,
|
||
|
|
"grad_norm": 0.4208085238933563,
|
||
|
|
"learning_rate": 7.2943722943722935e-06,
|
||
|
|
"loss": 1.3859,
|
||
|
|
"step": 399
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0869565217391304,
|
||
|
|
"grad_norm": 0.34764373302459717,
|
||
|
|
"learning_rate": 7.290480065093571e-06,
|
||
|
|
"loss": 1.3748,
|
||
|
|
"step": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0896739130434783,
|
||
|
|
"grad_norm": 0.35038769245147705,
|
||
|
|
"learning_rate": 7.286568787384447e-06,
|
||
|
|
"loss": 1.4612,
|
||
|
|
"step": 401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0923913043478262,
|
||
|
|
"grad_norm": 0.337618887424469,
|
||
|
|
"learning_rate": 7.2826383210684106e-06,
|
||
|
|
"loss": 1.3535,
|
||
|
|
"step": 402
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0951086956521738,
|
||
|
|
"grad_norm": 0.3615802824497223,
|
||
|
|
"learning_rate": 7.278688524590163e-06,
|
||
|
|
"loss": 1.3879,
|
||
|
|
"step": 403
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0978260869565217,
|
||
|
|
"grad_norm": 0.3457699418067932,
|
||
|
|
"learning_rate": 7.2747192549986296e-06,
|
||
|
|
"loss": 1.4415,
|
||
|
|
"step": 404
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1005434782608696,
|
||
|
|
"grad_norm": 0.34804657101631165,
|
||
|
|
"learning_rate": 7.270730367929708e-06,
|
||
|
|
"loss": 1.3189,
|
||
|
|
"step": 405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1032608695652173,
|
||
|
|
"grad_norm": 0.3465460240840912,
|
||
|
|
"learning_rate": 7.266721717588769e-06,
|
||
|
|
"loss": 1.3687,
|
||
|
|
"step": 406
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1059782608695652,
|
||
|
|
"grad_norm": 0.345615416765213,
|
||
|
|
"learning_rate": 7.262693156732891e-06,
|
||
|
|
"loss": 1.3165,
|
||
|
|
"step": 407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.108695652173913,
|
||
|
|
"grad_norm": 0.34842392802238464,
|
||
|
|
"learning_rate": 7.258644536652834e-06,
|
||
|
|
"loss": 1.3238,
|
||
|
|
"step": 408
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1114130434782608,
|
||
|
|
"grad_norm": 0.3517705798149109,
|
||
|
|
"learning_rate": 7.254575707154742e-06,
|
||
|
|
"loss": 1.3034,
|
||
|
|
"step": 409
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1141304347826086,
|
||
|
|
"grad_norm": 0.3425719141960144,
|
||
|
|
"learning_rate": 7.2504865165415625e-06,
|
||
|
|
"loss": 1.4513,
|
||
|
|
"step": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1168478260869565,
|
||
|
|
"grad_norm": 0.34302225708961487,
|
||
|
|
"learning_rate": 7.2463768115942025e-06,
|
||
|
|
"loss": 1.3241,
|
||
|
|
"step": 411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1195652173913044,
|
||
|
|
"grad_norm": 0.36630138754844666,
|
||
|
|
"learning_rate": 7.242246437552389e-06,
|
||
|
|
"loss": 1.2878,
|
||
|
|
"step": 412
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.122282608695652,
|
||
|
|
"grad_norm": 0.35825982689857483,
|
||
|
|
"learning_rate": 7.238095238095238e-06,
|
||
|
|
"loss": 1.2899,
|
||
|
|
"step": 413
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.125,
|
||
|
|
"grad_norm": 0.3747299015522003,
|
||
|
|
"learning_rate": 7.2339230553215386e-06,
|
||
|
|
"loss": 1.3485,
|
||
|
|
"step": 414
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.127717391304348,
|
||
|
|
"grad_norm": 0.35318565368652344,
|
||
|
|
"learning_rate": 7.229729729729729e-06,
|
||
|
|
"loss": 1.3475,
|
||
|
|
"step": 415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1304347826086956,
|
||
|
|
"grad_norm": 0.3485441505908966,
|
||
|
|
"learning_rate": 7.225515100197572e-06,
|
||
|
|
"loss": 1.3389,
|
||
|
|
"step": 416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1331521739130435,
|
||
|
|
"grad_norm": 0.37150004506111145,
|
||
|
|
"learning_rate": 7.221279003961517e-06,
|
||
|
|
"loss": 1.4033,
|
||
|
|
"step": 417
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1358695652173914,
|
||
|
|
"grad_norm": 0.34884974360466003,
|
||
|
|
"learning_rate": 7.217021276595745e-06,
|
||
|
|
"loss": 1.3228,
|
||
|
|
"step": 418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1385869565217392,
|
||
|
|
"grad_norm": 0.36258143186569214,
|
||
|
|
"learning_rate": 7.212741751990899e-06,
|
||
|
|
"loss": 1.4687,
|
||
|
|
"step": 419
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.141304347826087,
|
||
|
|
"grad_norm": 0.3562002182006836,
|
||
|
|
"learning_rate": 7.208440262332477e-06,
|
||
|
|
"loss": 1.4778,
|
||
|
|
"step": 420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1440217391304348,
|
||
|
|
"grad_norm": 0.35977649688720703,
|
||
|
|
"learning_rate": 7.204116638078902e-06,
|
||
|
|
"loss": 1.2789,
|
||
|
|
"step": 421
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1467391304347827,
|
||
|
|
"grad_norm": 0.36146384477615356,
|
||
|
|
"learning_rate": 7.1997707079392365e-06,
|
||
|
|
"loss": 1.3834,
|
||
|
|
"step": 422
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1494565217391304,
|
||
|
|
"grad_norm": 0.3516390323638916,
|
||
|
|
"learning_rate": 7.195402298850574e-06,
|
||
|
|
"loss": 1.3415,
|
||
|
|
"step": 423
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1521739130434783,
|
||
|
|
"grad_norm": 0.33636316657066345,
|
||
|
|
"learning_rate": 7.1910112359550555e-06,
|
||
|
|
"loss": 1.37,
|
||
|
|
"step": 424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1548913043478262,
|
||
|
|
"grad_norm": 0.3520665168762207,
|
||
|
|
"learning_rate": 7.186597342576545e-06,
|
||
|
|
"loss": 1.4197,
|
||
|
|
"step": 425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1576086956521738,
|
||
|
|
"grad_norm": 0.3456652760505676,
|
||
|
|
"learning_rate": 7.18216044019693e-06,
|
||
|
|
"loss": 1.4093,
|
||
|
|
"step": 426
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1603260869565217,
|
||
|
|
"grad_norm": 0.34344473481178284,
|
||
|
|
"learning_rate": 7.177700348432055e-06,
|
||
|
|
"loss": 1.3166,
|
||
|
|
"step": 427
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1630434782608696,
|
||
|
|
"grad_norm": 0.3432557284832001,
|
||
|
|
"learning_rate": 7.173216885007277e-06,
|
||
|
|
"loss": 1.314,
|
||
|
|
"step": 428
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1657608695652173,
|
||
|
|
"grad_norm": 0.3404994308948517,
|
||
|
|
"learning_rate": 7.168709865732632e-06,
|
||
|
|
"loss": 1.3291,
|
||
|
|
"step": 429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1684782608695652,
|
||
|
|
"grad_norm": 0.34031039476394653,
|
||
|
|
"learning_rate": 7.164179104477611e-06,
|
||
|
|
"loss": 1.3882,
|
||
|
|
"step": 430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.171195652173913,
|
||
|
|
"grad_norm": 0.34020742774009705,
|
||
|
|
"learning_rate": 7.159624413145539e-06,
|
||
|
|
"loss": 1.3246,
|
||
|
|
"step": 431
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1739130434782608,
|
||
|
|
"grad_norm": 0.34915924072265625,
|
||
|
|
"learning_rate": 7.155045601647543e-06,
|
||
|
|
"loss": 1.3882,
|
||
|
|
"step": 432
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1766304347826086,
|
||
|
|
"grad_norm": 0.35569998621940613,
|
||
|
|
"learning_rate": 7.1504424778761065e-06,
|
||
|
|
"loss": 1.3184,
|
||
|
|
"step": 433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1793478260869565,
|
||
|
|
"grad_norm": 0.34406909346580505,
|
||
|
|
"learning_rate": 7.145814847678202e-06,
|
||
|
|
"loss": 1.3279,
|
||
|
|
"step": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1820652173913044,
|
||
|
|
"grad_norm": 0.3368189334869385,
|
||
|
|
"learning_rate": 7.1411625148279956e-06,
|
||
|
|
"loss": 1.3227,
|
||
|
|
"step": 435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.184782608695652,
|
||
|
|
"grad_norm": 0.3536527454853058,
|
||
|
|
"learning_rate": 7.136485280999108e-06,
|
||
|
|
"loss": 1.3437,
|
||
|
|
"step": 436
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1875,
|
||
|
|
"grad_norm": 0.34697532653808594,
|
||
|
|
"learning_rate": 7.131782945736434e-06,
|
||
|
|
"loss": 1.4258,
|
||
|
|
"step": 437
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.190217391304348,
|
||
|
|
"grad_norm": 0.3366316258907318,
|
||
|
|
"learning_rate": 7.127055306427504e-06,
|
||
|
|
"loss": 1.3611,
|
||
|
|
"step": 438
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1929347826086956,
|
||
|
|
"grad_norm": 0.3574765622615814,
|
||
|
|
"learning_rate": 7.122302158273381e-06,
|
||
|
|
"loss": 1.307,
|
||
|
|
"step": 439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1956521739130435,
|
||
|
|
"grad_norm": 0.36839932203292847,
|
||
|
|
"learning_rate": 7.117523294259092e-06,
|
||
|
|
"loss": 1.3751,
|
||
|
|
"step": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1983695652173914,
|
||
|
|
"grad_norm": 0.3561205565929413,
|
||
|
|
"learning_rate": 7.112718505123568e-06,
|
||
|
|
"loss": 1.2903,
|
||
|
|
"step": 441
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2010869565217392,
|
||
|
|
"grad_norm": 0.360176682472229,
|
||
|
|
"learning_rate": 7.107887579329101e-06,
|
||
|
|
"loss": 1.4551,
|
||
|
|
"step": 442
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.203804347826087,
|
||
|
|
"grad_norm": 0.36860206723213196,
|
||
|
|
"learning_rate": 7.1030303030303025e-06,
|
||
|
|
"loss": 1.3247,
|
||
|
|
"step": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2065217391304348,
|
||
|
|
"grad_norm": 0.35110506415367126,
|
||
|
|
"learning_rate": 7.09814646004254e-06,
|
||
|
|
"loss": 1.2791,
|
||
|
|
"step": 444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2092391304347827,
|
||
|
|
"grad_norm": 0.3497629761695862,
|
||
|
|
"learning_rate": 7.093235831809871e-06,
|
||
|
|
"loss": 1.4523,
|
||
|
|
"step": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2119565217391304,
|
||
|
|
"grad_norm": 0.5049267411231995,
|
||
|
|
"learning_rate": 7.088298197372441e-06,
|
||
|
|
"loss": 1.3368,
|
||
|
|
"step": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2146739130434783,
|
||
|
|
"grad_norm": 0.3517695367336273,
|
||
|
|
"learning_rate": 7.083333333333333e-06,
|
||
|
|
"loss": 1.4039,
|
||
|
|
"step": 447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2173913043478262,
|
||
|
|
"grad_norm": 0.3525075316429138,
|
||
|
|
"learning_rate": 7.078341013824884e-06,
|
||
|
|
"loss": 1.3692,
|
||
|
|
"step": 448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2201086956521738,
|
||
|
|
"grad_norm": 0.3429877758026123,
|
||
|
|
"learning_rate": 7.07332101047443e-06,
|
||
|
|
"loss": 1.3325,
|
||
|
|
"step": 449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2228260869565217,
|
||
|
|
"grad_norm": 0.3495529592037201,
|
||
|
|
"learning_rate": 7.068273092369477e-06,
|
||
|
|
"loss": 1.3013,
|
||
|
|
"step": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2255434782608696,
|
||
|
|
"grad_norm": 0.36226147413253784,
|
||
|
|
"learning_rate": 7.0631970260223045e-06,
|
||
|
|
"loss": 1.3787,
|
||
|
|
"step": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2282608695652173,
|
||
|
|
"grad_norm": 0.33679068088531494,
|
||
|
|
"learning_rate": 7.058092575333955e-06,
|
||
|
|
"loss": 1.366,
|
||
|
|
"step": 452
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2309782608695652,
|
||
|
|
"grad_norm": 0.33339637517929077,
|
||
|
|
"learning_rate": 7.052959501557633e-06,
|
||
|
|
"loss": 1.2804,
|
||
|
|
"step": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.233695652173913,
|
||
|
|
"grad_norm": 0.3441121280193329,
|
||
|
|
"learning_rate": 7.047797563261481e-06,
|
||
|
|
"loss": 1.3386,
|
||
|
|
"step": 454
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2364130434782608,
|
||
|
|
"grad_norm": 0.36244091391563416,
|
||
|
|
"learning_rate": 7.042606516290728e-06,
|
||
|
|
"loss": 1.3221,
|
||
|
|
"step": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2391304347826086,
|
||
|
|
"grad_norm": 0.374056875705719,
|
||
|
|
"learning_rate": 7.037386113729187e-06,
|
||
|
|
"loss": 1.3576,
|
||
|
|
"step": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2418478260869565,
|
||
|
|
"grad_norm": 0.339832067489624,
|
||
|
|
"learning_rate": 7.032136105860114e-06,
|
||
|
|
"loss": 1.4454,
|
||
|
|
"step": 457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2445652173913044,
|
||
|
|
"grad_norm": 0.35934752225875854,
|
||
|
|
"learning_rate": 7.0268562401263825e-06,
|
||
|
|
"loss": 1.3476,
|
||
|
|
"step": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.247282608695652,
|
||
|
|
"grad_norm": 0.35539355874061584,
|
||
|
|
"learning_rate": 7.0215462610899875e-06,
|
||
|
|
"loss": 1.2979,
|
||
|
|
"step": 459
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.25,
|
||
|
|
"grad_norm": 0.3446095287799835,
|
||
|
|
"learning_rate": 7.016205910390849e-06,
|
||
|
|
"loss": 1.3906,
|
||
|
|
"step": 460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.252717391304348,
|
||
|
|
"grad_norm": 0.35285550355911255,
|
||
|
|
"learning_rate": 7.010834926704908e-06,
|
||
|
|
"loss": 1.3686,
|
||
|
|
"step": 461
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2554347826086958,
|
||
|
|
"grad_norm": 0.3725644052028656,
|
||
|
|
"learning_rate": 7.005433045701502e-06,
|
||
|
|
"loss": 1.3869,
|
||
|
|
"step": 462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2581521739130435,
|
||
|
|
"grad_norm": 0.3460637032985687,
|
||
|
|
"learning_rate": 7e-06,
|
||
|
|
"loss": 1.3895,
|
||
|
|
"step": 463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2608695652173914,
|
||
|
|
"grad_norm": 0.3563375771045685,
|
||
|
|
"learning_rate": 6.994535519125682e-06,
|
||
|
|
"loss": 1.3394,
|
||
|
|
"step": 464
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2635869565217392,
|
||
|
|
"grad_norm": 0.3403022289276123,
|
||
|
|
"learning_rate": 6.989039329464861e-06,
|
||
|
|
"loss": 1.3424,
|
||
|
|
"step": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.266304347826087,
|
||
|
|
"grad_norm": 0.3511143922805786,
|
||
|
|
"learning_rate": 6.983511154219204e-06,
|
||
|
|
"loss": 1.415,
|
||
|
|
"step": 466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2690217391304348,
|
||
|
|
"grad_norm": 0.35280826687812805,
|
||
|
|
"learning_rate": 6.9779507133592734e-06,
|
||
|
|
"loss": 1.3912,
|
||
|
|
"step": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2717391304347827,
|
||
|
|
"grad_norm": 0.34647977352142334,
|
||
|
|
"learning_rate": 6.972357723577236e-06,
|
||
|
|
"loss": 1.2599,
|
||
|
|
"step": 468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2744565217391304,
|
||
|
|
"grad_norm": 0.3498916029930115,
|
||
|
|
"learning_rate": 6.966731898238747e-06,
|
||
|
|
"loss": 1.2458,
|
||
|
|
"step": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2771739130434783,
|
||
|
|
"grad_norm": 0.35044994950294495,
|
||
|
|
"learning_rate": 6.961072947333987e-06,
|
||
|
|
"loss": 1.3233,
|
||
|
|
"step": 470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2798913043478262,
|
||
|
|
"grad_norm": 0.34845730662345886,
|
||
|
|
"learning_rate": 6.955380577427821e-06,
|
||
|
|
"loss": 1.3276,
|
||
|
|
"step": 471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2826086956521738,
|
||
|
|
"grad_norm": 0.33805760741233826,
|
||
|
|
"learning_rate": 6.949654491609081e-06,
|
||
|
|
"loss": 1.3344,
|
||
|
|
"step": 472
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2853260869565217,
|
||
|
|
"grad_norm": 0.3485923409461975,
|
||
|
|
"learning_rate": 6.943894389438942e-06,
|
||
|
|
"loss": 1.388,
|
||
|
|
"step": 473
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2880434782608696,
|
||
|
|
"grad_norm": 0.34043747186660767,
|
||
|
|
"learning_rate": 6.9380999668983765e-06,
|
||
|
|
"loss": 1.375,
|
||
|
|
"step": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2907608695652173,
|
||
|
|
"grad_norm": 0.3383161723613739,
|
||
|
|
"learning_rate": 6.93227091633466e-06,
|
||
|
|
"loss": 1.3298,
|
||
|
|
"step": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2934782608695652,
|
||
|
|
"grad_norm": 0.35131675004959106,
|
||
|
|
"learning_rate": 6.926406926406925e-06,
|
||
|
|
"loss": 1.423,
|
||
|
|
"step": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.296195652173913,
|
||
|
|
"grad_norm": 0.3680573105812073,
|
||
|
|
"learning_rate": 6.920507682030727e-06,
|
||
|
|
"loss": 1.3236,
|
||
|
|
"step": 477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2989130434782608,
|
||
|
|
"grad_norm": 0.3596939146518707,
|
||
|
|
"learning_rate": 6.914572864321606e-06,
|
||
|
|
"loss": 1.3664,
|
||
|
|
"step": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3016304347826086,
|
||
|
|
"grad_norm": 0.3593509793281555,
|
||
|
|
"learning_rate": 6.908602150537633e-06,
|
||
|
|
"loss": 1.3124,
|
||
|
|
"step": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3043478260869565,
|
||
|
|
"grad_norm": 0.33961907029151917,
|
||
|
|
"learning_rate": 6.902595214020896e-06,
|
||
|
|
"loss": 1.3484,
|
||
|
|
"step": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3070652173913042,
|
||
|
|
"grad_norm": 0.3712293207645416,
|
||
|
|
"learning_rate": 6.89655172413793e-06,
|
||
|
|
"loss": 1.3233,
|
||
|
|
"step": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.309782608695652,
|
||
|
|
"grad_norm": 0.3482085168361664,
|
||
|
|
"learning_rate": 6.890471346219057e-06,
|
||
|
|
"loss": 1.3653,
|
||
|
|
"step": 482
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3125,
|
||
|
|
"grad_norm": 0.3515682816505432,
|
||
|
|
"learning_rate": 6.884353741496598e-06,
|
||
|
|
"loss": 1.3987,
|
||
|
|
"step": 483
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.315217391304348,
|
||
|
|
"grad_norm": 0.35586169362068176,
|
||
|
|
"learning_rate": 6.878198567041965e-06,
|
||
|
|
"loss": 1.3217,
|
||
|
|
"step": 484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3179347826086958,
|
||
|
|
"grad_norm": 0.34130343794822693,
|
||
|
|
"learning_rate": 6.872005475701574e-06,
|
||
|
|
"loss": 1.3132,
|
||
|
|
"step": 485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3206521739130435,
|
||
|
|
"grad_norm": 0.39066967368125916,
|
||
|
|
"learning_rate": 6.8657741160315826e-06,
|
||
|
|
"loss": 1.3306,
|
||
|
|
"step": 486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3233695652173914,
|
||
|
|
"grad_norm": 0.35300007462501526,
|
||
|
|
"learning_rate": 6.859504132231405e-06,
|
||
|
|
"loss": 1.3755,
|
||
|
|
"step": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3260869565217392,
|
||
|
|
"grad_norm": 0.3553847372531891,
|
||
|
|
"learning_rate": 6.853195164075993e-06,
|
||
|
|
"loss": 1.3317,
|
||
|
|
"step": 488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.328804347826087,
|
||
|
|
"grad_norm": 0.3760699927806854,
|
||
|
|
"learning_rate": 6.846846846846847e-06,
|
||
|
|
"loss": 1.4368,
|
||
|
|
"step": 489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3315217391304348,
|
||
|
|
"grad_norm": 0.3570723533630371,
|
||
|
|
"learning_rate": 6.84045881126173e-06,
|
||
|
|
"loss": 1.4,
|
||
|
|
"step": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3342391304347827,
|
||
|
|
"grad_norm": 0.3696223497390747,
|
||
|
|
"learning_rate": 6.834030683403068e-06,
|
||
|
|
"loss": 1.3349,
|
||
|
|
"step": 491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3369565217391304,
|
||
|
|
"grad_norm": 0.37452593445777893,
|
||
|
|
"learning_rate": 6.827562084644981e-06,
|
||
|
|
"loss": 1.2458,
|
||
|
|
"step": 492
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3396739130434783,
|
||
|
|
"grad_norm": 0.3435407280921936,
|
||
|
|
"learning_rate": 6.821052631578946e-06,
|
||
|
|
"loss": 1.3624,
|
||
|
|
"step": 493
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3423913043478262,
|
||
|
|
"grad_norm": 0.3696284592151642,
|
||
|
|
"learning_rate": 6.814501935938049e-06,
|
||
|
|
"loss": 1.3795,
|
||
|
|
"step": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3451086956521738,
|
||
|
|
"grad_norm": 0.35256364941596985,
|
||
|
|
"learning_rate": 6.807909604519773e-06,
|
||
|
|
"loss": 1.3196,
|
||
|
|
"step": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3478260869565217,
|
||
|
|
"grad_norm": 0.34246110916137695,
|
||
|
|
"learning_rate": 6.801275239107331e-06,
|
||
|
|
"loss": 1.4291,
|
||
|
|
"step": 496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3505434782608696,
|
||
|
|
"grad_norm": 0.3617890775203705,
|
||
|
|
"learning_rate": 6.79459843638948e-06,
|
||
|
|
"loss": 1.3208,
|
||
|
|
"step": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3532608695652173,
|
||
|
|
"grad_norm": 0.35320863127708435,
|
||
|
|
"learning_rate": 6.787878787878787e-06,
|
||
|
|
"loss": 1.3272,
|
||
|
|
"step": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3559782608695652,
|
||
|
|
"grad_norm": 0.3528405427932739,
|
||
|
|
"learning_rate": 6.781115879828325e-06,
|
||
|
|
"loss": 1.3978,
|
||
|
|
"step": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.358695652173913,
|
||
|
|
"grad_norm": 0.3528098165988922,
|
||
|
|
"learning_rate": 6.774309293146751e-06,
|
||
|
|
"loss": 1.3747,
|
||
|
|
"step": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3614130434782608,
|
||
|
|
"grad_norm": 0.345384806394577,
|
||
|
|
"learning_rate": 6.767458603311734e-06,
|
||
|
|
"loss": 1.3925,
|
||
|
|
"step": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3641304347826086,
|
||
|
|
"grad_norm": 0.34573063254356384,
|
||
|
|
"learning_rate": 6.760563380281689e-06,
|
||
|
|
"loss": 1.383,
|
||
|
|
"step": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3668478260869565,
|
||
|
|
"grad_norm": 0.3485819697380066,
|
||
|
|
"learning_rate": 6.753623188405796e-06,
|
||
|
|
"loss": 1.3945,
|
||
|
|
"step": 503
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3695652173913042,
|
||
|
|
"grad_norm": 0.35015052556991577,
|
||
|
|
"learning_rate": 6.746637586332242e-06,
|
||
|
|
"loss": 1.323,
|
||
|
|
"step": 504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.372282608695652,
|
||
|
|
"grad_norm": 0.3457247316837311,
|
||
|
|
"learning_rate": 6.739606126914661e-06,
|
||
|
|
"loss": 1.2701,
|
||
|
|
"step": 505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.375,
|
||
|
|
"grad_norm": 0.3384177088737488,
|
||
|
|
"learning_rate": 6.732528357116721e-06,
|
||
|
|
"loss": 1.2737,
|
||
|
|
"step": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.377717391304348,
|
||
|
|
"grad_norm": 0.3327966034412384,
|
||
|
|
"learning_rate": 6.725403817914831e-06,
|
||
|
|
"loss": 1.3766,
|
||
|
|
"step": 507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3804347826086958,
|
||
|
|
"grad_norm": 0.3439366817474365,
|
||
|
|
"learning_rate": 6.718232044198895e-06,
|
||
|
|
"loss": 1.4009,
|
||
|
|
"step": 508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3831521739130435,
|
||
|
|
"grad_norm": 0.35546526312828064,
|
||
|
|
"learning_rate": 6.711012564671101e-06,
|
||
|
|
"loss": 1.374,
|
||
|
|
"step": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3858695652173914,
|
||
|
|
"grad_norm": 0.3421470522880554,
|
||
|
|
"learning_rate": 6.703744901742677e-06,
|
||
|
|
"loss": 1.3559,
|
||
|
|
"step": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3885869565217392,
|
||
|
|
"grad_norm": 0.334219366312027,
|
||
|
|
"learning_rate": 6.696428571428572e-06,
|
||
|
|
"loss": 1.3361,
|
||
|
|
"step": 511
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.391304347826087,
|
||
|
|
"grad_norm": 0.33939287066459656,
|
||
|
|
"learning_rate": 6.6890630832400145e-06,
|
||
|
|
"loss": 1.3162,
|
||
|
|
"step": 512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3940217391304348,
|
||
|
|
"grad_norm": 0.35583412647247314,
|
||
|
|
"learning_rate": 6.681647940074906e-06,
|
||
|
|
"loss": 1.4194,
|
||
|
|
"step": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3967391304347827,
|
||
|
|
"grad_norm": 0.3360309600830078,
|
||
|
|
"learning_rate": 6.674182638105974e-06,
|
||
|
|
"loss": 1.3667,
|
||
|
|
"step": 514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3994565217391304,
|
||
|
|
"grad_norm": 0.34478163719177246,
|
||
|
|
"learning_rate": 6.666666666666666e-06,
|
||
|
|
"loss": 1.4185,
|
||
|
|
"step": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4021739130434783,
|
||
|
|
"grad_norm": 0.3429558277130127,
|
||
|
|
"learning_rate": 6.659099508134694e-06,
|
||
|
|
"loss": 1.2838,
|
||
|
|
"step": 516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4048913043478262,
|
||
|
|
"grad_norm": 0.373536080121994,
|
||
|
|
"learning_rate": 6.6514806378132115e-06,
|
||
|
|
"loss": 1.3696,
|
||
|
|
"step": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4076086956521738,
|
||
|
|
"grad_norm": 0.34450167417526245,
|
||
|
|
"learning_rate": 6.643809523809523e-06,
|
||
|
|
"loss": 1.3697,
|
||
|
|
"step": 518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4103260869565217,
|
||
|
|
"grad_norm": 0.3542488217353821,
|
||
|
|
"learning_rate": 6.636085626911314e-06,
|
||
|
|
"loss": 1.3584,
|
||
|
|
"step": 519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4130434782608696,
|
||
|
|
"grad_norm": 0.3472454845905304,
|
||
|
|
"learning_rate": 6.628308400460298e-06,
|
||
|
|
"loss": 1.3639,
|
||
|
|
"step": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4157608695652173,
|
||
|
|
"grad_norm": 0.36916840076446533,
|
||
|
|
"learning_rate": 6.620477290223248e-06,
|
||
|
|
"loss": 1.3159,
|
||
|
|
"step": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4184782608695652,
|
||
|
|
"grad_norm": 0.3357577323913574,
|
||
|
|
"learning_rate": 6.612591734260331e-06,
|
||
|
|
"loss": 1.2943,
|
||
|
|
"step": 522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.421195652173913,
|
||
|
|
"grad_norm": 0.3517664074897766,
|
||
|
|
"learning_rate": 6.604651162790697e-06,
|
||
|
|
"loss": 1.3014,
|
||
|
|
"step": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4239130434782608,
|
||
|
|
"grad_norm": 0.3861806094646454,
|
||
|
|
"learning_rate": 6.596654998055231e-06,
|
||
|
|
"loss": 1.3575,
|
||
|
|
"step": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4266304347826086,
|
||
|
|
"grad_norm": 0.347937673330307,
|
||
|
|
"learning_rate": 6.588602654176425e-06,
|
||
|
|
"loss": 1.4401,
|
||
|
|
"step": 525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4293478260869565,
|
||
|
|
"grad_norm": 0.34917065501213074,
|
||
|
|
"learning_rate": 6.580493537015276e-06,
|
||
|
|
"loss": 1.3305,
|
||
|
|
"step": 526
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4320652173913042,
|
||
|
|
"grad_norm": 0.35585877299308777,
|
||
|
|
"learning_rate": 6.572327044025157e-06,
|
||
|
|
"loss": 1.3503,
|
||
|
|
"step": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.434782608695652,
|
||
|
|
"grad_norm": 0.36193299293518066,
|
||
|
|
"learning_rate": 6.564102564102565e-06,
|
||
|
|
"loss": 1.4254,
|
||
|
|
"step": 528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4375,
|
||
|
|
"grad_norm": 0.3477296233177185,
|
||
|
|
"learning_rate": 6.55581947743468e-06,
|
||
|
|
"loss": 1.3582,
|
||
|
|
"step": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.440217391304348,
|
||
|
|
"grad_norm": 0.348344624042511,
|
||
|
|
"learning_rate": 6.547477155343664e-06,
|
||
|
|
"loss": 1.3552,
|
||
|
|
"step": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4429347826086958,
|
||
|
|
"grad_norm": 0.34499526023864746,
|
||
|
|
"learning_rate": 6.5390749601275915e-06,
|
||
|
|
"loss": 1.3497,
|
||
|
|
"step": 531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4456521739130435,
|
||
|
|
"grad_norm": 0.3457295000553131,
|
||
|
|
"learning_rate": 6.530612244897959e-06,
|
||
|
|
"loss": 1.3276,
|
||
|
|
"step": 532
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4483695652173914,
|
||
|
|
"grad_norm": 0.37890803813934326,
|
||
|
|
"learning_rate": 6.522088353413655e-06,
|
||
|
|
"loss": 1.3666,
|
||
|
|
"step": 533
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4510869565217392,
|
||
|
|
"grad_norm": 0.36650505661964417,
|
||
|
|
"learning_rate": 6.513502619911325e-06,
|
||
|
|
"loss": 1.3268,
|
||
|
|
"step": 534
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.453804347826087,
|
||
|
|
"grad_norm": 0.3550858497619629,
|
||
|
|
"learning_rate": 6.5048543689320385e-06,
|
||
|
|
"loss": 1.3665,
|
||
|
|
"step": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4565217391304348,
|
||
|
|
"grad_norm": 0.35670801997184753,
|
||
|
|
"learning_rate": 6.496142915144132e-06,
|
||
|
|
"loss": 1.3853,
|
||
|
|
"step": 536
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4592391304347827,
|
||
|
|
"grad_norm": 0.34603556990623474,
|
||
|
|
"learning_rate": 6.487367563162183e-06,
|
||
|
|
"loss": 1.3374,
|
||
|
|
"step": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4619565217391304,
|
||
|
|
"grad_norm": 0.343791663646698,
|
||
|
|
"learning_rate": 6.478527607361963e-06,
|
||
|
|
"loss": 1.3811,
|
||
|
|
"step": 538
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4646739130434783,
|
||
|
|
"grad_norm": 0.3415122926235199,
|
||
|
|
"learning_rate": 6.469622331691296e-06,
|
||
|
|
"loss": 1.3357,
|
||
|
|
"step": 539
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4673913043478262,
|
||
|
|
"grad_norm": 0.3270445168018341,
|
||
|
|
"learning_rate": 6.460651009476719e-06,
|
||
|
|
"loss": 1.3408,
|
||
|
|
"step": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4701086956521738,
|
||
|
|
"grad_norm": 0.3364504277706146,
|
||
|
|
"learning_rate": 6.4516129032258055e-06,
|
||
|
|
"loss": 1.3302,
|
||
|
|
"step": 541
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4728260869565217,
|
||
|
|
"grad_norm": 0.3475323021411896,
|
||
|
|
"learning_rate": 6.4425072644250714e-06,
|
||
|
|
"loss": 1.3357,
|
||
|
|
"step": 542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4755434782608696,
|
||
|
|
"grad_norm": 0.34420526027679443,
|
||
|
|
"learning_rate": 6.4333333333333324e-06,
|
||
|
|
"loss": 1.3509,
|
||
|
|
"step": 543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4782608695652173,
|
||
|
|
"grad_norm": 0.3597885072231293,
|
||
|
|
"learning_rate": 6.424090338770388e-06,
|
||
|
|
"loss": 1.3475,
|
||
|
|
"step": 544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4809782608695652,
|
||
|
|
"grad_norm": 0.3500368893146515,
|
||
|
|
"learning_rate": 6.414777497900924e-06,
|
||
|
|
"loss": 1.3867,
|
||
|
|
"step": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.483695652173913,
|
||
|
|
"grad_norm": 0.3487471640110016,
|
||
|
|
"learning_rate": 6.405394016013486e-06,
|
||
|
|
"loss": 1.3833,
|
||
|
|
"step": 546
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4864130434782608,
|
||
|
|
"grad_norm": 0.3366873562335968,
|
||
|
|
"learning_rate": 6.395939086294416e-06,
|
||
|
|
"loss": 1.3299,
|
||
|
|
"step": 547
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4891304347826086,
|
||
|
|
"grad_norm": 0.355654239654541,
|
||
|
|
"learning_rate": 6.386411889596603e-06,
|
||
|
|
"loss": 1.3747,
|
||
|
|
"step": 548
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4918478260869565,
|
||
|
|
"grad_norm": 0.35797804594039917,
|
||
|
|
"learning_rate": 6.376811594202899e-06,
|
||
|
|
"loss": 1.3601,
|
||
|
|
"step": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4945652173913042,
|
||
|
|
"grad_norm": 0.34977495670318604,
|
||
|
|
"learning_rate": 6.367137355584082e-06,
|
||
|
|
"loss": 1.2506,
|
||
|
|
"step": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.497282608695652,
|
||
|
|
"grad_norm": 0.3429720401763916,
|
||
|
|
"learning_rate": 6.357388316151203e-06,
|
||
|
|
"loss": 1.336,
|
||
|
|
"step": 551
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5,
|
||
|
|
"grad_norm": 0.343697726726532,
|
||
|
|
"learning_rate": 6.347563605002156e-06,
|
||
|
|
"loss": 1.3987,
|
||
|
|
"step": 552
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5027173913043477,
|
||
|
|
"grad_norm": 0.36152300238609314,
|
||
|
|
"learning_rate": 6.337662337662337e-06,
|
||
|
|
"loss": 1.345,
|
||
|
|
"step": 553
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5054347826086958,
|
||
|
|
"grad_norm": 0.35157766938209534,
|
||
|
|
"learning_rate": 6.327683615819208e-06,
|
||
|
|
"loss": 1.3364,
|
||
|
|
"step": 554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5081521739130435,
|
||
|
|
"grad_norm": 0.3413873314857483,
|
||
|
|
"learning_rate": 6.31762652705061e-06,
|
||
|
|
"loss": 1.3341,
|
||
|
|
"step": 555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5108695652173914,
|
||
|
|
"grad_norm": 0.35999447107315063,
|
||
|
|
"learning_rate": 6.307490144546649e-06,
|
||
|
|
"loss": 1.3927,
|
||
|
|
"step": 556
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5135869565217392,
|
||
|
|
"grad_norm": 0.35518884658813477,
|
||
|
|
"learning_rate": 6.297273526824978e-06,
|
||
|
|
"loss": 1.3702,
|
||
|
|
"step": 557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.516304347826087,
|
||
|
|
"grad_norm": 0.3421798348426819,
|
||
|
|
"learning_rate": 6.2869757174392925e-06,
|
||
|
|
"loss": 1.428,
|
||
|
|
"step": 558
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5190217391304348,
|
||
|
|
"grad_norm": 0.36976388096809387,
|
||
|
|
"learning_rate": 6.27659574468085e-06,
|
||
|
|
"loss": 1.3784,
|
||
|
|
"step": 559
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5217391304347827,
|
||
|
|
"grad_norm": 0.35902440547943115,
|
||
|
|
"learning_rate": 6.266132621272808e-06,
|
||
|
|
"loss": 1.327,
|
||
|
|
"step": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5244565217391304,
|
||
|
|
"grad_norm": 0.360893577337265,
|
||
|
|
"learning_rate": 6.2555853440571934e-06,
|
||
|
|
"loss": 1.316,
|
||
|
|
"step": 561
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5271739130434783,
|
||
|
|
"grad_norm": 0.35077449679374695,
|
||
|
|
"learning_rate": 6.244952893674293e-06,
|
||
|
|
"loss": 1.2877,
|
||
|
|
"step": 562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5298913043478262,
|
||
|
|
"grad_norm": 0.3528415262699127,
|
||
|
|
"learning_rate": 6.234234234234233e-06,
|
||
|
|
"loss": 1.2806,
|
||
|
|
"step": 563
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5326086956521738,
|
||
|
|
"grad_norm": 0.3422030508518219,
|
||
|
|
"learning_rate": 6.223428312980551e-06,
|
||
|
|
"loss": 1.3473,
|
||
|
|
"step": 564
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5353260869565217,
|
||
|
|
"grad_norm": 0.3537536859512329,
|
||
|
|
"learning_rate": 6.212534059945504e-06,
|
||
|
|
"loss": 1.4186,
|
||
|
|
"step": 565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5380434782608696,
|
||
|
|
"grad_norm": 0.3570748269557953,
|
||
|
|
"learning_rate": 6.201550387596899e-06,
|
||
|
|
"loss": 1.3426,
|
||
|
|
"step": 566
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5407608695652173,
|
||
|
|
"grad_norm": 0.346245139837265,
|
||
|
|
"learning_rate": 6.19047619047619e-06,
|
||
|
|
"loss": 1.3228,
|
||
|
|
"step": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5434782608695652,
|
||
|
|
"grad_norm": 0.35338735580444336,
|
||
|
|
"learning_rate": 6.179310344827586e-06,
|
||
|
|
"loss": 1.4134,
|
||
|
|
"step": 568
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.546195652173913,
|
||
|
|
"grad_norm": 0.3490867614746094,
|
||
|
|
"learning_rate": 6.168051708217913e-06,
|
||
|
|
"loss": 1.3663,
|
||
|
|
"step": 569
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5489130434782608,
|
||
|
|
"grad_norm": 0.349432110786438,
|
||
|
|
"learning_rate": 6.1566991191469625e-06,
|
||
|
|
"loss": 1.2371,
|
||
|
|
"step": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5516304347826086,
|
||
|
|
"grad_norm": 0.3474854528903961,
|
||
|
|
"learning_rate": 6.145251396648044e-06,
|
||
|
|
"loss": 1.3437,
|
||
|
|
"step": 571
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5543478260869565,
|
||
|
|
"grad_norm": 0.3465191125869751,
|
||
|
|
"learning_rate": 6.133707339878447e-06,
|
||
|
|
"loss": 1.3227,
|
||
|
|
"step": 572
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5570652173913042,
|
||
|
|
"grad_norm": 0.3537481725215912,
|
||
|
|
"learning_rate": 6.122065727699531e-06,
|
||
|
|
"loss": 1.3241,
|
||
|
|
"step": 573
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5597826086956523,
|
||
|
|
"grad_norm": 0.3460562825202942,
|
||
|
|
"learning_rate": 6.110325318246111e-06,
|
||
|
|
"loss": 1.2951,
|
||
|
|
"step": 574
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5625,
|
||
|
|
"grad_norm": 0.3643260896205902,
|
||
|
|
"learning_rate": 6.098484848484849e-06,
|
||
|
|
"loss": 1.3374,
|
||
|
|
"step": 575
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5652173913043477,
|
||
|
|
"grad_norm": 0.36155954003334045,
|
||
|
|
"learning_rate": 6.0865430337612935e-06,
|
||
|
|
"loss": 1.5067,
|
||
|
|
"step": 576
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5679347826086958,
|
||
|
|
"grad_norm": 0.37407439947128296,
|
||
|
|
"learning_rate": 6.074498567335243e-06,
|
||
|
|
"loss": 1.4037,
|
||
|
|
"step": 577
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5706521739130435,
|
||
|
|
"grad_norm": 0.3926370143890381,
|
||
|
|
"learning_rate": 6.062350119904077e-06,
|
||
|
|
"loss": 1.356,
|
||
|
|
"step": 578
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5733695652173914,
|
||
|
|
"grad_norm": 0.3557644486427307,
|
||
|
|
"learning_rate": 6.05009633911368e-06,
|
||
|
|
"loss": 1.3288,
|
||
|
|
"step": 579
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5760869565217392,
|
||
|
|
"grad_norm": 0.36532676219940186,
|
||
|
|
"learning_rate": 6.037735849056603e-06,
|
||
|
|
"loss": 1.3494,
|
||
|
|
"step": 580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.578804347826087,
|
||
|
|
"grad_norm": 0.36569759249687195,
|
||
|
|
"learning_rate": 6.025267249757046e-06,
|
||
|
|
"loss": 1.3887,
|
||
|
|
"step": 581
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5815217391304348,
|
||
|
|
"grad_norm": 0.3753635883331299,
|
||
|
|
"learning_rate": 6.012689116642264e-06,
|
||
|
|
"loss": 1.3738,
|
||
|
|
"step": 582
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5842391304347827,
|
||
|
|
"grad_norm": 0.34146779775619507,
|
||
|
|
"learning_rate": 6e-06,
|
||
|
|
"loss": 1.3313,
|
||
|
|
"step": 583
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5869565217391304,
|
||
|
|
"grad_norm": 0.32988572120666504,
|
||
|
|
"learning_rate": 5.987198424421466e-06,
|
||
|
|
"loss": 1.347,
|
||
|
|
"step": 584
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5896739130434783,
|
||
|
|
"grad_norm": 0.3355376124382019,
|
||
|
|
"learning_rate": 5.974282888229475e-06,
|
||
|
|
"loss": 1.3562,
|
||
|
|
"step": 585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5923913043478262,
|
||
|
|
"grad_norm": 0.3790322542190552,
|
||
|
|
"learning_rate": 5.961251862891206e-06,
|
||
|
|
"loss": 1.3789,
|
||
|
|
"step": 586
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5951086956521738,
|
||
|
|
"grad_norm": 0.34783735871315,
|
||
|
|
"learning_rate": 5.948103792415168e-06,
|
||
|
|
"loss": 1.307,
|
||
|
|
"step": 587
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5978260869565217,
|
||
|
|
"grad_norm": 0.34875112771987915,
|
||
|
|
"learning_rate": 5.934837092731829e-06,
|
||
|
|
"loss": 1.3205,
|
||
|
|
"step": 588
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6005434782608696,
|
||
|
|
"grad_norm": 0.3391176462173462,
|
||
|
|
"learning_rate": 5.921450151057401e-06,
|
||
|
|
"loss": 1.3438,
|
||
|
|
"step": 589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6032608695652173,
|
||
|
|
"grad_norm": 0.35487234592437744,
|
||
|
|
"learning_rate": 5.907941325240263e-06,
|
||
|
|
"loss": 1.4358,
|
||
|
|
"step": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6059782608695652,
|
||
|
|
"grad_norm": 0.3468010127544403,
|
||
|
|
"learning_rate": 5.894308943089431e-06,
|
||
|
|
"loss": 1.2499,
|
||
|
|
"step": 591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.608695652173913,
|
||
|
|
"grad_norm": 0.34538012742996216,
|
||
|
|
"learning_rate": 5.8805513016845326e-06,
|
||
|
|
"loss": 1.3331,
|
||
|
|
"step": 592
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6114130434782608,
|
||
|
|
"grad_norm": 0.34510117769241333,
|
||
|
|
"learning_rate": 5.866666666666667e-06,
|
||
|
|
"loss": 1.4319,
|
||
|
|
"step": 593
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6141304347826086,
|
||
|
|
"grad_norm": 0.3392553925514221,
|
||
|
|
"learning_rate": 5.852653271509531e-06,
|
||
|
|
"loss": 1.4242,
|
||
|
|
"step": 594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6168478260869565,
|
||
|
|
"grad_norm": 0.344718873500824,
|
||
|
|
"learning_rate": 5.838509316770187e-06,
|
||
|
|
"loss": 1.3883,
|
||
|
|
"step": 595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6195652173913042,
|
||
|
|
"grad_norm": 0.348323792219162,
|
||
|
|
"learning_rate": 5.824232969318773e-06,
|
||
|
|
"loss": 1.3739,
|
||
|
|
"step": 596
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6222826086956523,
|
||
|
|
"grad_norm": 0.3823015093803406,
|
||
|
|
"learning_rate": 5.8098223615465e-06,
|
||
|
|
"loss": 1.3831,
|
||
|
|
"step": 597
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.625,
|
||
|
|
"grad_norm": 0.34378868341445923,
|
||
|
|
"learning_rate": 5.795275590551181e-06,
|
||
|
|
"loss": 1.2974,
|
||
|
|
"step": 598
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6277173913043477,
|
||
|
|
"grad_norm": 0.35214224457740784,
|
||
|
|
"learning_rate": 5.780590717299578e-06,
|
||
|
|
"loss": 1.3613,
|
||
|
|
"step": 599
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6304347826086958,
|
||
|
|
"grad_norm": 0.3451964557170868,
|
||
|
|
"learning_rate": 5.765765765765765e-06,
|
||
|
|
"loss": 1.319,
|
||
|
|
"step": 600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6331521739130435,
|
||
|
|
"grad_norm": 0.32084909081459045,
|
||
|
|
"learning_rate": 5.750798722044728e-06,
|
||
|
|
"loss": 1.3498,
|
||
|
|
"step": 601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6358695652173914,
|
||
|
|
"grad_norm": 0.3332853615283966,
|
||
|
|
"learning_rate": 5.735687533440342e-06,
|
||
|
|
"loss": 1.284,
|
||
|
|
"step": 602
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6385869565217392,
|
||
|
|
"grad_norm": 0.34017214179039,
|
||
|
|
"learning_rate": 5.720430107526882e-06,
|
||
|
|
"loss": 1.2987,
|
||
|
|
"step": 603
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.641304347826087,
|
||
|
|
"grad_norm": 0.3513610064983368,
|
||
|
|
"learning_rate": 5.705024311183144e-06,
|
||
|
|
"loss": 1.3854,
|
||
|
|
"step": 604
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6440217391304348,
|
||
|
|
"grad_norm": 0.3369273841381073,
|
||
|
|
"learning_rate": 5.689467969598262e-06,
|
||
|
|
"loss": 1.2769,
|
||
|
|
"step": 605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6467391304347827,
|
||
|
|
"grad_norm": 0.3546997904777527,
|
||
|
|
"learning_rate": 5.673758865248226e-06,
|
||
|
|
"loss": 1.423,
|
||
|
|
"step": 606
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6494565217391304,
|
||
|
|
"grad_norm": 0.3369233310222626,
|
||
|
|
"learning_rate": 5.657894736842105e-06,
|
||
|
|
"loss": 1.3771,
|
||
|
|
"step": 607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6521739130434783,
|
||
|
|
"grad_norm": 0.35690680146217346,
|
||
|
|
"learning_rate": 5.641873278236913e-06,
|
||
|
|
"loss": 1.4428,
|
||
|
|
"step": 608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6548913043478262,
|
||
|
|
"grad_norm": 0.35253193974494934,
|
||
|
|
"learning_rate": 5.625692137320044e-06,
|
||
|
|
"loss": 1.3207,
|
||
|
|
"step": 609
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6576086956521738,
|
||
|
|
"grad_norm": 0.3626672923564911,
|
||
|
|
"learning_rate": 5.609348914858095e-06,
|
||
|
|
"loss": 1.3305,
|
||
|
|
"step": 610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6603260869565217,
|
||
|
|
"grad_norm": 0.3617280423641205,
|
||
|
|
"learning_rate": 5.592841163310961e-06,
|
||
|
|
"loss": 1.3925,
|
||
|
|
"step": 611
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6630434782608696,
|
||
|
|
"grad_norm": 0.3400178551673889,
|
||
|
|
"learning_rate": 5.576166385609892e-06,
|
||
|
|
"loss": 1.3414,
|
||
|
|
"step": 612
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6657608695652173,
|
||
|
|
"grad_norm": 0.3407033085823059,
|
||
|
|
"learning_rate": 5.559322033898304e-06,
|
||
|
|
"loss": 1.3731,
|
||
|
|
"step": 613
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6684782608695652,
|
||
|
|
"grad_norm": 0.34921663999557495,
|
||
|
|
"learning_rate": 5.542305508233957e-06,
|
||
|
|
"loss": 1.3955,
|
||
|
|
"step": 614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.671195652173913,
|
||
|
|
"grad_norm": 0.3465006649494171,
|
||
|
|
"learning_rate": 5.525114155251141e-06,
|
||
|
|
"loss": 1.3132,
|
||
|
|
"step": 615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6739130434782608,
|
||
|
|
"grad_norm": 0.35166648030281067,
|
||
|
|
"learning_rate": 5.507745266781411e-06,
|
||
|
|
"loss": 1.3554,
|
||
|
|
"step": 616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6766304347826086,
|
||
|
|
"grad_norm": 0.3840665817260742,
|
||
|
|
"learning_rate": 5.490196078431373e-06,
|
||
|
|
"loss": 1.2921,
|
||
|
|
"step": 617
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6793478260869565,
|
||
|
|
"grad_norm": 0.3391146957874298,
|
||
|
|
"learning_rate": 5.4724637681159414e-06,
|
||
|
|
"loss": 1.3488,
|
||
|
|
"step": 618
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6820652173913042,
|
||
|
|
"grad_norm": 0.3500737249851227,
|
||
|
|
"learning_rate": 5.454545454545454e-06,
|
||
|
|
"loss": 1.3308,
|
||
|
|
"step": 619
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6847826086956523,
|
||
|
|
"grad_norm": 0.36856669187545776,
|
||
|
|
"learning_rate": 5.436438195664909e-06,
|
||
|
|
"loss": 1.2802,
|
||
|
|
"step": 620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6875,
|
||
|
|
"grad_norm": 0.3350989818572998,
|
||
|
|
"learning_rate": 5.418138987043581e-06,
|
||
|
|
"loss": 1.336,
|
||
|
|
"step": 621
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6902173913043477,
|
||
|
|
"grad_norm": 0.35188931226730347,
|
||
|
|
"learning_rate": 5.399644760213144e-06,
|
||
|
|
"loss": 1.3416,
|
||
|
|
"step": 622
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6929347826086958,
|
||
|
|
"grad_norm": 0.3366836607456207,
|
||
|
|
"learning_rate": 5.380952380952381e-06,
|
||
|
|
"loss": 1.3587,
|
||
|
|
"step": 623
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6956521739130435,
|
||
|
|
"grad_norm": 0.3324086368083954,
|
||
|
|
"learning_rate": 5.3620586475164575e-06,
|
||
|
|
"loss": 1.3623,
|
||
|
|
"step": 624
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6983695652173914,
|
||
|
|
"grad_norm": 0.3365149199962616,
|
||
|
|
"learning_rate": 5.3429602888086635e-06,
|
||
|
|
"loss": 1.3576,
|
||
|
|
"step": 625
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7010869565217392,
|
||
|
|
"grad_norm": 0.3542598783969879,
|
||
|
|
"learning_rate": 5.323653962492438e-06,
|
||
|
|
"loss": 1.3239,
|
||
|
|
"step": 626
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.703804347826087,
|
||
|
|
"grad_norm": 0.35152384638786316,
|
||
|
|
"learning_rate": 5.304136253041362e-06,
|
||
|
|
"loss": 1.3395,
|
||
|
|
"step": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7065217391304348,
|
||
|
|
"grad_norm": 0.345515638589859,
|
||
|
|
"learning_rate": 5.284403669724771e-06,
|
||
|
|
"loss": 1.4407,
|
||
|
|
"step": 628
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7092391304347827,
|
||
|
|
"grad_norm": 0.333773672580719,
|
||
|
|
"learning_rate": 5.2644526445264444e-06,
|
||
|
|
"loss": 1.3048,
|
||
|
|
"step": 629
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7119565217391304,
|
||
|
|
"grad_norm": 0.3490050137042999,
|
||
|
|
"learning_rate": 5.244279529993816e-06,
|
||
|
|
"loss": 1.4172,
|
||
|
|
"step": 630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7146739130434783,
|
||
|
|
"grad_norm": 0.3542667031288147,
|
||
|
|
"learning_rate": 5.223880597014925e-06,
|
||
|
|
"loss": 1.3624,
|
||
|
|
"step": 631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7173913043478262,
|
||
|
|
"grad_norm": 0.36096838116645813,
|
||
|
|
"learning_rate": 5.203252032520325e-06,
|
||
|
|
"loss": 1.2933,
|
||
|
|
"step": 632
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7201086956521738,
|
||
|
|
"grad_norm": 0.3591996133327484,
|
||
|
|
"learning_rate": 5.1823899371069175e-06,
|
||
|
|
"loss": 1.3022,
|
||
|
|
"step": 633
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7228260869565217,
|
||
|
|
"grad_norm": 0.33392882347106934,
|
||
|
|
"learning_rate": 5.161290322580645e-06,
|
||
|
|
"loss": 1.292,
|
||
|
|
"step": 634
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7255434782608696,
|
||
|
|
"grad_norm": 0.37061673402786255,
|
||
|
|
"learning_rate": 5.139949109414757e-06,
|
||
|
|
"loss": 1.4192,
|
||
|
|
"step": 635
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7282608695652173,
|
||
|
|
"grad_norm": 0.3426995277404785,
|
||
|
|
"learning_rate": 5.1183621241202815e-06,
|
||
|
|
"loss": 1.3483,
|
||
|
|
"step": 636
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7309782608695652,
|
||
|
|
"grad_norm": 0.33502069115638733,
|
||
|
|
"learning_rate": 5.096525096525096e-06,
|
||
|
|
"loss": 1.4262,
|
||
|
|
"step": 637
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.733695652173913,
|
||
|
|
"grad_norm": 0.34400445222854614,
|
||
|
|
"learning_rate": 5.074433656957928e-06,
|
||
|
|
"loss": 1.3643,
|
||
|
|
"step": 638
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7364130434782608,
|
||
|
|
"grad_norm": 0.32812371850013733,
|
||
|
|
"learning_rate": 5.052083333333333e-06,
|
||
|
|
"loss": 1.4587,
|
||
|
|
"step": 639
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7391304347826086,
|
||
|
|
"grad_norm": 0.3361847996711731,
|
||
|
|
"learning_rate": 5.029469548133595e-06,
|
||
|
|
"loss": 1.3439,
|
||
|
|
"step": 640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7418478260869565,
|
||
|
|
"grad_norm": 0.34841278195381165,
|
||
|
|
"learning_rate": 5.0065876152832674e-06,
|
||
|
|
"loss": 1.3689,
|
||
|
|
"step": 641
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7445652173913042,
|
||
|
|
"grad_norm": 0.33427974581718445,
|
||
|
|
"learning_rate": 4.983432736911861e-06,
|
||
|
|
"loss": 1.2804,
|
||
|
|
"step": 642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7472826086956523,
|
||
|
|
"grad_norm": 0.32697078585624695,
|
||
|
|
"learning_rate": 4.96e-06,
|
||
|
|
"loss": 1.3019,
|
||
|
|
"step": 643
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.75,
|
||
|
|
"grad_norm": 0.352584570646286,
|
||
|
|
"learning_rate": 4.93628437290409e-06,
|
||
|
|
"loss": 1.2847,
|
||
|
|
"step": 644
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7527173913043477,
|
||
|
|
"grad_norm": 0.3434447944164276,
|
||
|
|
"learning_rate": 4.912280701754385e-06,
|
||
|
|
"loss": 1.3631,
|
||
|
|
"step": 645
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7554347826086958,
|
||
|
|
"grad_norm": 0.3368462920188904,
|
||
|
|
"learning_rate": 4.887983706720978e-06,
|
||
|
|
"loss": 1.4064,
|
||
|
|
"step": 646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7581521739130435,
|
||
|
|
"grad_norm": 0.33226725459098816,
|
||
|
|
"learning_rate": 4.8633879781420755e-06,
|
||
|
|
"loss": 1.3605,
|
||
|
|
"step": 647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7608695652173914,
|
||
|
|
"grad_norm": 0.34347623586654663,
|
||
|
|
"learning_rate": 4.83848797250859e-06,
|
||
|
|
"loss": 1.3918,
|
||
|
|
"step": 648
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7635869565217392,
|
||
|
|
"grad_norm": 0.346498966217041,
|
||
|
|
"learning_rate": 4.8132780082987544e-06,
|
||
|
|
"loss": 1.3772,
|
||
|
|
"step": 649
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.766304347826087,
|
||
|
|
"grad_norm": 0.3317979574203491,
|
||
|
|
"learning_rate": 4.787752261656228e-06,
|
||
|
|
"loss": 1.3775,
|
||
|
|
"step": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7690217391304348,
|
||
|
|
"grad_norm": 0.36047762632369995,
|
||
|
|
"learning_rate": 4.761904761904761e-06,
|
||
|
|
"loss": 1.3881,
|
||
|
|
"step": 651
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7717391304347827,
|
||
|
|
"grad_norm": 0.3384213447570801,
|
||
|
|
"learning_rate": 4.735729386892177e-06,
|
||
|
|
"loss": 1.3693,
|
||
|
|
"step": 652
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7744565217391304,
|
||
|
|
"grad_norm": 0.33621373772621155,
|
||
|
|
"learning_rate": 4.709219858156028e-06,
|
||
|
|
"loss": 1.4111,
|
||
|
|
"step": 653
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7771739130434783,
|
||
|
|
"grad_norm": 0.33636173605918884,
|
||
|
|
"learning_rate": 4.6823697359029255e-06,
|
||
|
|
"loss": 1.3144,
|
||
|
|
"step": 654
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7798913043478262,
|
||
|
|
"grad_norm": 0.33351048827171326,
|
||
|
|
"learning_rate": 4.655172413793103e-06,
|
||
|
|
"loss": 1.349,
|
||
|
|
"step": 655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7826086956521738,
|
||
|
|
"grad_norm": 0.32726943492889404,
|
||
|
|
"learning_rate": 4.62762111352133e-06,
|
||
|
|
"loss": 1.3875,
|
||
|
|
"step": 656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7853260869565217,
|
||
|
|
"grad_norm": 0.3338076174259186,
|
||
|
|
"learning_rate": 4.5997088791848614e-06,
|
||
|
|
"loss": 1.3458,
|
||
|
|
"step": 657
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7880434782608696,
|
||
|
|
"grad_norm": 0.3343966603279114,
|
||
|
|
"learning_rate": 4.571428571428571e-06,
|
||
|
|
"loss": 1.3314,
|
||
|
|
"step": 658
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7907608695652173,
|
||
|
|
"grad_norm": 0.33754849433898926,
|
||
|
|
"learning_rate": 4.5427728613569326e-06,
|
||
|
|
"loss": 1.3338,
|
||
|
|
"step": 659
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7934782608695652,
|
||
|
|
"grad_norm": 0.34028756618499756,
|
||
|
|
"learning_rate": 4.51373422420193e-06,
|
||
|
|
"loss": 1.3775,
|
||
|
|
"step": 660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.796195652173913,
|
||
|
|
"grad_norm": 0.3283347189426422,
|
||
|
|
"learning_rate": 4.4843049327354254e-06,
|
||
|
|
"loss": 1.3437,
|
||
|
|
"step": 661
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7989130434782608,
|
||
|
|
"grad_norm": 0.35330507159233093,
|
||
|
|
"learning_rate": 4.454477050413845e-06,
|
||
|
|
"loss": 1.3844,
|
||
|
|
"step": 662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8016304347826086,
|
||
|
|
"grad_norm": 0.33189696073532104,
|
||
|
|
"learning_rate": 4.424242424242424e-06,
|
||
|
|
"loss": 1.3126,
|
||
|
|
"step": 663
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8043478260869565,
|
||
|
|
"grad_norm": 0.3308519721031189,
|
||
|
|
"learning_rate": 4.393592677345538e-06,
|
||
|
|
"loss": 1.3042,
|
||
|
|
"step": 664
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8070652173913042,
|
||
|
|
"grad_norm": 0.3336990177631378,
|
||
|
|
"learning_rate": 4.362519201228878e-06,
|
||
|
|
"loss": 1.4633,
|
||
|
|
"step": 665
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8097826086956523,
|
||
|
|
"grad_norm": 0.33930081129074097,
|
||
|
|
"learning_rate": 4.331013147718484e-06,
|
||
|
|
"loss": 1.3027,
|
||
|
|
"step": 666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8125,
|
||
|
|
"grad_norm": 0.33651721477508545,
|
||
|
|
"learning_rate": 4.299065420560747e-06,
|
||
|
|
"loss": 1.4218,
|
||
|
|
"step": 667
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8152173913043477,
|
||
|
|
"grad_norm": 0.3540966808795929,
|
||
|
|
"learning_rate": 4.266666666666667e-06,
|
||
|
|
"loss": 1.3684,
|
||
|
|
"step": 668
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8179347826086958,
|
||
|
|
"grad_norm": 0.3553326427936554,
|
||
|
|
"learning_rate": 4.2338072669826225e-06,
|
||
|
|
"loss": 1.343,
|
||
|
|
"step": 669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8206521739130435,
|
||
|
|
"grad_norm": 0.3493621349334717,
|
||
|
|
"learning_rate": 4.2004773269689735e-06,
|
||
|
|
"loss": 1.3468,
|
||
|
|
"step": 670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8233695652173914,
|
||
|
|
"grad_norm": 0.3427792191505432,
|
||
|
|
"learning_rate": 4.166666666666667e-06,
|
||
|
|
"loss": 1.3775,
|
||
|
|
"step": 671
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8260869565217392,
|
||
|
|
"grad_norm": 0.3357887864112854,
|
||
|
|
"learning_rate": 4.1323648103309125e-06,
|
||
|
|
"loss": 1.4103,
|
||
|
|
"step": 672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.828804347826087,
|
||
|
|
"grad_norm": 0.3339937627315521,
|
||
|
|
"learning_rate": 4.097560975609756e-06,
|
||
|
|
"loss": 1.2352,
|
||
|
|
"step": 673
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8315217391304348,
|
||
|
|
"grad_norm": 0.33390963077545166,
|
||
|
|
"learning_rate": 4.062244062244062e-06,
|
||
|
|
"loss": 1.3348,
|
||
|
|
"step": 674
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8342391304347827,
|
||
|
|
"grad_norm": 0.33141598105430603,
|
||
|
|
"learning_rate": 4.0264026402640265e-06,
|
||
|
|
"loss": 1.4008,
|
||
|
|
"step": 675
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8369565217391304,
|
||
|
|
"grad_norm": 0.3481482267379761,
|
||
|
|
"learning_rate": 3.990024937655859e-06,
|
||
|
|
"loss": 1.4426,
|
||
|
|
"step": 676
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8396739130434783,
|
||
|
|
"grad_norm": 0.3348937928676605,
|
||
|
|
"learning_rate": 3.9530988274706864e-06,
|
||
|
|
"loss": 1.3531,
|
||
|
|
"step": 677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8423913043478262,
|
||
|
|
"grad_norm": 0.34614527225494385,
|
||
|
|
"learning_rate": 3.915611814345991e-06,
|
||
|
|
"loss": 1.4149,
|
||
|
|
"step": 678
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8451086956521738,
|
||
|
|
"grad_norm": 0.34718191623687744,
|
||
|
|
"learning_rate": 3.877551020408163e-06,
|
||
|
|
"loss": 1.3121,
|
||
|
|
"step": 679
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8478260869565217,
|
||
|
|
"grad_norm": 0.3331008553504944,
|
||
|
|
"learning_rate": 3.8389031705227075e-06,
|
||
|
|
"loss": 1.3773,
|
||
|
|
"step": 680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8505434782608696,
|
||
|
|
"grad_norm": 0.3339367210865021,
|
||
|
|
"learning_rate": 3.799654576856649e-06,
|
||
|
|
"loss": 1.3741,
|
||
|
|
"step": 681
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8532608695652173,
|
||
|
|
"grad_norm": 0.34059810638427734,
|
||
|
|
"learning_rate": 3.7597911227154046e-06,
|
||
|
|
"loss": 1.4186,
|
||
|
|
"step": 682
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8559782608695652,
|
||
|
|
"grad_norm": 0.34646710753440857,
|
||
|
|
"learning_rate": 3.7192982456140345e-06,
|
||
|
|
"loss": 1.3247,
|
||
|
|
"step": 683
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.858695652173913,
|
||
|
|
"grad_norm": 0.33644041419029236,
|
||
|
|
"learning_rate": 3.6781609195402296e-06,
|
||
|
|
"loss": 1.2994,
|
||
|
|
"step": 684
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8614130434782608,
|
||
|
|
"grad_norm": 0.33584222197532654,
|
||
|
|
"learning_rate": 3.636363636363636e-06,
|
||
|
|
"loss": 1.3625,
|
||
|
|
"step": 685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8641304347826086,
|
||
|
|
"grad_norm": 0.3542543053627014,
|
||
|
|
"learning_rate": 3.593890386343216e-06,
|
||
|
|
"loss": 1.4085,
|
||
|
|
"step": 686
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8668478260869565,
|
||
|
|
"grad_norm": 0.3339426815509796,
|
||
|
|
"learning_rate": 3.550724637681159e-06,
|
||
|
|
"loss": 1.3908,
|
||
|
|
"step": 687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8695652173913042,
|
||
|
|
"grad_norm": 0.3317144215106964,
|
||
|
|
"learning_rate": 3.5068493150684927e-06,
|
||
|
|
"loss": 1.3254,
|
||
|
|
"step": 688
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8722826086956523,
|
||
|
|
"grad_norm": 0.32547229528427124,
|
||
|
|
"learning_rate": 3.462246777163904e-06,
|
||
|
|
"loss": 1.3813,
|
||
|
|
"step": 689
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.875,
|
||
|
|
"grad_norm": 0.3365728557109833,
|
||
|
|
"learning_rate": 3.416898792943361e-06,
|
||
|
|
"loss": 1.3404,
|
||
|
|
"step": 690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8777173913043477,
|
||
|
|
"grad_norm": 0.33152568340301514,
|
||
|
|
"learning_rate": 3.370786516853932e-06,
|
||
|
|
"loss": 1.303,
|
||
|
|
"step": 691
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8804347826086958,
|
||
|
|
"grad_norm": 0.34790927171707153,
|
||
|
|
"learning_rate": 3.3238904627006605e-06,
|
||
|
|
"loss": 1.3703,
|
||
|
|
"step": 692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8831521739130435,
|
||
|
|
"grad_norm": 0.3446458876132965,
|
||
|
|
"learning_rate": 3.276190476190476e-06,
|
||
|
|
"loss": 1.316,
|
||
|
|
"step": 693
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8858695652173914,
|
||
|
|
"grad_norm": 0.334216445684433,
|
||
|
|
"learning_rate": 3.2276657060518727e-06,
|
||
|
|
"loss": 1.4189,
|
||
|
|
"step": 694
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8885869565217392,
|
||
|
|
"grad_norm": 0.3326725661754608,
|
||
|
|
"learning_rate": 3.1782945736434107e-06,
|
||
|
|
"loss": 1.2832,
|
||
|
|
"step": 695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.891304347826087,
|
||
|
|
"grad_norm": 0.34493488073349,
|
||
|
|
"learning_rate": 3.1280547409579666e-06,
|
||
|
|
"loss": 1.3276,
|
||
|
|
"step": 696
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8940217391304348,
|
||
|
|
"grad_norm": 0.34864330291748047,
|
||
|
|
"learning_rate": 3.0769230769230766e-06,
|
||
|
|
"loss": 1.3397,
|
||
|
|
"step": 697
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8967391304347827,
|
||
|
|
"grad_norm": 0.3301394581794739,
|
||
|
|
"learning_rate": 3.0248756218905467e-06,
|
||
|
|
"loss": 1.3018,
|
||
|
|
"step": 698
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8994565217391304,
|
||
|
|
"grad_norm": 0.33378443121910095,
|
||
|
|
"learning_rate": 2.9718875502008025e-06,
|
||
|
|
"loss": 1.3894,
|
||
|
|
"step": 699
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9021739130434783,
|
||
|
|
"grad_norm": 0.33840638399124146,
|
||
|
|
"learning_rate": 2.917933130699088e-06,
|
||
|
|
"loss": 1.371,
|
||
|
|
"step": 700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9048913043478262,
|
||
|
|
"grad_norm": 0.3386146128177643,
|
||
|
|
"learning_rate": 2.8629856850715747e-06,
|
||
|
|
"loss": 1.3683,
|
||
|
|
"step": 701
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9076086956521738,
|
||
|
|
"grad_norm": 0.33338841795921326,
|
||
|
|
"learning_rate": 2.807017543859649e-06,
|
||
|
|
"loss": 1.4066,
|
||
|
|
"step": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9103260869565217,
|
||
|
|
"grad_norm": 0.32861799001693726,
|
||
|
|
"learning_rate": 2.7499999999999995e-06,
|
||
|
|
"loss": 1.3388,
|
||
|
|
"step": 703
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9130434782608696,
|
||
|
|
"grad_norm": 0.3410465717315674,
|
||
|
|
"learning_rate": 2.691903259726603e-06,
|
||
|
|
"loss": 1.3076,
|
||
|
|
"step": 704
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9157608695652173,
|
||
|
|
"grad_norm": 0.33878254890441895,
|
||
|
|
"learning_rate": 2.6326963906581743e-06,
|
||
|
|
"loss": 1.3261,
|
||
|
|
"step": 705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9184782608695652,
|
||
|
|
"grad_norm": 0.32100728154182434,
|
||
|
|
"learning_rate": 2.5723472668810287e-06,
|
||
|
|
"loss": 1.324,
|
||
|
|
"step": 706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.921195652173913,
|
||
|
|
"grad_norm": 0.3330436646938324,
|
||
|
|
"learning_rate": 2.5108225108225104e-06,
|
||
|
|
"loss": 1.278,
|
||
|
|
"step": 707
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9239130434782608,
|
||
|
|
"grad_norm": 0.340689092874527,
|
||
|
|
"learning_rate": 2.448087431693989e-06,
|
||
|
|
"loss": 1.3297,
|
||
|
|
"step": 708
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9266304347826086,
|
||
|
|
"grad_norm": 0.3378203511238098,
|
||
|
|
"learning_rate": 2.3841059602649004e-06,
|
||
|
|
"loss": 1.3884,
|
||
|
|
"step": 709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9293478260869565,
|
||
|
|
"grad_norm": 0.35144487023353577,
|
||
|
|
"learning_rate": 2.3188405797101444e-06,
|
||
|
|
"loss": 1.36,
|
||
|
|
"step": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9320652173913042,
|
||
|
|
"grad_norm": 0.329775333404541,
|
||
|
|
"learning_rate": 2.2522522522522524e-06,
|
||
|
|
"loss": 1.384,
|
||
|
|
"step": 711
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9347826086956523,
|
||
|
|
"grad_norm": 0.33017706871032715,
|
||
|
|
"learning_rate": 2.184300341296928e-06,
|
||
|
|
"loss": 1.3275,
|
||
|
|
"step": 712
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9375,
|
||
|
|
"grad_norm": 0.3360842764377594,
|
||
|
|
"learning_rate": 2.114942528735632e-06,
|
||
|
|
"loss": 1.4158,
|
||
|
|
"step": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9402173913043477,
|
||
|
|
"grad_norm": 0.32160329818725586,
|
||
|
|
"learning_rate": 2.044134727061556e-06,
|
||
|
|
"loss": 1.2895,
|
||
|
|
"step": 714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9429347826086958,
|
||
|
|
"grad_norm": 0.33702775835990906,
|
||
|
|
"learning_rate": 1.9718309859154927e-06,
|
||
|
|
"loss": 1.2626,
|
||
|
|
"step": 715
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9456521739130435,
|
||
|
|
"grad_norm": 0.3408706784248352,
|
||
|
|
"learning_rate": 1.8979833926453144e-06,
|
||
|
|
"loss": 1.3131,
|
||
|
|
"step": 716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9483695652173914,
|
||
|
|
"grad_norm": 0.3265095353126526,
|
||
|
|
"learning_rate": 1.8225419664268583e-06,
|
||
|
|
"loss": 1.2896,
|
||
|
|
"step": 717
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9510869565217392,
|
||
|
|
"grad_norm": 0.34003371000289917,
|
||
|
|
"learning_rate": 1.7454545454545452e-06,
|
||
|
|
"loss": 1.2791,
|
||
|
|
"step": 718
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.953804347826087,
|
||
|
|
"grad_norm": 0.33744871616363525,
|
||
|
|
"learning_rate": 1.6666666666666665e-06,
|
||
|
|
"loss": 1.3417,
|
||
|
|
"step": 719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9565217391304348,
|
||
|
|
"grad_norm": 0.33043915033340454,
|
||
|
|
"learning_rate": 1.5861214374225524e-06,
|
||
|
|
"loss": 1.3383,
|
||
|
|
"step": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9592391304347827,
|
||
|
|
"grad_norm": 0.3270052969455719,
|
||
|
|
"learning_rate": 1.5037593984962404e-06,
|
||
|
|
"loss": 1.358,
|
||
|
|
"step": 721
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9619565217391304,
|
||
|
|
"grad_norm": 0.32354652881622314,
|
||
|
|
"learning_rate": 1.4195183776932827e-06,
|
||
|
|
"loss": 1.3476,
|
||
|
|
"step": 722
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9646739130434783,
|
||
|
|
"grad_norm": 0.33073997497558594,
|
||
|
|
"learning_rate": 1.3333333333333332e-06,
|
||
|
|
"loss": 1.3588,
|
||
|
|
"step": 723
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9673913043478262,
|
||
|
|
"grad_norm": 0.33804917335510254,
|
||
|
|
"learning_rate": 1.245136186770428e-06,
|
||
|
|
"loss": 1.3842,
|
||
|
|
"step": 724
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9701086956521738,
|
||
|
|
"grad_norm": 0.3286682367324829,
|
||
|
|
"learning_rate": 1.1548556430446192e-06,
|
||
|
|
"loss": 1.303,
|
||
|
|
"step": 725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9728260869565217,
|
||
|
|
"grad_norm": 0.33602073788642883,
|
||
|
|
"learning_rate": 1.0624169986719786e-06,
|
||
|
|
"loss": 1.2939,
|
||
|
|
"step": 726
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9755434782608696,
|
||
|
|
"grad_norm": 0.33070436120033264,
|
||
|
|
"learning_rate": 9.67741935483871e-07,
|
||
|
|
"loss": 1.2889,
|
||
|
|
"step": 727
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9782608695652173,
|
||
|
|
"grad_norm": 0.33206212520599365,
|
||
|
|
"learning_rate": 8.707482993197278e-07,
|
||
|
|
"loss": 1.3471,
|
||
|
|
"step": 728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9809782608695652,
|
||
|
|
"grad_norm": 0.32087206840515137,
|
||
|
|
"learning_rate": 7.713498622589532e-07,
|
||
|
|
"loss": 1.3782,
|
||
|
|
"step": 729
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.983695652173913,
|
||
|
|
"grad_norm": 0.32111042737960815,
|
||
|
|
"learning_rate": 6.694560669456067e-07,
|
||
|
|
"loss": 1.2801,
|
||
|
|
"step": 730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9864130434782608,
|
||
|
|
"grad_norm": 0.3236371874809265,
|
||
|
|
"learning_rate": 5.649717514124293e-07,
|
||
|
|
"loss": 1.4388,
|
||
|
|
"step": 731
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9891304347826086,
|
||
|
|
"grad_norm": 0.3244304656982422,
|
||
|
|
"learning_rate": 4.5779685264663803e-07,
|
||
|
|
"loss": 1.2649,
|
||
|
|
"step": 732
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9918478260869565,
|
||
|
|
"grad_norm": 0.505721926689148,
|
||
|
|
"learning_rate": 3.478260869565217e-07,
|
||
|
|
"loss": 1.272,
|
||
|
|
"step": 733
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9945652173913042,
|
||
|
|
"grad_norm": 0.3223818838596344,
|
||
|
|
"learning_rate": 2.3494860499265783e-07,
|
||
|
|
"loss": 1.3915,
|
||
|
|
"step": 734
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9972826086956523,
|
||
|
|
"grad_norm": 0.3199419677257538,
|
||
|
|
"learning_rate": 1.1904761904761904e-07,
|
||
|
|
"loss": 1.3722,
|
||
|
|
"step": 735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0,
|
||
|
|
"grad_norm": 0.3224795460700989,
|
||
|
|
"learning_rate": 0,
|
||
|
|
"loss": 1.4072,
|
||
|
|
"step": 736
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 1,
|
||
|
|
"max_steps": 736,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 2,
|
||
|
|
"save_steps": 184,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 4.746705359123513e+19,
|
||
|
|
"train_batch_size": 6,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|