10943 lines
265 KiB
JSON
10943 lines
265 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1557,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0019277108433734939,
|
|
"grad_norm": 2.8518834114074707,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.0891,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0038554216867469878,
|
|
"grad_norm": 1.8441249132156372,
|
|
"learning_rate": 2.564102564102564e-07,
|
|
"loss": 0.0539,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.005783132530120482,
|
|
"grad_norm": 2.8263237476348877,
|
|
"learning_rate": 5.128205128205128e-07,
|
|
"loss": 0.099,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0077108433734939755,
|
|
"grad_norm": 2.5051236152648926,
|
|
"learning_rate": 7.692307692307694e-07,
|
|
"loss": 0.0789,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.00963855421686747,
|
|
"grad_norm": 2.6903438568115234,
|
|
"learning_rate": 1.0256410256410257e-06,
|
|
"loss": 0.0881,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.011566265060240964,
|
|
"grad_norm": 2.6205761432647705,
|
|
"learning_rate": 1.282051282051282e-06,
|
|
"loss": 0.0776,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.013493975903614458,
|
|
"grad_norm": 2.6309337615966797,
|
|
"learning_rate": 1.5384615384615387e-06,
|
|
"loss": 0.0827,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.015421686746987951,
|
|
"grad_norm": 1.5427855253219604,
|
|
"learning_rate": 1.794871794871795e-06,
|
|
"loss": 0.0577,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.017349397590361446,
|
|
"grad_norm": 1.0973446369171143,
|
|
"learning_rate": 2.0512820512820513e-06,
|
|
"loss": 0.04,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.01927710843373494,
|
|
"grad_norm": 1.3253350257873535,
|
|
"learning_rate": 2.307692307692308e-06,
|
|
"loss": 0.0506,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.021204819277108433,
|
|
"grad_norm": 1.588739037513733,
|
|
"learning_rate": 2.564102564102564e-06,
|
|
"loss": 0.0874,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.02313253012048193,
|
|
"grad_norm": 1.4987014532089233,
|
|
"learning_rate": 2.8205128205128207e-06,
|
|
"loss": 0.0597,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.02506024096385542,
|
|
"grad_norm": 1.6571592092514038,
|
|
"learning_rate": 3.0769230769230774e-06,
|
|
"loss": 0.0559,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.026987951807228915,
|
|
"grad_norm": 1.8860628604888916,
|
|
"learning_rate": 3.3333333333333333e-06,
|
|
"loss": 0.0688,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.02891566265060241,
|
|
"grad_norm": 1.3202295303344727,
|
|
"learning_rate": 3.58974358974359e-06,
|
|
"loss": 0.0433,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.030843373493975902,
|
|
"grad_norm": 1.5870612859725952,
|
|
"learning_rate": 3.846153846153847e-06,
|
|
"loss": 0.0695,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.0327710843373494,
|
|
"grad_norm": 0.9192284345626831,
|
|
"learning_rate": 4.102564102564103e-06,
|
|
"loss": 0.0392,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.03469879518072289,
|
|
"grad_norm": 0.7950155735015869,
|
|
"learning_rate": 4.358974358974359e-06,
|
|
"loss": 0.0351,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.03662650602409639,
|
|
"grad_norm": 0.8854314684867859,
|
|
"learning_rate": 4.615384615384616e-06,
|
|
"loss": 0.0356,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.03855421686746988,
|
|
"grad_norm": 0.9546788930892944,
|
|
"learning_rate": 4.871794871794872e-06,
|
|
"loss": 0.0427,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.04048192771084337,
|
|
"grad_norm": 0.6315903663635254,
|
|
"learning_rate": 5.128205128205128e-06,
|
|
"loss": 0.0397,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.042409638554216866,
|
|
"grad_norm": 0.9230924844741821,
|
|
"learning_rate": 5.384615384615385e-06,
|
|
"loss": 0.0481,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.04433734939759036,
|
|
"grad_norm": 0.711546003818512,
|
|
"learning_rate": 5.641025641025641e-06,
|
|
"loss": 0.0479,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.04626506024096386,
|
|
"grad_norm": 0.5288046598434448,
|
|
"learning_rate": 5.897435897435898e-06,
|
|
"loss": 0.0182,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.04819277108433735,
|
|
"grad_norm": 0.9420496225357056,
|
|
"learning_rate": 6.153846153846155e-06,
|
|
"loss": 0.0389,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.05012048192771084,
|
|
"grad_norm": 0.5001983046531677,
|
|
"learning_rate": 6.410256410256412e-06,
|
|
"loss": 0.0268,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.052048192771084335,
|
|
"grad_norm": 0.8084653615951538,
|
|
"learning_rate": 6.666666666666667e-06,
|
|
"loss": 0.0367,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.05397590361445783,
|
|
"grad_norm": 0.7195103764533997,
|
|
"learning_rate": 6.923076923076923e-06,
|
|
"loss": 0.0251,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.055903614457831326,
|
|
"grad_norm": 0.529958963394165,
|
|
"learning_rate": 7.17948717948718e-06,
|
|
"loss": 0.0289,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.05783132530120482,
|
|
"grad_norm": 0.795376181602478,
|
|
"learning_rate": 7.435897435897437e-06,
|
|
"loss": 0.043,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.059759036144578316,
|
|
"grad_norm": 0.6366249918937683,
|
|
"learning_rate": 7.692307692307694e-06,
|
|
"loss": 0.029,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.061686746987951804,
|
|
"grad_norm": 0.5414115190505981,
|
|
"learning_rate": 7.948717948717949e-06,
|
|
"loss": 0.0365,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.0636144578313253,
|
|
"grad_norm": 0.9350972175598145,
|
|
"learning_rate": 8.205128205128205e-06,
|
|
"loss": 0.0283,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.0655421686746988,
|
|
"grad_norm": 0.5660741925239563,
|
|
"learning_rate": 8.461538461538462e-06,
|
|
"loss": 0.0234,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.06746987951807229,
|
|
"grad_norm": 0.5623988509178162,
|
|
"learning_rate": 8.717948717948719e-06,
|
|
"loss": 0.0307,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.06939759036144579,
|
|
"grad_norm": 0.5260195732116699,
|
|
"learning_rate": 8.974358974358976e-06,
|
|
"loss": 0.0264,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.07132530120481928,
|
|
"grad_norm": 0.4934785068035126,
|
|
"learning_rate": 9.230769230769232e-06,
|
|
"loss": 0.0224,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.07325301204819278,
|
|
"grad_norm": 0.4797322154045105,
|
|
"learning_rate": 9.487179487179487e-06,
|
|
"loss": 0.0163,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.07518072289156627,
|
|
"grad_norm": 0.4739217460155487,
|
|
"learning_rate": 9.743589743589744e-06,
|
|
"loss": 0.0165,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.07710843373493977,
|
|
"grad_norm": 0.4527677595615387,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.0163,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.07903614457831325,
|
|
"grad_norm": 0.6241316795349121,
|
|
"learning_rate": 1.0256410256410256e-05,
|
|
"loss": 0.0302,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.08096385542168674,
|
|
"grad_norm": 0.639043927192688,
|
|
"learning_rate": 1.0512820512820514e-05,
|
|
"loss": 0.0312,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.08289156626506024,
|
|
"grad_norm": 0.5121409296989441,
|
|
"learning_rate": 1.076923076923077e-05,
|
|
"loss": 0.0256,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.08481927710843373,
|
|
"grad_norm": 0.6340477466583252,
|
|
"learning_rate": 1.1025641025641028e-05,
|
|
"loss": 0.04,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.08674698795180723,
|
|
"grad_norm": 0.5260409712791443,
|
|
"learning_rate": 1.1282051282051283e-05,
|
|
"loss": 0.0282,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.08867469879518072,
|
|
"grad_norm": 0.6390711069107056,
|
|
"learning_rate": 1.1538461538461538e-05,
|
|
"loss": 0.0243,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.09060240963855422,
|
|
"grad_norm": 0.46469295024871826,
|
|
"learning_rate": 1.1794871794871796e-05,
|
|
"loss": 0.0208,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.09253012048192771,
|
|
"grad_norm": 0.8711516857147217,
|
|
"learning_rate": 1.2051282051282051e-05,
|
|
"loss": 0.0291,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.09445783132530121,
|
|
"grad_norm": 0.9164300560951233,
|
|
"learning_rate": 1.230769230769231e-05,
|
|
"loss": 0.0342,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.0963855421686747,
|
|
"grad_norm": 0.5401139259338379,
|
|
"learning_rate": 1.2564102564102565e-05,
|
|
"loss": 0.0185,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.0983132530120482,
|
|
"grad_norm": 0.44393008947372437,
|
|
"learning_rate": 1.2820512820512823e-05,
|
|
"loss": 0.0228,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.10024096385542168,
|
|
"grad_norm": 0.3855767846107483,
|
|
"learning_rate": 1.3076923076923078e-05,
|
|
"loss": 0.0176,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.10216867469879518,
|
|
"grad_norm": 0.8561235070228577,
|
|
"learning_rate": 1.3333333333333333e-05,
|
|
"loss": 0.0433,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.10409638554216867,
|
|
"grad_norm": 0.768002450466156,
|
|
"learning_rate": 1.3589743589743592e-05,
|
|
"loss": 0.0245,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.10602409638554217,
|
|
"grad_norm": 0.4559759497642517,
|
|
"learning_rate": 1.3846153846153847e-05,
|
|
"loss": 0.0224,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.10795180722891566,
|
|
"grad_norm": 0.6203847527503967,
|
|
"learning_rate": 1.4102564102564105e-05,
|
|
"loss": 0.0296,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.10987951807228916,
|
|
"grad_norm": 0.6651368141174316,
|
|
"learning_rate": 1.435897435897436e-05,
|
|
"loss": 0.0336,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.11180722891566265,
|
|
"grad_norm": 0.377734512090683,
|
|
"learning_rate": 1.4615384615384615e-05,
|
|
"loss": 0.0196,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.11373493975903615,
|
|
"grad_norm": 0.687568724155426,
|
|
"learning_rate": 1.4871794871794874e-05,
|
|
"loss": 0.0207,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.11566265060240964,
|
|
"grad_norm": 0.7905604243278503,
|
|
"learning_rate": 1.5128205128205129e-05,
|
|
"loss": 0.047,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.11759036144578314,
|
|
"grad_norm": 0.7938196063041687,
|
|
"learning_rate": 1.5384615384615387e-05,
|
|
"loss": 0.0198,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.11951807228915663,
|
|
"grad_norm": 0.41340553760528564,
|
|
"learning_rate": 1.5641025641025644e-05,
|
|
"loss": 0.0161,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.12144578313253013,
|
|
"grad_norm": 0.5668172240257263,
|
|
"learning_rate": 1.5897435897435897e-05,
|
|
"loss": 0.0275,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.12337349397590361,
|
|
"grad_norm": 0.48333367705345154,
|
|
"learning_rate": 1.6153846153846154e-05,
|
|
"loss": 0.0137,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.12530120481927712,
|
|
"grad_norm": 0.6843933463096619,
|
|
"learning_rate": 1.641025641025641e-05,
|
|
"loss": 0.0294,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.1272289156626506,
|
|
"grad_norm": 0.7789272665977478,
|
|
"learning_rate": 1.6666666666666667e-05,
|
|
"loss": 0.0401,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.1291566265060241,
|
|
"grad_norm": 0.6203492879867554,
|
|
"learning_rate": 1.6923076923076924e-05,
|
|
"loss": 0.0292,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.1310843373493976,
|
|
"grad_norm": 0.5940662622451782,
|
|
"learning_rate": 1.717948717948718e-05,
|
|
"loss": 0.0178,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.13301204819277107,
|
|
"grad_norm": 0.35504868626594543,
|
|
"learning_rate": 1.7435897435897438e-05,
|
|
"loss": 0.0129,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.13493975903614458,
|
|
"grad_norm": 0.8796699643135071,
|
|
"learning_rate": 1.7692307692307694e-05,
|
|
"loss": 0.034,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.13686746987951806,
|
|
"grad_norm": 0.967444896697998,
|
|
"learning_rate": 1.794871794871795e-05,
|
|
"loss": 0.0266,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.13879518072289157,
|
|
"grad_norm": 0.4428526759147644,
|
|
"learning_rate": 1.8205128205128208e-05,
|
|
"loss": 0.0223,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.14072289156626505,
|
|
"grad_norm": 0.42897751927375793,
|
|
"learning_rate": 1.8461538461538465e-05,
|
|
"loss": 0.0187,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.14265060240963856,
|
|
"grad_norm": 0.5100914835929871,
|
|
"learning_rate": 1.8717948717948718e-05,
|
|
"loss": 0.0164,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.14457831325301204,
|
|
"grad_norm": 0.6028861999511719,
|
|
"learning_rate": 1.8974358974358975e-05,
|
|
"loss": 0.0164,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.14650602409638555,
|
|
"grad_norm": 0.6187024116516113,
|
|
"learning_rate": 1.923076923076923e-05,
|
|
"loss": 0.0296,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.14843373493975903,
|
|
"grad_norm": 0.4822489619255066,
|
|
"learning_rate": 1.9487179487179488e-05,
|
|
"loss": 0.0148,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.15036144578313254,
|
|
"grad_norm": 0.7231149673461914,
|
|
"learning_rate": 1.9743589743589745e-05,
|
|
"loss": 0.0395,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.15228915662650602,
|
|
"grad_norm": 0.8409642577171326,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.0446,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.15421686746987953,
|
|
"grad_norm": 0.4883500039577484,
|
|
"learning_rate": 2.025641025641026e-05,
|
|
"loss": 0.0206,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.156144578313253,
|
|
"grad_norm": 0.6287479400634766,
|
|
"learning_rate": 2.0512820512820512e-05,
|
|
"loss": 0.0333,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.1580722891566265,
|
|
"grad_norm": 0.5041632652282715,
|
|
"learning_rate": 2.0769230769230772e-05,
|
|
"loss": 0.0414,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.5103405117988586,
|
|
"learning_rate": 2.102564102564103e-05,
|
|
"loss": 0.045,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.16192771084337348,
|
|
"grad_norm": 0.493161678314209,
|
|
"learning_rate": 2.1282051282051285e-05,
|
|
"loss": 0.021,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.163855421686747,
|
|
"grad_norm": 0.908843994140625,
|
|
"learning_rate": 2.153846153846154e-05,
|
|
"loss": 0.0389,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.16578313253012048,
|
|
"grad_norm": 0.5067003965377808,
|
|
"learning_rate": 2.1794871794871795e-05,
|
|
"loss": 0.0272,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.16771084337349398,
|
|
"grad_norm": 0.5791381597518921,
|
|
"learning_rate": 2.2051282051282056e-05,
|
|
"loss": 0.0368,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.16963855421686747,
|
|
"grad_norm": 0.7056036591529846,
|
|
"learning_rate": 2.230769230769231e-05,
|
|
"loss": 0.0284,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.17156626506024097,
|
|
"grad_norm": 0.6563822031021118,
|
|
"learning_rate": 2.2564102564102566e-05,
|
|
"loss": 0.0646,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.17349397590361446,
|
|
"grad_norm": 0.9483286142349243,
|
|
"learning_rate": 2.2820512820512822e-05,
|
|
"loss": 0.0439,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.17542168674698796,
|
|
"grad_norm": 0.370664119720459,
|
|
"learning_rate": 2.3076923076923076e-05,
|
|
"loss": 0.0109,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.17734939759036145,
|
|
"grad_norm": 0.9776477813720703,
|
|
"learning_rate": 2.3333333333333336e-05,
|
|
"loss": 0.0458,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.17927710843373493,
|
|
"grad_norm": 0.45710092782974243,
|
|
"learning_rate": 2.3589743589743593e-05,
|
|
"loss": 0.0212,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.18120481927710844,
|
|
"grad_norm": 0.8623896837234497,
|
|
"learning_rate": 2.384615384615385e-05,
|
|
"loss": 0.0215,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.18313253012048192,
|
|
"grad_norm": 0.55814528465271,
|
|
"learning_rate": 2.4102564102564103e-05,
|
|
"loss": 0.0218,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.18506024096385543,
|
|
"grad_norm": 0.49882641434669495,
|
|
"learning_rate": 2.435897435897436e-05,
|
|
"loss": 0.0268,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.1869879518072289,
|
|
"grad_norm": 0.3508654534816742,
|
|
"learning_rate": 2.461538461538462e-05,
|
|
"loss": 0.0172,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.18891566265060242,
|
|
"grad_norm": 0.601170003414154,
|
|
"learning_rate": 2.4871794871794873e-05,
|
|
"loss": 0.0208,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.1908433734939759,
|
|
"grad_norm": 1.1748133897781372,
|
|
"learning_rate": 2.512820512820513e-05,
|
|
"loss": 0.0259,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.1927710843373494,
|
|
"grad_norm": 0.46370384097099304,
|
|
"learning_rate": 2.5384615384615386e-05,
|
|
"loss": 0.0242,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.1946987951807229,
|
|
"grad_norm": 0.525010883808136,
|
|
"learning_rate": 2.5641025641025646e-05,
|
|
"loss": 0.0188,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.1966265060240964,
|
|
"grad_norm": 0.766501784324646,
|
|
"learning_rate": 2.58974358974359e-05,
|
|
"loss": 0.0584,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.19855421686746988,
|
|
"grad_norm": 0.3572964370250702,
|
|
"learning_rate": 2.6153846153846157e-05,
|
|
"loss": 0.0131,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.20048192771084336,
|
|
"grad_norm": 0.6467130780220032,
|
|
"learning_rate": 2.6410256410256413e-05,
|
|
"loss": 0.0231,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.20240963855421687,
|
|
"grad_norm": 1.1852102279663086,
|
|
"learning_rate": 2.6666666666666667e-05,
|
|
"loss": 0.027,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.20433734939759035,
|
|
"grad_norm": 2.3659932613372803,
|
|
"learning_rate": 2.6923076923076927e-05,
|
|
"loss": 0.0224,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.20626506024096386,
|
|
"grad_norm": 0.5343687534332275,
|
|
"learning_rate": 2.7179487179487183e-05,
|
|
"loss": 0.0198,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.20819277108433734,
|
|
"grad_norm": 1.852160096168518,
|
|
"learning_rate": 2.7435897435897437e-05,
|
|
"loss": 0.032,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.21012048192771085,
|
|
"grad_norm": 0.47291702032089233,
|
|
"learning_rate": 2.7692307692307694e-05,
|
|
"loss": 0.0117,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.21204819277108433,
|
|
"grad_norm": 0.7623187899589539,
|
|
"learning_rate": 2.794871794871795e-05,
|
|
"loss": 0.0337,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.21397590361445784,
|
|
"grad_norm": 0.5272570848464966,
|
|
"learning_rate": 2.820512820512821e-05,
|
|
"loss": 0.0131,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.21590361445783132,
|
|
"grad_norm": 0.5568500757217407,
|
|
"learning_rate": 2.8461538461538464e-05,
|
|
"loss": 0.0233,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.21783132530120483,
|
|
"grad_norm": 0.4008469879627228,
|
|
"learning_rate": 2.871794871794872e-05,
|
|
"loss": 0.0204,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.2197590361445783,
|
|
"grad_norm": 0.4888612926006317,
|
|
"learning_rate": 2.8974358974358977e-05,
|
|
"loss": 0.016,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.2216867469879518,
|
|
"grad_norm": 0.44903355836868286,
|
|
"learning_rate": 2.923076923076923e-05,
|
|
"loss": 0.0135,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.2236144578313253,
|
|
"grad_norm": 0.9266762733459473,
|
|
"learning_rate": 2.948717948717949e-05,
|
|
"loss": 0.0233,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.22554216867469878,
|
|
"grad_norm": 0.5352638959884644,
|
|
"learning_rate": 2.9743589743589747e-05,
|
|
"loss": 0.0198,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.2274698795180723,
|
|
"grad_norm": 0.6051343679428101,
|
|
"learning_rate": 3.0000000000000004e-05,
|
|
"loss": 0.0246,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.22939759036144577,
|
|
"grad_norm": 0.9971133470535278,
|
|
"learning_rate": 3.0256410256410257e-05,
|
|
"loss": 0.025,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.23132530120481928,
|
|
"grad_norm": 0.704236626625061,
|
|
"learning_rate": 3.0512820512820514e-05,
|
|
"loss": 0.031,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.23325301204819276,
|
|
"grad_norm": 0.6137097477912903,
|
|
"learning_rate": 3.0769230769230774e-05,
|
|
"loss": 0.0519,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.23518072289156627,
|
|
"grad_norm": 0.7396159768104553,
|
|
"learning_rate": 3.102564102564103e-05,
|
|
"loss": 0.0325,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.23710843373493976,
|
|
"grad_norm": 1.3282053470611572,
|
|
"learning_rate": 3.128205128205129e-05,
|
|
"loss": 0.0252,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.23903614457831326,
|
|
"grad_norm": 0.5220731496810913,
|
|
"learning_rate": 3.153846153846154e-05,
|
|
"loss": 0.0262,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.24096385542168675,
|
|
"grad_norm": 0.5357242822647095,
|
|
"learning_rate": 3.1794871794871795e-05,
|
|
"loss": 0.0243,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.24289156626506025,
|
|
"grad_norm": 0.48207753896713257,
|
|
"learning_rate": 3.205128205128206e-05,
|
|
"loss": 0.0178,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.24481927710843374,
|
|
"grad_norm": 0.552988588809967,
|
|
"learning_rate": 3.230769230769231e-05,
|
|
"loss": 0.023,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.24674698795180722,
|
|
"grad_norm": 1.7962840795516968,
|
|
"learning_rate": 3.2564102564102565e-05,
|
|
"loss": 0.032,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.24867469879518073,
|
|
"grad_norm": 1.6404600143432617,
|
|
"learning_rate": 3.282051282051282e-05,
|
|
"loss": 0.0231,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.25060240963855424,
|
|
"grad_norm": 0.39142486453056335,
|
|
"learning_rate": 3.307692307692308e-05,
|
|
"loss": 0.0147,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.2525301204819277,
|
|
"grad_norm": 1.3272887468338013,
|
|
"learning_rate": 3.3333333333333335e-05,
|
|
"loss": 0.0439,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.2544578313253012,
|
|
"grad_norm": 1.5122811794281006,
|
|
"learning_rate": 3.358974358974359e-05,
|
|
"loss": 0.0282,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.2563855421686747,
|
|
"grad_norm": 1.8542430400848389,
|
|
"learning_rate": 3.384615384615385e-05,
|
|
"loss": 0.0515,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.2583132530120482,
|
|
"grad_norm": 4.059277534484863,
|
|
"learning_rate": 3.4102564102564105e-05,
|
|
"loss": 0.0781,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.26024096385542167,
|
|
"grad_norm": 0.6206214427947998,
|
|
"learning_rate": 3.435897435897436e-05,
|
|
"loss": 0.0306,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.2621686746987952,
|
|
"grad_norm": 0.4575510323047638,
|
|
"learning_rate": 3.461538461538462e-05,
|
|
"loss": 0.0154,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.2640963855421687,
|
|
"grad_norm": 1.1556978225708008,
|
|
"learning_rate": 3.4871794871794875e-05,
|
|
"loss": 0.0235,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.26602409638554214,
|
|
"grad_norm": 0.6975051760673523,
|
|
"learning_rate": 3.512820512820513e-05,
|
|
"loss": 0.0453,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.26795180722891565,
|
|
"grad_norm": 0.8686623573303223,
|
|
"learning_rate": 3.538461538461539e-05,
|
|
"loss": 0.0427,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.26987951807228916,
|
|
"grad_norm": 2.0681848526000977,
|
|
"learning_rate": 3.5641025641025646e-05,
|
|
"loss": 0.04,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.27180722891566267,
|
|
"grad_norm": 0.4397984445095062,
|
|
"learning_rate": 3.58974358974359e-05,
|
|
"loss": 0.0188,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.2737349397590361,
|
|
"grad_norm": 0.5871334075927734,
|
|
"learning_rate": 3.615384615384616e-05,
|
|
"loss": 0.0253,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.27566265060240963,
|
|
"grad_norm": 1.1078568696975708,
|
|
"learning_rate": 3.6410256410256416e-05,
|
|
"loss": 0.0316,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.27759036144578314,
|
|
"grad_norm": 0.5691841840744019,
|
|
"learning_rate": 3.6666666666666666e-05,
|
|
"loss": 0.0266,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.27951807228915665,
|
|
"grad_norm": 0.7896255254745483,
|
|
"learning_rate": 3.692307692307693e-05,
|
|
"loss": 0.0281,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.2814457831325301,
|
|
"grad_norm": 0.9988337159156799,
|
|
"learning_rate": 3.7179487179487186e-05,
|
|
"loss": 0.0295,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.2833734939759036,
|
|
"grad_norm": 0.9811834692955017,
|
|
"learning_rate": 3.7435897435897436e-05,
|
|
"loss": 0.0322,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.2853012048192771,
|
|
"grad_norm": 0.6503105759620667,
|
|
"learning_rate": 3.769230769230769e-05,
|
|
"loss": 0.0266,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.28722891566265063,
|
|
"grad_norm": 1.9164355993270874,
|
|
"learning_rate": 3.794871794871795e-05,
|
|
"loss": 0.0677,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.2891566265060241,
|
|
"grad_norm": 1.1724557876586914,
|
|
"learning_rate": 3.820512820512821e-05,
|
|
"loss": 0.0324,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.2910843373493976,
|
|
"grad_norm": 0.8482469916343689,
|
|
"learning_rate": 3.846153846153846e-05,
|
|
"loss": 0.0259,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.2930120481927711,
|
|
"grad_norm": 0.8572830557823181,
|
|
"learning_rate": 3.871794871794872e-05,
|
|
"loss": 0.0358,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.29493975903614456,
|
|
"grad_norm": 0.6630825400352478,
|
|
"learning_rate": 3.8974358974358976e-05,
|
|
"loss": 0.0447,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.29686746987951806,
|
|
"grad_norm": 0.9197093844413757,
|
|
"learning_rate": 3.923076923076923e-05,
|
|
"loss": 0.0409,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.2987951807228916,
|
|
"grad_norm": 0.6976819634437561,
|
|
"learning_rate": 3.948717948717949e-05,
|
|
"loss": 0.0317,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.3007228915662651,
|
|
"grad_norm": 0.7353514432907104,
|
|
"learning_rate": 3.9743589743589747e-05,
|
|
"loss": 0.0306,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.30265060240963854,
|
|
"grad_norm": 0.5730232000350952,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.0324,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.30457831325301205,
|
|
"grad_norm": 0.7852078676223755,
|
|
"learning_rate": 3.999994971675547e-05,
|
|
"loss": 0.0354,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.30650602409638555,
|
|
"grad_norm": 0.5924715399742126,
|
|
"learning_rate": 3.999979886727471e-05,
|
|
"loss": 0.0366,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.30843373493975906,
|
|
"grad_norm": 0.7359845638275146,
|
|
"learning_rate": 3.999954745231624e-05,
|
|
"loss": 0.0437,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.3103614457831325,
|
|
"grad_norm": 0.7866976857185364,
|
|
"learning_rate": 3.999919547314426e-05,
|
|
"loss": 0.0363,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.312289156626506,
|
|
"grad_norm": 0.7425745129585266,
|
|
"learning_rate": 3.999874293152863e-05,
|
|
"loss": 0.0259,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.31421686746987953,
|
|
"grad_norm": 1.8922245502471924,
|
|
"learning_rate": 3.9998189829744885e-05,
|
|
"loss": 0.0341,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.316144578313253,
|
|
"grad_norm": 0.7908634543418884,
|
|
"learning_rate": 3.99975361705742e-05,
|
|
"loss": 0.0424,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.3180722891566265,
|
|
"grad_norm": 2.047368049621582,
|
|
"learning_rate": 3.999678195730337e-05,
|
|
"loss": 0.0535,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.5702639222145081,
|
|
"learning_rate": 3.999592719372484e-05,
|
|
"loss": 0.0284,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.3219277108433735,
|
|
"grad_norm": 0.45015648007392883,
|
|
"learning_rate": 3.9994971884136636e-05,
|
|
"loss": 0.0313,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.32385542168674697,
|
|
"grad_norm": 4.094679355621338,
|
|
"learning_rate": 3.9993916033342355e-05,
|
|
"loss": 0.0524,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.3257831325301205,
|
|
"grad_norm": 0.800846517086029,
|
|
"learning_rate": 3.999275964665117e-05,
|
|
"loss": 0.0282,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.327710843373494,
|
|
"grad_norm": 0.47881078720092773,
|
|
"learning_rate": 3.999150272987776e-05,
|
|
"loss": 0.0293,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.3296385542168675,
|
|
"grad_norm": 0.5716657638549805,
|
|
"learning_rate": 3.999014528934232e-05,
|
|
"loss": 0.0221,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.33156626506024095,
|
|
"grad_norm": 0.6333311200141907,
|
|
"learning_rate": 3.998868733187048e-05,
|
|
"loss": 0.0302,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.33349397590361446,
|
|
"grad_norm": 6.642521858215332,
|
|
"learning_rate": 3.998712886479335e-05,
|
|
"loss": 0.0364,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.33542168674698797,
|
|
"grad_norm": 0.7515506148338318,
|
|
"learning_rate": 3.998546989594739e-05,
|
|
"loss": 0.0296,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.3373493975903614,
|
|
"grad_norm": 1.0728015899658203,
|
|
"learning_rate": 3.998371043367445e-05,
|
|
"loss": 0.0549,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.33927710843373493,
|
|
"grad_norm": 1.3025579452514648,
|
|
"learning_rate": 3.998185048682166e-05,
|
|
"loss": 0.0577,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.34120481927710844,
|
|
"grad_norm": 1.0962958335876465,
|
|
"learning_rate": 3.997989006474144e-05,
|
|
"loss": 0.0313,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.34313253012048195,
|
|
"grad_norm": 0.7064313292503357,
|
|
"learning_rate": 3.997782917729143e-05,
|
|
"loss": 0.0309,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.3450602409638554,
|
|
"grad_norm": 0.43374207615852356,
|
|
"learning_rate": 3.997566783483445e-05,
|
|
"loss": 0.0166,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.3469879518072289,
|
|
"grad_norm": 0.7236390113830566,
|
|
"learning_rate": 3.9973406048238413e-05,
|
|
"loss": 0.0254,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.3489156626506024,
|
|
"grad_norm": 0.5041500926017761,
|
|
"learning_rate": 3.9971043828876334e-05,
|
|
"loss": 0.0239,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.35084337349397593,
|
|
"grad_norm": 1.2744532823562622,
|
|
"learning_rate": 3.9968581188626204e-05,
|
|
"loss": 0.0404,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.3527710843373494,
|
|
"grad_norm": 0.45845362544059753,
|
|
"learning_rate": 3.996601813987098e-05,
|
|
"loss": 0.0127,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.3546987951807229,
|
|
"grad_norm": 0.4426881968975067,
|
|
"learning_rate": 3.996335469549852e-05,
|
|
"loss": 0.0176,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.3566265060240964,
|
|
"grad_norm": 1.0030732154846191,
|
|
"learning_rate": 3.9960590868901465e-05,
|
|
"loss": 0.0457,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.35855421686746985,
|
|
"grad_norm": 0.6428582668304443,
|
|
"learning_rate": 3.995772667397725e-05,
|
|
"loss": 0.0271,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.36048192771084336,
|
|
"grad_norm": 0.5335744619369507,
|
|
"learning_rate": 3.995476212512795e-05,
|
|
"loss": 0.0297,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.3624096385542169,
|
|
"grad_norm": 0.6995761394500732,
|
|
"learning_rate": 3.99516972372603e-05,
|
|
"loss": 0.0322,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.3643373493975904,
|
|
"grad_norm": 0.765511155128479,
|
|
"learning_rate": 3.9948532025785546e-05,
|
|
"loss": 0.0253,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.36626506024096384,
|
|
"grad_norm": 0.6165828108787537,
|
|
"learning_rate": 3.9945266506619403e-05,
|
|
"loss": 0.0355,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.36819277108433734,
|
|
"grad_norm": 0.851970911026001,
|
|
"learning_rate": 3.994190069618195e-05,
|
|
"loss": 0.056,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.37012048192771085,
|
|
"grad_norm": 0.9850023984909058,
|
|
"learning_rate": 3.993843461139757e-05,
|
|
"loss": 0.0415,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.37204819277108436,
|
|
"grad_norm": 0.7455295324325562,
|
|
"learning_rate": 3.9934868269694886e-05,
|
|
"loss": 0.0379,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.3739759036144578,
|
|
"grad_norm": 1.159469723701477,
|
|
"learning_rate": 3.9931201689006595e-05,
|
|
"loss": 0.0237,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.3759036144578313,
|
|
"grad_norm": 0.5490080118179321,
|
|
"learning_rate": 3.992743488776947e-05,
|
|
"loss": 0.024,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.37783132530120483,
|
|
"grad_norm": 1.279831886291504,
|
|
"learning_rate": 3.992356788492421e-05,
|
|
"loss": 0.0273,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.3797590361445783,
|
|
"grad_norm": 0.859104335308075,
|
|
"learning_rate": 3.9919600699915355e-05,
|
|
"loss": 0.0411,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.3816867469879518,
|
|
"grad_norm": 1.2525300979614258,
|
|
"learning_rate": 3.991553335269119e-05,
|
|
"loss": 0.0857,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.3836144578313253,
|
|
"grad_norm": 0.4924193024635315,
|
|
"learning_rate": 3.991136586370367e-05,
|
|
"loss": 0.0294,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.3855421686746988,
|
|
"grad_norm": 1.417190670967102,
|
|
"learning_rate": 3.990709825390828e-05,
|
|
"loss": 0.0395,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.38746987951807227,
|
|
"grad_norm": 0.6172056198120117,
|
|
"learning_rate": 3.9902730544763936e-05,
|
|
"loss": 0.0194,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.3893975903614458,
|
|
"grad_norm": 0.7292149662971497,
|
|
"learning_rate": 3.989826275823291e-05,
|
|
"loss": 0.0381,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.3913253012048193,
|
|
"grad_norm": 0.5949816107749939,
|
|
"learning_rate": 3.989369491678067e-05,
|
|
"loss": 0.0254,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.3932530120481928,
|
|
"grad_norm": 0.6012582182884216,
|
|
"learning_rate": 3.988902704337582e-05,
|
|
"loss": 0.048,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.39518072289156625,
|
|
"grad_norm": 0.6273590922355652,
|
|
"learning_rate": 3.9884259161489936e-05,
|
|
"loss": 0.0268,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.39710843373493976,
|
|
"grad_norm": 0.9615244269371033,
|
|
"learning_rate": 3.987939129509746e-05,
|
|
"loss": 0.0192,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.39903614457831327,
|
|
"grad_norm": 0.6009241342544556,
|
|
"learning_rate": 3.9874423468675624e-05,
|
|
"loss": 0.0362,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.4009638554216867,
|
|
"grad_norm": 0.411335289478302,
|
|
"learning_rate": 3.9869355707204266e-05,
|
|
"loss": 0.017,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.40289156626506023,
|
|
"grad_norm": 0.6151527166366577,
|
|
"learning_rate": 3.986418803616573e-05,
|
|
"loss": 0.0283,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.40481927710843374,
|
|
"grad_norm": 0.33808204531669617,
|
|
"learning_rate": 3.985892048154474e-05,
|
|
"loss": 0.0158,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.40674698795180725,
|
|
"grad_norm": 0.5464187860488892,
|
|
"learning_rate": 3.9853553069828284e-05,
|
|
"loss": 0.0292,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.4086746987951807,
|
|
"grad_norm": 0.6658390760421753,
|
|
"learning_rate": 3.984808582800543e-05,
|
|
"loss": 0.0281,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.4106024096385542,
|
|
"grad_norm": 0.4253764748573303,
|
|
"learning_rate": 3.984251878356726e-05,
|
|
"loss": 0.031,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.4125301204819277,
|
|
"grad_norm": 0.32309481501579285,
|
|
"learning_rate": 3.983685196450667e-05,
|
|
"loss": 0.0166,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.41445783132530123,
|
|
"grad_norm": 0.43756410479545593,
|
|
"learning_rate": 3.9831085399318265e-05,
|
|
"loss": 0.0326,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.4163855421686747,
|
|
"grad_norm": 0.264046847820282,
|
|
"learning_rate": 3.982521911699822e-05,
|
|
"loss": 0.0118,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.4183132530120482,
|
|
"grad_norm": 0.8630897402763367,
|
|
"learning_rate": 3.9819253147044084e-05,
|
|
"loss": 0.0246,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.4202409638554217,
|
|
"grad_norm": 0.6923379898071289,
|
|
"learning_rate": 3.98131875194547e-05,
|
|
"loss": 0.036,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.42216867469879515,
|
|
"grad_norm": 0.5874778628349304,
|
|
"learning_rate": 3.9807022264730024e-05,
|
|
"loss": 0.0255,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.42409638554216866,
|
|
"grad_norm": 0.394336074590683,
|
|
"learning_rate": 3.980075741387094e-05,
|
|
"loss": 0.0187,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.4260240963855422,
|
|
"grad_norm": 0.6300327777862549,
|
|
"learning_rate": 3.979439299837915e-05,
|
|
"loss": 0.0214,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.4279518072289157,
|
|
"grad_norm": 0.5200467109680176,
|
|
"learning_rate": 3.978792905025702e-05,
|
|
"loss": 0.0628,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.42987951807228914,
|
|
"grad_norm": 0.5713880062103271,
|
|
"learning_rate": 3.978136560200735e-05,
|
|
"loss": 0.0302,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.43180722891566264,
|
|
"grad_norm": 0.5345383286476135,
|
|
"learning_rate": 3.977470268663331e-05,
|
|
"loss": 0.0125,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.43373493975903615,
|
|
"grad_norm": 0.5378350019454956,
|
|
"learning_rate": 3.976794033763819e-05,
|
|
"loss": 0.0246,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.43566265060240966,
|
|
"grad_norm": 0.5554935336112976,
|
|
"learning_rate": 3.9761078589025276e-05,
|
|
"loss": 0.0212,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.4375903614457831,
|
|
"grad_norm": 0.2832634747028351,
|
|
"learning_rate": 3.9754117475297664e-05,
|
|
"loss": 0.0125,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.4395180722891566,
|
|
"grad_norm": 1.2910150289535522,
|
|
"learning_rate": 3.97470570314581e-05,
|
|
"loss": 0.0364,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.44144578313253013,
|
|
"grad_norm": 0.3731018602848053,
|
|
"learning_rate": 3.973989729300878e-05,
|
|
"loss": 0.0128,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.4433734939759036,
|
|
"grad_norm": 0.9433871507644653,
|
|
"learning_rate": 3.9732638295951195e-05,
|
|
"loss": 0.0367,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.4453012048192771,
|
|
"grad_norm": 1.0779197216033936,
|
|
"learning_rate": 3.972528007678594e-05,
|
|
"loss": 0.0667,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.4472289156626506,
|
|
"grad_norm": 1.7009105682373047,
|
|
"learning_rate": 3.9717822672512516e-05,
|
|
"loss": 0.0655,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.4491566265060241,
|
|
"grad_norm": 0.5646032094955444,
|
|
"learning_rate": 3.971026612062919e-05,
|
|
"loss": 0.064,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.45108433734939757,
|
|
"grad_norm": 0.44474121928215027,
|
|
"learning_rate": 3.970261045913274e-05,
|
|
"loss": 0.0206,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.4530120481927711,
|
|
"grad_norm": 1.3969277143478394,
|
|
"learning_rate": 3.969485572651833e-05,
|
|
"loss": 0.0486,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.4549397590361446,
|
|
"grad_norm": 0.6401994228363037,
|
|
"learning_rate": 3.968700196177925e-05,
|
|
"loss": 0.0262,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.4568674698795181,
|
|
"grad_norm": 0.7091913223266602,
|
|
"learning_rate": 3.96790492044068e-05,
|
|
"loss": 0.014,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.45879518072289155,
|
|
"grad_norm": 0.6561547517776489,
|
|
"learning_rate": 3.967099749439002e-05,
|
|
"loss": 0.0482,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.46072289156626506,
|
|
"grad_norm": 0.6924155354499817,
|
|
"learning_rate": 3.966284687221551e-05,
|
|
"loss": 0.0289,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.46265060240963857,
|
|
"grad_norm": 0.5868663787841797,
|
|
"learning_rate": 3.9654597378867256e-05,
|
|
"loss": 0.0331,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.464578313253012,
|
|
"grad_norm": 0.7930939793586731,
|
|
"learning_rate": 3.964624905582637e-05,
|
|
"loss": 0.0925,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.46650602409638553,
|
|
"grad_norm": 0.4888836145401001,
|
|
"learning_rate": 3.9637801945070944e-05,
|
|
"loss": 0.015,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.46843373493975904,
|
|
"grad_norm": 0.7820287346839905,
|
|
"learning_rate": 3.962925608907579e-05,
|
|
"loss": 0.0382,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.47036144578313255,
|
|
"grad_norm": 0.4914316236972809,
|
|
"learning_rate": 3.962061153081224e-05,
|
|
"loss": 0.0257,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.472289156626506,
|
|
"grad_norm": 0.5681505799293518,
|
|
"learning_rate": 3.961186831374793e-05,
|
|
"loss": 0.0551,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.4742168674698795,
|
|
"grad_norm": 0.5049723386764526,
|
|
"learning_rate": 3.9603026481846616e-05,
|
|
"loss": 0.0186,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.476144578313253,
|
|
"grad_norm": 0.5034119486808777,
|
|
"learning_rate": 3.959408607956787e-05,
|
|
"loss": 0.024,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.47807228915662653,
|
|
"grad_norm": 0.4543336033821106,
|
|
"learning_rate": 3.958504715186695e-05,
|
|
"loss": 0.0256,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"grad_norm": 0.5595743656158447,
|
|
"learning_rate": 3.957590974419452e-05,
|
|
"loss": 0.0222,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.4819277108433735,
|
|
"grad_norm": 0.5701581239700317,
|
|
"learning_rate": 3.956667390249642e-05,
|
|
"loss": 0.0334,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.483855421686747,
|
|
"grad_norm": 0.53755784034729,
|
|
"learning_rate": 3.9557339673213474e-05,
|
|
"loss": 0.0345,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.4857831325301205,
|
|
"grad_norm": 0.4368877112865448,
|
|
"learning_rate": 3.95479071032812e-05,
|
|
"loss": 0.0183,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.48771084337349396,
|
|
"grad_norm": 0.7972906827926636,
|
|
"learning_rate": 3.953837624012963e-05,
|
|
"loss": 0.0337,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.48963855421686747,
|
|
"grad_norm": 0.6148451566696167,
|
|
"learning_rate": 3.9528747131683023e-05,
|
|
"loss": 0.0524,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.491566265060241,
|
|
"grad_norm": 0.500840961933136,
|
|
"learning_rate": 3.9519019826359676e-05,
|
|
"loss": 0.0248,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.49349397590361443,
|
|
"grad_norm": 0.5536255240440369,
|
|
"learning_rate": 3.9509194373071624e-05,
|
|
"loss": 0.0219,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.49542168674698794,
|
|
"grad_norm": 0.6873176097869873,
|
|
"learning_rate": 3.9499270821224444e-05,
|
|
"loss": 0.0312,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.49734939759036145,
|
|
"grad_norm": 0.37207168340682983,
|
|
"learning_rate": 3.9489249220716974e-05,
|
|
"loss": 0.0149,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.49927710843373496,
|
|
"grad_norm": 0.4458799660205841,
|
|
"learning_rate": 3.947912962194107e-05,
|
|
"loss": 0.0214,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.5012048192771085,
|
|
"grad_norm": 0.4272724390029907,
|
|
"learning_rate": 3.9468912075781345e-05,
|
|
"loss": 0.0263,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.503132530120482,
|
|
"grad_norm": 0.5245792269706726,
|
|
"learning_rate": 3.945859663361496e-05,
|
|
"loss": 0.0103,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.5050602409638554,
|
|
"grad_norm": 0.8799260854721069,
|
|
"learning_rate": 3.9448183347311284e-05,
|
|
"loss": 0.0292,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.5069879518072289,
|
|
"grad_norm": 0.5996833443641663,
|
|
"learning_rate": 3.943767226923171e-05,
|
|
"loss": 0.0306,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.5089156626506024,
|
|
"grad_norm": 0.6044682860374451,
|
|
"learning_rate": 3.942706345222935e-05,
|
|
"loss": 0.0218,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.5108433734939759,
|
|
"grad_norm": 0.4770200848579407,
|
|
"learning_rate": 3.941635694964878e-05,
|
|
"loss": 0.0226,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.5127710843373494,
|
|
"grad_norm": 0.5605704188346863,
|
|
"learning_rate": 3.940555281532576e-05,
|
|
"loss": 0.0354,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.5146987951807229,
|
|
"grad_norm": 0.46532443165779114,
|
|
"learning_rate": 3.939465110358699e-05,
|
|
"loss": 0.0223,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.5166265060240964,
|
|
"grad_norm": 0.5190595388412476,
|
|
"learning_rate": 3.93836518692498e-05,
|
|
"loss": 0.0219,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.5185542168674698,
|
|
"grad_norm": 0.5767757892608643,
|
|
"learning_rate": 3.937255516762193e-05,
|
|
"loss": 0.0294,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.5204819277108433,
|
|
"grad_norm": 0.4543164372444153,
|
|
"learning_rate": 3.936136105450119e-05,
|
|
"loss": 0.0244,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.5224096385542168,
|
|
"grad_norm": 0.4155154526233673,
|
|
"learning_rate": 3.9350069586175195e-05,
|
|
"loss": 0.02,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.5243373493975904,
|
|
"grad_norm": 0.5470768213272095,
|
|
"learning_rate": 3.933868081942113e-05,
|
|
"loss": 0.0187,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.5262650602409639,
|
|
"grad_norm": 0.9491772651672363,
|
|
"learning_rate": 3.9327194811505406e-05,
|
|
"loss": 0.0337,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.5281927710843374,
|
|
"grad_norm": 0.9313873052597046,
|
|
"learning_rate": 3.93156116201834e-05,
|
|
"loss": 0.0573,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.5301204819277109,
|
|
"grad_norm": 0.7181005477905273,
|
|
"learning_rate": 3.930393130369915e-05,
|
|
"loss": 0.0405,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.5320481927710843,
|
|
"grad_norm": 0.34231385588645935,
|
|
"learning_rate": 3.9292153920785076e-05,
|
|
"loss": 0.0153,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.5339759036144578,
|
|
"grad_norm": 0.6899610161781311,
|
|
"learning_rate": 3.928027953066168e-05,
|
|
"loss": 0.0338,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.5359036144578313,
|
|
"grad_norm": 0.7509781718254089,
|
|
"learning_rate": 3.926830819303726e-05,
|
|
"loss": 0.0416,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.5378313253012048,
|
|
"grad_norm": 0.6326774954795837,
|
|
"learning_rate": 3.925623996810757e-05,
|
|
"loss": 0.0293,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.5397590361445783,
|
|
"grad_norm": 0.5543203353881836,
|
|
"learning_rate": 3.924407491655557e-05,
|
|
"loss": 0.0263,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.5416867469879518,
|
|
"grad_norm": 0.5367572903633118,
|
|
"learning_rate": 3.9231813099551086e-05,
|
|
"loss": 0.0276,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.5436144578313253,
|
|
"grad_norm": 0.3143869638442993,
|
|
"learning_rate": 3.921945457875051e-05,
|
|
"loss": 0.0146,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.5455421686746988,
|
|
"grad_norm": 0.47403043508529663,
|
|
"learning_rate": 3.920699941629649e-05,
|
|
"loss": 0.0267,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.5474698795180722,
|
|
"grad_norm": 0.5082595348358154,
|
|
"learning_rate": 3.919444767481763e-05,
|
|
"loss": 0.0183,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.5493975903614458,
|
|
"grad_norm": 0.747949481010437,
|
|
"learning_rate": 3.918179941742816e-05,
|
|
"loss": 0.0412,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.5513253012048193,
|
|
"grad_norm": 0.6553886532783508,
|
|
"learning_rate": 3.916905470772762e-05,
|
|
"loss": 0.0505,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.5532530120481928,
|
|
"grad_norm": 0.3838176131248474,
|
|
"learning_rate": 3.9156213609800545e-05,
|
|
"loss": 0.0156,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.5551807228915663,
|
|
"grad_norm": 0.7427731156349182,
|
|
"learning_rate": 3.914327618821614e-05,
|
|
"loss": 0.0278,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.5571084337349398,
|
|
"grad_norm": 0.2612821161746979,
|
|
"learning_rate": 3.913024250802796e-05,
|
|
"loss": 0.0101,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.5590361445783133,
|
|
"grad_norm": 0.3799416124820709,
|
|
"learning_rate": 3.911711263477357e-05,
|
|
"loss": 0.0168,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.5609638554216867,
|
|
"grad_norm": 0.5053854584693909,
|
|
"learning_rate": 3.910388663447425e-05,
|
|
"loss": 0.0249,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.5628915662650602,
|
|
"grad_norm": 0.38095012307167053,
|
|
"learning_rate": 3.909056457363461e-05,
|
|
"loss": 0.0156,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.5648192771084337,
|
|
"grad_norm": 0.4477892220020294,
|
|
"learning_rate": 3.907714651924229e-05,
|
|
"loss": 0.0309,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.5667469879518072,
|
|
"grad_norm": 0.5875864624977112,
|
|
"learning_rate": 3.906363253876763e-05,
|
|
"loss": 0.0287,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.5686746987951807,
|
|
"grad_norm": 0.522990882396698,
|
|
"learning_rate": 3.90500227001633e-05,
|
|
"loss": 0.0318,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.5706024096385542,
|
|
"grad_norm": 0.4153876304626465,
|
|
"learning_rate": 3.9036317071863994e-05,
|
|
"loss": 0.0192,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.5725301204819278,
|
|
"grad_norm": 0.4675769507884979,
|
|
"learning_rate": 3.902251572278605e-05,
|
|
"loss": 0.067,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.5744578313253013,
|
|
"grad_norm": 0.35778650641441345,
|
|
"learning_rate": 3.900861872232713e-05,
|
|
"loss": 0.0197,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.5763855421686747,
|
|
"grad_norm": 0.7382330894470215,
|
|
"learning_rate": 3.899462614036587e-05,
|
|
"loss": 0.0283,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.5783132530120482,
|
|
"grad_norm": 0.41268599033355713,
|
|
"learning_rate": 3.89805380472615e-05,
|
|
"loss": 0.0207,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.5802409638554217,
|
|
"grad_norm": 1.2013020515441895,
|
|
"learning_rate": 3.8966354513853535e-05,
|
|
"loss": 0.0301,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.5821686746987952,
|
|
"grad_norm": 0.424757719039917,
|
|
"learning_rate": 3.895207561146137e-05,
|
|
"loss": 0.022,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.5840963855421687,
|
|
"grad_norm": 0.4196677505970001,
|
|
"learning_rate": 3.893770141188396e-05,
|
|
"loss": 0.0424,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.5860240963855422,
|
|
"grad_norm": 0.8644190430641174,
|
|
"learning_rate": 3.892323198739946e-05,
|
|
"loss": 0.08,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.5879518072289157,
|
|
"grad_norm": 0.5645135045051575,
|
|
"learning_rate": 3.890866741076482e-05,
|
|
"loss": 0.0152,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.5898795180722891,
|
|
"grad_norm": 0.5218387246131897,
|
|
"learning_rate": 3.889400775521545e-05,
|
|
"loss": 0.0205,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.5918072289156626,
|
|
"grad_norm": 0.39709413051605225,
|
|
"learning_rate": 3.8879253094464865e-05,
|
|
"loss": 0.0233,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.5937349397590361,
|
|
"grad_norm": 0.3572910726070404,
|
|
"learning_rate": 3.8864403502704285e-05,
|
|
"loss": 0.0198,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.5956626506024096,
|
|
"grad_norm": 0.382709264755249,
|
|
"learning_rate": 3.8849459054602274e-05,
|
|
"loss": 0.0176,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.5975903614457831,
|
|
"grad_norm": 3.4527227878570557,
|
|
"learning_rate": 3.883441982530436e-05,
|
|
"loss": 0.0239,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.5995180722891567,
|
|
"grad_norm": 0.4467569589614868,
|
|
"learning_rate": 3.8819285890432674e-05,
|
|
"loss": 0.0284,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.6014457831325302,
|
|
"grad_norm": 0.44513460993766785,
|
|
"learning_rate": 3.880405732608555e-05,
|
|
"loss": 0.0233,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.6033734939759036,
|
|
"grad_norm": 0.8029689192771912,
|
|
"learning_rate": 3.8788734208837155e-05,
|
|
"loss": 0.0433,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.6053012048192771,
|
|
"grad_norm": 0.7291454076766968,
|
|
"learning_rate": 3.877331661573709e-05,
|
|
"loss": 0.043,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.6072289156626506,
|
|
"grad_norm": 0.6050467491149902,
|
|
"learning_rate": 3.8757804624310006e-05,
|
|
"loss": 0.0377,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.6091566265060241,
|
|
"grad_norm": 0.6714366674423218,
|
|
"learning_rate": 3.874219831255524e-05,
|
|
"loss": 0.046,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.6110843373493976,
|
|
"grad_norm": 0.336037278175354,
|
|
"learning_rate": 3.8726497758946394e-05,
|
|
"loss": 0.0149,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.6130120481927711,
|
|
"grad_norm": 0.3057402968406677,
|
|
"learning_rate": 3.871070304243094e-05,
|
|
"loss": 0.014,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.6149397590361446,
|
|
"grad_norm": 0.4537644684314728,
|
|
"learning_rate": 3.8694814242429834e-05,
|
|
"loss": 0.0503,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.6168674698795181,
|
|
"grad_norm": 0.45573824644088745,
|
|
"learning_rate": 3.8678831438837116e-05,
|
|
"loss": 0.021,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.6187951807228915,
|
|
"grad_norm": 0.30729591846466064,
|
|
"learning_rate": 3.866275471201952e-05,
|
|
"loss": 0.0163,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.620722891566265,
|
|
"grad_norm": 0.7614850401878357,
|
|
"learning_rate": 3.8646584142816036e-05,
|
|
"loss": 0.0347,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.6226506024096385,
|
|
"grad_norm": 0.5323611497879028,
|
|
"learning_rate": 3.863031981253754e-05,
|
|
"loss": 0.0201,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.624578313253012,
|
|
"grad_norm": 0.34426453709602356,
|
|
"learning_rate": 3.861396180296635e-05,
|
|
"loss": 0.0243,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.6265060240963856,
|
|
"grad_norm": 0.621636152267456,
|
|
"learning_rate": 3.859751019635585e-05,
|
|
"loss": 0.0166,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.6284337349397591,
|
|
"grad_norm": 0.549324095249176,
|
|
"learning_rate": 3.858096507543006e-05,
|
|
"loss": 0.0274,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.6303614457831326,
|
|
"grad_norm": 0.358426570892334,
|
|
"learning_rate": 3.8564326523383214e-05,
|
|
"loss": 0.0207,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.632289156626506,
|
|
"grad_norm": 0.3639723062515259,
|
|
"learning_rate": 3.8547594623879346e-05,
|
|
"loss": 0.0297,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.6342168674698795,
|
|
"grad_norm": 0.3402212858200073,
|
|
"learning_rate": 3.853076946105188e-05,
|
|
"loss": 0.0258,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.636144578313253,
|
|
"grad_norm": 0.4083027243614197,
|
|
"learning_rate": 3.85138511195032e-05,
|
|
"loss": 0.0351,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.6380722891566265,
|
|
"grad_norm": 0.43532121181488037,
|
|
"learning_rate": 3.84968396843042e-05,
|
|
"loss": 0.0388,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"grad_norm": 0.35353463888168335,
|
|
"learning_rate": 3.8479735240993904e-05,
|
|
"loss": 0.0203,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.6419277108433735,
|
|
"grad_norm": 0.350149929523468,
|
|
"learning_rate": 3.846253787557901e-05,
|
|
"loss": 0.0261,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.643855421686747,
|
|
"grad_norm": 0.7665389180183411,
|
|
"learning_rate": 3.844524767453344e-05,
|
|
"loss": 0.0108,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.6457831325301204,
|
|
"grad_norm": 0.44621360301971436,
|
|
"learning_rate": 3.842786472479795e-05,
|
|
"loss": 0.0282,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.6477108433734939,
|
|
"grad_norm": 0.7787201404571533,
|
|
"learning_rate": 3.841038911377962e-05,
|
|
"loss": 0.0216,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.6496385542168674,
|
|
"grad_norm": 0.48260653018951416,
|
|
"learning_rate": 3.839282092935153e-05,
|
|
"loss": 0.0234,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.651566265060241,
|
|
"grad_norm": 0.4987852871417999,
|
|
"learning_rate": 3.837516025985219e-05,
|
|
"loss": 0.0515,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.6534939759036145,
|
|
"grad_norm": 0.9030266404151917,
|
|
"learning_rate": 3.835740719408517e-05,
|
|
"loss": 0.0508,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.655421686746988,
|
|
"grad_norm": 0.6381701231002808,
|
|
"learning_rate": 3.833956182131867e-05,
|
|
"loss": 0.0405,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.6573493975903615,
|
|
"grad_norm": 0.42828986048698425,
|
|
"learning_rate": 3.832162423128499e-05,
|
|
"loss": 0.024,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.659277108433735,
|
|
"grad_norm": 0.38725873827934265,
|
|
"learning_rate": 3.8303594514180164e-05,
|
|
"loss": 0.0199,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.6612048192771084,
|
|
"grad_norm": 0.23280498385429382,
|
|
"learning_rate": 3.828547276066346e-05,
|
|
"loss": 0.0101,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.6631325301204819,
|
|
"grad_norm": 0.7298216819763184,
|
|
"learning_rate": 3.8267259061856925e-05,
|
|
"loss": 0.0455,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.6650602409638554,
|
|
"grad_norm": 0.5975687503814697,
|
|
"learning_rate": 3.824895350934496e-05,
|
|
"loss": 0.0372,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.6669879518072289,
|
|
"grad_norm": 0.6295403242111206,
|
|
"learning_rate": 3.823055619517381e-05,
|
|
"loss": 0.0362,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.6689156626506024,
|
|
"grad_norm": 0.5086020827293396,
|
|
"learning_rate": 3.821206721185115e-05,
|
|
"loss": 0.0368,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.6708433734939759,
|
|
"grad_norm": 0.34506168961524963,
|
|
"learning_rate": 3.819348665234557e-05,
|
|
"loss": 0.0178,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.6727710843373494,
|
|
"grad_norm": 1.309940218925476,
|
|
"learning_rate": 3.817481461008617e-05,
|
|
"loss": 0.024,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.6746987951807228,
|
|
"grad_norm": 0.4074770510196686,
|
|
"learning_rate": 3.815605117896204e-05,
|
|
"loss": 0.0262,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.6766265060240964,
|
|
"grad_norm": 0.48525840044021606,
|
|
"learning_rate": 3.8137196453321775e-05,
|
|
"loss": 0.0209,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.6785542168674699,
|
|
"grad_norm": 0.7199739217758179,
|
|
"learning_rate": 3.811825052797308e-05,
|
|
"loss": 0.0396,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.6804819277108434,
|
|
"grad_norm": 0.519540011882782,
|
|
"learning_rate": 3.8099213498182196e-05,
|
|
"loss": 0.0453,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.6824096385542169,
|
|
"grad_norm": 0.9738391041755676,
|
|
"learning_rate": 3.808008545967349e-05,
|
|
"loss": 0.0317,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.6843373493975904,
|
|
"grad_norm": 1.888344407081604,
|
|
"learning_rate": 3.8060866508628953e-05,
|
|
"loss": 0.0452,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.6862650602409639,
|
|
"grad_norm": 0.48989811539649963,
|
|
"learning_rate": 3.8041556741687695e-05,
|
|
"loss": 0.0315,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.6881927710843373,
|
|
"grad_norm": 0.3764645457267761,
|
|
"learning_rate": 3.8022156255945496e-05,
|
|
"loss": 0.0269,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.6901204819277108,
|
|
"grad_norm": 0.46409738063812256,
|
|
"learning_rate": 3.800266514895429e-05,
|
|
"loss": 0.0171,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.6920481927710843,
|
|
"grad_norm": 0.41091030836105347,
|
|
"learning_rate": 3.7983083518721695e-05,
|
|
"loss": 0.0167,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.6939759036144578,
|
|
"grad_norm": 0.8375523090362549,
|
|
"learning_rate": 3.79634114637105e-05,
|
|
"loss": 0.0342,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.6959036144578313,
|
|
"grad_norm": 1.7053394317626953,
|
|
"learning_rate": 3.794364908283817e-05,
|
|
"loss": 0.02,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.6978313253012048,
|
|
"grad_norm": 0.4163115918636322,
|
|
"learning_rate": 3.792379647547637e-05,
|
|
"loss": 0.0138,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.6997590361445784,
|
|
"grad_norm": 0.388751745223999,
|
|
"learning_rate": 3.790385374145046e-05,
|
|
"loss": 0.0172,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.7016867469879519,
|
|
"grad_norm": 0.5584064722061157,
|
|
"learning_rate": 3.7883820981038966e-05,
|
|
"loss": 0.0254,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.7036144578313253,
|
|
"grad_norm": 1.394264817237854,
|
|
"learning_rate": 3.7863698294973114e-05,
|
|
"loss": 0.037,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.7055421686746988,
|
|
"grad_norm": 0.46280744671821594,
|
|
"learning_rate": 3.78434857844363e-05,
|
|
"loss": 0.0234,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.7074698795180723,
|
|
"grad_norm": 0.39548924565315247,
|
|
"learning_rate": 3.782318355106358e-05,
|
|
"loss": 0.0164,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.7093975903614458,
|
|
"grad_norm": 0.7307773232460022,
|
|
"learning_rate": 3.780279169694118e-05,
|
|
"loss": 0.0192,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.7113253012048193,
|
|
"grad_norm": 0.28035807609558105,
|
|
"learning_rate": 3.778231032460594e-05,
|
|
"loss": 0.0131,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.7132530120481928,
|
|
"grad_norm": 0.8376953601837158,
|
|
"learning_rate": 3.776173953704486e-05,
|
|
"loss": 0.0291,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.7151807228915663,
|
|
"grad_norm": 0.7356843948364258,
|
|
"learning_rate": 3.774107943769454e-05,
|
|
"loss": 0.0214,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.7171084337349397,
|
|
"grad_norm": 0.41503390669822693,
|
|
"learning_rate": 3.772033013044064e-05,
|
|
"loss": 0.0221,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.7190361445783132,
|
|
"grad_norm": 0.35732385516166687,
|
|
"learning_rate": 3.7699491719617436e-05,
|
|
"loss": 0.015,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.7209638554216867,
|
|
"grad_norm": 0.283778578042984,
|
|
"learning_rate": 3.76785643100072e-05,
|
|
"loss": 0.0146,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.7228915662650602,
|
|
"grad_norm": 0.3219413459300995,
|
|
"learning_rate": 3.765754800683974e-05,
|
|
"loss": 0.015,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.7248192771084337,
|
|
"grad_norm": 0.610431432723999,
|
|
"learning_rate": 3.7636442915791856e-05,
|
|
"loss": 0.0326,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.7267469879518073,
|
|
"grad_norm": 4.944870948791504,
|
|
"learning_rate": 3.7615249142986784e-05,
|
|
"loss": 0.0432,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.7286746987951808,
|
|
"grad_norm": 0.4894593060016632,
|
|
"learning_rate": 3.7593966794993696e-05,
|
|
"loss": 0.0174,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.7306024096385542,
|
|
"grad_norm": 0.4211325943470001,
|
|
"learning_rate": 3.757259597882714e-05,
|
|
"loss": 0.023,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.7325301204819277,
|
|
"grad_norm": 0.33621737360954285,
|
|
"learning_rate": 3.755113680194651e-05,
|
|
"loss": 0.0201,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.7344578313253012,
|
|
"grad_norm": 0.5799694657325745,
|
|
"learning_rate": 3.7529589372255514e-05,
|
|
"loss": 0.0173,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.7363855421686747,
|
|
"grad_norm": 0.5172572731971741,
|
|
"learning_rate": 3.750795379810162e-05,
|
|
"loss": 0.0284,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.7383132530120482,
|
|
"grad_norm": 0.5715453028678894,
|
|
"learning_rate": 3.748623018827552e-05,
|
|
"loss": 0.0194,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.7402409638554217,
|
|
"grad_norm": 0.5284178256988525,
|
|
"learning_rate": 3.746441865201056e-05,
|
|
"loss": 0.0247,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.7421686746987952,
|
|
"grad_norm": 0.37828654050827026,
|
|
"learning_rate": 3.744251929898223e-05,
|
|
"loss": 0.0097,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.7440963855421687,
|
|
"grad_norm": 0.3252779543399811,
|
|
"learning_rate": 3.742053223930758e-05,
|
|
"loss": 0.0238,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.7460240963855421,
|
|
"grad_norm": 0.6031543612480164,
|
|
"learning_rate": 3.7398457583544674e-05,
|
|
"loss": 0.0332,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.7479518072289156,
|
|
"grad_norm": 0.23846614360809326,
|
|
"learning_rate": 3.737629544269206e-05,
|
|
"loss": 0.0122,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.7498795180722891,
|
|
"grad_norm": 0.5274029970169067,
|
|
"learning_rate": 3.7354045928188155e-05,
|
|
"loss": 0.0324,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.7518072289156627,
|
|
"grad_norm": 0.4672217071056366,
|
|
"learning_rate": 3.733170915191075e-05,
|
|
"loss": 0.0196,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.7537349397590362,
|
|
"grad_norm": 0.29819396138191223,
|
|
"learning_rate": 3.730928522617639e-05,
|
|
"loss": 0.0131,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.7556626506024097,
|
|
"grad_norm": 0.43824997544288635,
|
|
"learning_rate": 3.7286774263739855e-05,
|
|
"loss": 0.0238,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.7575903614457832,
|
|
"grad_norm": 0.2822072505950928,
|
|
"learning_rate": 3.726417637779357e-05,
|
|
"loss": 0.0314,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.7595180722891566,
|
|
"grad_norm": 0.43815648555755615,
|
|
"learning_rate": 3.7241491681967044e-05,
|
|
"loss": 0.0144,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.7614457831325301,
|
|
"grad_norm": 0.37194815278053284,
|
|
"learning_rate": 3.721872029032628e-05,
|
|
"loss": 0.0286,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.7633734939759036,
|
|
"grad_norm": 0.7319737672805786,
|
|
"learning_rate": 3.719586231737322e-05,
|
|
"loss": 0.0427,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.7653012048192771,
|
|
"grad_norm": 0.5870066285133362,
|
|
"learning_rate": 3.717291787804517e-05,
|
|
"loss": 0.0138,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.7672289156626506,
|
|
"grad_norm": 0.6574277281761169,
|
|
"learning_rate": 3.7149887087714225e-05,
|
|
"loss": 0.061,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.7691566265060241,
|
|
"grad_norm": 0.5467348694801331,
|
|
"learning_rate": 3.712677006218666e-05,
|
|
"loss": 0.022,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.7710843373493976,
|
|
"grad_norm": 0.3589288890361786,
|
|
"learning_rate": 3.710356691770238e-05,
|
|
"loss": 0.0161,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.7730120481927711,
|
|
"grad_norm": 0.574630618095398,
|
|
"learning_rate": 3.708027777093433e-05,
|
|
"loss": 0.0285,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.7749397590361445,
|
|
"grad_norm": 0.39048445224761963,
|
|
"learning_rate": 3.70569027389879e-05,
|
|
"loss": 0.012,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.776867469879518,
|
|
"grad_norm": 0.34803536534309387,
|
|
"learning_rate": 3.703344193940032e-05,
|
|
"loss": 0.0155,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.7787951807228916,
|
|
"grad_norm": 1.188948392868042,
|
|
"learning_rate": 3.700989549014011e-05,
|
|
"loss": 0.0617,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.7807228915662651,
|
|
"grad_norm": 0.473157674074173,
|
|
"learning_rate": 3.698626350960646e-05,
|
|
"loss": 0.0298,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.7826506024096386,
|
|
"grad_norm": 0.42009076476097107,
|
|
"learning_rate": 3.6962546116628634e-05,
|
|
"loss": 0.03,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.7845783132530121,
|
|
"grad_norm": 0.6334308981895447,
|
|
"learning_rate": 3.693874343046537e-05,
|
|
"loss": 0.0107,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.7865060240963856,
|
|
"grad_norm": 0.35594677925109863,
|
|
"learning_rate": 3.6914855570804314e-05,
|
|
"loss": 0.0174,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.788433734939759,
|
|
"grad_norm": 0.28985708951950073,
|
|
"learning_rate": 3.689088265776136e-05,
|
|
"loss": 0.0149,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.7903614457831325,
|
|
"grad_norm": 0.3981950581073761,
|
|
"learning_rate": 3.686682481188011e-05,
|
|
"loss": 0.019,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.792289156626506,
|
|
"grad_norm": 0.48819583654403687,
|
|
"learning_rate": 3.6842682154131193e-05,
|
|
"loss": 0.0217,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.7942168674698795,
|
|
"grad_norm": 0.42819952964782715,
|
|
"learning_rate": 3.681845480591174e-05,
|
|
"loss": 0.0198,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.796144578313253,
|
|
"grad_norm": 0.48591694235801697,
|
|
"learning_rate": 3.6794142889044727e-05,
|
|
"loss": 0.0253,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.7980722891566265,
|
|
"grad_norm": 0.4730607271194458,
|
|
"learning_rate": 3.676974652577835e-05,
|
|
"loss": 0.0329,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.5390865802764893,
|
|
"learning_rate": 3.6745265838785434e-05,
|
|
"loss": 0.0479,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.8019277108433734,
|
|
"grad_norm": 0.6377891302108765,
|
|
"learning_rate": 3.672070095116283e-05,
|
|
"loss": 0.019,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.803855421686747,
|
|
"grad_norm": 0.8984615206718445,
|
|
"learning_rate": 3.669605198643075e-05,
|
|
"loss": 0.0444,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.8057831325301205,
|
|
"grad_norm": 0.4913877546787262,
|
|
"learning_rate": 3.667131906853219e-05,
|
|
"loss": 0.031,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.807710843373494,
|
|
"grad_norm": 0.37894028425216675,
|
|
"learning_rate": 3.664650232183229e-05,
|
|
"loss": 0.0195,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.8096385542168675,
|
|
"grad_norm": 0.3644949495792389,
|
|
"learning_rate": 3.66216018711177e-05,
|
|
"loss": 0.018,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.811566265060241,
|
|
"grad_norm": 0.414440393447876,
|
|
"learning_rate": 3.659661784159597e-05,
|
|
"loss": 0.0188,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.8134939759036145,
|
|
"grad_norm": 0.49220341444015503,
|
|
"learning_rate": 3.65715503588949e-05,
|
|
"loss": 0.016,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.815421686746988,
|
|
"grad_norm": 1.0939836502075195,
|
|
"learning_rate": 3.654639954906193e-05,
|
|
"loss": 0.0758,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.8173493975903614,
|
|
"grad_norm": 0.43222442269325256,
|
|
"learning_rate": 3.652116553856349e-05,
|
|
"loss": 0.0308,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.8192771084337349,
|
|
"grad_norm": 0.5081896185874939,
|
|
"learning_rate": 3.649584845428438e-05,
|
|
"loss": 0.0493,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.8212048192771084,
|
|
"grad_norm": 0.9811948537826538,
|
|
"learning_rate": 3.64704484235271e-05,
|
|
"loss": 0.019,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.8231325301204819,
|
|
"grad_norm": 0.31656572222709656,
|
|
"learning_rate": 3.6444965574011255e-05,
|
|
"loss": 0.0135,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.8250602409638554,
|
|
"grad_norm": 0.7844433188438416,
|
|
"learning_rate": 3.641940003387289e-05,
|
|
"loss": 0.0402,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.826987951807229,
|
|
"grad_norm": 0.3353273570537567,
|
|
"learning_rate": 3.6393751931663814e-05,
|
|
"loss": 0.0132,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.8289156626506025,
|
|
"grad_norm": 0.7253058552742004,
|
|
"learning_rate": 3.6368021396351015e-05,
|
|
"loss": 0.0296,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.8308433734939759,
|
|
"grad_norm": 0.45300304889678955,
|
|
"learning_rate": 3.634220855731598e-05,
|
|
"loss": 0.0258,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.8327710843373494,
|
|
"grad_norm": 0.3480473458766937,
|
|
"learning_rate": 3.631631354435403e-05,
|
|
"loss": 0.0099,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.8346987951807229,
|
|
"grad_norm": 2.1114516258239746,
|
|
"learning_rate": 3.62903364876737e-05,
|
|
"loss": 0.0457,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.8366265060240964,
|
|
"grad_norm": 0.5649561882019043,
|
|
"learning_rate": 3.626427751789606e-05,
|
|
"loss": 0.0444,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.8385542168674699,
|
|
"grad_norm": 0.3864995539188385,
|
|
"learning_rate": 3.623813676605405e-05,
|
|
"loss": 0.0223,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.8404819277108434,
|
|
"grad_norm": 1.2134298086166382,
|
|
"learning_rate": 3.621191436359186e-05,
|
|
"loss": 0.0353,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.8424096385542169,
|
|
"grad_norm": 0.4403415024280548,
|
|
"learning_rate": 3.6185610442364246e-05,
|
|
"loss": 0.0216,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.8443373493975903,
|
|
"grad_norm": 0.6050297021865845,
|
|
"learning_rate": 3.6159225134635846e-05,
|
|
"loss": 0.0433,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.8462650602409638,
|
|
"grad_norm": 0.7951678037643433,
|
|
"learning_rate": 3.6132758573080556e-05,
|
|
"loss": 0.031,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.8481927710843373,
|
|
"grad_norm": 0.4991949796676636,
|
|
"learning_rate": 3.6106210890780834e-05,
|
|
"loss": 0.0313,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.8501204819277108,
|
|
"grad_norm": 0.47951385378837585,
|
|
"learning_rate": 3.607958222122704e-05,
|
|
"loss": 0.0218,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.8520481927710843,
|
|
"grad_norm": 0.7345194220542908,
|
|
"learning_rate": 3.6052872698316755e-05,
|
|
"loss": 0.0239,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.8539759036144579,
|
|
"grad_norm": 1.4814884662628174,
|
|
"learning_rate": 3.602608245635414e-05,
|
|
"loss": 0.0127,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.8559036144578314,
|
|
"grad_norm": 2.4240877628326416,
|
|
"learning_rate": 3.599921163004922e-05,
|
|
"loss": 0.0618,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.8578313253012049,
|
|
"grad_norm": 0.41523510217666626,
|
|
"learning_rate": 3.5972260354517216e-05,
|
|
"loss": 0.0283,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.8597590361445783,
|
|
"grad_norm": 0.5577677488327026,
|
|
"learning_rate": 3.594522876527791e-05,
|
|
"loss": 0.0271,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.8616867469879518,
|
|
"grad_norm": 0.5829064846038818,
|
|
"learning_rate": 3.591811699825487e-05,
|
|
"loss": 0.0169,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.8636144578313253,
|
|
"grad_norm": 0.4478822350502014,
|
|
"learning_rate": 3.5890925189774886e-05,
|
|
"loss": 0.0239,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.8655421686746988,
|
|
"grad_norm": 0.3498048782348633,
|
|
"learning_rate": 3.586365347656718e-05,
|
|
"loss": 0.0137,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.8674698795180723,
|
|
"grad_norm": 0.6571130156517029,
|
|
"learning_rate": 3.583630199576278e-05,
|
|
"loss": 0.027,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.8693975903614458,
|
|
"grad_norm": 0.344970166683197,
|
|
"learning_rate": 3.58088708848938e-05,
|
|
"loss": 0.0167,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.8713253012048193,
|
|
"grad_norm": 0.34611570835113525,
|
|
"learning_rate": 3.5781360281892775e-05,
|
|
"loss": 0.0468,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.8732530120481927,
|
|
"grad_norm": 0.66157066822052,
|
|
"learning_rate": 3.575377032509194e-05,
|
|
"loss": 0.0344,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.8751807228915662,
|
|
"grad_norm": 0.3676326870918274,
|
|
"learning_rate": 3.5726101153222534e-05,
|
|
"loss": 0.0366,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.8771084337349397,
|
|
"grad_norm": 0.5958423018455505,
|
|
"learning_rate": 3.569835290541414e-05,
|
|
"loss": 0.0382,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.8790361445783132,
|
|
"grad_norm": 0.36787471175193787,
|
|
"learning_rate": 3.567052572119397e-05,
|
|
"loss": 0.018,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.8809638554216868,
|
|
"grad_norm": 0.9478234052658081,
|
|
"learning_rate": 3.564261974048611e-05,
|
|
"loss": 0.0179,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.8828915662650603,
|
|
"grad_norm": 0.3337579369544983,
|
|
"learning_rate": 3.56146351036109e-05,
|
|
"loss": 0.0147,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.8848192771084338,
|
|
"grad_norm": 0.4984932243824005,
|
|
"learning_rate": 3.558657195128416e-05,
|
|
"loss": 0.0224,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.8867469879518072,
|
|
"grad_norm": 0.36718735098838806,
|
|
"learning_rate": 3.555843042461653e-05,
|
|
"loss": 0.0202,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.8886746987951807,
|
|
"grad_norm": 0.4081745445728302,
|
|
"learning_rate": 3.553021066511274e-05,
|
|
"loss": 0.0288,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.8906024096385542,
|
|
"grad_norm": 0.3233242332935333,
|
|
"learning_rate": 3.55019128146709e-05,
|
|
"loss": 0.0362,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.8925301204819277,
|
|
"grad_norm": 0.6560158729553223,
|
|
"learning_rate": 3.547353701558178e-05,
|
|
"loss": 0.038,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.8944578313253012,
|
|
"grad_norm": 0.47668641805648804,
|
|
"learning_rate": 3.544508341052811e-05,
|
|
"loss": 0.0399,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.8963855421686747,
|
|
"grad_norm": 0.45512664318084717,
|
|
"learning_rate": 3.541655214258383e-05,
|
|
"loss": 0.022,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.8983132530120482,
|
|
"grad_norm": 0.8410730361938477,
|
|
"learning_rate": 3.538794335521343e-05,
|
|
"loss": 0.0315,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.9002409638554217,
|
|
"grad_norm": 0.4872909486293793,
|
|
"learning_rate": 3.535925719227117e-05,
|
|
"loss": 0.0152,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.9021686746987951,
|
|
"grad_norm": 0.45623311400413513,
|
|
"learning_rate": 3.533049379800038e-05,
|
|
"loss": 0.0305,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.9040963855421686,
|
|
"grad_norm": 0.43087029457092285,
|
|
"learning_rate": 3.530165331703275e-05,
|
|
"loss": 0.0131,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.9060240963855422,
|
|
"grad_norm": 0.4610525369644165,
|
|
"learning_rate": 3.527273589438756e-05,
|
|
"loss": 0.0187,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.9079518072289157,
|
|
"grad_norm": 0.3356114327907562,
|
|
"learning_rate": 3.5243741675471006e-05,
|
|
"loss": 0.0185,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.9098795180722892,
|
|
"grad_norm": 0.9065960049629211,
|
|
"learning_rate": 3.5214670806075426e-05,
|
|
"loss": 0.0433,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.9118072289156627,
|
|
"grad_norm": 0.3652578294277191,
|
|
"learning_rate": 3.518552343237858e-05,
|
|
"loss": 0.02,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.9137349397590362,
|
|
"grad_norm": 0.32377883791923523,
|
|
"learning_rate": 3.5156299700942916e-05,
|
|
"loss": 0.0165,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.9156626506024096,
|
|
"grad_norm": 0.2431817352771759,
|
|
"learning_rate": 3.512699975871485e-05,
|
|
"loss": 0.0172,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.9175903614457831,
|
|
"grad_norm": 0.6390707492828369,
|
|
"learning_rate": 3.509762375302399e-05,
|
|
"loss": 0.0356,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.9195180722891566,
|
|
"grad_norm": 0.2283092886209488,
|
|
"learning_rate": 3.506817183158243e-05,
|
|
"loss": 0.0088,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.9214457831325301,
|
|
"grad_norm": 0.5053914189338684,
|
|
"learning_rate": 3.5038644142483966e-05,
|
|
"loss": 0.0389,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.9233734939759036,
|
|
"grad_norm": 0.2567576467990875,
|
|
"learning_rate": 3.500904083420342e-05,
|
|
"loss": 0.0155,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.9253012048192771,
|
|
"grad_norm": 0.6852384209632874,
|
|
"learning_rate": 3.497936205559583e-05,
|
|
"loss": 0.0247,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.9272289156626506,
|
|
"grad_norm": 0.36403414607048035,
|
|
"learning_rate": 3.494960795589572e-05,
|
|
"loss": 0.023,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.929156626506024,
|
|
"grad_norm": 0.506554901599884,
|
|
"learning_rate": 3.491977868471635e-05,
|
|
"loss": 0.0273,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.9310843373493976,
|
|
"grad_norm": 0.38329923152923584,
|
|
"learning_rate": 3.4889874392048985e-05,
|
|
"loss": 0.0169,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.9330120481927711,
|
|
"grad_norm": 0.2805836498737335,
|
|
"learning_rate": 3.48598952282621e-05,
|
|
"loss": 0.0105,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.9349397590361446,
|
|
"grad_norm": 0.6315302848815918,
|
|
"learning_rate": 3.482984134410067e-05,
|
|
"loss": 0.0289,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.9368674698795181,
|
|
"grad_norm": 0.6431388854980469,
|
|
"learning_rate": 3.479971289068537e-05,
|
|
"loss": 0.0311,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.9387951807228916,
|
|
"grad_norm": 0.9794723391532898,
|
|
"learning_rate": 3.476951001951184e-05,
|
|
"loss": 0.0452,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.9407228915662651,
|
|
"grad_norm": 0.7984824180603027,
|
|
"learning_rate": 3.473923288244991e-05,
|
|
"loss": 0.0689,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.9426506024096386,
|
|
"grad_norm": 0.46362006664276123,
|
|
"learning_rate": 3.470888163174286e-05,
|
|
"loss": 0.0241,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.944578313253012,
|
|
"grad_norm": 0.5051195025444031,
|
|
"learning_rate": 3.467845642000661e-05,
|
|
"loss": 0.0228,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.9465060240963855,
|
|
"grad_norm": 0.3082812428474426,
|
|
"learning_rate": 3.4647957400229004e-05,
|
|
"loss": 0.0144,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.948433734939759,
|
|
"grad_norm": 0.2691391110420227,
|
|
"learning_rate": 3.461738472576902e-05,
|
|
"loss": 0.0167,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.9503614457831325,
|
|
"grad_norm": 0.5627671480178833,
|
|
"learning_rate": 3.458673855035597e-05,
|
|
"loss": 0.031,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.952289156626506,
|
|
"grad_norm": 0.4571435749530792,
|
|
"learning_rate": 3.455601902808876e-05,
|
|
"loss": 0.0191,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.9542168674698795,
|
|
"grad_norm": 1.0117709636688232,
|
|
"learning_rate": 3.452522631343515e-05,
|
|
"loss": 0.0192,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.9561445783132531,
|
|
"grad_norm": 0.28375712037086487,
|
|
"learning_rate": 3.449436056123086e-05,
|
|
"loss": 0.0159,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.9580722891566265,
|
|
"grad_norm": 0.26381856203079224,
|
|
"learning_rate": 3.446342192667893e-05,
|
|
"loss": 0.0113,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"grad_norm": 0.49317577481269836,
|
|
"learning_rate": 3.443241056534884e-05,
|
|
"loss": 0.0332,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.9619277108433735,
|
|
"grad_norm": 0.28884485363960266,
|
|
"learning_rate": 3.440132663317579e-05,
|
|
"loss": 0.0117,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.963855421686747,
|
|
"grad_norm": 0.36255285143852234,
|
|
"learning_rate": 3.4370170286459864e-05,
|
|
"loss": 0.0169,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.9657831325301205,
|
|
"grad_norm": 0.4265049993991852,
|
|
"learning_rate": 3.433894168186529e-05,
|
|
"loss": 0.0217,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.967710843373494,
|
|
"grad_norm": 0.8169426321983337,
|
|
"learning_rate": 3.430764097641962e-05,
|
|
"loss": 0.0207,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.9696385542168675,
|
|
"grad_norm": 1.866077184677124,
|
|
"learning_rate": 3.427626832751296e-05,
|
|
"loss": 0.0381,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.971566265060241,
|
|
"grad_norm": 0.33124980330467224,
|
|
"learning_rate": 3.424482389289716e-05,
|
|
"loss": 0.0245,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.9734939759036144,
|
|
"grad_norm": 0.37479540705680847,
|
|
"learning_rate": 3.4213307830685055e-05,
|
|
"loss": 0.0164,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.9754216867469879,
|
|
"grad_norm": 0.39738863706588745,
|
|
"learning_rate": 3.4181720299349615e-05,
|
|
"loss": 0.0297,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.9773493975903614,
|
|
"grad_norm": 0.2567287087440491,
|
|
"learning_rate": 3.4150061457723205e-05,
|
|
"loss": 0.0102,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.9792771084337349,
|
|
"grad_norm": 0.6230517029762268,
|
|
"learning_rate": 3.411833146499675e-05,
|
|
"loss": 0.0243,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.9812048192771085,
|
|
"grad_norm": 0.44843971729278564,
|
|
"learning_rate": 3.408653048071894e-05,
|
|
"loss": 0.0357,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.983132530120482,
|
|
"grad_norm": 1.0569655895233154,
|
|
"learning_rate": 3.405465866479546e-05,
|
|
"loss": 0.037,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.9850602409638555,
|
|
"grad_norm": 0.29000964760780334,
|
|
"learning_rate": 3.402271617748812e-05,
|
|
"loss": 0.0129,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.9869879518072289,
|
|
"grad_norm": 2.1627447605133057,
|
|
"learning_rate": 3.399070317941413e-05,
|
|
"loss": 0.0442,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.9889156626506024,
|
|
"grad_norm": 0.27371272444725037,
|
|
"learning_rate": 3.395861983154522e-05,
|
|
"loss": 0.0119,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.9908433734939759,
|
|
"grad_norm": 0.4117226302623749,
|
|
"learning_rate": 3.392646629520688e-05,
|
|
"loss": 0.0455,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.9927710843373494,
|
|
"grad_norm": 0.5098996758460999,
|
|
"learning_rate": 3.389424273207752e-05,
|
|
"loss": 0.0203,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.9946987951807229,
|
|
"grad_norm": 0.5192157626152039,
|
|
"learning_rate": 3.386194930418767e-05,
|
|
"loss": 0.0329,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.9966265060240964,
|
|
"grad_norm": 0.18757697939872742,
|
|
"learning_rate": 3.382958617391915e-05,
|
|
"loss": 0.0065,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.9985542168674699,
|
|
"grad_norm": 0.3334413170814514,
|
|
"learning_rate": 3.3797153504004296e-05,
|
|
"loss": 0.0266,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.4152225852012634,
|
|
"learning_rate": 3.3764651457525095e-05,
|
|
"loss": 0.0169,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 1.0019277108433735,
|
|
"grad_norm": 0.43535247445106506,
|
|
"learning_rate": 3.373208019791237e-05,
|
|
"loss": 0.0221,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 1.003855421686747,
|
|
"grad_norm": 0.39292722940444946,
|
|
"learning_rate": 3.3699439888945e-05,
|
|
"loss": 0.0211,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 1.0057831325301205,
|
|
"grad_norm": 0.19566713273525238,
|
|
"learning_rate": 3.366673069474904e-05,
|
|
"loss": 0.0069,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 1.007710843373494,
|
|
"grad_norm": 0.5101853609085083,
|
|
"learning_rate": 3.3633952779796914e-05,
|
|
"loss": 0.0191,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 1.0096385542168675,
|
|
"grad_norm": 0.999434769153595,
|
|
"learning_rate": 3.360110630890664e-05,
|
|
"loss": 0.0196,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 1.011566265060241,
|
|
"grad_norm": 0.4646223783493042,
|
|
"learning_rate": 3.356819144724092e-05,
|
|
"loss": 0.0328,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 1.0134939759036146,
|
|
"grad_norm": 0.3132480978965759,
|
|
"learning_rate": 3.3535208360306354e-05,
|
|
"loss": 0.0203,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 1.0154216867469879,
|
|
"grad_norm": 0.3038032352924347,
|
|
"learning_rate": 3.350215721395261e-05,
|
|
"loss": 0.0122,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 1.0173493975903614,
|
|
"grad_norm": 0.45082882046699524,
|
|
"learning_rate": 3.346903817437157e-05,
|
|
"loss": 0.0437,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 1.0192771084337349,
|
|
"grad_norm": 0.26917046308517456,
|
|
"learning_rate": 3.343585140809651e-05,
|
|
"loss": 0.013,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 1.0212048192771084,
|
|
"grad_norm": 0.23869264125823975,
|
|
"learning_rate": 3.3402597082001276e-05,
|
|
"loss": 0.008,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 1.0231325301204819,
|
|
"grad_norm": 0.31315353512763977,
|
|
"learning_rate": 3.3369275363299394e-05,
|
|
"loss": 0.0078,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 1.0250602409638554,
|
|
"grad_norm": 0.4780346751213074,
|
|
"learning_rate": 3.333588641954327e-05,
|
|
"loss": 0.0225,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 1.026987951807229,
|
|
"grad_norm": 0.2920368015766144,
|
|
"learning_rate": 3.330243041862336e-05,
|
|
"loss": 0.0118,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 1.0289156626506024,
|
|
"grad_norm": 0.543669581413269,
|
|
"learning_rate": 3.326890752876728e-05,
|
|
"loss": 0.0338,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 1.030843373493976,
|
|
"grad_norm": 0.4288000464439392,
|
|
"learning_rate": 3.323531791853901e-05,
|
|
"loss": 0.0341,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 1.0327710843373494,
|
|
"grad_norm": 0.26600322127342224,
|
|
"learning_rate": 3.3201661756838e-05,
|
|
"loss": 0.0184,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 1.034698795180723,
|
|
"grad_norm": 0.290937602519989,
|
|
"learning_rate": 3.316793921289835e-05,
|
|
"loss": 0.0152,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 1.0366265060240965,
|
|
"grad_norm": 0.7621443271636963,
|
|
"learning_rate": 3.313415045628795e-05,
|
|
"loss": 0.0326,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 1.03855421686747,
|
|
"grad_norm": 0.5581283569335938,
|
|
"learning_rate": 3.3100295656907646e-05,
|
|
"loss": 0.0164,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 1.0404819277108435,
|
|
"grad_norm": 0.20930901169776917,
|
|
"learning_rate": 3.306637498499034e-05,
|
|
"loss": 0.0091,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 1.0424096385542168,
|
|
"grad_norm": 0.46212059259414673,
|
|
"learning_rate": 3.303238861110018e-05,
|
|
"loss": 0.0118,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 1.0443373493975903,
|
|
"grad_norm": 0.38259151577949524,
|
|
"learning_rate": 3.299833670613168e-05,
|
|
"loss": 0.0081,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 1.0462650602409638,
|
|
"grad_norm": 0.4888618290424347,
|
|
"learning_rate": 3.2964219441308865e-05,
|
|
"loss": 0.0138,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 1.0481927710843373,
|
|
"grad_norm": 0.32103127241134644,
|
|
"learning_rate": 3.2930036988184425e-05,
|
|
"loss": 0.0171,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 1.0501204819277108,
|
|
"grad_norm": 0.27787327766418457,
|
|
"learning_rate": 3.28957895186388e-05,
|
|
"loss": 0.0106,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 1.0520481927710843,
|
|
"grad_norm": 0.35597777366638184,
|
|
"learning_rate": 3.2861477204879395e-05,
|
|
"loss": 0.0123,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 1.0539759036144578,
|
|
"grad_norm": 0.3619804084300995,
|
|
"learning_rate": 3.2827100219439656e-05,
|
|
"loss": 0.0088,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 1.0559036144578313,
|
|
"grad_norm": 0.2525513470172882,
|
|
"learning_rate": 3.279265873517822e-05,
|
|
"loss": 0.0179,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 1.0578313253012048,
|
|
"grad_norm": 0.3910020887851715,
|
|
"learning_rate": 3.275815292527804e-05,
|
|
"loss": 0.0142,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 1.0597590361445783,
|
|
"grad_norm": 0.30515050888061523,
|
|
"learning_rate": 3.2723582963245526e-05,
|
|
"loss": 0.0123,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 1.0616867469879518,
|
|
"grad_norm": 0.21708644926548004,
|
|
"learning_rate": 3.2688949022909665e-05,
|
|
"loss": 0.0098,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 1.0636144578313254,
|
|
"grad_norm": 0.23307719826698303,
|
|
"learning_rate": 3.265425127842114e-05,
|
|
"loss": 0.0097,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 1.0655421686746989,
|
|
"grad_norm": 0.676654577255249,
|
|
"learning_rate": 3.261948990425147e-05,
|
|
"loss": 0.0227,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 1.0674698795180724,
|
|
"grad_norm": 0.4593975841999054,
|
|
"learning_rate": 3.258466507519213e-05,
|
|
"loss": 0.047,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 1.0693975903614459,
|
|
"grad_norm": 0.19405829906463623,
|
|
"learning_rate": 3.254977696635366e-05,
|
|
"loss": 0.0314,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 1.0713253012048192,
|
|
"grad_norm": 0.14563389122486115,
|
|
"learning_rate": 3.2514825753164774e-05,
|
|
"loss": 0.0046,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 1.0732530120481927,
|
|
"grad_norm": 0.2642340064048767,
|
|
"learning_rate": 3.247981161137153e-05,
|
|
"loss": 0.022,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 1.0751807228915662,
|
|
"grad_norm": 0.17274761199951172,
|
|
"learning_rate": 3.2444734717036386e-05,
|
|
"loss": 0.0134,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 1.0771084337349397,
|
|
"grad_norm": 0.44354626536369324,
|
|
"learning_rate": 3.240959524653735e-05,
|
|
"loss": 0.0211,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 1.0790361445783132,
|
|
"grad_norm": 0.2806888818740845,
|
|
"learning_rate": 3.237439337656708e-05,
|
|
"loss": 0.0141,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 1.0809638554216867,
|
|
"grad_norm": 0.21679501235485077,
|
|
"learning_rate": 3.2339129284131994e-05,
|
|
"loss": 0.019,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 1.0828915662650602,
|
|
"grad_norm": 0.3040260076522827,
|
|
"learning_rate": 3.2303803146551386e-05,
|
|
"loss": 0.0249,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 1.0848192771084337,
|
|
"grad_norm": 0.2793775200843811,
|
|
"learning_rate": 3.226841514145656e-05,
|
|
"loss": 0.0088,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 1.0867469879518072,
|
|
"grad_norm": 0.149955615401268,
|
|
"learning_rate": 3.223296544678987e-05,
|
|
"loss": 0.0054,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 1.0886746987951808,
|
|
"grad_norm": 0.22166767716407776,
|
|
"learning_rate": 3.219745424080389e-05,
|
|
"loss": 0.0109,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 1.0906024096385543,
|
|
"grad_norm": 0.22399431467056274,
|
|
"learning_rate": 3.2161881702060476e-05,
|
|
"loss": 0.0106,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 1.0925301204819278,
|
|
"grad_norm": 0.18537986278533936,
|
|
"learning_rate": 3.2126248009429905e-05,
|
|
"loss": 0.0077,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 1.0944578313253013,
|
|
"grad_norm": 0.24511495232582092,
|
|
"learning_rate": 3.2090553342089935e-05,
|
|
"loss": 0.0093,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 1.0963855421686748,
|
|
"grad_norm": 0.4766045808792114,
|
|
"learning_rate": 3.205479787952494e-05,
|
|
"loss": 0.036,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 1.0983132530120483,
|
|
"grad_norm": 0.1425715535879135,
|
|
"learning_rate": 3.201898180152499e-05,
|
|
"loss": 0.0085,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 1.1002409638554216,
|
|
"grad_norm": 0.1909666359424591,
|
|
"learning_rate": 3.1983105288184945e-05,
|
|
"loss": 0.0081,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 1.102168674698795,
|
|
"grad_norm": 0.44077104330062866,
|
|
"learning_rate": 3.194716851990355e-05,
|
|
"loss": 0.017,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 1.1040963855421686,
|
|
"grad_norm": 0.5757400989532471,
|
|
"learning_rate": 3.191117167738253e-05,
|
|
"loss": 0.021,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 1.106024096385542,
|
|
"grad_norm": 0.1977701038122177,
|
|
"learning_rate": 3.1875114941625705e-05,
|
|
"loss": 0.0096,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 1.1079518072289156,
|
|
"grad_norm": 0.3524581491947174,
|
|
"learning_rate": 3.1838998493938026e-05,
|
|
"loss": 0.0118,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 1.1098795180722891,
|
|
"grad_norm": 0.3301331698894501,
|
|
"learning_rate": 3.180282251592472e-05,
|
|
"loss": 0.0094,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 1.1118072289156626,
|
|
"grad_norm": 0.2774488925933838,
|
|
"learning_rate": 3.1766587189490336e-05,
|
|
"loss": 0.0131,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 1.1137349397590361,
|
|
"grad_norm": 1.732595443725586,
|
|
"learning_rate": 3.173029269683785e-05,
|
|
"loss": 0.0445,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 1.1156626506024097,
|
|
"grad_norm": 0.28746843338012695,
|
|
"learning_rate": 3.169393922046776e-05,
|
|
"loss": 0.0116,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 1.1175903614457832,
|
|
"grad_norm": 0.2952995002269745,
|
|
"learning_rate": 3.165752694317713e-05,
|
|
"loss": 0.0116,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 1.1195180722891567,
|
|
"grad_norm": 0.2938575744628906,
|
|
"learning_rate": 3.16210560480587e-05,
|
|
"loss": 0.013,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 1.1214457831325302,
|
|
"grad_norm": 0.22283495962619781,
|
|
"learning_rate": 3.158452671849998e-05,
|
|
"loss": 0.0052,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 1.1233734939759037,
|
|
"grad_norm": 0.6272858381271362,
|
|
"learning_rate": 3.154793913818226e-05,
|
|
"loss": 0.0182,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 1.1253012048192772,
|
|
"grad_norm": 0.479753702878952,
|
|
"learning_rate": 3.1511293491079804e-05,
|
|
"loss": 0.0146,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 1.1272289156626507,
|
|
"grad_norm": 0.31104400753974915,
|
|
"learning_rate": 3.1474589961458786e-05,
|
|
"loss": 0.0139,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 1.129156626506024,
|
|
"grad_norm": 0.4932832419872284,
|
|
"learning_rate": 3.1437828733876477e-05,
|
|
"loss": 0.0236,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 1.1310843373493975,
|
|
"grad_norm": 0.222808837890625,
|
|
"learning_rate": 3.140100999318025e-05,
|
|
"loss": 0.0084,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 1.133012048192771,
|
|
"grad_norm": 0.4515356719493866,
|
|
"learning_rate": 3.136413392450668e-05,
|
|
"loss": 0.0215,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 1.1349397590361445,
|
|
"grad_norm": 0.39302268624305725,
|
|
"learning_rate": 3.132720071328061e-05,
|
|
"loss": 0.0154,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 1.136867469879518,
|
|
"grad_norm": 0.43382835388183594,
|
|
"learning_rate": 3.1290210545214205e-05,
|
|
"loss": 0.0088,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 1.1387951807228915,
|
|
"grad_norm": 0.18707136809825897,
|
|
"learning_rate": 3.125316360630602e-05,
|
|
"loss": 0.0126,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 1.140722891566265,
|
|
"grad_norm": 0.5688219666481018,
|
|
"learning_rate": 3.121606008284011e-05,
|
|
"loss": 0.0147,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 1.1426506024096386,
|
|
"grad_norm": 0.3321833312511444,
|
|
"learning_rate": 3.1178900161385005e-05,
|
|
"loss": 0.0119,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 1.144578313253012,
|
|
"grad_norm": 0.3738424777984619,
|
|
"learning_rate": 3.114168402879286e-05,
|
|
"loss": 0.0158,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 1.1465060240963856,
|
|
"grad_norm": 0.2386978417634964,
|
|
"learning_rate": 3.110441187219846e-05,
|
|
"loss": 0.0107,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 1.148433734939759,
|
|
"grad_norm": 0.2165699452161789,
|
|
"learning_rate": 3.10670838790183e-05,
|
|
"loss": 0.0079,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 1.1503614457831326,
|
|
"grad_norm": 0.25952696800231934,
|
|
"learning_rate": 3.102970023694965e-05,
|
|
"loss": 0.0147,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 1.152289156626506,
|
|
"grad_norm": 0.21448305249214172,
|
|
"learning_rate": 3.099226113396959e-05,
|
|
"loss": 0.0099,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 1.1542168674698796,
|
|
"grad_norm": 0.37226060032844543,
|
|
"learning_rate": 3.095476675833405e-05,
|
|
"loss": 0.0214,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 1.1561445783132531,
|
|
"grad_norm": 0.29637983441352844,
|
|
"learning_rate": 3.0917217298576955e-05,
|
|
"loss": 0.0118,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 1.1580722891566264,
|
|
"grad_norm": 0.18535609543323517,
|
|
"learning_rate": 3.0879612943509154e-05,
|
|
"loss": 0.0086,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"grad_norm": 0.25874125957489014,
|
|
"learning_rate": 3.0841953882217536e-05,
|
|
"loss": 0.0088,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 1.1619277108433734,
|
|
"grad_norm": 0.46092745661735535,
|
|
"learning_rate": 3.08042403040641e-05,
|
|
"loss": 0.0241,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 1.163855421686747,
|
|
"grad_norm": 0.27023249864578247,
|
|
"learning_rate": 3.076647239868494e-05,
|
|
"loss": 0.0154,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 1.1657831325301204,
|
|
"grad_norm": 0.445157527923584,
|
|
"learning_rate": 3.072865035598933e-05,
|
|
"loss": 0.0197,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 1.167710843373494,
|
|
"grad_norm": 0.18097272515296936,
|
|
"learning_rate": 3.06907743661588e-05,
|
|
"loss": 0.0093,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 1.1696385542168675,
|
|
"grad_norm": 0.22469942271709442,
|
|
"learning_rate": 3.065284461964609e-05,
|
|
"loss": 0.0171,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 1.171566265060241,
|
|
"grad_norm": 0.20190906524658203,
|
|
"learning_rate": 3.061486130717428e-05,
|
|
"loss": 0.008,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 1.1734939759036145,
|
|
"grad_norm": 0.18294145166873932,
|
|
"learning_rate": 3.057682461973579e-05,
|
|
"loss": 0.0155,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 1.175421686746988,
|
|
"grad_norm": 0.34203943610191345,
|
|
"learning_rate": 3.053873474859143e-05,
|
|
"loss": 0.0212,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 1.1773493975903615,
|
|
"grad_norm": 0.49073582887649536,
|
|
"learning_rate": 3.050059188526942e-05,
|
|
"loss": 0.019,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 1.179277108433735,
|
|
"grad_norm": 0.3537680506706238,
|
|
"learning_rate": 3.046239622156446e-05,
|
|
"loss": 0.0147,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 1.1812048192771085,
|
|
"grad_norm": 0.2584632635116577,
|
|
"learning_rate": 3.042414794953674e-05,
|
|
"loss": 0.0088,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 1.1831325301204818,
|
|
"grad_norm": 0.3529360890388489,
|
|
"learning_rate": 3.0385847261510975e-05,
|
|
"loss": 0.0187,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 1.1850602409638555,
|
|
"grad_norm": 0.3331570327281952,
|
|
"learning_rate": 3.0347494350075465e-05,
|
|
"loss": 0.0124,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 1.1869879518072288,
|
|
"grad_norm": 0.2223527580499649,
|
|
"learning_rate": 3.0309089408081074e-05,
|
|
"loss": 0.01,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 1.1889156626506023,
|
|
"grad_norm": 0.21985746920108795,
|
|
"learning_rate": 3.027063262864032e-05,
|
|
"loss": 0.0087,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 1.1908433734939758,
|
|
"grad_norm": 0.2989653944969177,
|
|
"learning_rate": 3.023212420512637e-05,
|
|
"loss": 0.0137,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 1.1927710843373494,
|
|
"grad_norm": 0.17423275113105774,
|
|
"learning_rate": 3.0193564331172074e-05,
|
|
"loss": 0.0056,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 1.1946987951807229,
|
|
"grad_norm": 1.0992127656936646,
|
|
"learning_rate": 3.0154953200668976e-05,
|
|
"loss": 0.0274,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 1.1966265060240964,
|
|
"grad_norm": 0.21641989052295685,
|
|
"learning_rate": 3.011629100776638e-05,
|
|
"loss": 0.0151,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 1.1985542168674699,
|
|
"grad_norm": 0.4558199644088745,
|
|
"learning_rate": 3.007757794687033e-05,
|
|
"loss": 0.0424,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 1.2004819277108434,
|
|
"grad_norm": 0.42380189895629883,
|
|
"learning_rate": 3.003881421264266e-05,
|
|
"loss": 0.0079,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 1.202409638554217,
|
|
"grad_norm": 0.28791171312332153,
|
|
"learning_rate": 3.0000000000000004e-05,
|
|
"loss": 0.0142,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 1.2043373493975904,
|
|
"grad_norm": 0.3906581997871399,
|
|
"learning_rate": 2.996113550411281e-05,
|
|
"loss": 0.0251,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 1.206265060240964,
|
|
"grad_norm": 0.47848746180534363,
|
|
"learning_rate": 2.9922220920404375e-05,
|
|
"loss": 0.0137,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 1.2081927710843374,
|
|
"grad_norm": 0.22666941583156586,
|
|
"learning_rate": 2.9883256444549862e-05,
|
|
"loss": 0.0105,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 1.210120481927711,
|
|
"grad_norm": 0.18968136608600616,
|
|
"learning_rate": 2.984424227247529e-05,
|
|
"loss": 0.0089,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 1.2120481927710842,
|
|
"grad_norm": 0.28732606768608093,
|
|
"learning_rate": 2.980517860035656e-05,
|
|
"loss": 0.0253,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 1.213975903614458,
|
|
"grad_norm": 0.21131543815135956,
|
|
"learning_rate": 2.9766065624618518e-05,
|
|
"loss": 0.0134,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 1.2159036144578312,
|
|
"grad_norm": 0.7594877481460571,
|
|
"learning_rate": 2.972690354193388e-05,
|
|
"loss": 0.0157,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 1.2178313253012047,
|
|
"grad_norm": 0.730291485786438,
|
|
"learning_rate": 2.96876925492223e-05,
|
|
"loss": 0.0204,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 1.2197590361445783,
|
|
"grad_norm": 0.20333674550056458,
|
|
"learning_rate": 2.9648432843649382e-05,
|
|
"loss": 0.0114,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 1.2216867469879518,
|
|
"grad_norm": 0.5680793523788452,
|
|
"learning_rate": 2.960912462262566e-05,
|
|
"loss": 0.0146,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 1.2236144578313253,
|
|
"grad_norm": 0.4591079354286194,
|
|
"learning_rate": 2.9569768083805618e-05,
|
|
"loss": 0.0112,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 1.2255421686746988,
|
|
"grad_norm": 0.3793511390686035,
|
|
"learning_rate": 2.953036342508671e-05,
|
|
"loss": 0.0377,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 1.2274698795180723,
|
|
"grad_norm": 1.118723750114441,
|
|
"learning_rate": 2.9490910844608346e-05,
|
|
"loss": 0.0432,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 1.2293975903614458,
|
|
"grad_norm": 0.36990776658058167,
|
|
"learning_rate": 2.9451410540750887e-05,
|
|
"loss": 0.0203,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 1.2313253012048193,
|
|
"grad_norm": 0.930397629737854,
|
|
"learning_rate": 2.94118627121347e-05,
|
|
"loss": 0.0311,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 1.2332530120481928,
|
|
"grad_norm": 0.2347625195980072,
|
|
"learning_rate": 2.9372267557619075e-05,
|
|
"loss": 0.0168,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 1.2351807228915663,
|
|
"grad_norm": 0.3720332384109497,
|
|
"learning_rate": 2.933262527630131e-05,
|
|
"loss": 0.0136,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 1.2371084337349398,
|
|
"grad_norm": 0.4871984124183655,
|
|
"learning_rate": 2.929293606751565e-05,
|
|
"loss": 0.0339,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 1.2390361445783133,
|
|
"grad_norm": 0.35853689908981323,
|
|
"learning_rate": 2.9253200130832322e-05,
|
|
"loss": 0.0095,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 1.2409638554216866,
|
|
"grad_norm": 0.42003703117370605,
|
|
"learning_rate": 2.92134176660565e-05,
|
|
"loss": 0.0142,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 1.2428915662650604,
|
|
"grad_norm": 0.3854500651359558,
|
|
"learning_rate": 2.9173588873227338e-05,
|
|
"loss": 0.0209,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 1.2448192771084337,
|
|
"grad_norm": 0.24665917456150055,
|
|
"learning_rate": 2.913371395261691e-05,
|
|
"loss": 0.0087,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 1.2467469879518072,
|
|
"grad_norm": 0.41571593284606934,
|
|
"learning_rate": 2.9093793104729268e-05,
|
|
"loss": 0.0164,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 1.2486746987951807,
|
|
"grad_norm": 0.4597891569137573,
|
|
"learning_rate": 2.9053826530299377e-05,
|
|
"loss": 0.0138,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 1.2506024096385542,
|
|
"grad_norm": 0.43345385789871216,
|
|
"learning_rate": 2.901381443029215e-05,
|
|
"loss": 0.0353,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 1.2525301204819277,
|
|
"grad_norm": 0.3706768751144409,
|
|
"learning_rate": 2.897375700590141e-05,
|
|
"loss": 0.007,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 1.2544578313253012,
|
|
"grad_norm": 0.30305296182632446,
|
|
"learning_rate": 2.8933654458548873e-05,
|
|
"loss": 0.0123,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 1.2563855421686747,
|
|
"grad_norm": 0.2042127549648285,
|
|
"learning_rate": 2.8893506989883167e-05,
|
|
"loss": 0.0099,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 1.2583132530120482,
|
|
"grad_norm": 0.20524422824382782,
|
|
"learning_rate": 2.8853314801778784e-05,
|
|
"loss": 0.0097,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 1.2602409638554217,
|
|
"grad_norm": 0.2351921945810318,
|
|
"learning_rate": 2.8813078096335093e-05,
|
|
"loss": 0.0091,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 1.2621686746987952,
|
|
"grad_norm": 0.34547340869903564,
|
|
"learning_rate": 2.87727970758753e-05,
|
|
"loss": 0.0088,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 1.2640963855421687,
|
|
"grad_norm": 0.35163217782974243,
|
|
"learning_rate": 2.8732471942945443e-05,
|
|
"loss": 0.0145,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 1.266024096385542,
|
|
"grad_norm": 1.715137243270874,
|
|
"learning_rate": 2.8692102900313378e-05,
|
|
"loss": 0.0198,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 1.2679518072289158,
|
|
"grad_norm": 0.2860178053379059,
|
|
"learning_rate": 2.8651690150967748e-05,
|
|
"loss": 0.0085,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 1.269879518072289,
|
|
"grad_norm": 0.21175967156887054,
|
|
"learning_rate": 2.8611233898116967e-05,
|
|
"loss": 0.0071,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 1.2718072289156628,
|
|
"grad_norm": 0.33726972341537476,
|
|
"learning_rate": 2.85707343451882e-05,
|
|
"loss": 0.012,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 1.273734939759036,
|
|
"grad_norm": 0.2138456553220749,
|
|
"learning_rate": 2.853019169582635e-05,
|
|
"loss": 0.0092,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 1.2756626506024096,
|
|
"grad_norm": 0.2304934412240982,
|
|
"learning_rate": 2.8489606153892997e-05,
|
|
"loss": 0.0144,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 1.277590361445783,
|
|
"grad_norm": 0.2691061794757843,
|
|
"learning_rate": 2.8448977923465425e-05,
|
|
"loss": 0.0121,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 1.2795180722891566,
|
|
"grad_norm": 0.35254305601119995,
|
|
"learning_rate": 2.840830720883555e-05,
|
|
"loss": 0.0125,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 1.28144578313253,
|
|
"grad_norm": 0.36552608013153076,
|
|
"learning_rate": 2.836759421450893e-05,
|
|
"loss": 0.021,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 1.2833734939759036,
|
|
"grad_norm": 0.37177154421806335,
|
|
"learning_rate": 2.83268391452037e-05,
|
|
"loss": 0.0216,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 1.2853012048192771,
|
|
"grad_norm": 0.20932547748088837,
|
|
"learning_rate": 2.828604220584958e-05,
|
|
"loss": 0.0077,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 1.2872289156626506,
|
|
"grad_norm": 0.5158557295799255,
|
|
"learning_rate": 2.824520360158681e-05,
|
|
"loss": 0.0394,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 1.2891566265060241,
|
|
"grad_norm": 0.22623969614505768,
|
|
"learning_rate": 2.820432353776515e-05,
|
|
"loss": 0.0087,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 1.2910843373493976,
|
|
"grad_norm": 0.2996046245098114,
|
|
"learning_rate": 2.8163402219942822e-05,
|
|
"loss": 0.01,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 1.2930120481927712,
|
|
"grad_norm": 0.24957989156246185,
|
|
"learning_rate": 2.8122439853885488e-05,
|
|
"loss": 0.0127,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 1.2949397590361444,
|
|
"grad_norm": 0.2636559307575226,
|
|
"learning_rate": 2.8081436645565216e-05,
|
|
"loss": 0.0128,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 1.2968674698795182,
|
|
"grad_norm": 0.3531591296195984,
|
|
"learning_rate": 2.804039280115944e-05,
|
|
"loss": 0.0199,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 1.2987951807228915,
|
|
"grad_norm": 0.3682299852371216,
|
|
"learning_rate": 2.7999308527049927e-05,
|
|
"loss": 0.0088,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 1.3007228915662652,
|
|
"grad_norm": 0.19555217027664185,
|
|
"learning_rate": 2.795818402982174e-05,
|
|
"loss": 0.0084,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 1.3026506024096385,
|
|
"grad_norm": 0.2864912450313568,
|
|
"learning_rate": 2.7917019516262186e-05,
|
|
"loss": 0.0154,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 1.304578313253012,
|
|
"grad_norm": 0.2211237996816635,
|
|
"learning_rate": 2.78758151933598e-05,
|
|
"loss": 0.0078,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 1.3065060240963855,
|
|
"grad_norm": 0.13646945357322693,
|
|
"learning_rate": 2.7834571268303294e-05,
|
|
"loss": 0.0058,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 1.308433734939759,
|
|
"grad_norm": 0.16530285775661469,
|
|
"learning_rate": 2.779328794848049e-05,
|
|
"loss": 0.007,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 1.3103614457831325,
|
|
"grad_norm": 0.2145693302154541,
|
|
"learning_rate": 2.7751965441477325e-05,
|
|
"loss": 0.0203,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.312289156626506,
|
|
"grad_norm": 0.24273739755153656,
|
|
"learning_rate": 2.771060395507677e-05,
|
|
"loss": 0.0106,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 1.3142168674698795,
|
|
"grad_norm": 0.20430618524551392,
|
|
"learning_rate": 2.7669203697257794e-05,
|
|
"loss": 0.0122,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 1.316144578313253,
|
|
"grad_norm": 0.2502615749835968,
|
|
"learning_rate": 2.7627764876194335e-05,
|
|
"loss": 0.0101,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 1.3180722891566266,
|
|
"grad_norm": 0.287239670753479,
|
|
"learning_rate": 2.7586287700254214e-05,
|
|
"loss": 0.0203,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"grad_norm": 0.16239754855632782,
|
|
"learning_rate": 2.7544772377998147e-05,
|
|
"loss": 0.0084,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 1.3219277108433736,
|
|
"grad_norm": 0.27174142003059387,
|
|
"learning_rate": 2.7503219118178636e-05,
|
|
"loss": 0.008,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 1.3238554216867469,
|
|
"grad_norm": 0.12878240644931793,
|
|
"learning_rate": 2.7461628129738954e-05,
|
|
"loss": 0.0053,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 1.3257831325301206,
|
|
"grad_norm": 0.16112515330314636,
|
|
"learning_rate": 2.7419999621812086e-05,
|
|
"loss": 0.0059,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 1.3277108433734939,
|
|
"grad_norm": 0.2398834228515625,
|
|
"learning_rate": 2.7378333803719672e-05,
|
|
"loss": 0.0095,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 1.3296385542168676,
|
|
"grad_norm": 0.18516193330287933,
|
|
"learning_rate": 2.733663088497097e-05,
|
|
"loss": 0.0071,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 1.331566265060241,
|
|
"grad_norm": 0.2974924147129059,
|
|
"learning_rate": 2.7294891075261785e-05,
|
|
"loss": 0.0227,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 1.3334939759036144,
|
|
"grad_norm": 0.12931054830551147,
|
|
"learning_rate": 2.7253114584473418e-05,
|
|
"loss": 0.0039,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 1.335421686746988,
|
|
"grad_norm": 0.16319474577903748,
|
|
"learning_rate": 2.7211301622671623e-05,
|
|
"loss": 0.008,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 1.3373493975903614,
|
|
"grad_norm": 0.27622169256210327,
|
|
"learning_rate": 2.7169452400105533e-05,
|
|
"loss": 0.0238,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 1.339277108433735,
|
|
"grad_norm": 0.45309779047966003,
|
|
"learning_rate": 2.712756712720663e-05,
|
|
"loss": 0.0439,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 1.3412048192771084,
|
|
"grad_norm": 0.2469855099916458,
|
|
"learning_rate": 2.708564601458765e-05,
|
|
"loss": 0.0085,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 1.343132530120482,
|
|
"grad_norm": 0.4245856702327728,
|
|
"learning_rate": 2.7043689273041535e-05,
|
|
"loss": 0.0097,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 1.3450602409638555,
|
|
"grad_norm": 0.26796087622642517,
|
|
"learning_rate": 2.7001697113540414e-05,
|
|
"loss": 0.0119,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 1.346987951807229,
|
|
"grad_norm": 0.3569283187389374,
|
|
"learning_rate": 2.6959669747234482e-05,
|
|
"loss": 0.0096,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 1.3489156626506025,
|
|
"grad_norm": 0.7038524150848389,
|
|
"learning_rate": 2.6917607385450973e-05,
|
|
"loss": 0.0317,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 1.350843373493976,
|
|
"grad_norm": 0.23568563163280487,
|
|
"learning_rate": 2.687551023969308e-05,
|
|
"loss": 0.0112,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 1.3527710843373493,
|
|
"grad_norm": 0.20338499546051025,
|
|
"learning_rate": 2.6833378521638935e-05,
|
|
"loss": 0.0092,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 1.354698795180723,
|
|
"grad_norm": 4.22187614440918,
|
|
"learning_rate": 2.679121244314046e-05,
|
|
"loss": 0.0314,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 1.3566265060240963,
|
|
"grad_norm": 0.2542206048965454,
|
|
"learning_rate": 2.674901221622239e-05,
|
|
"loss": 0.0158,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 1.3585542168674698,
|
|
"grad_norm": 0.49705010652542114,
|
|
"learning_rate": 2.670677805308116e-05,
|
|
"loss": 0.0162,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 1.3604819277108433,
|
|
"grad_norm": 0.17502115666866302,
|
|
"learning_rate": 2.666451016608383e-05,
|
|
"loss": 0.0074,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 1.3624096385542168,
|
|
"grad_norm": 0.21738742291927338,
|
|
"learning_rate": 2.6622208767767075e-05,
|
|
"loss": 0.0135,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 1.3643373493975903,
|
|
"grad_norm": 0.3309847414493561,
|
|
"learning_rate": 2.6579874070836032e-05,
|
|
"loss": 0.0107,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 1.3662650602409638,
|
|
"grad_norm": 0.10706827789545059,
|
|
"learning_rate": 2.6537506288163303e-05,
|
|
"loss": 0.0043,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 1.3681927710843373,
|
|
"grad_norm": 0.173640176653862,
|
|
"learning_rate": 2.6495105632787835e-05,
|
|
"loss": 0.0092,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 1.3701204819277109,
|
|
"grad_norm": 0.2636397182941437,
|
|
"learning_rate": 2.6452672317913893e-05,
|
|
"loss": 0.0097,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 1.3720481927710844,
|
|
"grad_norm": 0.28485360741615295,
|
|
"learning_rate": 2.6410206556909943e-05,
|
|
"loss": 0.0193,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 1.3739759036144579,
|
|
"grad_norm": 0.23210027813911438,
|
|
"learning_rate": 2.636770856330761e-05,
|
|
"loss": 0.0229,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 1.3759036144578314,
|
|
"grad_norm": 0.13388316333293915,
|
|
"learning_rate": 2.6325178550800596e-05,
|
|
"loss": 0.004,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 1.377831325301205,
|
|
"grad_norm": 0.5131422877311707,
|
|
"learning_rate": 2.6282616733243603e-05,
|
|
"loss": 0.0137,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 1.3797590361445784,
|
|
"grad_norm": 0.3243267834186554,
|
|
"learning_rate": 2.6240023324651258e-05,
|
|
"loss": 0.0153,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 1.3816867469879517,
|
|
"grad_norm": 0.1440611034631729,
|
|
"learning_rate": 2.619739853919704e-05,
|
|
"loss": 0.0031,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 1.3836144578313254,
|
|
"grad_norm": 0.30346596240997314,
|
|
"learning_rate": 2.6154742591212196e-05,
|
|
"loss": 0.0109,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 1.3855421686746987,
|
|
"grad_norm": 0.19109240174293518,
|
|
"learning_rate": 2.611205569518468e-05,
|
|
"loss": 0.0094,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 1.3874698795180722,
|
|
"grad_norm": 0.28636518120765686,
|
|
"learning_rate": 2.6069338065758056e-05,
|
|
"loss": 0.0123,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 1.3893975903614457,
|
|
"grad_norm": 0.28083911538124084,
|
|
"learning_rate": 2.6026589917730416e-05,
|
|
"loss": 0.0104,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 1.3913253012048192,
|
|
"grad_norm": 0.36553966999053955,
|
|
"learning_rate": 2.5983811466053327e-05,
|
|
"loss": 0.0143,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 1.3932530120481927,
|
|
"grad_norm": 0.23317205905914307,
|
|
"learning_rate": 2.5941002925830708e-05,
|
|
"loss": 0.011,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 1.3951807228915662,
|
|
"grad_norm": 0.3825171887874603,
|
|
"learning_rate": 2.589816451231781e-05,
|
|
"loss": 0.0098,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 1.3971084337349398,
|
|
"grad_norm": 0.19916608929634094,
|
|
"learning_rate": 2.585529644092006e-05,
|
|
"loss": 0.0094,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 1.3990361445783133,
|
|
"grad_norm": 0.19990523159503937,
|
|
"learning_rate": 2.5812398927192027e-05,
|
|
"loss": 0.0128,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 1.4009638554216868,
|
|
"grad_norm": 0.34662899374961853,
|
|
"learning_rate": 2.5769472186836347e-05,
|
|
"loss": 0.0091,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 1.4028915662650603,
|
|
"grad_norm": 0.23481112718582153,
|
|
"learning_rate": 2.5726516435702583e-05,
|
|
"loss": 0.0154,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 1.4048192771084338,
|
|
"grad_norm": 0.1846667379140854,
|
|
"learning_rate": 2.5683531889786194e-05,
|
|
"loss": 0.0088,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 1.4067469879518073,
|
|
"grad_norm": 0.16717663407325745,
|
|
"learning_rate": 2.564051876522742e-05,
|
|
"loss": 0.0083,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 1.4086746987951808,
|
|
"grad_norm": 0.4116475284099579,
|
|
"learning_rate": 2.5597477278310202e-05,
|
|
"loss": 0.0179,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 1.410602409638554,
|
|
"grad_norm": 0.171807661652565,
|
|
"learning_rate": 2.5554407645461115e-05,
|
|
"loss": 0.0063,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 1.4125301204819278,
|
|
"grad_norm": 0.1954439878463745,
|
|
"learning_rate": 2.5511310083248243e-05,
|
|
"loss": 0.017,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 1.4144578313253011,
|
|
"grad_norm": 0.37158989906311035,
|
|
"learning_rate": 2.5468184808380104e-05,
|
|
"loss": 0.0173,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 1.4163855421686746,
|
|
"grad_norm": 0.2001633644104004,
|
|
"learning_rate": 2.542503203770458e-05,
|
|
"loss": 0.0165,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 1.4183132530120481,
|
|
"grad_norm": 0.45673373341560364,
|
|
"learning_rate": 2.53818519882078e-05,
|
|
"loss": 0.0185,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 1.4202409638554216,
|
|
"grad_norm": 0.3838701546192169,
|
|
"learning_rate": 2.5338644877013067e-05,
|
|
"loss": 0.0134,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 1.4221686746987952,
|
|
"grad_norm": 0.32032477855682373,
|
|
"learning_rate": 2.5295410921379745e-05,
|
|
"loss": 0.0143,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 1.4240963855421687,
|
|
"grad_norm": 0.4594039022922516,
|
|
"learning_rate": 2.52521503387022e-05,
|
|
"loss": 0.0193,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 1.4260240963855422,
|
|
"grad_norm": 0.3889620900154114,
|
|
"learning_rate": 2.5208863346508667e-05,
|
|
"loss": 0.0114,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 1.4279518072289157,
|
|
"grad_norm": 0.33153319358825684,
|
|
"learning_rate": 2.5165550162460203e-05,
|
|
"loss": 0.0102,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 1.4298795180722892,
|
|
"grad_norm": 0.7269518375396729,
|
|
"learning_rate": 2.5122211004349536e-05,
|
|
"loss": 0.0215,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 1.4318072289156627,
|
|
"grad_norm": 0.31653261184692383,
|
|
"learning_rate": 2.5078846090100023e-05,
|
|
"loss": 0.0115,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 1.4337349397590362,
|
|
"grad_norm": 0.20620353519916534,
|
|
"learning_rate": 2.5035455637764518e-05,
|
|
"loss": 0.0153,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 1.4356626506024097,
|
|
"grad_norm": 0.17266008257865906,
|
|
"learning_rate": 2.4992039865524297e-05,
|
|
"loss": 0.0069,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 1.4375903614457832,
|
|
"grad_norm": 0.24760811030864716,
|
|
"learning_rate": 2.494859899168795e-05,
|
|
"loss": 0.0108,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 1.4395180722891565,
|
|
"grad_norm": 0.2584865391254425,
|
|
"learning_rate": 2.4905133234690282e-05,
|
|
"loss": 0.0095,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 1.4414457831325302,
|
|
"grad_norm": 0.48847514390945435,
|
|
"learning_rate": 2.486164281309122e-05,
|
|
"loss": 0.0181,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 1.4433734939759035,
|
|
"grad_norm": 0.42942047119140625,
|
|
"learning_rate": 2.4818127945574717e-05,
|
|
"loss": 0.025,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 1.445301204819277,
|
|
"grad_norm": 0.23713800311088562,
|
|
"learning_rate": 2.4774588850947648e-05,
|
|
"loss": 0.0085,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 1.4472289156626506,
|
|
"grad_norm": 0.8797569870948792,
|
|
"learning_rate": 2.473102574813871e-05,
|
|
"loss": 0.0097,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 1.449156626506024,
|
|
"grad_norm": 0.2744862735271454,
|
|
"learning_rate": 2.4687438856197302e-05,
|
|
"loss": 0.0122,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 1.4510843373493976,
|
|
"grad_norm": 0.12747010588645935,
|
|
"learning_rate": 2.4643828394292478e-05,
|
|
"loss": 0.0056,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 1.453012048192771,
|
|
"grad_norm": 0.37376829981803894,
|
|
"learning_rate": 2.4600194581711775e-05,
|
|
"loss": 0.0052,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 1.4549397590361446,
|
|
"grad_norm": 0.2536911368370056,
|
|
"learning_rate": 2.4556537637860176e-05,
|
|
"loss": 0.0113,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 1.456867469879518,
|
|
"grad_norm": 0.25950780510902405,
|
|
"learning_rate": 2.451285778225894e-05,
|
|
"loss": 0.0099,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 1.4587951807228916,
|
|
"grad_norm": 0.19535955786705017,
|
|
"learning_rate": 2.4469155234544565e-05,
|
|
"loss": 0.0069,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 1.4607228915662651,
|
|
"grad_norm": 0.22816115617752075,
|
|
"learning_rate": 2.442543021446764e-05,
|
|
"loss": 0.0088,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 1.4626506024096386,
|
|
"grad_norm": 0.3363986313343048,
|
|
"learning_rate": 2.4381682941891755e-05,
|
|
"loss": 0.0182,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 1.464578313253012,
|
|
"grad_norm": 0.21492891013622284,
|
|
"learning_rate": 2.4337913636792382e-05,
|
|
"loss": 0.0069,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 1.4665060240963856,
|
|
"grad_norm": 0.6070862412452698,
|
|
"learning_rate": 2.429412251925579e-05,
|
|
"loss": 0.0406,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 1.468433734939759,
|
|
"grad_norm": 2.6469690799713135,
|
|
"learning_rate": 2.425030980947793e-05,
|
|
"loss": 0.0205,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 1.4703614457831327,
|
|
"grad_norm": 0.30909740924835205,
|
|
"learning_rate": 2.420647572776332e-05,
|
|
"loss": 0.0136,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 1.472289156626506,
|
|
"grad_norm": 0.6639553904533386,
|
|
"learning_rate": 2.416262049452395e-05,
|
|
"loss": 0.011,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 1.4742168674698795,
|
|
"grad_norm": 0.2919616997241974,
|
|
"learning_rate": 2.4118744330278147e-05,
|
|
"loss": 0.0131,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 1.476144578313253,
|
|
"grad_norm": 0.5232429504394531,
|
|
"learning_rate": 2.4074847455649523e-05,
|
|
"loss": 0.0138,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 1.4780722891566265,
|
|
"grad_norm": 5.630630970001221,
|
|
"learning_rate": 2.403093009136579e-05,
|
|
"loss": 0.0264,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"grad_norm": 0.33234721422195435,
|
|
"learning_rate": 2.3986992458257707e-05,
|
|
"loss": 0.0111,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 1.4819277108433735,
|
|
"grad_norm": 0.28444772958755493,
|
|
"learning_rate": 2.3943034777257945e-05,
|
|
"loss": 0.0144,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 1.483855421686747,
|
|
"grad_norm": 0.16229979693889618,
|
|
"learning_rate": 2.38990572694e-05,
|
|
"loss": 0.0062,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 1.4857831325301205,
|
|
"grad_norm": 0.27474716305732727,
|
|
"learning_rate": 2.385506015581704e-05,
|
|
"loss": 0.0172,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 1.487710843373494,
|
|
"grad_norm": 0.246526300907135,
|
|
"learning_rate": 2.381104365774083e-05,
|
|
"loss": 0.012,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 1.4896385542168675,
|
|
"grad_norm": 0.282047837972641,
|
|
"learning_rate": 2.37670079965006e-05,
|
|
"loss": 0.0116,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 1.491566265060241,
|
|
"grad_norm": 0.2878139317035675,
|
|
"learning_rate": 2.3722953393521944e-05,
|
|
"loss": 0.0147,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 1.4934939759036143,
|
|
"grad_norm": 0.5586277842521667,
|
|
"learning_rate": 2.367888007032571e-05,
|
|
"loss": 0.0111,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 1.495421686746988,
|
|
"grad_norm": 0.562160313129425,
|
|
"learning_rate": 2.3634788248526846e-05,
|
|
"loss": 0.0061,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 1.4973493975903613,
|
|
"grad_norm": 0.3452005982398987,
|
|
"learning_rate": 2.3590678149833356e-05,
|
|
"loss": 0.0205,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 1.499277108433735,
|
|
"grad_norm": 0.7757686376571655,
|
|
"learning_rate": 2.3546549996045114e-05,
|
|
"loss": 0.0273,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 1.5012048192771084,
|
|
"grad_norm": 0.19530551135540009,
|
|
"learning_rate": 2.3502404009052812e-05,
|
|
"loss": 0.0083,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 1.503132530120482,
|
|
"grad_norm": 0.2586531639099121,
|
|
"learning_rate": 2.3458240410836775e-05,
|
|
"loss": 0.0122,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 1.5050602409638554,
|
|
"grad_norm": 0.30063286423683167,
|
|
"learning_rate": 2.3414059423465924e-05,
|
|
"loss": 0.0083,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 1.5069879518072289,
|
|
"grad_norm": 0.18663185834884644,
|
|
"learning_rate": 2.3369861269096575e-05,
|
|
"loss": 0.0104,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 1.5089156626506024,
|
|
"grad_norm": 0.4405941069126129,
|
|
"learning_rate": 2.3325646169971416e-05,
|
|
"loss": 0.0264,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 1.510843373493976,
|
|
"grad_norm": 0.2947913110256195,
|
|
"learning_rate": 2.3281414348418294e-05,
|
|
"loss": 0.0107,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 1.5127710843373494,
|
|
"grad_norm": 0.23813778162002563,
|
|
"learning_rate": 2.3237166026849158e-05,
|
|
"loss": 0.0084,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 1.514698795180723,
|
|
"grad_norm": 0.33380329608917236,
|
|
"learning_rate": 2.3192901427758932e-05,
|
|
"loss": 0.0111,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 1.5166265060240964,
|
|
"grad_norm": 0.3736988306045532,
|
|
"learning_rate": 2.314862077372438e-05,
|
|
"loss": 0.0135,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 1.5185542168674697,
|
|
"grad_norm": 0.3785395920276642,
|
|
"learning_rate": 2.3104324287402996e-05,
|
|
"loss": 0.0265,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 1.5204819277108435,
|
|
"grad_norm": 0.3359154462814331,
|
|
"learning_rate": 2.3060012191531885e-05,
|
|
"loss": 0.0127,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 1.5224096385542167,
|
|
"grad_norm": 0.720753014087677,
|
|
"learning_rate": 2.301568470892664e-05,
|
|
"loss": 0.0134,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 1.5243373493975905,
|
|
"grad_norm": 0.36473193764686584,
|
|
"learning_rate": 2.297134206248024e-05,
|
|
"loss": 0.0318,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 1.5262650602409638,
|
|
"grad_norm": 0.29987087845802307,
|
|
"learning_rate": 2.2926984475161884e-05,
|
|
"loss": 0.008,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 1.5281927710843375,
|
|
"grad_norm": 0.2883112132549286,
|
|
"learning_rate": 2.2882612170015914e-05,
|
|
"loss": 0.0125,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 1.5301204819277108,
|
|
"grad_norm": 0.28983229398727417,
|
|
"learning_rate": 2.2838225370160682e-05,
|
|
"loss": 0.0155,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 1.5320481927710843,
|
|
"grad_norm": 0.47236886620521545,
|
|
"learning_rate": 2.2793824298787414e-05,
|
|
"loss": 0.0132,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 1.5339759036144578,
|
|
"grad_norm": 0.8328865170478821,
|
|
"learning_rate": 2.2749409179159104e-05,
|
|
"loss": 0.026,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 1.5359036144578313,
|
|
"grad_norm": 0.3129172623157501,
|
|
"learning_rate": 2.2704980234609396e-05,
|
|
"loss": 0.0099,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 1.5378313253012048,
|
|
"grad_norm": 0.22284500300884247,
|
|
"learning_rate": 2.2660537688541416e-05,
|
|
"loss": 0.009,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 1.5397590361445783,
|
|
"grad_norm": 0.3346405625343323,
|
|
"learning_rate": 2.2616081764426726e-05,
|
|
"loss": 0.0077,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 1.5416867469879518,
|
|
"grad_norm": 0.2923565208911896,
|
|
"learning_rate": 2.2571612685804124e-05,
|
|
"loss": 0.0119,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 1.5436144578313253,
|
|
"grad_norm": 0.1921311914920807,
|
|
"learning_rate": 2.252713067627857e-05,
|
|
"loss": 0.0083,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 1.5455421686746988,
|
|
"grad_norm": 0.23221106827259064,
|
|
"learning_rate": 2.2482635959520044e-05,
|
|
"loss": 0.0049,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 1.5474698795180721,
|
|
"grad_norm": 0.6340724229812622,
|
|
"learning_rate": 2.243812875926241e-05,
|
|
"loss": 0.0273,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 1.5493975903614459,
|
|
"grad_norm": 0.2699439823627472,
|
|
"learning_rate": 2.2393609299302314e-05,
|
|
"loss": 0.0108,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 1.5513253012048192,
|
|
"grad_norm": 0.2005189210176468,
|
|
"learning_rate": 2.2349077803498052e-05,
|
|
"loss": 0.0076,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 1.5532530120481929,
|
|
"grad_norm": 0.39668548107147217,
|
|
"learning_rate": 2.230453449576842e-05,
|
|
"loss": 0.0135,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 1.5551807228915662,
|
|
"grad_norm": 0.2406950294971466,
|
|
"learning_rate": 2.2259979600091635e-05,
|
|
"loss": 0.0094,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 1.55710843373494,
|
|
"grad_norm": 0.30363157391548157,
|
|
"learning_rate": 2.2215413340504158e-05,
|
|
"loss": 0.0178,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 1.5590361445783132,
|
|
"grad_norm": 0.19508181512355804,
|
|
"learning_rate": 2.2170835941099605e-05,
|
|
"loss": 0.0069,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 1.5609638554216867,
|
|
"grad_norm": 0.734106719493866,
|
|
"learning_rate": 2.2126247626027615e-05,
|
|
"loss": 0.0319,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 1.5628915662650602,
|
|
"grad_norm": 0.2591583728790283,
|
|
"learning_rate": 2.208164861949268e-05,
|
|
"loss": 0.0168,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 1.5648192771084337,
|
|
"grad_norm": 0.2386734038591385,
|
|
"learning_rate": 2.20370391457531e-05,
|
|
"loss": 0.0041,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 1.5667469879518072,
|
|
"grad_norm": 0.1675218939781189,
|
|
"learning_rate": 2.1992419429119764e-05,
|
|
"loss": 0.0078,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 1.5686746987951807,
|
|
"grad_norm": 0.45591506361961365,
|
|
"learning_rate": 2.1947789693955097e-05,
|
|
"loss": 0.0166,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 1.5706024096385542,
|
|
"grad_norm": 0.46940621733665466,
|
|
"learning_rate": 2.190315016467188e-05,
|
|
"loss": 0.0176,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 1.5725301204819278,
|
|
"grad_norm": 0.2294205278158188,
|
|
"learning_rate": 2.1858501065732146e-05,
|
|
"loss": 0.0102,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 1.5744578313253013,
|
|
"grad_norm": 0.28922322392463684,
|
|
"learning_rate": 2.181384262164606e-05,
|
|
"loss": 0.0111,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 1.5763855421686745,
|
|
"grad_norm": 0.19650064408779144,
|
|
"learning_rate": 2.1769175056970765e-05,
|
|
"loss": 0.0076,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 1.5783132530120483,
|
|
"grad_norm": 0.19538825750350952,
|
|
"learning_rate": 2.172449859630927e-05,
|
|
"loss": 0.0118,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 1.5802409638554216,
|
|
"grad_norm": 0.1900389939546585,
|
|
"learning_rate": 2.167981346430931e-05,
|
|
"loss": 0.0066,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 1.5821686746987953,
|
|
"grad_norm": 0.21593710780143738,
|
|
"learning_rate": 2.1635119885662235e-05,
|
|
"loss": 0.0101,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 1.5840963855421686,
|
|
"grad_norm": 0.2699289321899414,
|
|
"learning_rate": 2.159041808510185e-05,
|
|
"loss": 0.0118,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 1.5860240963855423,
|
|
"grad_norm": 0.31867673993110657,
|
|
"learning_rate": 2.1545708287403322e-05,
|
|
"loss": 0.0122,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 1.5879518072289156,
|
|
"grad_norm": 0.2862400412559509,
|
|
"learning_rate": 2.1500990717382004e-05,
|
|
"loss": 0.0216,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 1.589879518072289,
|
|
"grad_norm": 0.28482481837272644,
|
|
"learning_rate": 2.145626559989237e-05,
|
|
"loss": 0.0136,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 1.5918072289156626,
|
|
"grad_norm": 0.2866958975791931,
|
|
"learning_rate": 2.1411533159826803e-05,
|
|
"loss": 0.0298,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 1.5937349397590361,
|
|
"grad_norm": 0.39092838764190674,
|
|
"learning_rate": 2.1366793622114533e-05,
|
|
"loss": 0.0382,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 1.5956626506024096,
|
|
"grad_norm": 0.16381537914276123,
|
|
"learning_rate": 2.1322047211720468e-05,
|
|
"loss": 0.0074,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 1.5975903614457831,
|
|
"grad_norm": 0.22146940231323242,
|
|
"learning_rate": 2.1277294153644083e-05,
|
|
"loss": 0.0103,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 1.5995180722891567,
|
|
"grad_norm": 0.2155209183692932,
|
|
"learning_rate": 2.123253467291827e-05,
|
|
"loss": 0.0095,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 1.6014457831325302,
|
|
"grad_norm": 0.41510409116744995,
|
|
"learning_rate": 2.118776899460822e-05,
|
|
"loss": 0.0457,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 1.6033734939759037,
|
|
"grad_norm": 0.19718150794506073,
|
|
"learning_rate": 2.1142997343810293e-05,
|
|
"loss": 0.0192,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 1.605301204819277,
|
|
"grad_norm": 0.40924403071403503,
|
|
"learning_rate": 2.1098219945650865e-05,
|
|
"loss": 0.0278,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 1.6072289156626507,
|
|
"grad_norm": 0.18657824397087097,
|
|
"learning_rate": 2.105343702528524e-05,
|
|
"loss": 0.0076,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 1.609156626506024,
|
|
"grad_norm": 0.1727641075849533,
|
|
"learning_rate": 2.100864880789645e-05,
|
|
"loss": 0.0076,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 1.6110843373493977,
|
|
"grad_norm": 0.18138745427131653,
|
|
"learning_rate": 2.0963855518694203e-05,
|
|
"loss": 0.005,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 1.613012048192771,
|
|
"grad_norm": 0.19173955917358398,
|
|
"learning_rate": 2.0919057382913675e-05,
|
|
"loss": 0.0084,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 1.6149397590361447,
|
|
"grad_norm": 0.3812403380870819,
|
|
"learning_rate": 2.0874254625814435e-05,
|
|
"loss": 0.009,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 1.616867469879518,
|
|
"grad_norm": 0.2009759545326233,
|
|
"learning_rate": 2.0829447472679285e-05,
|
|
"loss": 0.0098,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 1.6187951807228915,
|
|
"grad_norm": 0.48703446984291077,
|
|
"learning_rate": 2.0784636148813124e-05,
|
|
"loss": 0.0099,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 1.620722891566265,
|
|
"grad_norm": 0.28995075821876526,
|
|
"learning_rate": 2.0739820879541827e-05,
|
|
"loss": 0.0075,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 1.6226506024096385,
|
|
"grad_norm": 0.2130059450864792,
|
|
"learning_rate": 2.069500189021111e-05,
|
|
"loss": 0.007,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 1.624578313253012,
|
|
"grad_norm": 0.252524733543396,
|
|
"learning_rate": 2.0650179406185397e-05,
|
|
"loss": 0.0249,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 1.6265060240963856,
|
|
"grad_norm": 0.23069098591804504,
|
|
"learning_rate": 2.060535365284668e-05,
|
|
"loss": 0.0084,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 1.628433734939759,
|
|
"grad_norm": 0.25051403045654297,
|
|
"learning_rate": 2.056052485559338e-05,
|
|
"loss": 0.0071,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 1.6303614457831326,
|
|
"grad_norm": 0.27664798498153687,
|
|
"learning_rate": 2.051569323983924e-05,
|
|
"loss": 0.0198,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 1.632289156626506,
|
|
"grad_norm": 0.2954922318458557,
|
|
"learning_rate": 2.047085903101218e-05,
|
|
"loss": 0.006,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 1.6342168674698794,
|
|
"grad_norm": 0.28477591276168823,
|
|
"learning_rate": 2.0426022454553137e-05,
|
|
"loss": 0.0147,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 1.636144578313253,
|
|
"grad_norm": 0.2785305678844452,
|
|
"learning_rate": 2.0381183735914968e-05,
|
|
"loss": 0.0117,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 1.6380722891566264,
|
|
"grad_norm": 0.2500309348106384,
|
|
"learning_rate": 2.0336343100561295e-05,
|
|
"loss": 0.008,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 1.6400000000000001,
|
|
"grad_norm": 0.18932047486305237,
|
|
"learning_rate": 2.0291500773965392e-05,
|
|
"loss": 0.0256,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 1.6419277108433734,
|
|
"grad_norm": 0.6396257877349854,
|
|
"learning_rate": 2.0246656981609013e-05,
|
|
"loss": 0.0141,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 1.6438554216867471,
|
|
"grad_norm": 0.5072891116142273,
|
|
"learning_rate": 2.02018119489813e-05,
|
|
"loss": 0.008,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 1.6457831325301204,
|
|
"grad_norm": 0.2920839488506317,
|
|
"learning_rate": 2.0156965901577635e-05,
|
|
"loss": 0.0085,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 1.647710843373494,
|
|
"grad_norm": 0.1391262263059616,
|
|
"learning_rate": 2.011211906489848e-05,
|
|
"loss": 0.0078,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 1.6496385542168674,
|
|
"grad_norm": 0.29620468616485596,
|
|
"learning_rate": 2.00672716644483e-05,
|
|
"loss": 0.0109,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 1.651566265060241,
|
|
"grad_norm": 0.13946573436260223,
|
|
"learning_rate": 2.002242392573436e-05,
|
|
"loss": 0.0076,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 1.6534939759036145,
|
|
"grad_norm": 0.9766128659248352,
|
|
"learning_rate": 1.997757607426565e-05,
|
|
"loss": 0.0309,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 1.655421686746988,
|
|
"grad_norm": 0.18002203106880188,
|
|
"learning_rate": 1.9932728335551702e-05,
|
|
"loss": 0.0072,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 1.6573493975903615,
|
|
"grad_norm": 0.28073111176490784,
|
|
"learning_rate": 1.988788093510152e-05,
|
|
"loss": 0.0246,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 1.659277108433735,
|
|
"grad_norm": 0.1919957399368286,
|
|
"learning_rate": 1.9843034098422375e-05,
|
|
"loss": 0.0087,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 1.6612048192771085,
|
|
"grad_norm": 0.1825258433818817,
|
|
"learning_rate": 1.9798188051018705e-05,
|
|
"loss": 0.0092,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 1.6631325301204818,
|
|
"grad_norm": 0.32412952184677124,
|
|
"learning_rate": 1.9753343018390997e-05,
|
|
"loss": 0.0118,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 1.6650602409638555,
|
|
"grad_norm": 0.12828563153743744,
|
|
"learning_rate": 1.9708499226034618e-05,
|
|
"loss": 0.0056,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 1.6669879518072288,
|
|
"grad_norm": 0.18647560477256775,
|
|
"learning_rate": 1.966365689943871e-05,
|
|
"loss": 0.0094,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 1.6689156626506025,
|
|
"grad_norm": 0.19835828244686127,
|
|
"learning_rate": 1.9618816264085042e-05,
|
|
"loss": 0.0097,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 1.6708433734939758,
|
|
"grad_norm": 0.22364282608032227,
|
|
"learning_rate": 1.957397754544687e-05,
|
|
"loss": 0.0062,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 1.6727710843373496,
|
|
"grad_norm": 0.29420018196105957,
|
|
"learning_rate": 1.952914096898783e-05,
|
|
"loss": 0.0182,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 1.6746987951807228,
|
|
"grad_norm": 0.2149929702281952,
|
|
"learning_rate": 1.9484306760160766e-05,
|
|
"loss": 0.0125,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 1.6766265060240964,
|
|
"grad_norm": 0.16844330728054047,
|
|
"learning_rate": 1.9439475144406623e-05,
|
|
"loss": 0.0074,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 1.6785542168674699,
|
|
"grad_norm": 0.5010282397270203,
|
|
"learning_rate": 1.9394646347153334e-05,
|
|
"loss": 0.0213,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 1.6804819277108434,
|
|
"grad_norm": 0.29847195744514465,
|
|
"learning_rate": 1.9349820593814606e-05,
|
|
"loss": 0.0173,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 1.6824096385542169,
|
|
"grad_norm": 0.23835812509059906,
|
|
"learning_rate": 1.930499810978889e-05,
|
|
"loss": 0.011,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 1.6843373493975904,
|
|
"grad_norm": 0.3269020617008209,
|
|
"learning_rate": 1.9260179120458177e-05,
|
|
"loss": 0.0285,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 1.686265060240964,
|
|
"grad_norm": 0.2142144739627838,
|
|
"learning_rate": 1.9215363851186883e-05,
|
|
"loss": 0.0146,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 1.6881927710843372,
|
|
"grad_norm": 0.3098377585411072,
|
|
"learning_rate": 1.9170552527320725e-05,
|
|
"loss": 0.0104,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 1.690120481927711,
|
|
"grad_norm": 0.22504115104675293,
|
|
"learning_rate": 1.9125745374185568e-05,
|
|
"loss": 0.0091,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 1.6920481927710842,
|
|
"grad_norm": 0.20633333921432495,
|
|
"learning_rate": 1.908094261708633e-05,
|
|
"loss": 0.0097,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 1.693975903614458,
|
|
"grad_norm": 1.179566502571106,
|
|
"learning_rate": 1.9036144481305807e-05,
|
|
"loss": 0.0143,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 1.6959036144578312,
|
|
"grad_norm": 0.15525613725185394,
|
|
"learning_rate": 1.8991351192103554e-05,
|
|
"loss": 0.0062,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 1.697831325301205,
|
|
"grad_norm": 0.15966367721557617,
|
|
"learning_rate": 1.8946562974714763e-05,
|
|
"loss": 0.0048,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 1.6997590361445782,
|
|
"grad_norm": 0.18902607262134552,
|
|
"learning_rate": 1.890178005434914e-05,
|
|
"loss": 0.0124,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 1.701686746987952,
|
|
"grad_norm": 0.21692413091659546,
|
|
"learning_rate": 1.885700265618971e-05,
|
|
"loss": 0.0135,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 1.7036144578313253,
|
|
"grad_norm": 0.38948455452919006,
|
|
"learning_rate": 1.8812231005391786e-05,
|
|
"loss": 0.0365,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 1.7055421686746988,
|
|
"grad_norm": 0.2483491599559784,
|
|
"learning_rate": 1.8767465327081736e-05,
|
|
"loss": 0.0202,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 1.7074698795180723,
|
|
"grad_norm": 0.15305832028388977,
|
|
"learning_rate": 1.872270584635592e-05,
|
|
"loss": 0.0035,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 1.7093975903614458,
|
|
"grad_norm": 0.17794466018676758,
|
|
"learning_rate": 1.867795278827954e-05,
|
|
"loss": 0.0157,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 1.7113253012048193,
|
|
"grad_norm": 0.1938813328742981,
|
|
"learning_rate": 1.863320637788547e-05,
|
|
"loss": 0.0071,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 1.7132530120481928,
|
|
"grad_norm": 0.27061617374420166,
|
|
"learning_rate": 1.8588466840173207e-05,
|
|
"loss": 0.0347,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 1.7151807228915663,
|
|
"grad_norm": 0.1541014313697815,
|
|
"learning_rate": 1.8543734400107637e-05,
|
|
"loss": 0.006,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 1.7171084337349396,
|
|
"grad_norm": 0.1436876654624939,
|
|
"learning_rate": 1.8499009282617996e-05,
|
|
"loss": 0.0059,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 1.7190361445783133,
|
|
"grad_norm": 1.0573723316192627,
|
|
"learning_rate": 1.8454291712596688e-05,
|
|
"loss": 0.008,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 1.7209638554216866,
|
|
"grad_norm": 0.15406259894371033,
|
|
"learning_rate": 1.8409581914898157e-05,
|
|
"loss": 0.0061,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 1.7228915662650603,
|
|
"grad_norm": 0.24822913110256195,
|
|
"learning_rate": 1.836488011433777e-05,
|
|
"loss": 0.0085,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 1.7248192771084336,
|
|
"grad_norm": 0.21049316227436066,
|
|
"learning_rate": 1.83201865356907e-05,
|
|
"loss": 0.0075,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 1.7267469879518074,
|
|
"grad_norm": 0.24159866571426392,
|
|
"learning_rate": 1.8275501403690733e-05,
|
|
"loss": 0.0156,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 1.7286746987951807,
|
|
"grad_norm": 0.3191063106060028,
|
|
"learning_rate": 1.823082494302924e-05,
|
|
"loss": 0.0218,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 1.7306024096385542,
|
|
"grad_norm": 0.20296362042427063,
|
|
"learning_rate": 1.8186157378353945e-05,
|
|
"loss": 0.0126,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 1.7325301204819277,
|
|
"grad_norm": 0.1905524581670761,
|
|
"learning_rate": 1.8141498934267858e-05,
|
|
"loss": 0.0131,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 1.7344578313253012,
|
|
"grad_norm": 0.5350520610809326,
|
|
"learning_rate": 1.809684983532813e-05,
|
|
"loss": 0.0115,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 1.7363855421686747,
|
|
"grad_norm": 0.17144092917442322,
|
|
"learning_rate": 1.8052210306044907e-05,
|
|
"loss": 0.0113,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 1.7383132530120482,
|
|
"grad_norm": 0.11777982115745544,
|
|
"learning_rate": 1.8007580570880236e-05,
|
|
"loss": 0.0058,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 1.7402409638554217,
|
|
"grad_norm": 0.2078275978565216,
|
|
"learning_rate": 1.7962960854246908e-05,
|
|
"loss": 0.0106,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 1.7421686746987952,
|
|
"grad_norm": 0.2550877630710602,
|
|
"learning_rate": 1.791835138050732e-05,
|
|
"loss": 0.0076,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 1.7440963855421687,
|
|
"grad_norm": 0.11553912609815598,
|
|
"learning_rate": 1.7873752373972395e-05,
|
|
"loss": 0.0038,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 1.746024096385542,
|
|
"grad_norm": 0.10724586248397827,
|
|
"learning_rate": 1.7829164058900398e-05,
|
|
"loss": 0.0043,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 1.7479518072289157,
|
|
"grad_norm": 0.30152231454849243,
|
|
"learning_rate": 1.7784586659495845e-05,
|
|
"loss": 0.0099,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 1.749879518072289,
|
|
"grad_norm": 0.18372933566570282,
|
|
"learning_rate": 1.7740020399908372e-05,
|
|
"loss": 0.0074,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 1.7518072289156628,
|
|
"grad_norm": 0.35184428095817566,
|
|
"learning_rate": 1.7695465504231586e-05,
|
|
"loss": 0.0184,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 1.753734939759036,
|
|
"grad_norm": 0.15083615481853485,
|
|
"learning_rate": 1.765092219650196e-05,
|
|
"loss": 0.0061,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 1.7556626506024098,
|
|
"grad_norm": 0.2599961459636688,
|
|
"learning_rate": 1.7606390700697693e-05,
|
|
"loss": 0.0101,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 1.757590361445783,
|
|
"grad_norm": 0.10829206556081772,
|
|
"learning_rate": 1.7561871240737595e-05,
|
|
"loss": 0.0034,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 1.7595180722891566,
|
|
"grad_norm": 0.38098782300949097,
|
|
"learning_rate": 1.7517364040479966e-05,
|
|
"loss": 0.0384,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 1.76144578313253,
|
|
"grad_norm": 0.14975085854530334,
|
|
"learning_rate": 1.7472869323721432e-05,
|
|
"loss": 0.0055,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 1.7633734939759036,
|
|
"grad_norm": 0.4151444733142853,
|
|
"learning_rate": 1.742838731419588e-05,
|
|
"loss": 0.0307,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 1.765301204819277,
|
|
"grad_norm": 0.22238481044769287,
|
|
"learning_rate": 1.738391823557328e-05,
|
|
"loss": 0.0059,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 1.7672289156626506,
|
|
"grad_norm": 0.23386356234550476,
|
|
"learning_rate": 1.7339462311458587e-05,
|
|
"loss": 0.0113,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 1.7691566265060241,
|
|
"grad_norm": 0.21911191940307617,
|
|
"learning_rate": 1.7295019765390618e-05,
|
|
"loss": 0.0071,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 1.7710843373493976,
|
|
"grad_norm": 0.343159943819046,
|
|
"learning_rate": 1.7250590820840903e-05,
|
|
"loss": 0.0144,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 1.7730120481927711,
|
|
"grad_norm": 0.32204556465148926,
|
|
"learning_rate": 1.720617570121259e-05,
|
|
"loss": 0.0131,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 1.7749397590361444,
|
|
"grad_norm": 0.4105585515499115,
|
|
"learning_rate": 1.7161774629839328e-05,
|
|
"loss": 0.0148,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 1.7768674698795182,
|
|
"grad_norm": 0.16380974650382996,
|
|
"learning_rate": 1.7117387829984093e-05,
|
|
"loss": 0.0066,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 1.7787951807228914,
|
|
"grad_norm": 0.22920913994312286,
|
|
"learning_rate": 1.707301552483813e-05,
|
|
"loss": 0.0105,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 1.7807228915662652,
|
|
"grad_norm": 0.2075149267911911,
|
|
"learning_rate": 1.7028657937519767e-05,
|
|
"loss": 0.0104,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 1.7826506024096385,
|
|
"grad_norm": 0.44439977407455444,
|
|
"learning_rate": 1.6984315291073355e-05,
|
|
"loss": 0.0134,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 1.7845783132530122,
|
|
"grad_norm": 0.24068203568458557,
|
|
"learning_rate": 1.6939987808468125e-05,
|
|
"loss": 0.0078,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 1.7865060240963855,
|
|
"grad_norm": 0.34044349193573,
|
|
"learning_rate": 1.689567571259701e-05,
|
|
"loss": 0.0108,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 1.788433734939759,
|
|
"grad_norm": 0.34082743525505066,
|
|
"learning_rate": 1.6851379226275624e-05,
|
|
"loss": 0.0266,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 1.7903614457831325,
|
|
"grad_norm": 0.19490115344524384,
|
|
"learning_rate": 1.6807098572241075e-05,
|
|
"loss": 0.0109,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 1.792289156626506,
|
|
"grad_norm": 0.16208237409591675,
|
|
"learning_rate": 1.6762833973150846e-05,
|
|
"loss": 0.0113,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 1.7942168674698795,
|
|
"grad_norm": 0.35555699467658997,
|
|
"learning_rate": 1.671858565158172e-05,
|
|
"loss": 0.0196,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 1.796144578313253,
|
|
"grad_norm": 0.1600857824087143,
|
|
"learning_rate": 1.6674353830028587e-05,
|
|
"loss": 0.0089,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 1.7980722891566265,
|
|
"grad_norm": 0.1699574887752533,
|
|
"learning_rate": 1.663013873090342e-05,
|
|
"loss": 0.0074,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"grad_norm": 0.2472933828830719,
|
|
"learning_rate": 1.6585940576534086e-05,
|
|
"loss": 0.0063,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 1.8019277108433736,
|
|
"grad_norm": 0.23491555452346802,
|
|
"learning_rate": 1.654175958916323e-05,
|
|
"loss": 0.0101,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 1.8038554216867468,
|
|
"grad_norm": 0.28635191917419434,
|
|
"learning_rate": 1.6497595990947195e-05,
|
|
"loss": 0.0131,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 1.8057831325301206,
|
|
"grad_norm": 0.15400712192058563,
|
|
"learning_rate": 1.645345000395489e-05,
|
|
"loss": 0.0068,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 1.8077108433734939,
|
|
"grad_norm": 0.18223172426223755,
|
|
"learning_rate": 1.6409321850166647e-05,
|
|
"loss": 0.0094,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 1.8096385542168676,
|
|
"grad_norm": 0.2789457142353058,
|
|
"learning_rate": 1.636521175147316e-05,
|
|
"loss": 0.0202,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 1.8115662650602409,
|
|
"grad_norm": 0.4267627000808716,
|
|
"learning_rate": 1.6321119929674297e-05,
|
|
"loss": 0.0176,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 1.8134939759036146,
|
|
"grad_norm": 0.3021615445613861,
|
|
"learning_rate": 1.6277046606478056e-05,
|
|
"loss": 0.0085,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 1.815421686746988,
|
|
"grad_norm": 0.3724934756755829,
|
|
"learning_rate": 1.6232992003499405e-05,
|
|
"loss": 0.0474,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 1.8173493975903614,
|
|
"grad_norm": 0.20904326438903809,
|
|
"learning_rate": 1.6188956342259177e-05,
|
|
"loss": 0.0078,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 1.819277108433735,
|
|
"grad_norm": 0.31168171763420105,
|
|
"learning_rate": 1.614493984418297e-05,
|
|
"loss": 0.0174,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 1.8212048192771084,
|
|
"grad_norm": 0.21273556351661682,
|
|
"learning_rate": 1.6100942730600003e-05,
|
|
"loss": 0.0054,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 1.823132530120482,
|
|
"grad_norm": 0.16991695761680603,
|
|
"learning_rate": 1.6056965222742055e-05,
|
|
"loss": 0.0063,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 1.8250602409638554,
|
|
"grad_norm": 0.22762684524059296,
|
|
"learning_rate": 1.6013007541742303e-05,
|
|
"loss": 0.0234,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 1.826987951807229,
|
|
"grad_norm": 0.20128795504570007,
|
|
"learning_rate": 1.596906990863422e-05,
|
|
"loss": 0.0095,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 1.8289156626506025,
|
|
"grad_norm": 0.30772027373313904,
|
|
"learning_rate": 1.592515254435048e-05,
|
|
"loss": 0.0356,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 1.830843373493976,
|
|
"grad_norm": 0.12954631447792053,
|
|
"learning_rate": 1.5881255669721857e-05,
|
|
"loss": 0.008,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 1.8327710843373493,
|
|
"grad_norm": 0.7787145972251892,
|
|
"learning_rate": 1.5837379505476054e-05,
|
|
"loss": 0.0108,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 1.834698795180723,
|
|
"grad_norm": 0.1683879941701889,
|
|
"learning_rate": 1.5793524272236683e-05,
|
|
"loss": 0.006,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 1.8366265060240963,
|
|
"grad_norm": 0.16475361585617065,
|
|
"learning_rate": 1.5749690190522076e-05,
|
|
"loss": 0.0065,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 1.83855421686747,
|
|
"grad_norm": 0.211905375123024,
|
|
"learning_rate": 1.5705877480744214e-05,
|
|
"loss": 0.0092,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 1.8404819277108433,
|
|
"grad_norm": 0.23850117623806,
|
|
"learning_rate": 1.5662086363207628e-05,
|
|
"loss": 0.012,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 1.842409638554217,
|
|
"grad_norm": 0.19100065529346466,
|
|
"learning_rate": 1.561831705810825e-05,
|
|
"loss": 0.0113,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 1.8443373493975903,
|
|
"grad_norm": 0.3635985255241394,
|
|
"learning_rate": 1.557456978553236e-05,
|
|
"loss": 0.0168,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 1.8462650602409638,
|
|
"grad_norm": 0.16449116170406342,
|
|
"learning_rate": 1.553084476545544e-05,
|
|
"loss": 0.0042,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 1.8481927710843373,
|
|
"grad_norm": 0.566093385219574,
|
|
"learning_rate": 1.5487142217741062e-05,
|
|
"loss": 0.0145,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 1.8501204819277108,
|
|
"grad_norm": 0.15960252285003662,
|
|
"learning_rate": 1.5443462362139834e-05,
|
|
"loss": 0.0059,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 1.8520481927710843,
|
|
"grad_norm": 0.40773797035217285,
|
|
"learning_rate": 1.539980541828823e-05,
|
|
"loss": 0.0257,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 1.8539759036144579,
|
|
"grad_norm": 0.4802496135234833,
|
|
"learning_rate": 1.5356171605707522e-05,
|
|
"loss": 0.0111,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 1.8559036144578314,
|
|
"grad_norm": 0.15745794773101807,
|
|
"learning_rate": 1.5312561143802704e-05,
|
|
"loss": 0.0049,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 1.8578313253012049,
|
|
"grad_norm": 0.15139251947402954,
|
|
"learning_rate": 1.5268974251861298e-05,
|
|
"loss": 0.0077,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 1.8597590361445784,
|
|
"grad_norm": 0.2188841849565506,
|
|
"learning_rate": 1.5225411149052356e-05,
|
|
"loss": 0.017,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 1.8616867469879517,
|
|
"grad_norm": 0.10853131115436554,
|
|
"learning_rate": 1.5181872054425287e-05,
|
|
"loss": 0.0049,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 1.8636144578313254,
|
|
"grad_norm": 0.8254880905151367,
|
|
"learning_rate": 1.5138357186908785e-05,
|
|
"loss": 0.0317,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 1.8655421686746987,
|
|
"grad_norm": 0.2989620566368103,
|
|
"learning_rate": 1.5094866765309728e-05,
|
|
"loss": 0.0126,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 1.8674698795180724,
|
|
"grad_norm": 0.16411150991916656,
|
|
"learning_rate": 1.5051401008312054e-05,
|
|
"loss": 0.0101,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 1.8693975903614457,
|
|
"grad_norm": 0.2861763834953308,
|
|
"learning_rate": 1.5007960134475706e-05,
|
|
"loss": 0.0155,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 1.8713253012048194,
|
|
"grad_norm": 0.24879588186740875,
|
|
"learning_rate": 1.4964544362235487e-05,
|
|
"loss": 0.0187,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 1.8732530120481927,
|
|
"grad_norm": 0.2433672398328781,
|
|
"learning_rate": 1.4921153909899983e-05,
|
|
"loss": 0.0084,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 1.8751807228915662,
|
|
"grad_norm": 0.15097154676914215,
|
|
"learning_rate": 1.487778899565047e-05,
|
|
"loss": 0.007,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 1.8771084337349397,
|
|
"grad_norm": 0.1629047691822052,
|
|
"learning_rate": 1.4834449837539806e-05,
|
|
"loss": 0.0058,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 1.8790361445783132,
|
|
"grad_norm": 0.9937071204185486,
|
|
"learning_rate": 1.4791136653491333e-05,
|
|
"loss": 0.0323,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 1.8809638554216868,
|
|
"grad_norm": 0.19555562734603882,
|
|
"learning_rate": 1.4747849661297808e-05,
|
|
"loss": 0.0126,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 1.8828915662650603,
|
|
"grad_norm": 0.16147711873054504,
|
|
"learning_rate": 1.470458907862026e-05,
|
|
"loss": 0.0067,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 1.8848192771084338,
|
|
"grad_norm": 0.2730027735233307,
|
|
"learning_rate": 1.4661355122986945e-05,
|
|
"loss": 0.0147,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 1.886746987951807,
|
|
"grad_norm": 0.13759832084178925,
|
|
"learning_rate": 1.4618148011792206e-05,
|
|
"loss": 0.0038,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 1.8886746987951808,
|
|
"grad_norm": 0.33516690135002136,
|
|
"learning_rate": 1.4574967962295419e-05,
|
|
"loss": 0.0139,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 1.890602409638554,
|
|
"grad_norm": 0.2345741093158722,
|
|
"learning_rate": 1.4531815191619903e-05,
|
|
"loss": 0.0094,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 1.8925301204819278,
|
|
"grad_norm": 0.14681044220924377,
|
|
"learning_rate": 1.4488689916751762e-05,
|
|
"loss": 0.0065,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 1.894457831325301,
|
|
"grad_norm": 0.21143914759159088,
|
|
"learning_rate": 1.4445592354538885e-05,
|
|
"loss": 0.0057,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 1.8963855421686748,
|
|
"grad_norm": 0.3109160363674164,
|
|
"learning_rate": 1.44025227216898e-05,
|
|
"loss": 0.0142,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 1.8983132530120481,
|
|
"grad_norm": 0.24301907420158386,
|
|
"learning_rate": 1.435948123477259e-05,
|
|
"loss": 0.012,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 1.9002409638554218,
|
|
"grad_norm": 0.19817675650119781,
|
|
"learning_rate": 1.431646811021382e-05,
|
|
"loss": 0.0097,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 1.9021686746987951,
|
|
"grad_norm": 0.13464932143688202,
|
|
"learning_rate": 1.4273483564297425e-05,
|
|
"loss": 0.0046,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 1.9040963855421686,
|
|
"grad_norm": 0.1698642522096634,
|
|
"learning_rate": 1.4230527813163656e-05,
|
|
"loss": 0.0038,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 1.9060240963855422,
|
|
"grad_norm": 0.19395388662815094,
|
|
"learning_rate": 1.4187601072807975e-05,
|
|
"loss": 0.0123,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 1.9079518072289157,
|
|
"grad_norm": 0.2093188613653183,
|
|
"learning_rate": 1.4144703559079948e-05,
|
|
"loss": 0.0093,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 1.9098795180722892,
|
|
"grad_norm": 0.1529311090707779,
|
|
"learning_rate": 1.4101835487682198e-05,
|
|
"loss": 0.0051,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 1.9118072289156627,
|
|
"grad_norm": 0.18725350499153137,
|
|
"learning_rate": 1.4058997074169299e-05,
|
|
"loss": 0.0083,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 1.9137349397590362,
|
|
"grad_norm": 0.15601560473442078,
|
|
"learning_rate": 1.401618853394668e-05,
|
|
"loss": 0.0086,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 1.9156626506024095,
|
|
"grad_norm": 0.23890644311904907,
|
|
"learning_rate": 1.3973410082269591e-05,
|
|
"loss": 0.015,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 1.9175903614457832,
|
|
"grad_norm": 0.2442619949579239,
|
|
"learning_rate": 1.3930661934241947e-05,
|
|
"loss": 0.0089,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 1.9195180722891565,
|
|
"grad_norm": 0.1540212482213974,
|
|
"learning_rate": 1.388794430481532e-05,
|
|
"loss": 0.0072,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 1.9214457831325302,
|
|
"grad_norm": 0.1359291970729828,
|
|
"learning_rate": 1.3845257408787807e-05,
|
|
"loss": 0.0131,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 1.9233734939759035,
|
|
"grad_norm": 0.25486138463020325,
|
|
"learning_rate": 1.3802601460802967e-05,
|
|
"loss": 0.0198,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 1.9253012048192772,
|
|
"grad_norm": 0.28815609216690063,
|
|
"learning_rate": 1.3759976675348754e-05,
|
|
"loss": 0.014,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 1.9272289156626505,
|
|
"grad_norm": 0.15648497641086578,
|
|
"learning_rate": 1.3717383266756403e-05,
|
|
"loss": 0.0065,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 1.929156626506024,
|
|
"grad_norm": 0.16912540793418884,
|
|
"learning_rate": 1.367482144919941e-05,
|
|
"loss": 0.0059,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 1.9310843373493976,
|
|
"grad_norm": 0.16896723210811615,
|
|
"learning_rate": 1.3632291436692397e-05,
|
|
"loss": 0.0054,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 1.933012048192771,
|
|
"grad_norm": 0.20287497341632843,
|
|
"learning_rate": 1.3589793443090064e-05,
|
|
"loss": 0.0097,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 1.9349397590361446,
|
|
"grad_norm": 0.14804276823997498,
|
|
"learning_rate": 1.3547327682086114e-05,
|
|
"loss": 0.0125,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 1.936867469879518,
|
|
"grad_norm": 0.23820064961910248,
|
|
"learning_rate": 1.3504894367212171e-05,
|
|
"loss": 0.0131,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 1.9387951807228916,
|
|
"grad_norm": 0.25607362389564514,
|
|
"learning_rate": 1.34624937118367e-05,
|
|
"loss": 0.0115,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 1.940722891566265,
|
|
"grad_norm": 0.37233737111091614,
|
|
"learning_rate": 1.3420125929163976e-05,
|
|
"loss": 0.0309,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 1.9426506024096386,
|
|
"grad_norm": 0.19426730275154114,
|
|
"learning_rate": 1.3377791232232929e-05,
|
|
"loss": 0.0078,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 1.944578313253012,
|
|
"grad_norm": 0.2784160077571869,
|
|
"learning_rate": 1.333548983391617e-05,
|
|
"loss": 0.0142,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 1.9465060240963856,
|
|
"grad_norm": 0.11407195776700974,
|
|
"learning_rate": 1.3293221946918853e-05,
|
|
"loss": 0.0035,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 1.948433734939759,
|
|
"grad_norm": 0.3965436816215515,
|
|
"learning_rate": 1.325098778377762e-05,
|
|
"loss": 0.0242,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 1.9503614457831326,
|
|
"grad_norm": 0.18520519137382507,
|
|
"learning_rate": 1.3208787556859543e-05,
|
|
"loss": 0.0096,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 1.952289156626506,
|
|
"grad_norm": 0.2783315181732178,
|
|
"learning_rate": 1.3166621478361075e-05,
|
|
"loss": 0.0103,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 1.9542168674698797,
|
|
"grad_norm": 0.22714459896087646,
|
|
"learning_rate": 1.3124489760306917e-05,
|
|
"loss": 0.0078,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 1.956144578313253,
|
|
"grad_norm": 0.1257915049791336,
|
|
"learning_rate": 1.3082392614549036e-05,
|
|
"loss": 0.0077,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 1.9580722891566265,
|
|
"grad_norm": 0.15592887997627258,
|
|
"learning_rate": 1.3040330252765526e-05,
|
|
"loss": 0.0106,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"grad_norm": 0.19295449554920197,
|
|
"learning_rate": 1.2998302886459586e-05,
|
|
"loss": 0.0082,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 1.9619277108433735,
|
|
"grad_norm": 0.15544794499874115,
|
|
"learning_rate": 1.2956310726958472e-05,
|
|
"loss": 0.0068,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 1.963855421686747,
|
|
"grad_norm": 0.25899502635002136,
|
|
"learning_rate": 1.291435398541236e-05,
|
|
"loss": 0.0086,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 1.9657831325301205,
|
|
"grad_norm": 0.34639033675193787,
|
|
"learning_rate": 1.2872432872793379e-05,
|
|
"loss": 0.0116,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 1.967710843373494,
|
|
"grad_norm": 0.1628410518169403,
|
|
"learning_rate": 1.283054759989447e-05,
|
|
"loss": 0.0055,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 1.9696385542168675,
|
|
"grad_norm": 0.9273788928985596,
|
|
"learning_rate": 1.2788698377328385e-05,
|
|
"loss": 0.0264,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 1.971566265060241,
|
|
"grad_norm": 0.163126140832901,
|
|
"learning_rate": 1.2746885415526594e-05,
|
|
"loss": 0.0046,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 1.9734939759036143,
|
|
"grad_norm": 0.1475439816713333,
|
|
"learning_rate": 1.2705108924738223e-05,
|
|
"loss": 0.0056,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 1.975421686746988,
|
|
"grad_norm": 0.1654318869113922,
|
|
"learning_rate": 1.2663369115029034e-05,
|
|
"loss": 0.0056,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 1.9773493975903613,
|
|
"grad_norm": 0.20536045730113983,
|
|
"learning_rate": 1.2621666196280333e-05,
|
|
"loss": 0.0101,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 1.979277108433735,
|
|
"grad_norm": 0.19256474077701569,
|
|
"learning_rate": 1.258000037818792e-05,
|
|
"loss": 0.0059,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 1.9812048192771083,
|
|
"grad_norm": 0.2605120539665222,
|
|
"learning_rate": 1.2538371870261053e-05,
|
|
"loss": 0.0115,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 1.983132530120482,
|
|
"grad_norm": 0.14840295910835266,
|
|
"learning_rate": 1.249678088182137e-05,
|
|
"loss": 0.0046,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 1.9850602409638554,
|
|
"grad_norm": 0.17585207521915436,
|
|
"learning_rate": 1.2455227622001851e-05,
|
|
"loss": 0.0086,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 1.9869879518072289,
|
|
"grad_norm": 0.11044781655073166,
|
|
"learning_rate": 1.241371229974579e-05,
|
|
"loss": 0.0034,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 1.9889156626506024,
|
|
"grad_norm": 0.25584840774536133,
|
|
"learning_rate": 1.2372235123805672e-05,
|
|
"loss": 0.0245,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 1.9908433734939759,
|
|
"grad_norm": 0.25962474942207336,
|
|
"learning_rate": 1.2330796302742211e-05,
|
|
"loss": 0.0104,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 1.9927710843373494,
|
|
"grad_norm": 0.33408522605895996,
|
|
"learning_rate": 1.2289396044923238e-05,
|
|
"loss": 0.0176,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 1.994698795180723,
|
|
"grad_norm": 0.479950487613678,
|
|
"learning_rate": 1.2248034558522682e-05,
|
|
"loss": 0.0113,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 1.9966265060240964,
|
|
"grad_norm": 0.16567294299602509,
|
|
"learning_rate": 1.2206712051519518e-05,
|
|
"loss": 0.0036,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 1.99855421686747,
|
|
"grad_norm": 0.19343771040439606,
|
|
"learning_rate": 1.2165428731696713e-05,
|
|
"loss": 0.0077,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.22895601391792297,
|
|
"learning_rate": 1.2124184806640202e-05,
|
|
"loss": 0.0114,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 2.0019277108433733,
|
|
"grad_norm": 0.15838384628295898,
|
|
"learning_rate": 1.208298048373782e-05,
|
|
"loss": 0.0043,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 2.003855421686747,
|
|
"grad_norm": 0.681065559387207,
|
|
"learning_rate": 1.2041815970178268e-05,
|
|
"loss": 0.0214,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 2.0057831325301203,
|
|
"grad_norm": 0.3357350528240204,
|
|
"learning_rate": 1.2000691472950081e-05,
|
|
"loss": 0.0079,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 2.007710843373494,
|
|
"grad_norm": 0.15238308906555176,
|
|
"learning_rate": 1.1959607198840568e-05,
|
|
"loss": 0.0041,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 2.0096385542168673,
|
|
"grad_norm": 0.11763229966163635,
|
|
"learning_rate": 1.1918563354434784e-05,
|
|
"loss": 0.0033,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 2.011566265060241,
|
|
"grad_norm": 0.3759301006793976,
|
|
"learning_rate": 1.1877560146114515e-05,
|
|
"loss": 0.0128,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 2.0134939759036143,
|
|
"grad_norm": 0.1143188625574112,
|
|
"learning_rate": 1.1836597780057183e-05,
|
|
"loss": 0.0078,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 2.015421686746988,
|
|
"grad_norm": 0.20059260725975037,
|
|
"learning_rate": 1.179567646223485e-05,
|
|
"loss": 0.0149,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 2.0173493975903614,
|
|
"grad_norm": 0.15569567680358887,
|
|
"learning_rate": 1.1754796398413196e-05,
|
|
"loss": 0.0038,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 2.019277108433735,
|
|
"grad_norm": 0.1153278723359108,
|
|
"learning_rate": 1.1713957794150423e-05,
|
|
"loss": 0.0041,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 2.0212048192771084,
|
|
"grad_norm": 0.1838717758655548,
|
|
"learning_rate": 1.1673160854796307e-05,
|
|
"loss": 0.0041,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 2.023132530120482,
|
|
"grad_norm": 0.12264502793550491,
|
|
"learning_rate": 1.1632405785491077e-05,
|
|
"loss": 0.0043,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 2.0250602409638554,
|
|
"grad_norm": 0.14363229274749756,
|
|
"learning_rate": 1.159169279116445e-05,
|
|
"loss": 0.0066,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 2.026987951807229,
|
|
"grad_norm": 0.1316995471715927,
|
|
"learning_rate": 1.1551022076534585e-05,
|
|
"loss": 0.0024,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 2.0289156626506024,
|
|
"grad_norm": 0.13392619788646698,
|
|
"learning_rate": 1.1510393846107001e-05,
|
|
"loss": 0.0051,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 2.0308433734939757,
|
|
"grad_norm": 3.0086817741394043,
|
|
"learning_rate": 1.1469808304173658e-05,
|
|
"loss": 0.0334,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 2.0327710843373494,
|
|
"grad_norm": 0.17756076157093048,
|
|
"learning_rate": 1.1429265654811803e-05,
|
|
"loss": 0.0068,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 2.0346987951807227,
|
|
"grad_norm": 0.13250532746315002,
|
|
"learning_rate": 1.1388766101883038e-05,
|
|
"loss": 0.0087,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 2.0366265060240965,
|
|
"grad_norm": 0.3534089922904968,
|
|
"learning_rate": 1.1348309849032257e-05,
|
|
"loss": 0.0076,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 2.0385542168674697,
|
|
"grad_norm": 0.11939049512147903,
|
|
"learning_rate": 1.1307897099686627e-05,
|
|
"loss": 0.0029,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 2.0404819277108435,
|
|
"grad_norm": 0.11862517893314362,
|
|
"learning_rate": 1.1267528057054562e-05,
|
|
"loss": 0.0062,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 2.0424096385542168,
|
|
"grad_norm": 0.1539212018251419,
|
|
"learning_rate": 1.1227202924124704e-05,
|
|
"loss": 0.0067,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 2.0443373493975905,
|
|
"grad_norm": 0.17163440585136414,
|
|
"learning_rate": 1.118692190366491e-05,
|
|
"loss": 0.0055,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 2.0462650602409638,
|
|
"grad_norm": 0.12304897606372833,
|
|
"learning_rate": 1.1146685198221222e-05,
|
|
"loss": 0.0036,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 2.0481927710843375,
|
|
"grad_norm": 0.17319051921367645,
|
|
"learning_rate": 1.1106493010116842e-05,
|
|
"loss": 0.0058,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 2.050120481927711,
|
|
"grad_norm": 0.2242443859577179,
|
|
"learning_rate": 1.1066345541451127e-05,
|
|
"loss": 0.0059,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 2.0520481927710845,
|
|
"grad_norm": 0.09533938020467758,
|
|
"learning_rate": 1.1026242994098597e-05,
|
|
"loss": 0.0033,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 2.053975903614458,
|
|
"grad_norm": 0.11697929352521896,
|
|
"learning_rate": 1.0986185569707852e-05,
|
|
"loss": 0.0038,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 2.0559036144578315,
|
|
"grad_norm": 0.2563149333000183,
|
|
"learning_rate": 1.0946173469700625e-05,
|
|
"loss": 0.0158,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 2.057831325301205,
|
|
"grad_norm": 0.21836932003498077,
|
|
"learning_rate": 1.0906206895270739e-05,
|
|
"loss": 0.0085,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 2.059759036144578,
|
|
"grad_norm": 0.1798071414232254,
|
|
"learning_rate": 1.0866286047383094e-05,
|
|
"loss": 0.0053,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 2.061686746987952,
|
|
"grad_norm": 0.08937730640172958,
|
|
"learning_rate": 1.0826411126772675e-05,
|
|
"loss": 0.0025,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 2.063614457831325,
|
|
"grad_norm": 0.0942138060927391,
|
|
"learning_rate": 1.0786582333943499e-05,
|
|
"loss": 0.0017,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 2.065542168674699,
|
|
"grad_norm": 0.13076582551002502,
|
|
"learning_rate": 1.0746799869167679e-05,
|
|
"loss": 0.0033,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 2.067469879518072,
|
|
"grad_norm": 0.0993233174085617,
|
|
"learning_rate": 1.0707063932484357e-05,
|
|
"loss": 0.0046,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 2.069397590361446,
|
|
"grad_norm": 0.3046741485595703,
|
|
"learning_rate": 1.0667374723698698e-05,
|
|
"loss": 0.009,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 2.071325301204819,
|
|
"grad_norm": 0.12197669595479965,
|
|
"learning_rate": 1.0627732442380932e-05,
|
|
"loss": 0.0034,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 2.073253012048193,
|
|
"grad_norm": 0.12721140682697296,
|
|
"learning_rate": 1.058813728786531e-05,
|
|
"loss": 0.0048,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 2.075180722891566,
|
|
"grad_norm": 0.10011966526508331,
|
|
"learning_rate": 1.0548589459249112e-05,
|
|
"loss": 0.0026,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 2.07710843373494,
|
|
"grad_norm": 0.3314201831817627,
|
|
"learning_rate": 1.0509089155391661e-05,
|
|
"loss": 0.0284,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 2.079036144578313,
|
|
"grad_norm": 0.32739701867103577,
|
|
"learning_rate": 1.0469636574913288e-05,
|
|
"loss": 0.0088,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 2.080963855421687,
|
|
"grad_norm": 0.13805675506591797,
|
|
"learning_rate": 1.043023191619438e-05,
|
|
"loss": 0.0042,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 2.0828915662650602,
|
|
"grad_norm": 0.14789745211601257,
|
|
"learning_rate": 1.039087537737435e-05,
|
|
"loss": 0.0037,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 2.0848192771084335,
|
|
"grad_norm": 0.15518991649150848,
|
|
"learning_rate": 1.0351567156350617e-05,
|
|
"loss": 0.0044,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 2.0867469879518072,
|
|
"grad_norm": 0.08380113542079926,
|
|
"learning_rate": 1.0312307450777706e-05,
|
|
"loss": 0.0019,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 2.0886746987951805,
|
|
"grad_norm": 0.17892400920391083,
|
|
"learning_rate": 1.027309645806613e-05,
|
|
"loss": 0.0065,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 2.0906024096385543,
|
|
"grad_norm": 0.5497608780860901,
|
|
"learning_rate": 1.0233934375381489e-05,
|
|
"loss": 0.0238,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 2.0925301204819275,
|
|
"grad_norm": 1.0189186334609985,
|
|
"learning_rate": 1.019482139964344e-05,
|
|
"loss": 0.0092,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 2.0944578313253013,
|
|
"grad_norm": 0.12144117057323456,
|
|
"learning_rate": 1.015575772752472e-05,
|
|
"loss": 0.0038,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 2.0963855421686746,
|
|
"grad_norm": 0.1115315854549408,
|
|
"learning_rate": 1.0116743555450148e-05,
|
|
"loss": 0.0024,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 2.0983132530120483,
|
|
"grad_norm": 0.22671759128570557,
|
|
"learning_rate": 1.0077779079595631e-05,
|
|
"loss": 0.0136,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 2.1002409638554216,
|
|
"grad_norm": 2.0009827613830566,
|
|
"learning_rate": 1.003886449588719e-05,
|
|
"loss": 0.0493,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 2.1021686746987953,
|
|
"grad_norm": 0.11907301843166351,
|
|
"learning_rate": 1.0000000000000006e-05,
|
|
"loss": 0.0034,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 2.1040963855421686,
|
|
"grad_norm": 0.31257638335227966,
|
|
"learning_rate": 9.961185787357346e-06,
|
|
"loss": 0.0129,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 2.1060240963855423,
|
|
"grad_norm": 0.11033743619918823,
|
|
"learning_rate": 9.922422053129674e-06,
|
|
"loss": 0.0184,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 2.1079518072289156,
|
|
"grad_norm": 0.2575698494911194,
|
|
"learning_rate": 9.883708992233626e-06,
|
|
"loss": 0.0054,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 2.1098795180722894,
|
|
"grad_norm": 0.12921132147312164,
|
|
"learning_rate": 9.845046799331029e-06,
|
|
"loss": 0.0037,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 2.1118072289156626,
|
|
"grad_norm": 0.21405921876430511,
|
|
"learning_rate": 9.806435668827941e-06,
|
|
"loss": 0.006,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 2.113734939759036,
|
|
"grad_norm": 0.12929430603981018,
|
|
"learning_rate": 9.76787579487363e-06,
|
|
"loss": 0.0049,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 2.1156626506024097,
|
|
"grad_norm": 0.1793181151151657,
|
|
"learning_rate": 9.729367371359681e-06,
|
|
"loss": 0.0086,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 2.117590361445783,
|
|
"grad_norm": 0.2182074338197708,
|
|
"learning_rate": 9.690910591918936e-06,
|
|
"loss": 0.0106,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 2.1195180722891567,
|
|
"grad_norm": 0.0705680400133133,
|
|
"learning_rate": 9.652505649924547e-06,
|
|
"loss": 0.0012,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 2.12144578313253,
|
|
"grad_norm": 0.10509738326072693,
|
|
"learning_rate": 9.614152738489021e-06,
|
|
"loss": 0.0048,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 2.1233734939759037,
|
|
"grad_norm": 0.13775436580181122,
|
|
"learning_rate": 9.575852050463268e-06,
|
|
"loss": 0.0089,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 2.125301204819277,
|
|
"grad_norm": 0.15230101346969604,
|
|
"learning_rate": 9.537603778435545e-06,
|
|
"loss": 0.0065,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 2.1272289156626507,
|
|
"grad_norm": 0.24702346324920654,
|
|
"learning_rate": 9.499408114730583e-06,
|
|
"loss": 0.016,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 2.129156626506024,
|
|
"grad_norm": 0.1082577034831047,
|
|
"learning_rate": 9.461265251408575e-06,
|
|
"loss": 0.0036,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 2.1310843373493977,
|
|
"grad_norm": 0.1063847690820694,
|
|
"learning_rate": 9.423175380264211e-06,
|
|
"loss": 0.0037,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 2.133012048192771,
|
|
"grad_norm": 0.07686953246593475,
|
|
"learning_rate": 9.385138692825729e-06,
|
|
"loss": 0.0031,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 2.1349397590361447,
|
|
"grad_norm": 0.2046380341053009,
|
|
"learning_rate": 9.347155380353912e-06,
|
|
"loss": 0.0087,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 2.136867469879518,
|
|
"grad_norm": 0.1341692954301834,
|
|
"learning_rate": 9.30922563384121e-06,
|
|
"loss": 0.0045,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 2.1387951807228918,
|
|
"grad_norm": 0.09870535880327225,
|
|
"learning_rate": 9.271349644010672e-06,
|
|
"loss": 0.003,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 2.140722891566265,
|
|
"grad_norm": 0.18708615005016327,
|
|
"learning_rate": 9.233527601315069e-06,
|
|
"loss": 0.0042,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 2.1426506024096383,
|
|
"grad_norm": 0.5175634026527405,
|
|
"learning_rate": 9.195759695935907e-06,
|
|
"loss": 0.0173,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 2.144578313253012,
|
|
"grad_norm": 0.14939036965370178,
|
|
"learning_rate": 9.158046117782464e-06,
|
|
"loss": 0.0031,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 2.1465060240963854,
|
|
"grad_norm": 0.2837410569190979,
|
|
"learning_rate": 9.120387056490851e-06,
|
|
"loss": 0.0097,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 2.148433734939759,
|
|
"grad_norm": 0.11088677495718002,
|
|
"learning_rate": 9.082782701423047e-06,
|
|
"loss": 0.0026,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 2.1503614457831324,
|
|
"grad_norm": 0.07785166054964066,
|
|
"learning_rate": 9.045233241665947e-06,
|
|
"loss": 0.0019,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 2.152289156626506,
|
|
"grad_norm": 0.17568141222000122,
|
|
"learning_rate": 9.007738866030427e-06,
|
|
"loss": 0.0039,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 2.1542168674698794,
|
|
"grad_norm": 0.12652266025543213,
|
|
"learning_rate": 8.970299763050356e-06,
|
|
"loss": 0.0033,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 2.156144578313253,
|
|
"grad_norm": 0.16801467537879944,
|
|
"learning_rate": 8.932916120981695e-06,
|
|
"loss": 0.0076,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 2.1580722891566264,
|
|
"grad_norm": 0.18313169479370117,
|
|
"learning_rate": 8.895588127801545e-06,
|
|
"loss": 0.0052,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"grad_norm": 0.07546049356460571,
|
|
"learning_rate": 8.858315971207146e-06,
|
|
"loss": 0.0022,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 2.1619277108433734,
|
|
"grad_norm": 0.4039839208126068,
|
|
"learning_rate": 8.821099838614996e-06,
|
|
"loss": 0.0203,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 2.163855421686747,
|
|
"grad_norm": 0.09244243055582047,
|
|
"learning_rate": 8.783939917159897e-06,
|
|
"loss": 0.002,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 2.1657831325301204,
|
|
"grad_norm": 0.18327835202217102,
|
|
"learning_rate": 8.746836393693978e-06,
|
|
"loss": 0.0055,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 2.167710843373494,
|
|
"grad_norm": 0.22010307013988495,
|
|
"learning_rate": 8.709789454785809e-06,
|
|
"loss": 0.0077,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 2.1696385542168675,
|
|
"grad_norm": 0.09438297897577286,
|
|
"learning_rate": 8.67279928671939e-06,
|
|
"loss": 0.0032,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 2.1715662650602408,
|
|
"grad_norm": 0.20782770216464996,
|
|
"learning_rate": 8.635866075493318e-06,
|
|
"loss": 0.0028,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 2.1734939759036145,
|
|
"grad_norm": 0.1958685964345932,
|
|
"learning_rate": 8.598990006819756e-06,
|
|
"loss": 0.0047,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 2.1754216867469878,
|
|
"grad_norm": 0.06459935009479523,
|
|
"learning_rate": 8.562171266123528e-06,
|
|
"loss": 0.0015,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 2.1773493975903615,
|
|
"grad_norm": 0.33486708998680115,
|
|
"learning_rate": 8.525410038541218e-06,
|
|
"loss": 0.0094,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 2.179277108433735,
|
|
"grad_norm": 0.5755940079689026,
|
|
"learning_rate": 8.488706508920202e-06,
|
|
"loss": 0.0067,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 2.1812048192771085,
|
|
"grad_norm": 0.10840924829244614,
|
|
"learning_rate": 8.452060861817738e-06,
|
|
"loss": 0.0082,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 2.183132530120482,
|
|
"grad_norm": 0.18611350655555725,
|
|
"learning_rate": 8.415473281500037e-06,
|
|
"loss": 0.0059,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 2.1850602409638555,
|
|
"grad_norm": 0.11245249956846237,
|
|
"learning_rate": 8.378943951941301e-06,
|
|
"loss": 0.0107,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 2.186987951807229,
|
|
"grad_norm": 0.12284426391124725,
|
|
"learning_rate": 8.342473056822873e-06,
|
|
"loss": 0.0025,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 2.1889156626506026,
|
|
"grad_norm": 0.12542888522148132,
|
|
"learning_rate": 8.306060779532245e-06,
|
|
"loss": 0.0059,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 2.190843373493976,
|
|
"grad_norm": 0.1287655532360077,
|
|
"learning_rate": 8.26970730316215e-06,
|
|
"loss": 0.0022,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 2.1927710843373496,
|
|
"grad_norm": 0.1818632185459137,
|
|
"learning_rate": 8.233412810509669e-06,
|
|
"loss": 0.0131,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 2.194698795180723,
|
|
"grad_norm": 0.09687745571136475,
|
|
"learning_rate": 8.197177484075284e-06,
|
|
"loss": 0.0025,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 2.1966265060240966,
|
|
"grad_norm": 0.16103452444076538,
|
|
"learning_rate": 8.161001506061979e-06,
|
|
"loss": 0.0031,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 2.19855421686747,
|
|
"grad_norm": 0.2711680233478546,
|
|
"learning_rate": 8.124885058374302e-06,
|
|
"loss": 0.0034,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 2.200481927710843,
|
|
"grad_norm": 0.17613105475902557,
|
|
"learning_rate": 8.088828322617473e-06,
|
|
"loss": 0.0044,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 2.202409638554217,
|
|
"grad_norm": 0.2298487424850464,
|
|
"learning_rate": 8.052831480096464e-06,
|
|
"loss": 0.0168,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 2.20433734939759,
|
|
"grad_norm": 0.17042206227779388,
|
|
"learning_rate": 8.016894711815067e-06,
|
|
"loss": 0.007,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 2.206265060240964,
|
|
"grad_norm": 0.2830466628074646,
|
|
"learning_rate": 7.98101819847501e-06,
|
|
"loss": 0.0091,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 2.208192771084337,
|
|
"grad_norm": 0.22089065611362457,
|
|
"learning_rate": 7.945202120475063e-06,
|
|
"loss": 0.0046,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 2.210120481927711,
|
|
"grad_norm": 0.1716073900461197,
|
|
"learning_rate": 7.909446657910072e-06,
|
|
"loss": 0.0032,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 2.212048192771084,
|
|
"grad_norm": 0.16140373051166534,
|
|
"learning_rate": 7.873751990570104e-06,
|
|
"loss": 0.0057,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 2.213975903614458,
|
|
"grad_norm": 0.1671605408191681,
|
|
"learning_rate": 7.838118297939529e-06,
|
|
"loss": 0.0039,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 2.2159036144578312,
|
|
"grad_norm": 0.10933005809783936,
|
|
"learning_rate": 7.802545759196117e-06,
|
|
"loss": 0.005,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 2.217831325301205,
|
|
"grad_norm": 0.07819998264312744,
|
|
"learning_rate": 7.76703455321014e-06,
|
|
"loss": 0.0025,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 2.2197590361445783,
|
|
"grad_norm": 0.36211854219436646,
|
|
"learning_rate": 7.73158485854344e-06,
|
|
"loss": 0.0151,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 2.221686746987952,
|
|
"grad_norm": 0.09098304808139801,
|
|
"learning_rate": 7.696196853448612e-06,
|
|
"loss": 0.0027,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 2.2236144578313253,
|
|
"grad_norm": 0.17442144453525543,
|
|
"learning_rate": 7.660870715868018e-06,
|
|
"loss": 0.006,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 2.225542168674699,
|
|
"grad_norm": 0.09785338491201401,
|
|
"learning_rate": 7.625606623432933e-06,
|
|
"loss": 0.0041,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 2.2274698795180723,
|
|
"grad_norm": 0.19399888813495636,
|
|
"learning_rate": 7.590404753462653e-06,
|
|
"loss": 0.0125,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 2.2293975903614456,
|
|
"grad_norm": 0.11080623418092728,
|
|
"learning_rate": 7.55526528296362e-06,
|
|
"loss": 0.0022,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 2.2313253012048193,
|
|
"grad_norm": 0.14067359268665314,
|
|
"learning_rate": 7.520188388628473e-06,
|
|
"loss": 0.0123,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 2.2332530120481926,
|
|
"grad_norm": 0.14533625543117523,
|
|
"learning_rate": 7.485174246835227e-06,
|
|
"loss": 0.0039,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 2.2351807228915663,
|
|
"grad_norm": 0.1253812462091446,
|
|
"learning_rate": 7.4502230336463466e-06,
|
|
"loss": 0.003,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 2.2371084337349396,
|
|
"grad_norm": 0.12766572833061218,
|
|
"learning_rate": 7.415334924807869e-06,
|
|
"loss": 0.0044,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 2.2390361445783133,
|
|
"grad_norm": 0.11985791474580765,
|
|
"learning_rate": 7.380510095748535e-06,
|
|
"loss": 0.0071,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 2.2409638554216866,
|
|
"grad_norm": 0.15505346655845642,
|
|
"learning_rate": 7.3457487215788605e-06,
|
|
"loss": 0.0046,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 2.2428915662650604,
|
|
"grad_norm": 0.18983210623264313,
|
|
"learning_rate": 7.311050977090343e-06,
|
|
"loss": 0.0079,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 2.2448192771084337,
|
|
"grad_norm": 0.19279207289218903,
|
|
"learning_rate": 7.276417036754479e-06,
|
|
"loss": 0.0042,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 2.2467469879518074,
|
|
"grad_norm": 0.21539707481861115,
|
|
"learning_rate": 7.241847074721964e-06,
|
|
"loss": 0.0087,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 2.2486746987951807,
|
|
"grad_norm": 0.07004354894161224,
|
|
"learning_rate": 7.207341264821783e-06,
|
|
"loss": 0.002,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 2.2506024096385544,
|
|
"grad_norm": 0.2203039526939392,
|
|
"learning_rate": 7.172899780560345e-06,
|
|
"loss": 0.0069,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 2.2525301204819277,
|
|
"grad_norm": 0.12474718689918518,
|
|
"learning_rate": 7.138522795120606e-06,
|
|
"loss": 0.0122,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 2.2544578313253014,
|
|
"grad_norm": 0.09078995883464813,
|
|
"learning_rate": 7.104210481361204e-06,
|
|
"loss": 0.0025,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 2.2563855421686747,
|
|
"grad_norm": 0.141757071018219,
|
|
"learning_rate": 7.069963011815584e-06,
|
|
"loss": 0.0039,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 2.258313253012048,
|
|
"grad_norm": 0.14944659173488617,
|
|
"learning_rate": 7.035780558691141e-06,
|
|
"loss": 0.0025,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 2.2602409638554217,
|
|
"grad_norm": 0.06723666191101074,
|
|
"learning_rate": 7.001663293868328e-06,
|
|
"loss": 0.0014,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 2.262168674698795,
|
|
"grad_norm": 0.11966485530138016,
|
|
"learning_rate": 6.967611388899826e-06,
|
|
"loss": 0.0067,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 2.2640963855421687,
|
|
"grad_norm": 0.08943185210227966,
|
|
"learning_rate": 6.933625015009666e-06,
|
|
"loss": 0.0036,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 2.266024096385542,
|
|
"grad_norm": 0.04511453956365585,
|
|
"learning_rate": 6.899704343092359e-06,
|
|
"loss": 0.0014,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 2.2679518072289158,
|
|
"grad_norm": 0.1867951601743698,
|
|
"learning_rate": 6.865849543712058e-06,
|
|
"loss": 0.009,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 2.269879518072289,
|
|
"grad_norm": 0.23791250586509705,
|
|
"learning_rate": 6.832060787101658e-06,
|
|
"loss": 0.0117,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 2.271807228915663,
|
|
"grad_norm": 0.13210316002368927,
|
|
"learning_rate": 6.798338243162008e-06,
|
|
"loss": 0.0024,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 2.273734939759036,
|
|
"grad_norm": 0.1601375937461853,
|
|
"learning_rate": 6.764682081461002e-06,
|
|
"loss": 0.013,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 2.27566265060241,
|
|
"grad_norm": 0.21996766328811646,
|
|
"learning_rate": 6.73109247123273e-06,
|
|
"loss": 0.0074,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 2.277590361445783,
|
|
"grad_norm": 0.15780030190944672,
|
|
"learning_rate": 6.6975695813766465e-06,
|
|
"loss": 0.0052,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 2.279518072289157,
|
|
"grad_norm": 0.18146437406539917,
|
|
"learning_rate": 6.664113580456739e-06,
|
|
"loss": 0.0265,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 2.28144578313253,
|
|
"grad_norm": 0.12033495306968689,
|
|
"learning_rate": 6.630724636700618e-06,
|
|
"loss": 0.0026,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 2.283373493975904,
|
|
"grad_norm": 0.25268155336380005,
|
|
"learning_rate": 6.59740291799873e-06,
|
|
"loss": 0.0046,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 2.285301204819277,
|
|
"grad_norm": 0.19043004512786865,
|
|
"learning_rate": 6.564148591903488e-06,
|
|
"loss": 0.0063,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 2.2872289156626504,
|
|
"grad_norm": 0.06894923001527786,
|
|
"learning_rate": 6.530961825628432e-06,
|
|
"loss": 0.0012,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 2.289156626506024,
|
|
"grad_norm": 0.16378818452358246,
|
|
"learning_rate": 6.4978427860474015e-06,
|
|
"loss": 0.0048,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 2.2910843373493974,
|
|
"grad_norm": 0.11130444705486298,
|
|
"learning_rate": 6.464791639693648e-06,
|
|
"loss": 0.0049,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 2.293012048192771,
|
|
"grad_norm": 0.10573417693376541,
|
|
"learning_rate": 6.431808552759083e-06,
|
|
"loss": 0.0019,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 2.2949397590361444,
|
|
"grad_norm": 0.13344882428646088,
|
|
"learning_rate": 6.398893691093367e-06,
|
|
"loss": 0.0033,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 2.296867469879518,
|
|
"grad_norm": 0.12659135460853577,
|
|
"learning_rate": 6.366047220203088e-06,
|
|
"loss": 0.0032,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 2.2987951807228915,
|
|
"grad_norm": 0.10152821987867355,
|
|
"learning_rate": 6.333269305250971e-06,
|
|
"loss": 0.0027,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 2.300722891566265,
|
|
"grad_norm": 0.1889944225549698,
|
|
"learning_rate": 6.300560111055006e-06,
|
|
"loss": 0.0062,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 2.3026506024096385,
|
|
"grad_norm": 2.3101227283477783,
|
|
"learning_rate": 6.2679198020876275e-06,
|
|
"loss": 0.0113,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 2.304578313253012,
|
|
"grad_norm": 0.6224933862686157,
|
|
"learning_rate": 6.235348542474908e-06,
|
|
"loss": 0.0273,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 2.3065060240963855,
|
|
"grad_norm": 0.1908419281244278,
|
|
"learning_rate": 6.202846495995705e-06,
|
|
"loss": 0.0056,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 2.3084337349397592,
|
|
"grad_norm": 0.10968491435050964,
|
|
"learning_rate": 6.170413826080856e-06,
|
|
"loss": 0.0034,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 2.3103614457831325,
|
|
"grad_norm": 0.23200668394565582,
|
|
"learning_rate": 6.138050695812343e-06,
|
|
"loss": 0.0042,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 2.3122891566265062,
|
|
"grad_norm": 0.12442032992839813,
|
|
"learning_rate": 6.105757267922481e-06,
|
|
"loss": 0.0045,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 2.3142168674698795,
|
|
"grad_norm": 0.14563624560832977,
|
|
"learning_rate": 6.073533704793122e-06,
|
|
"loss": 0.0035,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 2.316144578313253,
|
|
"grad_norm": 0.11523722857236862,
|
|
"learning_rate": 6.04138016845478e-06,
|
|
"loss": 0.0088,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 2.3180722891566266,
|
|
"grad_norm": 0.2000943422317505,
|
|
"learning_rate": 6.009296820585871e-06,
|
|
"loss": 0.0059,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"grad_norm": 0.10698592662811279,
|
|
"learning_rate": 5.977283822511879e-06,
|
|
"loss": 0.0028,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 2.3219277108433736,
|
|
"grad_norm": 0.1533137410879135,
|
|
"learning_rate": 5.945341335204547e-06,
|
|
"loss": 0.0044,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 2.323855421686747,
|
|
"grad_norm": 0.1235835999250412,
|
|
"learning_rate": 5.9134695192810695e-06,
|
|
"loss": 0.0043,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 2.3257831325301206,
|
|
"grad_norm": 0.1916925013065338,
|
|
"learning_rate": 5.8816685350032575e-06,
|
|
"loss": 0.0066,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 2.327710843373494,
|
|
"grad_norm": 0.08812380582094193,
|
|
"learning_rate": 5.849938542276801e-06,
|
|
"loss": 0.0022,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 2.3296385542168676,
|
|
"grad_norm": 0.13387660682201385,
|
|
"learning_rate": 5.818279700650393e-06,
|
|
"loss": 0.0037,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 2.331566265060241,
|
|
"grad_norm": 0.2309022694826126,
|
|
"learning_rate": 5.786692169314954e-06,
|
|
"loss": 0.0049,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 2.3334939759036146,
|
|
"grad_norm": 0.09956549853086472,
|
|
"learning_rate": 5.755176107102833e-06,
|
|
"loss": 0.002,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 2.335421686746988,
|
|
"grad_norm": 0.06035687029361725,
|
|
"learning_rate": 5.723731672487043e-06,
|
|
"loss": 0.002,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 2.337349397590361,
|
|
"grad_norm": 0.06850237399339676,
|
|
"learning_rate": 5.69235902358038e-06,
|
|
"loss": 0.0013,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 2.339277108433735,
|
|
"grad_norm": 0.12068171054124832,
|
|
"learning_rate": 5.661058318134711e-06,
|
|
"loss": 0.0041,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 2.3412048192771087,
|
|
"grad_norm": 0.13146616518497467,
|
|
"learning_rate": 5.6298297135401355e-06,
|
|
"loss": 0.0022,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 2.343132530120482,
|
|
"grad_norm": 0.15160737931728363,
|
|
"learning_rate": 5.598673366824212e-06,
|
|
"loss": 0.0036,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 2.3450602409638552,
|
|
"grad_norm": 0.26196014881134033,
|
|
"learning_rate": 5.567589434651164e-06,
|
|
"loss": 0.0151,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 2.346987951807229,
|
|
"grad_norm": 0.12898831069469452,
|
|
"learning_rate": 5.536578073321073e-06,
|
|
"loss": 0.006,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 2.3489156626506023,
|
|
"grad_norm": 0.11385104805231094,
|
|
"learning_rate": 5.505639438769146e-06,
|
|
"loss": 0.0052,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 2.350843373493976,
|
|
"grad_norm": 0.14569509029388428,
|
|
"learning_rate": 5.47477368656486e-06,
|
|
"loss": 0.0048,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 2.3527710843373493,
|
|
"grad_norm": 0.12406075745820999,
|
|
"learning_rate": 5.443980971911238e-06,
|
|
"loss": 0.0028,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 2.354698795180723,
|
|
"grad_norm": 0.3730498254299164,
|
|
"learning_rate": 5.413261449644039e-06,
|
|
"loss": 0.0043,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 2.3566265060240963,
|
|
"grad_norm": 0.1449914574623108,
|
|
"learning_rate": 5.382615274230987e-06,
|
|
"loss": 0.0075,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 2.35855421686747,
|
|
"grad_norm": 0.20739100873470306,
|
|
"learning_rate": 5.352042599770995e-06,
|
|
"loss": 0.0061,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 2.3604819277108433,
|
|
"grad_norm": 0.05786775052547455,
|
|
"learning_rate": 5.321543579993398e-06,
|
|
"loss": 0.0015,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 2.362409638554217,
|
|
"grad_norm": 0.09043122828006744,
|
|
"learning_rate": 5.2911183682571446e-06,
|
|
"loss": 0.0034,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 2.3643373493975903,
|
|
"grad_norm": 0.2685496211051941,
|
|
"learning_rate": 5.260767117550094e-06,
|
|
"loss": 0.0076,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 2.3662650602409636,
|
|
"grad_norm": 0.17694126069545746,
|
|
"learning_rate": 5.230489980488165e-06,
|
|
"loss": 0.0148,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 2.3681927710843373,
|
|
"grad_norm": 0.11609307676553726,
|
|
"learning_rate": 5.200287109314633e-06,
|
|
"loss": 0.0049,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 2.370120481927711,
|
|
"grad_norm": 0.1257704645395279,
|
|
"learning_rate": 5.1701586558993285e-06,
|
|
"loss": 0.0031,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 2.3720481927710844,
|
|
"grad_norm": 0.27177703380584717,
|
|
"learning_rate": 5.140104771737899e-06,
|
|
"loss": 0.0058,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 2.3739759036144576,
|
|
"grad_norm": 0.13928169012069702,
|
|
"learning_rate": 5.110125607951024e-06,
|
|
"loss": 0.0051,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 2.3759036144578314,
|
|
"grad_norm": 0.679577648639679,
|
|
"learning_rate": 5.0802213152836514e-06,
|
|
"loss": 0.0173,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 2.3778313253012047,
|
|
"grad_norm": 0.16769403219223022,
|
|
"learning_rate": 5.0503920441042845e-06,
|
|
"loss": 0.0045,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 2.3797590361445784,
|
|
"grad_norm": 0.09427493065595627,
|
|
"learning_rate": 5.0206379444041764e-06,
|
|
"loss": 0.0024,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 2.3816867469879517,
|
|
"grad_norm": 0.33908671140670776,
|
|
"learning_rate": 4.990959165796585e-06,
|
|
"loss": 0.0088,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 2.3836144578313254,
|
|
"grad_norm": 0.18106943368911743,
|
|
"learning_rate": 4.961355857516034e-06,
|
|
"loss": 0.0094,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 2.3855421686746987,
|
|
"grad_norm": 0.5833203196525574,
|
|
"learning_rate": 4.931828168417583e-06,
|
|
"loss": 0.0086,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 2.3874698795180724,
|
|
"grad_norm": 0.09108569473028183,
|
|
"learning_rate": 4.902376246976015e-06,
|
|
"loss": 0.0014,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 2.3893975903614457,
|
|
"grad_norm": 0.10596407204866409,
|
|
"learning_rate": 4.873000241285153e-06,
|
|
"loss": 0.0043,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 2.3913253012048195,
|
|
"grad_norm": 0.10775511711835861,
|
|
"learning_rate": 4.8437002990570835e-06,
|
|
"loss": 0.0014,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 2.3932530120481927,
|
|
"grad_norm": 0.9646345973014832,
|
|
"learning_rate": 4.8144765676214245e-06,
|
|
"loss": 0.0525,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 2.395180722891566,
|
|
"grad_norm": 0.20530278980731964,
|
|
"learning_rate": 4.7853291939245814e-06,
|
|
"loss": 0.008,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 2.3971084337349398,
|
|
"grad_norm": 0.1682119369506836,
|
|
"learning_rate": 4.756258324528995e-06,
|
|
"loss": 0.0044,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 2.3990361445783135,
|
|
"grad_norm": 0.45536917448043823,
|
|
"learning_rate": 4.727264105612439e-06,
|
|
"loss": 0.0186,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 2.4009638554216868,
|
|
"grad_norm": 0.3017471730709076,
|
|
"learning_rate": 4.698346682967258e-06,
|
|
"loss": 0.0106,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 2.40289156626506,
|
|
"grad_norm": 0.1226554661989212,
|
|
"learning_rate": 4.669506201999625e-06,
|
|
"loss": 0.0035,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 2.404819277108434,
|
|
"grad_norm": 0.13750068843364716,
|
|
"learning_rate": 4.640742807728837e-06,
|
|
"loss": 0.0038,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 2.406746987951807,
|
|
"grad_norm": 0.11531024426221848,
|
|
"learning_rate": 4.612056644786575e-06,
|
|
"loss": 0.0021,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 2.408674698795181,
|
|
"grad_norm": 0.1143675372004509,
|
|
"learning_rate": 4.583447857416175e-06,
|
|
"loss": 0.0028,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 2.410602409638554,
|
|
"grad_norm": 0.0914216861128807,
|
|
"learning_rate": 4.554916589471898e-06,
|
|
"loss": 0.0027,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 2.412530120481928,
|
|
"grad_norm": 0.18339012563228607,
|
|
"learning_rate": 4.526462984418221e-06,
|
|
"loss": 0.0037,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 2.414457831325301,
|
|
"grad_norm": 0.11073138564825058,
|
|
"learning_rate": 4.498087185329105e-06,
|
|
"loss": 0.003,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 2.416385542168675,
|
|
"grad_norm": 0.20792435109615326,
|
|
"learning_rate": 4.469789334887265e-06,
|
|
"loss": 0.009,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 2.418313253012048,
|
|
"grad_norm": 0.09485629945993423,
|
|
"learning_rate": 4.441569575383471e-06,
|
|
"loss": 0.0033,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 2.420240963855422,
|
|
"grad_norm": 0.11831793934106827,
|
|
"learning_rate": 4.413428048715851e-06,
|
|
"loss": 0.0021,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 2.422168674698795,
|
|
"grad_norm": 0.11818034201860428,
|
|
"learning_rate": 4.38536489638911e-06,
|
|
"loss": 0.0041,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 2.4240963855421684,
|
|
"grad_norm": 0.2583082616329193,
|
|
"learning_rate": 4.3573802595138945e-06,
|
|
"loss": 0.0039,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 2.426024096385542,
|
|
"grad_norm": 0.3120201826095581,
|
|
"learning_rate": 4.329474278806034e-06,
|
|
"loss": 0.0087,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 2.427951807228916,
|
|
"grad_norm": 0.1258879452943802,
|
|
"learning_rate": 4.301647094585855e-06,
|
|
"loss": 0.0046,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 2.429879518072289,
|
|
"grad_norm": 0.15144586563110352,
|
|
"learning_rate": 4.273898846777473e-06,
|
|
"loss": 0.0054,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 2.4318072289156625,
|
|
"grad_norm": 0.15615184605121613,
|
|
"learning_rate": 4.246229674908067e-06,
|
|
"loss": 0.0072,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 2.433734939759036,
|
|
"grad_norm": 0.09690173715353012,
|
|
"learning_rate": 4.218639718107225e-06,
|
|
"loss": 0.003,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 2.4356626506024095,
|
|
"grad_norm": 0.23884955048561096,
|
|
"learning_rate": 4.1911291151062e-06,
|
|
"loss": 0.0109,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 2.4375903614457832,
|
|
"grad_norm": 0.0905768945813179,
|
|
"learning_rate": 4.163698004237222e-06,
|
|
"loss": 0.0027,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 2.4395180722891565,
|
|
"grad_norm": 0.09168912470340729,
|
|
"learning_rate": 4.136346523432821e-06,
|
|
"loss": 0.0018,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 2.4414457831325302,
|
|
"grad_norm": 0.17878012359142303,
|
|
"learning_rate": 4.109074810225118e-06,
|
|
"loss": 0.0048,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 2.4433734939759035,
|
|
"grad_norm": 0.09913790971040726,
|
|
"learning_rate": 4.08188300174513e-06,
|
|
"loss": 0.0021,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 2.4453012048192773,
|
|
"grad_norm": 0.16615812480449677,
|
|
"learning_rate": 4.054771234722106e-06,
|
|
"loss": 0.0066,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 2.4472289156626506,
|
|
"grad_norm": 0.09618276357650757,
|
|
"learning_rate": 4.027739645482784e-06,
|
|
"loss": 0.0043,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 2.4491566265060243,
|
|
"grad_norm": 0.33473479747772217,
|
|
"learning_rate": 4.0007883699507855e-06,
|
|
"loss": 0.0236,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 2.4510843373493976,
|
|
"grad_norm": 0.15051880478858948,
|
|
"learning_rate": 3.973917543645867e-06,
|
|
"loss": 0.0068,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 2.453012048192771,
|
|
"grad_norm": 0.24134816229343414,
|
|
"learning_rate": 3.947127301683249e-06,
|
|
"loss": 0.0194,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 2.4549397590361446,
|
|
"grad_norm": 0.10495353490114212,
|
|
"learning_rate": 3.920417778772967e-06,
|
|
"loss": 0.0042,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 2.4568674698795183,
|
|
"grad_norm": 0.2294938713312149,
|
|
"learning_rate": 3.893789109219171e-06,
|
|
"loss": 0.0224,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 2.4587951807228916,
|
|
"grad_norm": 0.13710513710975647,
|
|
"learning_rate": 3.867241426919446e-06,
|
|
"loss": 0.0046,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 2.460722891566265,
|
|
"grad_norm": 0.06754808127880096,
|
|
"learning_rate": 3.840774865364157e-06,
|
|
"loss": 0.0019,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 2.4626506024096386,
|
|
"grad_norm": 0.24797780811786652,
|
|
"learning_rate": 3.8143895576357605e-06,
|
|
"loss": 0.0063,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 2.464578313253012,
|
|
"grad_norm": 0.1476449817419052,
|
|
"learning_rate": 3.788085636408143e-06,
|
|
"loss": 0.0055,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 2.4665060240963856,
|
|
"grad_norm": 0.22397096455097198,
|
|
"learning_rate": 3.7618632339459616e-06,
|
|
"loss": 0.0164,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 2.468433734939759,
|
|
"grad_norm": 0.21596969664096832,
|
|
"learning_rate": 3.7357224821039497e-06,
|
|
"loss": 0.0112,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 2.4703614457831327,
|
|
"grad_norm": 0.2775099575519562,
|
|
"learning_rate": 3.7096635123263068e-06,
|
|
"loss": 0.0112,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 2.472289156626506,
|
|
"grad_norm": 0.07963326573371887,
|
|
"learning_rate": 3.683686455645974e-06,
|
|
"loss": 0.0013,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 2.4742168674698797,
|
|
"grad_norm": 0.1253802627325058,
|
|
"learning_rate": 3.6577914426840266e-06,
|
|
"loss": 0.0038,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 2.476144578313253,
|
|
"grad_norm": 0.10258597880601883,
|
|
"learning_rate": 3.631978603648989e-06,
|
|
"loss": 0.0023,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 2.4780722891566267,
|
|
"grad_norm": 0.17102380096912384,
|
|
"learning_rate": 3.6062480683361935e-06,
|
|
"loss": 0.0025,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"grad_norm": 0.09547360241413116,
|
|
"learning_rate": 3.580599966127123e-06,
|
|
"loss": 0.003,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 2.4819277108433733,
|
|
"grad_norm": 0.08008653670549393,
|
|
"learning_rate": 3.5550344259887438e-06,
|
|
"loss": 0.0023,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 2.483855421686747,
|
|
"grad_norm": 0.07712296396493912,
|
|
"learning_rate": 3.5295515764729003e-06,
|
|
"loss": 0.0015,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 2.4857831325301207,
|
|
"grad_norm": 0.21118703484535217,
|
|
"learning_rate": 3.5041515457156303e-06,
|
|
"loss": 0.0041,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 2.487710843373494,
|
|
"grad_norm": 0.10772393643856049,
|
|
"learning_rate": 3.4788344614365155e-06,
|
|
"loss": 0.0029,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 2.4896385542168673,
|
|
"grad_norm": 0.2353268563747406,
|
|
"learning_rate": 3.453600450938073e-06,
|
|
"loss": 0.0072,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 2.491566265060241,
|
|
"grad_norm": 0.2897944152355194,
|
|
"learning_rate": 3.428449641105107e-06,
|
|
"loss": 0.0205,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 2.4934939759036143,
|
|
"grad_norm": 0.19756680727005005,
|
|
"learning_rate": 3.4033821584040383e-06,
|
|
"loss": 0.0065,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 2.495421686746988,
|
|
"grad_norm": 0.13538534939289093,
|
|
"learning_rate": 3.378398128882305e-06,
|
|
"loss": 0.0025,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 2.4973493975903613,
|
|
"grad_norm": 0.2301637977361679,
|
|
"learning_rate": 3.3534976781677142e-06,
|
|
"loss": 0.0071,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 2.499277108433735,
|
|
"grad_norm": 0.0965796634554863,
|
|
"learning_rate": 3.3286809314678137e-06,
|
|
"loss": 0.0024,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 2.5012048192771084,
|
|
"grad_norm": 0.0777980163693428,
|
|
"learning_rate": 3.30394801356926e-06,
|
|
"loss": 0.0013,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 2.503132530120482,
|
|
"grad_norm": 0.3157603442668915,
|
|
"learning_rate": 3.279299048837177e-06,
|
|
"loss": 0.0228,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 2.5050602409638554,
|
|
"grad_norm": 0.15660233795642853,
|
|
"learning_rate": 3.2547341612145654e-06,
|
|
"loss": 0.0056,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 2.506987951807229,
|
|
"grad_norm": 0.21655581891536713,
|
|
"learning_rate": 3.2302534742216586e-06,
|
|
"loss": 0.0081,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 2.5089156626506024,
|
|
"grad_norm": 0.09475889801979065,
|
|
"learning_rate": 3.205857110955277e-06,
|
|
"loss": 0.0029,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 2.5108433734939757,
|
|
"grad_norm": 0.13174696266651154,
|
|
"learning_rate": 3.18154519408826e-06,
|
|
"loss": 0.0059,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 2.5127710843373494,
|
|
"grad_norm": 0.10386355221271515,
|
|
"learning_rate": 3.1573178458688102e-06,
|
|
"loss": 0.0042,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 2.514698795180723,
|
|
"grad_norm": 0.12700854241847992,
|
|
"learning_rate": 3.133175188119899e-06,
|
|
"loss": 0.0041,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 2.5166265060240964,
|
|
"grad_norm": 0.1617022454738617,
|
|
"learning_rate": 3.109117342238639e-06,
|
|
"loss": 0.0053,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 2.5185542168674697,
|
|
"grad_norm": 0.8668884038925171,
|
|
"learning_rate": 3.085144429195688e-06,
|
|
"loss": 0.0084,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 2.5204819277108435,
|
|
"grad_norm": 0.22429344058036804,
|
|
"learning_rate": 3.061256569534634e-06,
|
|
"loss": 0.0053,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 2.5224096385542167,
|
|
"grad_norm": 0.08967582136392593,
|
|
"learning_rate": 3.037453883371375e-06,
|
|
"loss": 0.0018,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 2.5243373493975905,
|
|
"grad_norm": 0.1251695454120636,
|
|
"learning_rate": 3.0137364903935464e-06,
|
|
"loss": 0.0037,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 2.5262650602409638,
|
|
"grad_norm": 0.09026174992322922,
|
|
"learning_rate": 2.990104509859897e-06,
|
|
"loss": 0.0024,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 2.5281927710843375,
|
|
"grad_norm": 0.34319114685058594,
|
|
"learning_rate": 2.966558060599689e-06,
|
|
"loss": 0.0063,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 2.5301204819277108,
|
|
"grad_norm": 0.20300136506557465,
|
|
"learning_rate": 2.9430972610121087e-06,
|
|
"loss": 0.0054,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 2.532048192771084,
|
|
"grad_norm": 0.19160760939121246,
|
|
"learning_rate": 2.9197222290656737e-06,
|
|
"loss": 0.0095,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 2.533975903614458,
|
|
"grad_norm": 0.18991442024707794,
|
|
"learning_rate": 2.8964330822976227e-06,
|
|
"loss": 0.006,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 2.5359036144578315,
|
|
"grad_norm": 0.1801903396844864,
|
|
"learning_rate": 2.873229937813349e-06,
|
|
"loss": 0.0067,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 2.537831325301205,
|
|
"grad_norm": 0.07068303227424622,
|
|
"learning_rate": 2.850112912285783e-06,
|
|
"loss": 0.0015,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 2.539759036144578,
|
|
"grad_norm": 0.1404612809419632,
|
|
"learning_rate": 2.8270821219548296e-06,
|
|
"loss": 0.0036,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 2.541686746987952,
|
|
"grad_norm": 0.12199504673480988,
|
|
"learning_rate": 2.8041376826267862e-06,
|
|
"loss": 0.0068,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 2.5436144578313256,
|
|
"grad_norm": 0.2167249619960785,
|
|
"learning_rate": 2.7812797096737253e-06,
|
|
"loss": 0.0048,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 2.545542168674699,
|
|
"grad_norm": 0.07466506212949753,
|
|
"learning_rate": 2.7585083180329575e-06,
|
|
"loss": 0.0017,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 2.547469879518072,
|
|
"grad_norm": 0.11736353486776352,
|
|
"learning_rate": 2.7358236222064283e-06,
|
|
"loss": 0.003,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 2.549397590361446,
|
|
"grad_norm": 0.16602204740047455,
|
|
"learning_rate": 2.7132257362601453e-06,
|
|
"loss": 0.005,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 2.551325301204819,
|
|
"grad_norm": 0.15473629534244537,
|
|
"learning_rate": 2.6907147738236193e-06,
|
|
"loss": 0.0077,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 2.553253012048193,
|
|
"grad_norm": 0.07868973910808563,
|
|
"learning_rate": 2.6682908480892567e-06,
|
|
"loss": 0.0013,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 2.555180722891566,
|
|
"grad_norm": 0.2137845754623413,
|
|
"learning_rate": 2.645954071811847e-06,
|
|
"loss": 0.0092,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 2.55710843373494,
|
|
"grad_norm": 0.11191053688526154,
|
|
"learning_rate": 2.623704557307949e-06,
|
|
"loss": 0.0031,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 2.559036144578313,
|
|
"grad_norm": 0.3080642521381378,
|
|
"learning_rate": 2.6015424164553295e-06,
|
|
"loss": 0.0104,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 2.5609638554216865,
|
|
"grad_norm": 0.08816439658403397,
|
|
"learning_rate": 2.579467760692427e-06,
|
|
"loss": 0.004,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 2.56289156626506,
|
|
"grad_norm": 0.17154981195926666,
|
|
"learning_rate": 2.557480701017776e-06,
|
|
"loss": 0.0035,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 2.564819277108434,
|
|
"grad_norm": 0.09479143470525742,
|
|
"learning_rate": 2.5355813479894464e-06,
|
|
"loss": 0.0034,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 2.5667469879518072,
|
|
"grad_norm": 0.26139333844184875,
|
|
"learning_rate": 2.513769811724487e-06,
|
|
"loss": 0.0076,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 2.5686746987951805,
|
|
"grad_norm": 0.16864238679409027,
|
|
"learning_rate": 2.4920462018983816e-06,
|
|
"loss": 0.0046,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 2.5706024096385542,
|
|
"grad_norm": 0.1133158802986145,
|
|
"learning_rate": 2.4704106277444884e-06,
|
|
"loss": 0.0034,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 2.572530120481928,
|
|
"grad_norm": 0.27522334456443787,
|
|
"learning_rate": 2.4488631980534995e-06,
|
|
"loss": 0.0127,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 2.5744578313253013,
|
|
"grad_norm": 0.13547387719154358,
|
|
"learning_rate": 2.427404021172868e-06,
|
|
"loss": 0.0031,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 2.5763855421686745,
|
|
"grad_norm": 0.13478629291057587,
|
|
"learning_rate": 2.406033205006313e-06,
|
|
"loss": 0.0039,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 2.5783132530120483,
|
|
"grad_norm": 0.11515481770038605,
|
|
"learning_rate": 2.3847508570132226e-06,
|
|
"loss": 0.0029,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 2.5802409638554216,
|
|
"grad_norm": 0.21657171845436096,
|
|
"learning_rate": 2.36355708420815e-06,
|
|
"loss": 0.011,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 2.5821686746987953,
|
|
"grad_norm": 0.11441601067781448,
|
|
"learning_rate": 2.342451993160262e-06,
|
|
"loss": 0.006,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 2.5840963855421686,
|
|
"grad_norm": 0.13475841283798218,
|
|
"learning_rate": 2.3214356899928036e-06,
|
|
"loss": 0.0051,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 2.5860240963855423,
|
|
"grad_norm": 0.053035832941532135,
|
|
"learning_rate": 2.300508280382572e-06,
|
|
"loss": 0.0012,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 2.5879518072289156,
|
|
"grad_norm": 0.12467508763074875,
|
|
"learning_rate": 2.279669869559358e-06,
|
|
"loss": 0.0024,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 2.589879518072289,
|
|
"grad_norm": 0.10572273284196854,
|
|
"learning_rate": 2.2589205623054646e-06,
|
|
"loss": 0.0024,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 2.5918072289156626,
|
|
"grad_norm": 0.17056365311145782,
|
|
"learning_rate": 2.238260462955142e-06,
|
|
"loss": 0.0064,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 2.5937349397590364,
|
|
"grad_norm": 0.07940494269132614,
|
|
"learning_rate": 2.2176896753940637e-06,
|
|
"loss": 0.0012,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 2.5956626506024096,
|
|
"grad_norm": 0.10416694730520248,
|
|
"learning_rate": 2.1972083030588244e-06,
|
|
"loss": 0.0092,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 2.597590361445783,
|
|
"grad_norm": 0.2384328842163086,
|
|
"learning_rate": 2.176816448936423e-06,
|
|
"loss": 0.0067,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 2.5995180722891567,
|
|
"grad_norm": 0.14279082417488098,
|
|
"learning_rate": 2.156514215563703e-06,
|
|
"loss": 0.0059,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 2.6014457831325304,
|
|
"grad_norm": 0.08462683111429214,
|
|
"learning_rate": 2.1363017050268886e-06,
|
|
"loss": 0.0021,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 2.6033734939759037,
|
|
"grad_norm": 0.09768491238355637,
|
|
"learning_rate": 2.1161790189610377e-06,
|
|
"loss": 0.0038,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 2.605301204819277,
|
|
"grad_norm": 0.25498896837234497,
|
|
"learning_rate": 2.0961462585495474e-06,
|
|
"loss": 0.0114,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 2.6072289156626507,
|
|
"grad_norm": 0.15635675191879272,
|
|
"learning_rate": 2.076203524523637e-06,
|
|
"loss": 0.0054,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 2.609156626506024,
|
|
"grad_norm": 0.11619213968515396,
|
|
"learning_rate": 2.056350917161836e-06,
|
|
"loss": 0.007,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 2.6110843373493977,
|
|
"grad_norm": 0.18085338175296783,
|
|
"learning_rate": 2.0365885362895053e-06,
|
|
"loss": 0.0061,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 2.613012048192771,
|
|
"grad_norm": 0.14492927491664886,
|
|
"learning_rate": 2.016916481278306e-06,
|
|
"loss": 0.0114,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 2.6149397590361447,
|
|
"grad_norm": 0.21257621049880981,
|
|
"learning_rate": 1.997334851045709e-06,
|
|
"loss": 0.0057,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 2.616867469879518,
|
|
"grad_norm": 0.11539656668901443,
|
|
"learning_rate": 1.9778437440545085e-06,
|
|
"loss": 0.0071,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 2.6187951807228913,
|
|
"grad_norm": 0.1642933189868927,
|
|
"learning_rate": 1.95844325831231e-06,
|
|
"loss": 0.0054,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 2.620722891566265,
|
|
"grad_norm": 0.10779479146003723,
|
|
"learning_rate": 1.9391334913710545e-06,
|
|
"loss": 0.0028,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 2.6226506024096388,
|
|
"grad_norm": 0.14295366406440735,
|
|
"learning_rate": 1.9199145403265175e-06,
|
|
"loss": 0.0048,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 2.624578313253012,
|
|
"grad_norm": 0.13454844057559967,
|
|
"learning_rate": 1.9007865018178107e-06,
|
|
"loss": 0.0072,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 2.6265060240963853,
|
|
"grad_norm": 0.778252363204956,
|
|
"learning_rate": 1.8817494720269302e-06,
|
|
"loss": 0.0071,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 2.628433734939759,
|
|
"grad_norm": 0.11488679051399231,
|
|
"learning_rate": 1.8628035466782268e-06,
|
|
"loss": 0.0038,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 2.630361445783133,
|
|
"grad_norm": 0.15560875833034515,
|
|
"learning_rate": 1.8439488210379687e-06,
|
|
"loss": 0.0043,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 2.632289156626506,
|
|
"grad_norm": 0.10538071393966675,
|
|
"learning_rate": 1.8251853899138306e-06,
|
|
"loss": 0.0041,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 2.6342168674698794,
|
|
"grad_norm": 0.12866193056106567,
|
|
"learning_rate": 1.8065133476544306e-06,
|
|
"loss": 0.0034,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 2.636144578313253,
|
|
"grad_norm": 0.2045469433069229,
|
|
"learning_rate": 1.7879327881488584e-06,
|
|
"loss": 0.0141,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 2.6380722891566264,
|
|
"grad_norm": 0.12423976510763168,
|
|
"learning_rate": 1.769443804826194e-06,
|
|
"loss": 0.0047,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"grad_norm": 0.1007109209895134,
|
|
"learning_rate": 1.751046490655046e-06,
|
|
"loss": 0.0031,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 2.6419277108433734,
|
|
"grad_norm": 0.0681275874376297,
|
|
"learning_rate": 1.7327409381430804e-06,
|
|
"loss": 0.0019,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 2.643855421686747,
|
|
"grad_norm": 0.1645517498254776,
|
|
"learning_rate": 1.7145272393365498e-06,
|
|
"loss": 0.0035,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 2.6457831325301204,
|
|
"grad_norm": 0.13689427077770233,
|
|
"learning_rate": 1.6964054858198386e-06,
|
|
"loss": 0.0086,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 2.6477108433734937,
|
|
"grad_norm": 0.10440093278884888,
|
|
"learning_rate": 1.6783757687150149e-06,
|
|
"loss": 0.0019,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 2.6496385542168674,
|
|
"grad_norm": 0.1142532229423523,
|
|
"learning_rate": 1.6604381786813383e-06,
|
|
"loss": 0.0047,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 2.651566265060241,
|
|
"grad_norm": 0.10430166125297546,
|
|
"learning_rate": 1.6425928059148312e-06,
|
|
"loss": 0.0027,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 2.6534939759036145,
|
|
"grad_norm": 0.2315254956483841,
|
|
"learning_rate": 1.624839740147819e-06,
|
|
"loss": 0.0071,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 2.6554216867469878,
|
|
"grad_norm": 0.15356265008449554,
|
|
"learning_rate": 1.6071790706484746e-06,
|
|
"loss": 0.0109,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 2.6573493975903615,
|
|
"grad_norm": 0.1332363784313202,
|
|
"learning_rate": 1.589610886220383e-06,
|
|
"loss": 0.0046,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 2.659277108433735,
|
|
"grad_norm": 0.18892519176006317,
|
|
"learning_rate": 1.5721352752020602e-06,
|
|
"loss": 0.0138,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 2.6612048192771085,
|
|
"grad_norm": 0.10537895560264587,
|
|
"learning_rate": 1.5547523254665598e-06,
|
|
"loss": 0.0066,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 2.663132530120482,
|
|
"grad_norm": 0.1308947205543518,
|
|
"learning_rate": 1.5374621244209965e-06,
|
|
"loss": 0.0039,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 2.6650602409638555,
|
|
"grad_norm": 0.11358808726072311,
|
|
"learning_rate": 1.5202647590060983e-06,
|
|
"loss": 0.0029,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 2.666987951807229,
|
|
"grad_norm": 0.12029009312391281,
|
|
"learning_rate": 1.5031603156958064e-06,
|
|
"loss": 0.0032,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 2.6689156626506025,
|
|
"grad_norm": 0.36994072794914246,
|
|
"learning_rate": 1.4861488804968093e-06,
|
|
"loss": 0.024,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 2.670843373493976,
|
|
"grad_norm": 0.1263083666563034,
|
|
"learning_rate": 1.4692305389481232e-06,
|
|
"loss": 0.0047,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 2.6727710843373496,
|
|
"grad_norm": 0.15056709945201874,
|
|
"learning_rate": 1.452405376120658e-06,
|
|
"loss": 0.0014,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 2.674698795180723,
|
|
"grad_norm": 0.10418888181447983,
|
|
"learning_rate": 1.4356734766167925e-06,
|
|
"loss": 0.0035,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 2.676626506024096,
|
|
"grad_norm": 0.12220565974712372,
|
|
"learning_rate": 1.4190349245699443e-06,
|
|
"loss": 0.0063,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 2.67855421686747,
|
|
"grad_norm": 0.14774753153324127,
|
|
"learning_rate": 1.402489803644156e-06,
|
|
"loss": 0.008,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 2.6804819277108436,
|
|
"grad_norm": 0.14384198188781738,
|
|
"learning_rate": 1.3860381970336544e-06,
|
|
"loss": 0.0039,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 2.682409638554217,
|
|
"grad_norm": 0.10995055735111237,
|
|
"learning_rate": 1.3696801874624698e-06,
|
|
"loss": 0.0028,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 2.68433734939759,
|
|
"grad_norm": 0.12208505719900131,
|
|
"learning_rate": 1.353415857183966e-06,
|
|
"loss": 0.0029,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 2.686265060240964,
|
|
"grad_norm": 0.16018439829349518,
|
|
"learning_rate": 1.337245287980482e-06,
|
|
"loss": 0.0068,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 2.688192771084337,
|
|
"grad_norm": 5.2112274169921875,
|
|
"learning_rate": 1.3211685611628844e-06,
|
|
"loss": 0.1645,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 2.690120481927711,
|
|
"grad_norm": 0.12426120787858963,
|
|
"learning_rate": 1.3051857575701732e-06,
|
|
"loss": 0.0044,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 2.692048192771084,
|
|
"grad_norm": 0.13931375741958618,
|
|
"learning_rate": 1.2892969575690685e-06,
|
|
"loss": 0.0035,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 2.693975903614458,
|
|
"grad_norm": 0.1804540753364563,
|
|
"learning_rate": 1.273502241053608e-06,
|
|
"loss": 0.0108,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 2.695903614457831,
|
|
"grad_norm": 0.12313607335090637,
|
|
"learning_rate": 1.2578016874447596e-06,
|
|
"loss": 0.0073,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 2.697831325301205,
|
|
"grad_norm": 0.1301470398902893,
|
|
"learning_rate": 1.2421953756899985e-06,
|
|
"loss": 0.0037,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 2.6997590361445782,
|
|
"grad_norm": 0.12769126892089844,
|
|
"learning_rate": 1.226683384262919e-06,
|
|
"loss": 0.0041,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 2.701686746987952,
|
|
"grad_norm": 0.20923997461795807,
|
|
"learning_rate": 1.21126579116285e-06,
|
|
"loss": 0.0101,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 2.7036144578313253,
|
|
"grad_norm": 0.09334482997655869,
|
|
"learning_rate": 1.1959426739144497e-06,
|
|
"loss": 0.0022,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 2.7055421686746985,
|
|
"grad_norm": 0.06848987936973572,
|
|
"learning_rate": 1.1807141095673291e-06,
|
|
"loss": 0.0013,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 2.7074698795180723,
|
|
"grad_norm": 0.14552196860313416,
|
|
"learning_rate": 1.1655801746956463e-06,
|
|
"loss": 0.0066,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 2.709397590361446,
|
|
"grad_norm": 0.11259587109088898,
|
|
"learning_rate": 1.1505409453977334e-06,
|
|
"loss": 0.0045,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 2.7113253012048193,
|
|
"grad_norm": 0.23408068716526031,
|
|
"learning_rate": 1.135596497295719e-06,
|
|
"loss": 0.0181,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 2.7132530120481926,
|
|
"grad_norm": 0.1483619660139084,
|
|
"learning_rate": 1.1207469055351395e-06,
|
|
"loss": 0.0042,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 2.7151807228915663,
|
|
"grad_norm": 0.1170588880777359,
|
|
"learning_rate": 1.105992244784555e-06,
|
|
"loss": 0.0059,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 2.7171084337349396,
|
|
"grad_norm": 0.15649215877056122,
|
|
"learning_rate": 1.0913325892351857e-06,
|
|
"loss": 0.0023,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 2.7190361445783133,
|
|
"grad_norm": 0.0980108231306076,
|
|
"learning_rate": 1.0767680126005443e-06,
|
|
"loss": 0.0019,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 2.7209638554216866,
|
|
"grad_norm": 0.14913050830364227,
|
|
"learning_rate": 1.0622985881160396e-06,
|
|
"loss": 0.0018,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 2.7228915662650603,
|
|
"grad_norm": 0.0827481672167778,
|
|
"learning_rate": 1.0479243885386347e-06,
|
|
"loss": 0.0023,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 2.7248192771084336,
|
|
"grad_norm": 0.15648555755615234,
|
|
"learning_rate": 1.0336454861464706e-06,
|
|
"loss": 0.0033,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 2.7267469879518074,
|
|
"grad_norm": 0.10614357888698578,
|
|
"learning_rate": 1.0194619527385007e-06,
|
|
"loss": 0.0029,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 2.7286746987951807,
|
|
"grad_norm": 0.07111652940511703,
|
|
"learning_rate": 1.0053738596341355e-06,
|
|
"loss": 0.0026,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 2.7306024096385544,
|
|
"grad_norm": 0.11736573278903961,
|
|
"learning_rate": 9.91381277672867e-07,
|
|
"loss": 0.005,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 2.7325301204819277,
|
|
"grad_norm": 0.18440629541873932,
|
|
"learning_rate": 9.774842772139537e-07,
|
|
"loss": 0.0038,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 2.734457831325301,
|
|
"grad_norm": 0.11000041663646698,
|
|
"learning_rate": 9.636829281360116e-07,
|
|
"loss": 0.0034,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 2.7363855421686747,
|
|
"grad_norm": 0.15212605893611908,
|
|
"learning_rate": 9.499772998367018e-07,
|
|
"loss": 0.0038,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 2.7383132530120484,
|
|
"grad_norm": 0.07784705609083176,
|
|
"learning_rate": 9.36367461232377e-07,
|
|
"loss": 0.002,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 2.7402409638554217,
|
|
"grad_norm": 0.1096726506948471,
|
|
"learning_rate": 9.22853480757715e-07,
|
|
"loss": 0.0028,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 2.742168674698795,
|
|
"grad_norm": 0.17528535425662994,
|
|
"learning_rate": 9.094354263653971e-07,
|
|
"loss": 0.0065,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 2.7440963855421687,
|
|
"grad_norm": 0.09263470768928528,
|
|
"learning_rate": 8.961133655257548e-07,
|
|
"loss": 0.0031,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 2.746024096385542,
|
|
"grad_norm": 0.14822180569171906,
|
|
"learning_rate": 8.828873652264303e-07,
|
|
"loss": 0.0043,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 2.7479518072289157,
|
|
"grad_norm": 0.11577019095420837,
|
|
"learning_rate": 8.697574919720497e-07,
|
|
"loss": 0.004,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 2.749879518072289,
|
|
"grad_norm": 0.11681873351335526,
|
|
"learning_rate": 8.567238117838683e-07,
|
|
"loss": 0.0035,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 2.7518072289156628,
|
|
"grad_norm": 0.1191524937748909,
|
|
"learning_rate": 8.437863901994592e-07,
|
|
"loss": 0.0022,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 2.753734939759036,
|
|
"grad_norm": 0.1528361737728119,
|
|
"learning_rate": 8.309452922723849e-07,
|
|
"loss": 0.0042,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 2.75566265060241,
|
|
"grad_norm": 0.42052382230758667,
|
|
"learning_rate": 8.18200582571842e-07,
|
|
"loss": 0.0149,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 2.757590361445783,
|
|
"grad_norm": 0.13524137437343597,
|
|
"learning_rate": 8.055523251823705e-07,
|
|
"loss": 0.0029,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 2.759518072289157,
|
|
"grad_norm": 0.0980493426322937,
|
|
"learning_rate": 7.930005837035138e-07,
|
|
"loss": 0.0036,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 2.76144578313253,
|
|
"grad_norm": 0.17335453629493713,
|
|
"learning_rate": 7.805454212494967e-07,
|
|
"loss": 0.0066,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 2.7633734939759034,
|
|
"grad_norm": 0.13746409118175507,
|
|
"learning_rate": 7.681869004489218e-07,
|
|
"loss": 0.0066,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 2.765301204819277,
|
|
"grad_norm": 0.18556399643421173,
|
|
"learning_rate": 7.559250834444332e-07,
|
|
"loss": 0.0073,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 2.767228915662651,
|
|
"grad_norm": 0.09743557125329971,
|
|
"learning_rate": 7.437600318924332e-07,
|
|
"loss": 0.0023,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 2.769156626506024,
|
|
"grad_norm": 0.10671001672744751,
|
|
"learning_rate": 7.316918069627488e-07,
|
|
"loss": 0.003,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 2.7710843373493974,
|
|
"grad_norm": 0.10671380162239075,
|
|
"learning_rate": 7.197204693383231e-07,
|
|
"loss": 0.0021,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 2.773012048192771,
|
|
"grad_norm": 0.06824454665184021,
|
|
"learning_rate": 7.078460792149311e-07,
|
|
"loss": 0.0017,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 2.7749397590361444,
|
|
"grad_norm": 0.12668560445308685,
|
|
"learning_rate": 6.960686963008556e-07,
|
|
"loss": 0.0035,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 2.776867469879518,
|
|
"grad_norm": 0.10260980576276779,
|
|
"learning_rate": 6.843883798166029e-07,
|
|
"loss": 0.0027,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 2.7787951807228914,
|
|
"grad_norm": 0.09880302101373672,
|
|
"learning_rate": 6.728051884945941e-07,
|
|
"loss": 0.0029,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 2.780722891566265,
|
|
"grad_norm": 0.305993914604187,
|
|
"learning_rate": 6.613191805788699e-07,
|
|
"loss": 0.0112,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 2.7826506024096385,
|
|
"grad_norm": 0.10707511752843857,
|
|
"learning_rate": 6.499304138248064e-07,
|
|
"loss": 0.0062,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 2.784578313253012,
|
|
"grad_norm": 0.0986943170428276,
|
|
"learning_rate": 6.386389454988195e-07,
|
|
"loss": 0.0021,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 2.7865060240963855,
|
|
"grad_norm": 0.1458776742219925,
|
|
"learning_rate": 6.274448323780724e-07,
|
|
"loss": 0.0094,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 2.788433734939759,
|
|
"grad_norm": 0.09657061100006104,
|
|
"learning_rate": 6.163481307501995e-07,
|
|
"loss": 0.0026,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 2.7903614457831325,
|
|
"grad_norm": 0.1462988704442978,
|
|
"learning_rate": 6.053488964130183e-07,
|
|
"loss": 0.0075,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 2.792289156626506,
|
|
"grad_norm": 0.15330864489078522,
|
|
"learning_rate": 5.94447184674245e-07,
|
|
"loss": 0.0067,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 2.7942168674698795,
|
|
"grad_norm": 0.1513473242521286,
|
|
"learning_rate": 5.836430503512236e-07,
|
|
"loss": 0.0106,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 2.7961445783132532,
|
|
"grad_norm": 0.2151842713356018,
|
|
"learning_rate": 5.729365477706505e-07,
|
|
"loss": 0.0062,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 2.7980722891566265,
|
|
"grad_norm": 0.13624203205108643,
|
|
"learning_rate": 5.623277307682929e-07,
|
|
"loss": 0.0045,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"grad_norm": 0.12075261026620865,
|
|
"learning_rate": 5.518166526887214e-07,
|
|
"loss": 0.0073,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 2.8019277108433736,
|
|
"grad_norm": 0.11320624500513077,
|
|
"learning_rate": 5.41403366385047e-07,
|
|
"loss": 0.002,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 2.803855421686747,
|
|
"grad_norm": 0.08470363914966583,
|
|
"learning_rate": 5.310879242186606e-07,
|
|
"loss": 0.0021,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 2.8057831325301206,
|
|
"grad_norm": 0.15221907198429108,
|
|
"learning_rate": 5.208703780589419e-07,
|
|
"loss": 0.0019,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 2.807710843373494,
|
|
"grad_norm": 0.12709103524684906,
|
|
"learning_rate": 5.107507792830335e-07,
|
|
"loss": 0.0052,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 2.8096385542168676,
|
|
"grad_norm": 0.10888515412807465,
|
|
"learning_rate": 5.007291787755586e-07,
|
|
"loss": 0.0023,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 2.811566265060241,
|
|
"grad_norm": 0.25710970163345337,
|
|
"learning_rate": 4.908056269283789e-07,
|
|
"loss": 0.0073,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 2.8134939759036146,
|
|
"grad_norm": 0.08488702774047852,
|
|
"learning_rate": 4.809801736403308e-07,
|
|
"loss": 0.0016,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 2.815421686746988,
|
|
"grad_norm": 0.1282006949186325,
|
|
"learning_rate": 4.7125286831698034e-07,
|
|
"loss": 0.0035,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 2.8173493975903616,
|
|
"grad_norm": 0.08955442905426025,
|
|
"learning_rate": 4.6162375987037766e-07,
|
|
"loss": 0.004,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 2.819277108433735,
|
|
"grad_norm": 0.11310838907957077,
|
|
"learning_rate": 4.520928967188054e-07,
|
|
"loss": 0.0022,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 2.821204819277108,
|
|
"grad_norm": 0.15055686235427856,
|
|
"learning_rate": 4.426603267865326e-07,
|
|
"loss": 0.0042,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 2.823132530120482,
|
|
"grad_norm": 0.14379452168941498,
|
|
"learning_rate": 4.333260975035769e-07,
|
|
"loss": 0.0089,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 2.8250602409638557,
|
|
"grad_norm": 0.1795361489057541,
|
|
"learning_rate": 4.240902558054827e-07,
|
|
"loss": 0.013,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 2.826987951807229,
|
|
"grad_norm": 0.06829468160867691,
|
|
"learning_rate": 4.1495284813305003e-07,
|
|
"loss": 0.0018,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 2.8289156626506022,
|
|
"grad_norm": 0.35213515162467957,
|
|
"learning_rate": 4.0591392043213275e-07,
|
|
"loss": 0.0144,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 2.830843373493976,
|
|
"grad_norm": 0.11828093230724335,
|
|
"learning_rate": 3.969735181533918e-07,
|
|
"loss": 0.0028,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 2.8327710843373493,
|
|
"grad_norm": 0.13286921381950378,
|
|
"learning_rate": 3.881316862520712e-07,
|
|
"loss": 0.0042,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 2.834698795180723,
|
|
"grad_norm": 0.10271132737398148,
|
|
"learning_rate": 3.7938846918776917e-07,
|
|
"loss": 0.0047,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 2.8366265060240963,
|
|
"grad_norm": 0.09422904253005981,
|
|
"learning_rate": 3.707439109242139e-07,
|
|
"loss": 0.0061,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 2.83855421686747,
|
|
"grad_norm": 0.10817123204469681,
|
|
"learning_rate": 3.6219805492905934e-07,
|
|
"loss": 0.0029,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 2.8404819277108433,
|
|
"grad_norm": 0.10254565626382828,
|
|
"learning_rate": 3.53750944173632e-07,
|
|
"loss": 0.0044,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 2.842409638554217,
|
|
"grad_norm": 0.11423154920339584,
|
|
"learning_rate": 3.45402621132751e-07,
|
|
"loss": 0.0059,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 2.8443373493975903,
|
|
"grad_norm": 0.15620556473731995,
|
|
"learning_rate": 3.3715312778449305e-07,
|
|
"loss": 0.005,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 2.846265060240964,
|
|
"grad_norm": 0.1081036925315857,
|
|
"learning_rate": 3.2900250560998546e-07,
|
|
"loss": 0.004,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 2.8481927710843373,
|
|
"grad_norm": 0.38650745153427124,
|
|
"learning_rate": 3.209507955932001e-07,
|
|
"loss": 0.0076,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 2.8501204819277106,
|
|
"grad_norm": 0.1864783614873886,
|
|
"learning_rate": 3.129980382207509e-07,
|
|
"loss": 0.0092,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 2.8520481927710843,
|
|
"grad_norm": 0.1458069533109665,
|
|
"learning_rate": 3.05144273481679e-07,
|
|
"loss": 0.0058,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 2.853975903614458,
|
|
"grad_norm": 0.14836257696151733,
|
|
"learning_rate": 2.9738954086726334e-07,
|
|
"loss": 0.014,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 2.8559036144578314,
|
|
"grad_norm": 0.10147511214017868,
|
|
"learning_rate": 2.8973387937081485e-07,
|
|
"loss": 0.0047,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 2.8578313253012047,
|
|
"grad_norm": 0.13740235567092896,
|
|
"learning_rate": 2.821773274874828e-07,
|
|
"loss": 0.0028,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 2.8597590361445784,
|
|
"grad_norm": 0.16089461743831635,
|
|
"learning_rate": 2.7471992321406624e-07,
|
|
"loss": 0.0168,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 2.8616867469879517,
|
|
"grad_norm": 0.0599152147769928,
|
|
"learning_rate": 2.6736170404880744e-07,
|
|
"loss": 0.0017,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 2.8636144578313254,
|
|
"grad_norm": 0.148875430226326,
|
|
"learning_rate": 2.6010270699122096e-07,
|
|
"loss": 0.0045,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 2.8655421686746987,
|
|
"grad_norm": 0.26763641834259033,
|
|
"learning_rate": 2.529429685419027e-07,
|
|
"loss": 0.007,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 2.8674698795180724,
|
|
"grad_norm": 0.1743084192276001,
|
|
"learning_rate": 2.458825247023389e-07,
|
|
"loss": 0.0112,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 2.8693975903614457,
|
|
"grad_norm": 0.21380828320980072,
|
|
"learning_rate": 2.3892141097473063e-07,
|
|
"loss": 0.0103,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 2.8713253012048194,
|
|
"grad_norm": 2.185253620147705,
|
|
"learning_rate": 2.3205966236181433e-07,
|
|
"loss": 0.0195,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 2.8732530120481927,
|
|
"grad_norm": 0.11854024976491928,
|
|
"learning_rate": 2.252973133666947e-07,
|
|
"loss": 0.0034,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 2.8751807228915665,
|
|
"grad_norm": 0.36487653851509094,
|
|
"learning_rate": 2.1863439799265195e-07,
|
|
"loss": 0.0063,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 2.8771084337349397,
|
|
"grad_norm": 0.1029730811715126,
|
|
"learning_rate": 2.1207094974298847e-07,
|
|
"loss": 0.0049,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 2.879036144578313,
|
|
"grad_norm": 0.10066278278827667,
|
|
"learning_rate": 2.056070016208489e-07,
|
|
"loss": 0.0021,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 2.8809638554216868,
|
|
"grad_norm": 0.21477262675762177,
|
|
"learning_rate": 1.9924258612906256e-07,
|
|
"loss": 0.0052,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 2.8828915662650605,
|
|
"grad_norm": 0.29007601737976074,
|
|
"learning_rate": 1.929777352699791e-07,
|
|
"loss": 0.0065,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 2.8848192771084338,
|
|
"grad_norm": 0.32320499420166016,
|
|
"learning_rate": 1.8681248054529754e-07,
|
|
"loss": 0.0334,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 2.886746987951807,
|
|
"grad_norm": 0.12790757417678833,
|
|
"learning_rate": 1.8074685295591754e-07,
|
|
"loss": 0.0034,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 2.888674698795181,
|
|
"grad_norm": 0.12194570153951645,
|
|
"learning_rate": 1.7478088300178608e-07,
|
|
"loss": 0.0038,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 2.890602409638554,
|
|
"grad_norm": 0.13514107465744019,
|
|
"learning_rate": 1.6891460068173548e-07,
|
|
"loss": 0.0042,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 2.892530120481928,
|
|
"grad_norm": 0.09762352705001831,
|
|
"learning_rate": 1.631480354933346e-07,
|
|
"loss": 0.0016,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 2.894457831325301,
|
|
"grad_norm": 0.10607658326625824,
|
|
"learning_rate": 1.5748121643274661e-07,
|
|
"loss": 0.0062,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 2.896385542168675,
|
|
"grad_norm": 0.0920143872499466,
|
|
"learning_rate": 1.519141719945738e-07,
|
|
"loss": 0.0025,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 2.898313253012048,
|
|
"grad_norm": 0.17520834505558014,
|
|
"learning_rate": 1.4644693017172418e-07,
|
|
"loss": 0.0045,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 2.900240963855422,
|
|
"grad_norm": 0.49769192934036255,
|
|
"learning_rate": 1.4107951845526267e-07,
|
|
"loss": 0.0059,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 2.902168674698795,
|
|
"grad_norm": 0.06354644149541855,
|
|
"learning_rate": 1.3581196383427586e-07,
|
|
"loss": 0.0021,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 2.904096385542169,
|
|
"grad_norm": 0.09340358525514603,
|
|
"learning_rate": 1.3064429279573853e-07,
|
|
"loss": 0.0036,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 2.906024096385542,
|
|
"grad_norm": 0.06073952466249466,
|
|
"learning_rate": 1.255765313243762e-07,
|
|
"loss": 0.001,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 2.9079518072289154,
|
|
"grad_norm": 0.1323407143354416,
|
|
"learning_rate": 1.206087049025384e-07,
|
|
"loss": 0.008,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 2.909879518072289,
|
|
"grad_norm": 0.18533159792423248,
|
|
"learning_rate": 1.1574083851007e-07,
|
|
"loss": 0.0086,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 2.911807228915663,
|
|
"grad_norm": 0.09885486960411072,
|
|
"learning_rate": 1.1097295662418018e-07,
|
|
"loss": 0.0023,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 2.913734939759036,
|
|
"grad_norm": 0.08286528289318085,
|
|
"learning_rate": 1.0630508321932687e-07,
|
|
"loss": 0.0029,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 2.9156626506024095,
|
|
"grad_norm": 0.1265413761138916,
|
|
"learning_rate": 1.0173724176709254e-07,
|
|
"loss": 0.003,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 2.917590361445783,
|
|
"grad_norm": 0.0776480957865715,
|
|
"learning_rate": 9.726945523606646e-08,
|
|
"loss": 0.0013,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 2.9195180722891565,
|
|
"grad_norm": 0.14106431603431702,
|
|
"learning_rate": 9.290174609172697e-08,
|
|
"loss": 0.0204,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 2.9214457831325302,
|
|
"grad_norm": 0.10813348740339279,
|
|
"learning_rate": 8.863413629633277e-08,
|
|
"loss": 0.0026,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 2.9233734939759035,
|
|
"grad_norm": 0.11505429446697235,
|
|
"learning_rate": 8.446664730881182e-08,
|
|
"loss": 0.0038,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 2.9253012048192772,
|
|
"grad_norm": 0.18488599359989166,
|
|
"learning_rate": 8.039930008465257e-08,
|
|
"loss": 0.0094,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 2.9272289156626505,
|
|
"grad_norm": 0.19229602813720703,
|
|
"learning_rate": 7.643211507579296e-08,
|
|
"loss": 0.0062,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 2.929156626506024,
|
|
"grad_norm": 0.0876188799738884,
|
|
"learning_rate": 7.25651122305293e-08,
|
|
"loss": 0.0024,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 2.9310843373493976,
|
|
"grad_norm": 0.15103434026241302,
|
|
"learning_rate": 6.87983109934054e-08,
|
|
"loss": 0.0056,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 2.9330120481927713,
|
|
"grad_norm": 0.1714266538619995,
|
|
"learning_rate": 6.51317303051191e-08,
|
|
"loss": 0.0047,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 2.9349397590361446,
|
|
"grad_norm": 0.30670225620269775,
|
|
"learning_rate": 6.156538860242922e-08,
|
|
"loss": 0.0111,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 2.936867469879518,
|
|
"grad_norm": 0.13250356912612915,
|
|
"learning_rate": 5.809930381805773e-08,
|
|
"loss": 0.0033,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 2.9387951807228916,
|
|
"grad_norm": 0.10350223630666733,
|
|
"learning_rate": 5.4733493380603183e-08,
|
|
"loss": 0.0028,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 2.9407228915662653,
|
|
"grad_norm": 0.1638195812702179,
|
|
"learning_rate": 5.1467974214456374e-08,
|
|
"loss": 0.0037,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 2.9426506024096386,
|
|
"grad_norm": 0.11159276962280273,
|
|
"learning_rate": 4.830276273970258e-08,
|
|
"loss": 0.003,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 2.944578313253012,
|
|
"grad_norm": 0.09866586327552795,
|
|
"learning_rate": 4.5237874872052776e-08,
|
|
"loss": 0.0032,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 2.9465060240963856,
|
|
"grad_norm": 0.17825454473495483,
|
|
"learning_rate": 4.227332602275924e-08,
|
|
"loss": 0.0105,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 2.948433734939759,
|
|
"grad_norm": 0.10379356890916824,
|
|
"learning_rate": 3.940913109853561e-08,
|
|
"loss": 0.0055,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 2.9503614457831326,
|
|
"grad_norm": 0.23834416270256042,
|
|
"learning_rate": 3.66453045014814e-08,
|
|
"loss": 0.0044,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 2.952289156626506,
|
|
"grad_norm": 0.11515571922063828,
|
|
"learning_rate": 3.398186012901539e-08,
|
|
"loss": 0.0042,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 2.9542168674698797,
|
|
"grad_norm": 0.14170049130916595,
|
|
"learning_rate": 3.141881137379788e-08,
|
|
"loss": 0.0073,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 2.956144578313253,
|
|
"grad_norm": 0.237248957157135,
|
|
"learning_rate": 2.8956171123670774e-08,
|
|
"loss": 0.0055,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 2.9580722891566262,
|
|
"grad_norm": 0.07076071947813034,
|
|
"learning_rate": 2.6593951761588744e-08,
|
|
"loss": 0.0016,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"grad_norm": 0.1100577786564827,
|
|
"learning_rate": 2.4332165165557032e-08,
|
|
"loss": 0.0026,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 2.9619277108433737,
|
|
"grad_norm": 0.11576279252767563,
|
|
"learning_rate": 2.2170822708573736e-08,
|
|
"loss": 0.0036,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 2.963855421686747,
|
|
"grad_norm": 0.2067718207836151,
|
|
"learning_rate": 2.0109935258565415e-08,
|
|
"loss": 0.0063,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 2.9657831325301203,
|
|
"grad_norm": 0.15040244162082672,
|
|
"learning_rate": 1.8149513178347122e-08,
|
|
"loss": 0.0081,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 2.967710843373494,
|
|
"grad_norm": 0.14071759581565857,
|
|
"learning_rate": 1.6289566325555783e-08,
|
|
"loss": 0.006,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 2.9696385542168677,
|
|
"grad_norm": 0.32527413964271545,
|
|
"learning_rate": 1.4530104052610239e-08,
|
|
"loss": 0.0021,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 2.971566265060241,
|
|
"grad_norm": 0.06794515997171402,
|
|
"learning_rate": 1.2871135206651287e-08,
|
|
"loss": 0.0016,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 2.9734939759036143,
|
|
"grad_norm": 0.08525913208723068,
|
|
"learning_rate": 1.1312668129519477e-08,
|
|
"loss": 0.0023,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 2.975421686746988,
|
|
"grad_norm": 0.14025282859802246,
|
|
"learning_rate": 9.854710657688504e-09,
|
|
"loss": 0.0025,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 2.9773493975903613,
|
|
"grad_norm": 0.15709802508354187,
|
|
"learning_rate": 8.497270122242996e-09,
|
|
"loss": 0.0038,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 2.979277108433735,
|
|
"grad_norm": 0.1520087569952011,
|
|
"learning_rate": 7.240353348834106e-09,
|
|
"loss": 0.0027,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 2.9812048192771083,
|
|
"grad_norm": 0.13271088898181915,
|
|
"learning_rate": 6.083966657646212e-09,
|
|
"loss": 0.003,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 2.983132530120482,
|
|
"grad_norm": 0.0962211862206459,
|
|
"learning_rate": 5.028115863370265e-09,
|
|
"loss": 0.0021,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 2.9850602409638554,
|
|
"grad_norm": 0.11485985666513443,
|
|
"learning_rate": 4.072806275163821e-09,
|
|
"loss": 0.0039,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 2.9869879518072286,
|
|
"grad_norm": 0.15437521040439606,
|
|
"learning_rate": 3.2180426966332833e-09,
|
|
"loss": 0.0048,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 2.9889156626506024,
|
|
"grad_norm": 0.09884651750326157,
|
|
"learning_rate": 2.4638294258072513e-09,
|
|
"loss": 0.0032,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 2.990843373493976,
|
|
"grad_norm": 0.30931419134140015,
|
|
"learning_rate": 1.810170255116539e-09,
|
|
"loss": 0.0038,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 2.9927710843373494,
|
|
"grad_norm": 0.3311678469181061,
|
|
"learning_rate": 1.2570684713719695e-09,
|
|
"loss": 0.0247,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 2.9946987951807227,
|
|
"grad_norm": 0.13150249421596527,
|
|
"learning_rate": 8.045268557443919e-10,
|
|
"loss": 0.0029,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 2.9966265060240964,
|
|
"grad_norm": 0.10827342420816422,
|
|
"learning_rate": 4.5254768376468137e-10,
|
|
"loss": 0.0119,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 2.99855421686747,
|
|
"grad_norm": 0.10358250141143799,
|
|
"learning_rate": 2.011327252948725e-10,
|
|
"loss": 0.0038,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.09550733864307404,
|
|
"learning_rate": 5.028324453482114e-11,
|
|
"loss": 0.0016,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 1557,
|
|
"total_flos": 2.043435500286509e+18,
|
|
"train_loss": 0.016654981696585226,
|
|
"train_runtime": 5294.7714,
|
|
"train_samples_per_second": 9.403,
|
|
"train_steps_per_second": 0.294
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 1557,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 92,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.043435500286509e+18,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|