6812 lines
133 KiB
JSON
6812 lines
133 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"global_step": 1131,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 2.9411764705882355e-06,
|
|
"loss": 3.5459,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 5.882352941176471e-06,
|
|
"loss": 3.4515,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 8.823529411764707e-06,
|
|
"loss": 3.5459,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.1764705882352942e-05,
|
|
"loss": 3.3973,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.4705882352941177e-05,
|
|
"loss": 3.3427,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.7647058823529414e-05,
|
|
"loss": 3.1525,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 2.058823529411765e-05,
|
|
"loss": 3.1169,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 2.3529411764705884e-05,
|
|
"loss": 3.0744,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 2.647058823529412e-05,
|
|
"loss": 2.9719,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 2.9411764705882354e-05,
|
|
"loss": 2.8901,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 3.235294117647059e-05,
|
|
"loss": 2.9138,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 3.529411764705883e-05,
|
|
"loss": 2.7916,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 3.8235294117647055e-05,
|
|
"loss": 2.7394,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 4.11764705882353e-05,
|
|
"loss": 2.6911,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 4.411764705882353e-05,
|
|
"loss": 2.6669,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 4.705882352941177e-05,
|
|
"loss": 2.7453,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 5e-05,
|
|
"loss": 2.5887,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 5.294117647058824e-05,
|
|
"loss": 2.5516,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 5.588235294117647e-05,
|
|
"loss": 2.6737,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 5.882352941176471e-05,
|
|
"loss": 2.4736,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 6.176470588235295e-05,
|
|
"loss": 2.397,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 6.470588235294118e-05,
|
|
"loss": 2.4433,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 6.764705882352942e-05,
|
|
"loss": 2.354,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 7.058823529411765e-05,
|
|
"loss": 2.4111,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 7.352941176470589e-05,
|
|
"loss": 2.3523,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 7.647058823529411e-05,
|
|
"loss": 2.3626,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 7.941176470588235e-05,
|
|
"loss": 2.2676,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 8.23529411764706e-05,
|
|
"loss": 2.403,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 8.529411764705883e-05,
|
|
"loss": 2.2134,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 8.823529411764706e-05,
|
|
"loss": 2.2442,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.11764705882353e-05,
|
|
"loss": 2.2206,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.411764705882353e-05,
|
|
"loss": 2.1881,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.705882352941177e-05,
|
|
"loss": 2.1922,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 0.0001,
|
|
"loss": 2.1636,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.999979496585003e-05,
|
|
"loss": 2.1973,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.999917986508165e-05,
|
|
"loss": 2.1669,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.999815470273954e-05,
|
|
"loss": 2.1175,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.999671948723141e-05,
|
|
"loss": 2.2195,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.999487423032803e-05,
|
|
"loss": 2.0737,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 9.999261894716299e-05,
|
|
"loss": 2.1123,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 9.998995365623271e-05,
|
|
"loss": 2.1978,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 9.998687837939621e-05,
|
|
"loss": 2.0849,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 9.998339314187497e-05,
|
|
"loss": 2.0458,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 9.997949797225268e-05,
|
|
"loss": 1.9736,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 9.997519290247507e-05,
|
|
"loss": 2.1129,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 9.997047796784959e-05,
|
|
"loss": 2.0466,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 9.996535320704514e-05,
|
|
"loss": 2.0619,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 9.995981866209174e-05,
|
|
"loss": 2.1507,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 9.995387437838026e-05,
|
|
"loss": 2.0693,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 9.99475204046619e-05,
|
|
"loss": 1.9994,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 9.994075679304798e-05,
|
|
"loss": 2.1005,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 9.993358359900931e-05,
|
|
"loss": 2.0013,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 9.99260008813759e-05,
|
|
"loss": 2.0427,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 9.991800870233638e-05,
|
|
"loss": 2.0812,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 9.990960712743754e-05,
|
|
"loss": 1.9826,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 9.990079622558377e-05,
|
|
"loss": 2.0451,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 9.989157606903649e-05,
|
|
"loss": 2.0134,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 9.988194673341362e-05,
|
|
"loss": 2.0029,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 9.987190829768882e-05,
|
|
"loss": 1.9063,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 9.986146084419099e-05,
|
|
"loss": 1.9806,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 9.985060445860352e-05,
|
|
"loss": 1.9337,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 9.983933922996361e-05,
|
|
"loss": 1.9956,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 9.982766525066152e-05,
|
|
"loss": 2.0342,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 9.981558261643981e-05,
|
|
"loss": 2.0178,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 9.980309142639261e-05,
|
|
"loss": 1.9531,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 9.979019178296473e-05,
|
|
"loss": 2.0035,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 9.977688379195087e-05,
|
|
"loss": 1.8774,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 9.976316756249472e-05,
|
|
"loss": 2.0842,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 9.97490432070881e-05,
|
|
"loss": 1.9861,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 9.973451084157006e-05,
|
|
"loss": 1.8747,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 9.97195705851258e-05,
|
|
"loss": 1.9883,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 9.970422256028587e-05,
|
|
"loss": 1.9212,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 9.9688466892925e-05,
|
|
"loss": 1.9101,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 9.96723037122612e-05,
|
|
"loss": 2.0329,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 9.965573315085462e-05,
|
|
"loss": 1.9547,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 9.963875534460653e-05,
|
|
"loss": 1.9255,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 9.96213704327581e-05,
|
|
"loss": 1.9336,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 9.960357855788938e-05,
|
|
"loss": 1.8824,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 9.958537986591803e-05,
|
|
"loss": 1.884,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 9.95667745060982e-05,
|
|
"loss": 1.9605,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 9.954776263101924e-05,
|
|
"loss": 1.8576,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 9.95283443966045e-05,
|
|
"loss": 1.9995,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 9.950851996211004e-05,
|
|
"loss": 1.944,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 9.948828949012327e-05,
|
|
"loss": 1.9854,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 9.946765314656174e-05,
|
|
"loss": 1.9635,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 9.944661110067162e-05,
|
|
"loss": 2.0009,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 9.942516352502644e-05,
|
|
"loss": 1.9422,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 9.940331059552563e-05,
|
|
"loss": 1.8891,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 9.938105249139306e-05,
|
|
"loss": 1.9399,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 9.935838939517556e-05,
|
|
"loss": 1.9283,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 9.933532149274152e-05,
|
|
"loss": 1.9132,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 9.931184897327922e-05,
|
|
"loss": 1.8997,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 9.928797202929539e-05,
|
|
"loss": 1.8664,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 9.92636908566136e-05,
|
|
"loss": 1.8697,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 9.923900565437262e-05,
|
|
"loss": 1.9275,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 9.921391662502483e-05,
|
|
"loss": 1.9188,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 9.918842397433455e-05,
|
|
"loss": 1.8527,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 9.916252791137631e-05,
|
|
"loss": 1.9087,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 9.913622864853325e-05,
|
|
"loss": 1.8689,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 9.91095264014952e-05,
|
|
"loss": 1.9366,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 9.908242138925709e-05,
|
|
"loss": 1.8494,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 9.905491383411705e-05,
|
|
"loss": 1.8945,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 9.902700396167459e-05,
|
|
"loss": 1.914,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 9.899869200082881e-05,
|
|
"loss": 1.8494,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 9.896997818377642e-05,
|
|
"loss": 1.8909,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 9.894086274601e-05,
|
|
"loss": 1.8623,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 9.891134592631587e-05,
|
|
"loss": 1.8872,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 9.88814279667723e-05,
|
|
"loss": 1.8787,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 9.885110911274738e-05,
|
|
"loss": 1.8782,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 9.88203896128972e-05,
|
|
"loss": 1.7577,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 9.878926971916354e-05,
|
|
"loss": 1.8954,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 9.87577496867721e-05,
|
|
"loss": 1.8517,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 9.872582977423018e-05,
|
|
"loss": 1.839,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 9.869351024332467e-05,
|
|
"loss": 1.8851,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 9.866079135911986e-05,
|
|
"loss": 1.854,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 9.86276733899553e-05,
|
|
"loss": 1.8384,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 9.85941566074436e-05,
|
|
"loss": 1.8775,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 9.856024128646812e-05,
|
|
"loss": 1.8111,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 9.852592770518085e-05,
|
|
"loss": 1.8075,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 9.849121614500001e-05,
|
|
"loss": 1.7952,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 9.845610689060782e-05,
|
|
"loss": 1.8371,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 9.842060022994814e-05,
|
|
"loss": 1.8761,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 9.838469645422406e-05,
|
|
"loss": 1.8242,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 9.834839585789559e-05,
|
|
"loss": 1.8772,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 9.831169873867723e-05,
|
|
"loss": 1.7998,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 9.827460539753546e-05,
|
|
"loss": 1.8881,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 9.823711613868636e-05,
|
|
"loss": 1.8629,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 9.819923126959308e-05,
|
|
"loss": 1.8018,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 9.816095110096325e-05,
|
|
"loss": 1.8126,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 9.812227594674659e-05,
|
|
"loss": 1.8217,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 9.808320612413217e-05,
|
|
"loss": 1.8537,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 9.804374195354591e-05,
|
|
"loss": 1.8028,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 9.80038837586479e-05,
|
|
"loss": 1.8954,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 9.796363186632985e-05,
|
|
"loss": 1.8818,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 9.792298660671217e-05,
|
|
"loss": 1.8965,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 9.788194831314158e-05,
|
|
"loss": 1.8414,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 9.784051732218808e-05,
|
|
"loss": 1.8456,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 9.779869397364247e-05,
|
|
"loss": 1.8479,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 9.775647861051329e-05,
|
|
"loss": 1.8176,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 9.771387157902417e-05,
|
|
"loss": 1.7994,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 9.767087322861102e-05,
|
|
"loss": 1.8153,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 9.7627483911919e-05,
|
|
"loss": 1.8048,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 9.758370398479981e-05,
|
|
"loss": 1.8491,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 9.753953380630862e-05,
|
|
"loss": 1.82,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 9.74949737387013e-05,
|
|
"loss": 1.922,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 9.745002414743119e-05,
|
|
"loss": 1.8061,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 9.740468540114638e-05,
|
|
"loss": 1.8676,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 9.735895787168652e-05,
|
|
"loss": 1.904,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 9.73128419340798e-05,
|
|
"loss": 1.7908,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 9.726633796653994e-05,
|
|
"loss": 1.8096,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 9.721944635046297e-05,
|
|
"loss": 1.8669,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 9.717216747042419e-05,
|
|
"loss": 1.7547,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 9.712450171417502e-05,
|
|
"loss": 1.7849,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 9.707644947263976e-05,
|
|
"loss": 1.8122,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 9.702801113991243e-05,
|
|
"loss": 1.768,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 9.697918711325353e-05,
|
|
"loss": 1.8519,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 9.692997779308677e-05,
|
|
"loss": 1.7329,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 9.688038358299578e-05,
|
|
"loss": 1.7725,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 9.683040488972086e-05,
|
|
"loss": 1.7678,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 9.678004212315554e-05,
|
|
"loss": 1.7351,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 9.672929569634331e-05,
|
|
"loss": 1.8248,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 9.66781660254742e-05,
|
|
"loss": 1.7674,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 9.662665352988133e-05,
|
|
"loss": 1.7685,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 9.657475863203755e-05,
|
|
"loss": 1.8122,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 9.65224817575519e-05,
|
|
"loss": 1.8858,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 9.646982333516616e-05,
|
|
"loss": 1.8532,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 9.641678379675135e-05,
|
|
"loss": 1.8341,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 9.63633635773041e-05,
|
|
"loss": 1.6986,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 9.63095631149432e-05,
|
|
"loss": 1.7467,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 9.625538285090595e-05,
|
|
"loss": 1.769,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 9.620082322954448e-05,
|
|
"loss": 1.7554,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 9.614588469832225e-05,
|
|
"loss": 1.6883,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 9.609056770781026e-05,
|
|
"loss": 1.7908,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 9.603487271168336e-05,
|
|
"loss": 1.769,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 9.597880016671665e-05,
|
|
"loss": 1.8701,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 9.592235053278157e-05,
|
|
"loss": 1.7404,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 9.586552427284223e-05,
|
|
"loss": 1.7462,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 9.580832185295156e-05,
|
|
"loss": 1.7704,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 9.575074374224758e-05,
|
|
"loss": 1.7354,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 9.569279041294944e-05,
|
|
"loss": 1.9016,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 9.563446234035358e-05,
|
|
"loss": 1.7546,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 9.557576000282991e-05,
|
|
"loss": 1.7814,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 9.551668388181776e-05,
|
|
"loss": 1.7423,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 9.545723446182202e-05,
|
|
"loss": 1.737,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 9.539741223040915e-05,
|
|
"loss": 1.7577,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 9.533721767820317e-05,
|
|
"loss": 1.7864,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 9.527665129888161e-05,
|
|
"loss": 1.7015,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 9.521571358917153e-05,
|
|
"loss": 1.7017,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 9.51544050488454e-05,
|
|
"loss": 1.7616,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 9.509272618071699e-05,
|
|
"loss": 1.7538,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.503067749063726e-05,
|
|
"loss": 1.8012,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.496825948749024e-05,
|
|
"loss": 1.7607,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.490547268318881e-05,
|
|
"loss": 1.7575,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.484231759267054e-05,
|
|
"loss": 1.7102,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.477879473389345e-05,
|
|
"loss": 1.7801,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.471490462783175e-05,
|
|
"loss": 1.7379,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.465064779847156e-05,
|
|
"loss": 1.718,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.458602477280668e-05,
|
|
"loss": 1.6832,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.452103608083417e-05,
|
|
"loss": 1.7995,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.445568225555014e-05,
|
|
"loss": 1.7036,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.438996383294516e-05,
|
|
"loss": 1.6973,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 9.43238813520001e-05,
|
|
"loss": 1.757,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 9.425743535468156e-05,
|
|
"loss": 1.7293,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 9.41906263859375e-05,
|
|
"loss": 1.8156,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 9.412345499369271e-05,
|
|
"loss": 1.7483,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 9.405592172884437e-05,
|
|
"loss": 1.7947,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 9.39880271452575e-05,
|
|
"loss": 1.8237,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 9.391977179976043e-05,
|
|
"loss": 1.6674,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 9.385115625214022e-05,
|
|
"loss": 1.7484,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 9.378218106513812e-05,
|
|
"loss": 1.7449,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 9.371284680444483e-05,
|
|
"loss": 1.7444,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 9.364315403869606e-05,
|
|
"loss": 1.6666,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 9.357310333946763e-05,
|
|
"loss": 1.7569,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 9.3502695281271e-05,
|
|
"loss": 1.6859,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 9.343193044154843e-05,
|
|
"loss": 1.7095,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 9.336080940066826e-05,
|
|
"loss": 1.8226,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 9.328933274192015e-05,
|
|
"loss": 1.8059,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 9.32175010515104e-05,
|
|
"loss": 1.699,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 9.314531491855692e-05,
|
|
"loss": 1.7162,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 9.307277493508465e-05,
|
|
"loss": 1.7898,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 9.299988169602054e-05,
|
|
"loss": 1.8133,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 9.292663579918873e-05,
|
|
"loss": 1.81,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 9.285303784530558e-05,
|
|
"loss": 1.694,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 9.277908843797492e-05,
|
|
"loss": 1.648,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 9.270478818368287e-05,
|
|
"loss": 1.8439,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 9.263013769179298e-05,
|
|
"loss": 1.7486,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 9.25551375745413e-05,
|
|
"loss": 1.7601,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 9.247978844703122e-05,
|
|
"loss": 1.7399,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 9.240409092722852e-05,
|
|
"loss": 1.8162,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 9.232804563595626e-05,
|
|
"loss": 1.6533,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 9.22516531968897e-05,
|
|
"loss": 1.7488,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 9.217491423655123e-05,
|
|
"loss": 1.7544,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 9.209782938430509e-05,
|
|
"loss": 1.6406,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 9.202039927235241e-05,
|
|
"loss": 1.7158,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 9.194262453572586e-05,
|
|
"loss": 1.7827,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 9.186450581228454e-05,
|
|
"loss": 1.6567,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 9.178604374270867e-05,
|
|
"loss": 1.7305,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 9.170723897049439e-05,
|
|
"loss": 1.7544,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 9.162809214194851e-05,
|
|
"loss": 1.7247,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 9.154860390618313e-05,
|
|
"loss": 1.8192,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 9.146877491511035e-05,
|
|
"loss": 1.7016,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 9.138860582343696e-05,
|
|
"loss": 1.7377,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 9.130809728865901e-05,
|
|
"loss": 1.6459,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 9.122724997105647e-05,
|
|
"loss": 1.7161,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 9.114606453368779e-05,
|
|
"loss": 1.6868,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 9.106454164238442e-05,
|
|
"loss": 1.7086,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 9.098268196574546e-05,
|
|
"loss": 1.7164,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 9.090048617513207e-05,
|
|
"loss": 1.6877,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 9.081795494466201e-05,
|
|
"loss": 1.6701,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 9.073508895120411e-05,
|
|
"loss": 1.7393,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 9.065188887437273e-05,
|
|
"loss": 1.746,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 9.056835539652211e-05,
|
|
"loss": 1.79,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 9.048448920274088e-05,
|
|
"loss": 1.6791,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 9.040029098084643e-05,
|
|
"loss": 1.6771,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 9.031576142137919e-05,
|
|
"loss": 1.644,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 9.023090121759699e-05,
|
|
"loss": 1.7242,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 9.01457110654694e-05,
|
|
"loss": 1.7745,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 9.006019166367208e-05,
|
|
"loss": 1.7381,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 8.997434371358093e-05,
|
|
"loss": 1.6923,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 8.98881679192664e-05,
|
|
"loss": 1.8049,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 8.980166498748774e-05,
|
|
"loss": 1.6683,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 8.971483562768712e-05,
|
|
"loss": 1.7033,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 8.962768055198394e-05,
|
|
"loss": 1.761,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 8.954020047516884e-05,
|
|
"loss": 1.7824,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 8.945239611469796e-05,
|
|
"loss": 1.725,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 8.9364268190687e-05,
|
|
"loss": 1.6417,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 8.927581742590533e-05,
|
|
"loss": 1.7119,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 8.918704454577003e-05,
|
|
"loss": 1.7466,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 8.909795027833998e-05,
|
|
"loss": 1.6963,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 8.900853535430986e-05,
|
|
"loss": 1.7345,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 8.891880050700424e-05,
|
|
"loss": 1.6779,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 8.882874647237138e-05,
|
|
"loss": 1.6923,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 8.873837398897742e-05,
|
|
"loss": 1.6592,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 8.864768379800016e-05,
|
|
"loss": 1.6333,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 8.855667664322307e-05,
|
|
"loss": 1.7154,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 8.846535327102909e-05,
|
|
"loss": 1.7901,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 8.837371443039466e-05,
|
|
"loss": 1.6907,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 8.828176087288345e-05,
|
|
"loss": 1.7244,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 8.818949335264021e-05,
|
|
"loss": 1.7037,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 8.809691262638467e-05,
|
|
"loss": 1.6272,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 8.800401945340523e-05,
|
|
"loss": 1.6574,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 8.791081459555281e-05,
|
|
"loss": 1.6544,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 8.781729881723458e-05,
|
|
"loss": 1.6271,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 8.772347288540763e-05,
|
|
"loss": 1.7392,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 8.762933756957281e-05,
|
|
"loss": 1.6172,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 8.753489364176826e-05,
|
|
"loss": 1.7241,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 8.744014187656321e-05,
|
|
"loss": 1.726,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 8.734508305105158e-05,
|
|
"loss": 1.699,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 8.724971794484556e-05,
|
|
"loss": 1.6371,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 8.715404734006931e-05,
|
|
"loss": 1.7337,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 8.705807202135248e-05,
|
|
"loss": 1.6385,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 8.69617927758238e-05,
|
|
"loss": 1.7023,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 8.686521039310454e-05,
|
|
"loss": 1.6796,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 8.676832566530221e-05,
|
|
"loss": 1.7157,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 8.667113938700396e-05,
|
|
"loss": 1.6873,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 8.657365235526995e-05,
|
|
"loss": 1.7194,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 8.647586536962707e-05,
|
|
"loss": 1.7695,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 8.637777923206215e-05,
|
|
"loss": 1.6464,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 8.62793947470155e-05,
|
|
"loss": 1.7462,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 8.618071272137431e-05,
|
|
"loss": 1.6386,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 8.608173396446598e-05,
|
|
"loss": 1.6692,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 8.598245928805152e-05,
|
|
"loss": 1.7241,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 8.588288950631889e-05,
|
|
"loss": 1.744,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 8.578302543587631e-05,
|
|
"loss": 1.6958,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 8.568286789574557e-05,
|
|
"loss": 1.7288,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 8.558241770735531e-05,
|
|
"loss": 1.7376,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 8.548167569453429e-05,
|
|
"loss": 1.668,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 8.538064268350465e-05,
|
|
"loss": 1.6949,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 8.527931950287507e-05,
|
|
"loss": 1.645,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 8.517770698363404e-05,
|
|
"loss": 1.6848,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 8.507580595914303e-05,
|
|
"loss": 1.7163,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 8.497361726512965e-05,
|
|
"loss": 1.7366,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 8.487114173968074e-05,
|
|
"loss": 1.7858,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 8.476838022323561e-05,
|
|
"loss": 1.6975,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 8.466533355857908e-05,
|
|
"loss": 1.7549,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 8.456200259083454e-05,
|
|
"loss": 1.6796,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 8.445838816745709e-05,
|
|
"loss": 1.6895,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 8.435449113822655e-05,
|
|
"loss": 1.6524,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 8.425031235524046e-05,
|
|
"loss": 1.7097,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 8.414585267290715e-05,
|
|
"loss": 1.7021,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 8.404111294793873e-05,
|
|
"loss": 1.7239,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 8.393609403934398e-05,
|
|
"loss": 1.6201,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 8.383079680842145e-05,
|
|
"loss": 1.6921,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 8.372522211875224e-05,
|
|
"loss": 1.6285,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 8.361937083619304e-05,
|
|
"loss": 1.692,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 8.351324382886895e-05,
|
|
"loss": 1.7094,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.340684196716639e-05,
|
|
"loss": 1.661,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.330016612372599e-05,
|
|
"loss": 1.6573,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.319321717343535e-05,
|
|
"loss": 1.7666,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 8.308599599342202e-05,
|
|
"loss": 1.6458,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 8.297850346304608e-05,
|
|
"loss": 1.6689,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 8.287074046389312e-05,
|
|
"loss": 1.6694,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 8.276270787976696e-05,
|
|
"loss": 1.7342,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 8.265440659668236e-05,
|
|
"loss": 1.7041,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 8.254583750285776e-05,
|
|
"loss": 1.707,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 8.243700148870805e-05,
|
|
"loss": 1.6359,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 8.232789944683723e-05,
|
|
"loss": 1.6944,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 8.221853227203106e-05,
|
|
"loss": 1.6221,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 8.210890086124977e-05,
|
|
"loss": 1.6485,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 8.199900611362068e-05,
|
|
"loss": 1.6927,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 8.188884893043083e-05,
|
|
"loss": 1.71,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 8.177843021511962e-05,
|
|
"loss": 1.6721,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 8.166775087327133e-05,
|
|
"loss": 1.7052,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 8.155681181260777e-05,
|
|
"loss": 1.679,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 8.144561394298075e-05,
|
|
"loss": 1.6976,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 8.133415817636471e-05,
|
|
"loss": 1.591,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 8.12224454268492e-05,
|
|
"loss": 1.7302,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 8.111047661063136e-05,
|
|
"loss": 1.649,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 8.099825264600842e-05,
|
|
"loss": 1.7271,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 8.08857744533702e-05,
|
|
"loss": 1.7033,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 8.077304295519151e-05,
|
|
"loss": 1.6853,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 8.066005907602465e-05,
|
|
"loss": 1.6198,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 8.054682374249174e-05,
|
|
"loss": 1.5788,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 8.04333378832772e-05,
|
|
"loss": 1.6358,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 8.031960242912011e-05,
|
|
"loss": 1.6205,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 8.020561831280654e-05,
|
|
"loss": 1.6251,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 8.009138646916196e-05,
|
|
"loss": 1.6325,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 7.997690783504353e-05,
|
|
"loss": 1.5752,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 7.986218334933241e-05,
|
|
"loss": 1.702,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 7.97472139529261e-05,
|
|
"loss": 1.6434,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 7.963200058873072e-05,
|
|
"loss": 1.6503,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 7.951654420165323e-05,
|
|
"loss": 1.6811,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 7.940084573859369e-05,
|
|
"loss": 1.6883,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 7.928490614843757e-05,
|
|
"loss": 1.6747,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 7.916872638204788e-05,
|
|
"loss": 1.6585,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 7.90523073922574e-05,
|
|
"loss": 1.6598,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 7.893565013386087e-05,
|
|
"loss": 1.6732,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 7.881875556360717e-05,
|
|
"loss": 1.6139,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 7.870162464019144e-05,
|
|
"loss": 1.7143,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 7.858425832424728e-05,
|
|
"loss": 1.6749,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 7.846665757833878e-05,
|
|
"loss": 1.7282,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 7.83488233669527e-05,
|
|
"loss": 1.6329,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 7.823075665649056e-05,
|
|
"loss": 1.6273,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 7.811245841526063e-05,
|
|
"loss": 1.6262,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 7.79939296134701e-05,
|
|
"loss": 1.6977,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 7.787517122321706e-05,
|
|
"loss": 1.735,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 7.775618421848252e-05,
|
|
"loss": 1.6294,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 7.763696957512246e-05,
|
|
"loss": 1.5115,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 7.75175282708598e-05,
|
|
"loss": 1.5511,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 7.739786128527643e-05,
|
|
"loss": 1.6208,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 7.727796959980504e-05,
|
|
"loss": 1.5682,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 7.715785419772126e-05,
|
|
"loss": 1.5706,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 7.703751606413542e-05,
|
|
"loss": 1.6126,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 7.691695618598467e-05,
|
|
"loss": 1.6065,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 7.679617555202463e-05,
|
|
"loss": 1.5688,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 7.667517515282152e-05,
|
|
"loss": 1.5788,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 7.655395598074389e-05,
|
|
"loss": 1.513,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 7.643251902995452e-05,
|
|
"loss": 1.5044,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 7.63108652964023e-05,
|
|
"loss": 1.5667,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 7.618899577781404e-05,
|
|
"loss": 1.5765,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 7.606691147368627e-05,
|
|
"loss": 1.5661,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 7.594461338527701e-05,
|
|
"loss": 1.5763,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 7.582210251559769e-05,
|
|
"loss": 1.5253,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 7.569937986940477e-05,
|
|
"loss": 1.5982,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 7.557644645319158e-05,
|
|
"loss": 1.5583,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 7.545330327518007e-05,
|
|
"loss": 1.488,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 7.532995134531251e-05,
|
|
"loss": 1.5368,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 7.520639167524322e-05,
|
|
"loss": 1.5863,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 7.508262527833029e-05,
|
|
"loss": 1.6736,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 7.495865316962723e-05,
|
|
"loss": 1.5957,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 7.483447636587467e-05,
|
|
"loss": 1.5553,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 7.471009588549205e-05,
|
|
"loss": 1.5217,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 7.458551274856918e-05,
|
|
"loss": 1.5806,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 7.4460727976858e-05,
|
|
"loss": 1.6075,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 7.433574259376407e-05,
|
|
"loss": 1.5302,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 7.421055762433826e-05,
|
|
"loss": 1.4965,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 7.408517409526835e-05,
|
|
"loss": 1.6272,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 7.39595930348705e-05,
|
|
"loss": 1.5668,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 7.3833815473081e-05,
|
|
"loss": 1.5652,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 7.370784244144762e-05,
|
|
"loss": 1.5885,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 7.358167497312134e-05,
|
|
"loss": 1.5324,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 7.345531410284774e-05,
|
|
"loss": 1.6304,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 7.332876086695855e-05,
|
|
"loss": 1.5931,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 7.320201630336318e-05,
|
|
"loss": 1.5992,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 7.307508145154019e-05,
|
|
"loss": 1.5467,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 7.294795735252875e-05,
|
|
"loss": 1.5775,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 7.282064504892015e-05,
|
|
"loss": 1.5119,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 7.269314558484914e-05,
|
|
"loss": 1.5829,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 7.256546000598551e-05,
|
|
"loss": 1.6211,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 7.243758935952547e-05,
|
|
"loss": 1.5241,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 7.230953469418292e-05,
|
|
"loss": 1.5521,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 7.218129706018108e-05,
|
|
"loss": 1.5349,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 7.205287750924372e-05,
|
|
"loss": 1.5815,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 7.192427709458656e-05,
|
|
"loss": 1.5188,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 7.179549687090867e-05,
|
|
"loss": 1.5987,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 7.166653789438382e-05,
|
|
"loss": 1.5643,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 7.153740122265176e-05,
|
|
"loss": 1.5052,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 7.140808791480959e-05,
|
|
"loss": 1.6092,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 7.127859903140311e-05,
|
|
"loss": 1.5671,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 7.114893563441802e-05,
|
|
"loss": 1.5004,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 7.101909878727128e-05,
|
|
"loss": 1.5558,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 7.088908955480244e-05,
|
|
"loss": 1.5113,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 7.075890900326475e-05,
|
|
"loss": 1.6546,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 7.062855820031659e-05,
|
|
"loss": 1.5282,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 7.049803821501259e-05,
|
|
"loss": 1.5285,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 7.036735011779492e-05,
|
|
"loss": 1.5854,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 7.023649498048451e-05,
|
|
"loss": 1.6048,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 7.01054738762722e-05,
|
|
"loss": 1.5618,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 6.997428787971005e-05,
|
|
"loss": 1.6191,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 6.984293806670244e-05,
|
|
"loss": 1.5588,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 6.971142551449725e-05,
|
|
"loss": 1.6202,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 6.957975130167705e-05,
|
|
"loss": 1.607,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 6.944791650815023e-05,
|
|
"loss": 1.554,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 6.931592221514222e-05,
|
|
"loss": 1.6057,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 6.91837695051865e-05,
|
|
"loss": 1.5725,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 6.905145946211583e-05,
|
|
"loss": 1.5788,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 6.891899317105329e-05,
|
|
"loss": 1.5324,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 6.878637171840343e-05,
|
|
"loss": 1.5962,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 6.865359619184331e-05,
|
|
"loss": 1.5458,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 6.85206676803136e-05,
|
|
"loss": 1.6023,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 6.83875872740097e-05,
|
|
"loss": 1.5291,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 6.825435606437273e-05,
|
|
"loss": 1.5929,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 6.81209751440806e-05,
|
|
"loss": 1.5424,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 6.798744560703905e-05,
|
|
"loss": 1.5881,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 6.785376854837268e-05,
|
|
"loss": 1.4747,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 6.771994506441597e-05,
|
|
"loss": 1.5215,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 6.758597625270433e-05,
|
|
"loss": 1.465,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 6.745186321196495e-05,
|
|
"loss": 1.5071,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 6.731760704210802e-05,
|
|
"loss": 1.4882,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 6.718320884421751e-05,
|
|
"loss": 1.5905,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 6.704866972054223e-05,
|
|
"loss": 1.5922,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 6.691399077448677e-05,
|
|
"loss": 1.5448,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 6.677917311060246e-05,
|
|
"loss": 1.5675,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 6.66442178345783e-05,
|
|
"loss": 1.6005,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 6.650912605323194e-05,
|
|
"loss": 1.6179,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 6.637389887450045e-05,
|
|
"loss": 1.5711,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 6.623853740743146e-05,
|
|
"loss": 1.6179,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 6.610304276217392e-05,
|
|
"loss": 1.6407,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 6.596741604996897e-05,
|
|
"loss": 1.6296,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 6.583165838314095e-05,
|
|
"loss": 1.6393,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 6.569577087508814e-05,
|
|
"loss": 1.5851,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 6.555975464027375e-05,
|
|
"loss": 1.5772,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 6.542361079421669e-05,
|
|
"loss": 1.5792,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 6.528734045348248e-05,
|
|
"loss": 1.5866,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 6.515094473567407e-05,
|
|
"loss": 1.5141,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 6.501442475942265e-05,
|
|
"loss": 1.5783,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 6.48777816443785e-05,
|
|
"loss": 1.5052,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 6.474101651120184e-05,
|
|
"loss": 1.5681,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 6.460413048155355e-05,
|
|
"loss": 1.6441,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 6.446712467808608e-05,
|
|
"loss": 1.5737,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 6.433000022443419e-05,
|
|
"loss": 1.5541,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 6.419275824520568e-05,
|
|
"loss": 1.5573,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 6.405539986597225e-05,
|
|
"loss": 1.5178,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 6.391792621326027e-05,
|
|
"loss": 1.5345,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 6.378033841454147e-05,
|
|
"loss": 1.6092,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 6.364263759822371e-05,
|
|
"loss": 1.5439,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 6.350482489364186e-05,
|
|
"loss": 1.547,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 6.336690143104827e-05,
|
|
"loss": 1.5803,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 6.322886834160378e-05,
|
|
"loss": 1.584,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 6.309072675736827e-05,
|
|
"loss": 1.46,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 6.29524778112914e-05,
|
|
"loss": 1.5754,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 6.281412263720344e-05,
|
|
"loss": 1.5056,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 6.267566236980574e-05,
|
|
"loss": 1.5539,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 6.253709814466168e-05,
|
|
"loss": 1.5229,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 6.239843109818716e-05,
|
|
"loss": 1.4894,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 6.22596623676414e-05,
|
|
"loss": 1.5337,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 6.212079309111753e-05,
|
|
"loss": 1.592,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 6.19818244075333e-05,
|
|
"loss": 1.4937,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 6.18427574566218e-05,
|
|
"loss": 1.5862,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 6.170359337892194e-05,
|
|
"loss": 1.5252,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 6.156433331576927e-05,
|
|
"loss": 1.5639,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 6.142497840928656e-05,
|
|
"loss": 1.5306,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 6.128552980237437e-05,
|
|
"loss": 1.6537,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 6.114598863870177e-05,
|
|
"loss": 1.4589,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 6.100635606269694e-05,
|
|
"loss": 1.5472,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 6.0866633219537694e-05,
|
|
"loss": 1.5372,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 6.0726821255142255e-05,
|
|
"loss": 1.4852,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 6.058692131615968e-05,
|
|
"loss": 1.509,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 6.04469345499606e-05,
|
|
"loss": 1.5736,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 6.0306862104627705e-05,
|
|
"loss": 1.5348,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 6.0166705128946375e-05,
|
|
"loss": 1.5519,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 6.00264647723953e-05,
|
|
"loss": 1.5526,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 5.988614218513693e-05,
|
|
"loss": 1.5908,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 5.974573851800818e-05,
|
|
"loss": 1.5455,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 5.9605254922510926e-05,
|
|
"loss": 1.5317,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 5.946469255080251e-05,
|
|
"loss": 1.5962,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 5.9324052555686436e-05,
|
|
"loss": 1.6437,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 5.918333609060276e-05,
|
|
"loss": 1.5306,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 5.9042544309618694e-05,
|
|
"loss": 1.5289,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 5.890167836741919e-05,
|
|
"loss": 1.5338,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 5.8760739419297384e-05,
|
|
"loss": 1.6154,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 5.861972862114518e-05,
|
|
"loss": 1.5108,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 5.847864712944373e-05,
|
|
"loss": 1.5818,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 5.833749610125402e-05,
|
|
"loss": 1.6317,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 5.819627669420724e-05,
|
|
"loss": 1.5724,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 5.805499006649547e-05,
|
|
"loss": 1.5023,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 5.791363737686205e-05,
|
|
"loss": 1.5374,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 5.7772219784592105e-05,
|
|
"loss": 1.5141,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 5.76307384495031e-05,
|
|
"loss": 1.6443,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 5.748919453193521e-05,
|
|
"loss": 1.5954,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 5.734758919274192e-05,
|
|
"loss": 1.6019,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 5.720592359328047e-05,
|
|
"loss": 1.6241,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 5.706419889540225e-05,
|
|
"loss": 1.5813,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 5.69224162614434e-05,
|
|
"loss": 1.518,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 5.6780576854215195e-05,
|
|
"loss": 1.5473,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 5.6638681836994535e-05,
|
|
"loss": 1.6277,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 5.649673237351436e-05,
|
|
"loss": 1.6213,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 5.6354729627954195e-05,
|
|
"loss": 1.5182,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 5.621267476493053e-05,
|
|
"loss": 1.6186,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 5.607056894948728e-05,
|
|
"loss": 1.5195,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 5.592841334708624e-05,
|
|
"loss": 1.5293,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 5.578620912359758e-05,
|
|
"loss": 1.6225,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 5.564395744529012e-05,
|
|
"loss": 1.5548,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 5.5501659478821964e-05,
|
|
"loss": 1.556,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 5.535931639123083e-05,
|
|
"loss": 1.4946,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 5.521692934992447e-05,
|
|
"loss": 1.5343,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 5.5074499522671106e-05,
|
|
"loss": 1.5353,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 5.493202807758992e-05,
|
|
"loss": 1.5644,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 5.478951618314133e-05,
|
|
"loss": 1.4671,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 5.464696500811757e-05,
|
|
"loss": 1.553,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 5.450437572163298e-05,
|
|
"loss": 1.5658,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 5.4361749493114514e-05,
|
|
"loss": 1.5448,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 5.4219087492292054e-05,
|
|
"loss": 1.5305,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 5.407639088918888e-05,
|
|
"loss": 1.5567,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 5.3933660854112075e-05,
|
|
"loss": 1.5312,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 5.37908985576429e-05,
|
|
"loss": 1.4669,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 5.364810517062717e-05,
|
|
"loss": 1.6714,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 5.350528186416573e-05,
|
|
"loss": 1.5867,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 5.3362429809604806e-05,
|
|
"loss": 1.5232,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 5.321955017852637e-05,
|
|
"loss": 1.5636,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 5.307664414273855e-05,
|
|
"loss": 1.4686,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 5.2933712874266084e-05,
|
|
"loss": 1.5301,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 5.2790757545340586e-05,
|
|
"loss": 1.5631,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 5.2647779328391045e-05,
|
|
"loss": 1.5805,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 5.2504779396034146e-05,
|
|
"loss": 1.6171,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 5.236175892106467e-05,
|
|
"loss": 1.5264,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 5.221871907644589e-05,
|
|
"loss": 1.5189,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 5.207566103529991e-05,
|
|
"loss": 1.5974,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 5.1932585970898096e-05,
|
|
"loss": 1.5221,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 5.17894950566514e-05,
|
|
"loss": 1.5471,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 5.1646389466100795e-05,
|
|
"loss": 1.521,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 5.150327037290761e-05,
|
|
"loss": 1.5258,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 5.136013895084388e-05,
|
|
"loss": 1.4685,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 5.121699637378282e-05,
|
|
"loss": 1.5678,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 5.107384381568907e-05,
|
|
"loss": 1.4684,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 5.093068245060917e-05,
|
|
"loss": 1.4688,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 5.0787513452661864e-05,
|
|
"loss": 1.566,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 5.064433799602849e-05,
|
|
"loss": 1.5323,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 5.05011572549434e-05,
|
|
"loss": 1.581,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 5.0357972403684225e-05,
|
|
"loss": 1.5065,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 5.021478461656235e-05,
|
|
"loss": 1.5708,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 5.007159506791325e-05,
|
|
"loss": 1.5121,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 4.992840493208676e-05,
|
|
"loss": 1.5743,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 4.9785215383437646e-05,
|
|
"loss": 1.5861,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 4.9642027596315786e-05,
|
|
"loss": 1.5671,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 4.949884274505661e-05,
|
|
"loss": 1.5105,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 4.935566200397152e-05,
|
|
"loss": 1.5658,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 4.921248654733814e-05,
|
|
"loss": 1.5483,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 4.906931754939084e-05,
|
|
"loss": 1.567,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 4.8926156184310946e-05,
|
|
"loss": 1.5763,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 4.878300362621719e-05,
|
|
"loss": 1.5044,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 4.8639861049156136e-05,
|
|
"loss": 1.5653,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 4.8496729627092405e-05,
|
|
"loss": 1.5588,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 4.835361053389922e-05,
|
|
"loss": 1.4821,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 4.821050494334861e-05,
|
|
"loss": 1.6273,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 4.806741402910193e-05,
|
|
"loss": 1.4818,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 4.7924338964700096e-05,
|
|
"loss": 1.4659,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 4.778128092355412e-05,
|
|
"loss": 1.5297,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 4.7638241078935325e-05,
|
|
"loss": 1.585,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 4.7495220603965866e-05,
|
|
"loss": 1.4958,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 4.735222067160896e-05,
|
|
"loss": 1.5098,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 4.720924245465943e-05,
|
|
"loss": 1.6065,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 4.706628712573394e-05,
|
|
"loss": 1.5091,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 4.6923355857261455e-05,
|
|
"loss": 1.4611,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 4.678044982147365e-05,
|
|
"loss": 1.5287,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 4.6637570190395205e-05,
|
|
"loss": 1.5573,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 4.649471813583427e-05,
|
|
"loss": 1.6371,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 4.635189482937284e-05,
|
|
"loss": 1.5336,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 4.620910144235712e-05,
|
|
"loss": 1.5559,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 4.606633914588793e-05,
|
|
"loss": 1.5399,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 4.592360911081113e-05,
|
|
"loss": 1.487,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 4.5780912507707944e-05,
|
|
"loss": 1.5583,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 4.563825050688549e-05,
|
|
"loss": 1.5271,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 4.549562427836701e-05,
|
|
"loss": 1.5934,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 4.535303499188244e-05,
|
|
"loss": 1.5261,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 4.5210483816858676e-05,
|
|
"loss": 1.6577,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 4.506797192241009e-05,
|
|
"loss": 1.4575,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 4.49255004773289e-05,
|
|
"loss": 1.4948,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 4.478307065007554e-05,
|
|
"loss": 1.4523,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 4.464068360876919e-05,
|
|
"loss": 1.6135,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 4.449834052117804e-05,
|
|
"loss": 1.5568,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 4.4356042554709905e-05,
|
|
"loss": 1.5823,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 4.421379087640244e-05,
|
|
"loss": 1.664,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 4.407158665291377e-05,
|
|
"loss": 1.5322,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 4.3929431050512727e-05,
|
|
"loss": 1.5811,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 4.3787325235069487e-05,
|
|
"loss": 1.5768,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 4.36452703720458e-05,
|
|
"loss": 1.5219,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 4.350326762648565e-05,
|
|
"loss": 1.5525,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 4.3361318163005484e-05,
|
|
"loss": 1.477,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 4.321942314578482e-05,
|
|
"loss": 1.524,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 4.307758373855661e-05,
|
|
"loss": 1.5741,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 4.293580110459776e-05,
|
|
"loss": 1.531,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 4.279407640671956e-05,
|
|
"loss": 1.5424,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 4.265241080725808e-05,
|
|
"loss": 1.471,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 4.251080546806481e-05,
|
|
"loss": 1.5149,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 4.2369261550496905e-05,
|
|
"loss": 1.5289,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 4.22277802154079e-05,
|
|
"loss": 1.455,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 4.2086362623137955e-05,
|
|
"loss": 1.5351,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 4.194500993350454e-05,
|
|
"loss": 1.5747,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 4.180372330579276e-05,
|
|
"loss": 1.5356,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 4.1662503898745994e-05,
|
|
"loss": 1.4969,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 4.1521352870556266e-05,
|
|
"loss": 1.5077,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 4.1380271378854833e-05,
|
|
"loss": 1.5598,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 4.1239260580702635e-05,
|
|
"loss": 1.5431,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 4.1098321632580824e-05,
|
|
"loss": 1.5806,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 4.095745569038133e-05,
|
|
"loss": 1.4687,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 4.0816663909397256e-05,
|
|
"loss": 1.534,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 4.067594744431358e-05,
|
|
"loss": 1.5602,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 4.053530744919749e-05,
|
|
"loss": 1.5434,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 4.03947450774891e-05,
|
|
"loss": 1.4529,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 4.0254261481991825e-05,
|
|
"loss": 1.5127,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 4.011385781486308e-05,
|
|
"loss": 1.5195,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 3.9973535227604714e-05,
|
|
"loss": 1.5714,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 3.983329487105364e-05,
|
|
"loss": 1.5864,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 3.96931378953723e-05,
|
|
"loss": 1.5457,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 3.955306545003941e-05,
|
|
"loss": 1.5544,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 3.941307868384034e-05,
|
|
"loss": 1.5802,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 3.927317874485776e-05,
|
|
"loss": 1.4793,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 3.9133366780462325e-05,
|
|
"loss": 1.5746,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 3.899364393730308e-05,
|
|
"loss": 1.5031,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 3.8854011361298246e-05,
|
|
"loss": 1.5029,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 3.871447019762564e-05,
|
|
"loss": 1.4952,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 3.857502159071346e-05,
|
|
"loss": 1.556,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 3.843566668423073e-05,
|
|
"loss": 1.5939,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 3.829640662107807e-05,
|
|
"loss": 1.5231,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 3.8157242543378205e-05,
|
|
"loss": 1.543,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 3.8018175592466695e-05,
|
|
"loss": 1.5051,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 3.787920690888248e-05,
|
|
"loss": 1.4483,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 3.7740337632358616e-05,
|
|
"loss": 1.5926,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 3.760156890181283e-05,
|
|
"loss": 1.5499,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 3.746290185533833e-05,
|
|
"loss": 1.6084,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 3.732433763019428e-05,
|
|
"loss": 1.4915,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 3.718587736279658e-05,
|
|
"loss": 1.5149,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 3.704752218870861e-05,
|
|
"loss": 1.5557,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 3.690927324263175e-05,
|
|
"loss": 1.4818,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 3.677113165839623e-05,
|
|
"loss": 1.4723,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 3.663309856895174e-05,
|
|
"loss": 1.4855,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 3.6495175106358154e-05,
|
|
"loss": 1.5185,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 3.6357362401776277e-05,
|
|
"loss": 1.5155,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 3.621966158545855e-05,
|
|
"loss": 1.5517,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 3.608207378673973e-05,
|
|
"loss": 1.4894,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 3.594460013402775e-05,
|
|
"loss": 1.4591,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 3.580724175479432e-05,
|
|
"loss": 1.5306,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 3.566999977556582e-05,
|
|
"loss": 1.4702,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 3.5532875321913935e-05,
|
|
"loss": 1.6138,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 3.5395869518446464e-05,
|
|
"loss": 1.4431,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 3.525898348879819e-05,
|
|
"loss": 1.6268,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 3.5122218355621514e-05,
|
|
"loss": 1.5443,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 3.4985575240577365e-05,
|
|
"loss": 1.6156,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 3.484905526432594e-05,
|
|
"loss": 1.5196,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 3.471265954651752e-05,
|
|
"loss": 1.5153,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 3.457638920578331e-05,
|
|
"loss": 1.5637,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 3.4440245359726266e-05,
|
|
"loss": 1.555,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 3.4304229124911856e-05,
|
|
"loss": 1.5495,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 3.416834161685907e-05,
|
|
"loss": 1.5596,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 3.403258395003102e-05,
|
|
"loss": 1.5496,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 3.389695723782609e-05,
|
|
"loss": 1.5649,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 3.376146259256855e-05,
|
|
"loss": 1.5552,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 3.3626101125499555e-05,
|
|
"loss": 1.5355,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 3.349087394676809e-05,
|
|
"loss": 1.5022,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 3.33557821654217e-05,
|
|
"loss": 1.527,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 3.322082688939755e-05,
|
|
"loss": 1.5452,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 3.308600922551324e-05,
|
|
"loss": 1.5208,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 3.295133027945778e-05,
|
|
"loss": 1.47,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 3.281679115578249e-05,
|
|
"loss": 1.5202,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 3.2682392957891985e-05,
|
|
"loss": 1.4507,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 3.254813678803504e-05,
|
|
"loss": 1.6117,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 3.241402374729569e-05,
|
|
"loss": 1.6149,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 3.2280054935584025e-05,
|
|
"loss": 1.5947,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 3.2146231451627334e-05,
|
|
"loss": 1.5165,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 3.2012554392960966e-05,
|
|
"loss": 1.4893,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 3.187902485591941e-05,
|
|
"loss": 1.6028,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 3.174564393562728e-05,
|
|
"loss": 1.5429,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 3.161241272599031e-05,
|
|
"loss": 1.5214,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 3.147933231968642e-05,
|
|
"loss": 1.5541,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 3.1346403808156713e-05,
|
|
"loss": 1.5747,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 3.121362828159659e-05,
|
|
"loss": 1.5768,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 3.108100682894671e-05,
|
|
"loss": 1.6119,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 3.094854053788418e-05,
|
|
"loss": 1.577,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 3.08162304948135e-05,
|
|
"loss": 1.5888,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 3.06840777848578e-05,
|
|
"loss": 1.5093,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 3.055208349184977e-05,
|
|
"loss": 1.4787,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 3.0420248698322973e-05,
|
|
"loss": 1.5513,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 3.0288574485502757e-05,
|
|
"loss": 1.594,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 3.015706193329757e-05,
|
|
"loss": 1.5548,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 3.002571212028995e-05,
|
|
"loss": 1.5783,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 2.9894526123727808e-05,
|
|
"loss": 1.5001,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 2.9763505019515525e-05,
|
|
"loss": 1.5542,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 2.9632649882205088e-05,
|
|
"loss": 1.5134,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 2.950196178498743e-05,
|
|
"loss": 1.5232,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 2.937144179968342e-05,
|
|
"loss": 1.4753,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 2.9241090996735266e-05,
|
|
"loss": 1.4371,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 2.911091044519757e-05,
|
|
"loss": 1.5026,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 2.8980901212728728e-05,
|
|
"loss": 1.5565,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 2.8851064365581982e-05,
|
|
"loss": 1.509,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 2.8721400968596903e-05,
|
|
"loss": 1.5417,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 2.8591912085190392e-05,
|
|
"loss": 1.4827,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 2.8462598777348247e-05,
|
|
"loss": 1.5347,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 2.8333462105616194e-05,
|
|
"loss": 1.5072,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 2.820450312909134e-05,
|
|
"loss": 1.4506,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 2.807572290541346e-05,
|
|
"loss": 1.5673,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 2.79471224907563e-05,
|
|
"loss": 1.5108,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 2.781870293981893e-05,
|
|
"loss": 1.4845,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 2.7690465305817088e-05,
|
|
"loss": 1.5846,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 2.756241064047456e-05,
|
|
"loss": 1.5504,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 2.7434539994014475e-05,
|
|
"loss": 1.5451,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 2.730685441515088e-05,
|
|
"loss": 1.4817,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 2.7179354951079856e-05,
|
|
"loss": 1.4819,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 2.7052042647471252e-05,
|
|
"loss": 1.5487,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 2.69249185484598e-05,
|
|
"loss": 1.4851,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 2.679798369663683e-05,
|
|
"loss": 1.5208,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 2.667123913304146e-05,
|
|
"loss": 1.536,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 2.6544685897152272e-05,
|
|
"loss": 1.5505,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 2.6418325026878665e-05,
|
|
"loss": 1.6026,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 2.629215755855239e-05,
|
|
"loss": 1.4181,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 2.6166184526919047e-05,
|
|
"loss": 1.4751,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 2.6040406965129515e-05,
|
|
"loss": 1.4894,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 2.5914825904731686e-05,
|
|
"loss": 1.5007,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 2.5789442375661744e-05,
|
|
"loss": 1.372,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 2.5664257406235955e-05,
|
|
"loss": 1.4389,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 2.5539272023141995e-05,
|
|
"loss": 1.4259,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 2.541448725143083e-05,
|
|
"loss": 1.4355,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 2.5289904114507946e-05,
|
|
"loss": 1.4497,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 2.516552363412534e-05,
|
|
"loss": 1.4206,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 2.504134683037278e-05,
|
|
"loss": 1.481,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 2.491737472166972e-05,
|
|
"loss": 1.4599,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 2.479360832475679e-05,
|
|
"loss": 1.5219,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 2.46700486546875e-05,
|
|
"loss": 1.4852,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 2.4546696724819963e-05,
|
|
"loss": 1.4385,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 2.4423553546808427e-05,
|
|
"loss": 1.4962,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 2.430062013059526e-05,
|
|
"loss": 1.441,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 2.4177897484402306e-05,
|
|
"loss": 1.4178,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 2.4055386614722996e-05,
|
|
"loss": 1.499,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 2.393308852631373e-05,
|
|
"loss": 1.4574,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 2.381100422218596e-05,
|
|
"loss": 1.4838,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 2.3689134703597706e-05,
|
|
"loss": 1.479,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 2.3567480970045492e-05,
|
|
"loss": 1.5401,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 2.344604401925613e-05,
|
|
"loss": 1.4839,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 2.3324824847178494e-05,
|
|
"loss": 1.4536,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 2.3203824447975392e-05,
|
|
"loss": 1.3847,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 2.308304381401534e-05,
|
|
"loss": 1.4686,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 2.296248393586459e-05,
|
|
"loss": 1.4785,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 2.284214580227875e-05,
|
|
"loss": 1.4651,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 2.2722030400194976e-05,
|
|
"loss": 1.4577,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 2.2602138714723574e-05,
|
|
"loss": 1.4656,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 2.24824717291402e-05,
|
|
"loss": 1.4736,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 2.2363030424877535e-05,
|
|
"loss": 1.4946,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 2.2243815781517496e-05,
|
|
"loss": 1.4902,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 2.2124828776782957e-05,
|
|
"loss": 1.3805,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 2.2006070386529913e-05,
|
|
"loss": 1.4926,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 2.1887541584739385e-05,
|
|
"loss": 1.4136,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 2.1769243343509454e-05,
|
|
"loss": 1.4177,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 2.165117663304732e-05,
|
|
"loss": 1.4555,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 2.153334242166123e-05,
|
|
"loss": 1.4362,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 2.1415741675752742e-05,
|
|
"loss": 1.4483,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 2.129837535980856e-05,
|
|
"loss": 1.3899,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 2.1181244436392855e-05,
|
|
"loss": 1.521,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 2.1064349866139132e-05,
|
|
"loss": 1.4221,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 2.094769260774262e-05,
|
|
"loss": 1.4968,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 2.0831273617952136e-05,
|
|
"loss": 1.4631,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 2.071509385156244e-05,
|
|
"loss": 1.4571,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 2.0599154261406316e-05,
|
|
"loss": 1.4922,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 2.0483455798346786e-05,
|
|
"loss": 1.4316,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 2.0367999411269285e-05,
|
|
"loss": 1.4226,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 2.0252786047073895e-05,
|
|
"loss": 1.4586,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 2.0137816650667612e-05,
|
|
"loss": 1.4131,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 2.0023092164956474e-05,
|
|
"loss": 1.4782,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 1.9908613530838055e-05,
|
|
"loss": 1.4648,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 1.979438168719346e-05,
|
|
"loss": 1.4328,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 1.968039757087991e-05,
|
|
"loss": 1.4804,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 1.9566662116722793e-05,
|
|
"loss": 1.5185,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 1.9453176257508275e-05,
|
|
"loss": 1.418,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 1.9339940923975364e-05,
|
|
"loss": 1.5342,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 1.9226957044808497e-05,
|
|
"loss": 1.4951,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 1.911422554662981e-05,
|
|
"loss": 1.5001,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 1.9001747353991582e-05,
|
|
"loss": 1.4289,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 1.888952338936864e-05,
|
|
"loss": 1.4779,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 1.8777554573150795e-05,
|
|
"loss": 1.4541,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 1.8665841823635284e-05,
|
|
"loss": 1.3708,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 1.855438605701925e-05,
|
|
"loss": 1.4434,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 1.8443188187392257e-05,
|
|
"loss": 1.4388,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 1.8332249126728666e-05,
|
|
"loss": 1.543,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 1.8221569784880397e-05,
|
|
"loss": 1.4487,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 1.811115106956918e-05,
|
|
"loss": 1.4323,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 1.8000993886379342e-05,
|
|
"loss": 1.4424,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 1.789109913875025e-05,
|
|
"loss": 1.3609,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 1.7781467727968953e-05,
|
|
"loss": 1.4008,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 1.7672100553162774e-05,
|
|
"loss": 1.4308,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 1.7562998511291946e-05,
|
|
"loss": 1.5258,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 1.745416249714224e-05,
|
|
"loss": 1.4535,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 1.734559340331765e-05,
|
|
"loss": 1.4607,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 1.7237292120233044e-05,
|
|
"loss": 1.3692,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 1.7129259536106885e-05,
|
|
"loss": 1.4383,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 1.702149653695395e-05,
|
|
"loss": 1.3952,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 1.691400400657799e-05,
|
|
"loss": 1.3934,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 1.6806782826564654e-05,
|
|
"loss": 1.4273,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 1.6699833876274028e-05,
|
|
"loss": 1.4847,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 1.6593158032833624e-05,
|
|
"loss": 1.4369,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 1.6486756171131063e-05,
|
|
"loss": 1.4289,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 1.638062916380697e-05,
|
|
"loss": 1.4509,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 1.627477788124776e-05,
|
|
"loss": 1.4375,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 1.6169203191578557e-05,
|
|
"loss": 1.5603,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 1.606390596065602e-05,
|
|
"loss": 1.5002,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 1.5958887052061283e-05,
|
|
"loss": 1.4442,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 1.5854147327092855e-05,
|
|
"loss": 1.4966,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 1.5749687644759552e-05,
|
|
"loss": 1.4576,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 1.564550886177348e-05,
|
|
"loss": 1.3977,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 1.5541611832542925e-05,
|
|
"loss": 1.5316,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 1.5437997409165478e-05,
|
|
"loss": 1.434,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 1.533466644142095e-05,
|
|
"loss": 1.449,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 1.523161977676441e-05,
|
|
"loss": 1.4726,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 1.5128858260319285e-05,
|
|
"loss": 1.4609,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 1.5026382734870376e-05,
|
|
"loss": 1.3527,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 1.4924194040856975e-05,
|
|
"loss": 1.4362,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 1.4822293016365962e-05,
|
|
"loss": 1.4483,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 1.4720680497124934e-05,
|
|
"loss": 1.3976,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 1.4619357316495352e-05,
|
|
"loss": 1.4461,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 1.4518324305465702e-05,
|
|
"loss": 1.4909,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 1.4417582292644694e-05,
|
|
"loss": 1.4755,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 1.4317132104254438e-05,
|
|
"loss": 1.4927,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 1.421697456412371e-05,
|
|
"loss": 1.4471,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 1.4117110493681124e-05,
|
|
"loss": 1.4859,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 1.401754071194849e-05,
|
|
"loss": 1.471,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 1.3918266035534027e-05,
|
|
"loss": 1.3538,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 1.3819287278625697e-05,
|
|
"loss": 1.4906,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 1.3720605252984503e-05,
|
|
"loss": 1.5514,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 1.362222076793786e-05,
|
|
"loss": 1.45,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 1.3524134630372937e-05,
|
|
"loss": 1.4378,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 1.3426347644730047e-05,
|
|
"loss": 1.4655,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 1.3328860612996053e-05,
|
|
"loss": 1.4872,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 1.3231674334697774e-05,
|
|
"loss": 1.5583,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 1.3134789606895476e-05,
|
|
"loss": 1.3942,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 1.3038207224176213e-05,
|
|
"loss": 1.4931,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 1.2941927978647528e-05,
|
|
"loss": 1.4525,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 1.2845952659930693e-05,
|
|
"loss": 1.5043,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 1.275028205515445e-05,
|
|
"loss": 1.3989,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 1.2654916948948436e-05,
|
|
"loss": 1.4537,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 1.2559858123436802e-05,
|
|
"loss": 1.4277,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 1.2465106358231753e-05,
|
|
"loss": 1.441,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 1.23706624304272e-05,
|
|
"loss": 1.4923,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 1.2276527114592367e-05,
|
|
"loss": 1.4097,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 1.2182701182765426e-05,
|
|
"loss": 1.4913,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 1.208918540444719e-05,
|
|
"loss": 1.421,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 1.1995980546594776e-05,
|
|
"loss": 1.4794,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 1.1903087373615351e-05,
|
|
"loss": 1.4501,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 1.1810506647359793e-05,
|
|
"loss": 1.5201,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 1.171823912711657e-05,
|
|
"loss": 1.4111,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 1.1626285569605344e-05,
|
|
"loss": 1.4176,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 1.153464672897091e-05,
|
|
"loss": 1.5283,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 1.144332335677694e-05,
|
|
"loss": 1.4544,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 1.1352316201999841e-05,
|
|
"loss": 1.4489,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 1.1261626011022586e-05,
|
|
"loss": 1.4405,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 1.1171253527628628e-05,
|
|
"loss": 1.4862,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 1.1081199492995781e-05,
|
|
"loss": 1.4785,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 1.0991464645690142e-05,
|
|
"loss": 1.4659,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 1.0902049721660046e-05,
|
|
"loss": 1.438,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 1.0812955454229978e-05,
|
|
"loss": 1.4393,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 1.0724182574094682e-05,
|
|
"loss": 1.4228,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 1.0635731809312993e-05,
|
|
"loss": 1.4681,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 1.0547603885302049e-05,
|
|
"loss": 1.4792,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 1.045979952483117e-05,
|
|
"loss": 1.4224,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 1.037231944801607e-05,
|
|
"loss": 1.4102,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 1.0285164372312884e-05,
|
|
"loss": 1.4084,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 1.0198335012512272e-05,
|
|
"loss": 1.493,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 1.0111832080733601e-05,
|
|
"loss": 1.4612,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 1.0025656286419078e-05,
|
|
"loss": 1.4915,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 9.939808336327921e-06,
|
|
"loss": 1.4335,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 9.854288934530604e-06,
|
|
"loss": 1.4599,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 9.769098782403041e-06,
|
|
"loss": 1.3922,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 9.684238578620814e-06,
|
|
"loss": 1.4095,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 9.599709019153568e-06,
|
|
"loss": 1.4492,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 9.515510797259102e-06,
|
|
"loss": 1.4238,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 9.431644603477907e-06,
|
|
"loss": 1.4694,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 9.34811112562728e-06,
|
|
"loss": 1.5137,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 9.264911048795893e-06,
|
|
"loss": 1.3667,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 9.182045055337995e-06,
|
|
"loss": 1.4456,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 9.099513824867939e-06,
|
|
"loss": 1.4658,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 9.017318034254546e-06,
|
|
"loss": 1.4212,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 8.935458357615584e-06,
|
|
"loss": 1.4453,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 8.853935466312225e-06,
|
|
"loss": 1.4445,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 8.772750028943527e-06,
|
|
"loss": 1.3734,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 8.691902711341e-06,
|
|
"loss": 1.4893,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 8.611394176563038e-06,
|
|
"loss": 1.5218,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 8.531225084889654e-06,
|
|
"loss": 1.4519,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 8.451396093816872e-06,
|
|
"loss": 1.4522,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 8.371907858051497e-06,
|
|
"loss": 1.4729,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 8.292761029505603e-06,
|
|
"loss": 1.4275,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 8.21395625729135e-06,
|
|
"loss": 1.4604,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 8.135494187715475e-06,
|
|
"loss": 1.4039,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 8.057375464274142e-06,
|
|
"loss": 1.4912,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 7.979600727647596e-06,
|
|
"loss": 1.4187,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 7.902170615694915e-06,
|
|
"loss": 1.4225,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 7.825085763448798e-06,
|
|
"loss": 1.4011,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 7.748346803110295e-06,
|
|
"loss": 1.3841,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 7.671954364043754e-06,
|
|
"loss": 1.474,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 7.595909072771485e-06,
|
|
"loss": 1.4427,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 7.520211552968792e-06,
|
|
"loss": 1.5081,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 7.444862425458699e-06,
|
|
"loss": 1.4544,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 7.369862308207026e-06,
|
|
"loss": 1.5018,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 7.295211816317149e-06,
|
|
"loss": 1.4605,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 7.220911562025085e-06,
|
|
"loss": 1.4122,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 7.146962154694409e-06,
|
|
"loss": 1.4672,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 7.0733642008112836e-06,
|
|
"loss": 1.5257,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 7.000118303979464e-06,
|
|
"loss": 1.4355,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 6.927225064915349e-06,
|
|
"loss": 1.4799,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 6.854685081443097e-06,
|
|
"loss": 1.5002,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 6.782498948489613e-06,
|
|
"loss": 1.478,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 6.71066725807985e-06,
|
|
"loss": 1.4507,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 6.639190599331746e-06,
|
|
"loss": 1.482,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 6.5680695584515725e-06,
|
|
"loss": 1.4785,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 6.497304718728986e-06,
|
|
"loss": 1.4368,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 6.4268966605323725e-06,
|
|
"loss": 1.4422,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 6.3568459613039536e-06,
|
|
"loss": 1.4643,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 6.287153195555174e-06,
|
|
"loss": 1.4136,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 6.217818934861896e-06,
|
|
"loss": 1.4759,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 6.148843747859778e-06,
|
|
"loss": 1.5521,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 6.080228200239585e-06,
|
|
"loss": 1.4326,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 6.011972854742503e-06,
|
|
"loss": 1.4925,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 5.94407827115564e-06,
|
|
"loss": 1.5337,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 5.876545006307288e-06,
|
|
"loss": 1.4804,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 5.809373614062508e-06,
|
|
"loss": 1.4134,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 5.742564645318432e-06,
|
|
"loss": 1.4892,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 5.6761186479999115e-06,
|
|
"loss": 1.5565,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 5.610036167054839e-06,
|
|
"loss": 1.4506,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 5.544317744449873e-06,
|
|
"loss": 1.3972,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 5.478963919165819e-06,
|
|
"loss": 1.5298,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 5.4139752271933295e-06,
|
|
"loss": 1.507,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 5.349352201528446e-06,
|
|
"loss": 1.5116,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 5.285095372168264e-06,
|
|
"loss": 1.4707,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 5.2212052661065656e-06,
|
|
"loss": 1.4136,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 5.157682407329456e-06,
|
|
"loss": 1.5139,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 5.094527316811204e-06,
|
|
"loss": 1.4348,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 5.031740512509769e-06,
|
|
"loss": 1.5051,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 4.969322509362762e-06,
|
|
"loss": 1.4504,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 4.9072738192830255e-06,
|
|
"loss": 1.3567,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 4.845594951154614e-06,
|
|
"loss": 1.5088,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 4.784286410828481e-06,
|
|
"loss": 1.4605,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 4.723348701118407e-06,
|
|
"loss": 1.4053,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 4.662782321796849e-06,
|
|
"loss": 1.3966,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 4.60258776959086e-06,
|
|
"loss": 1.4311,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 4.54276553817799e-06,
|
|
"loss": 1.4258,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 4.483316118182251e-06,
|
|
"loss": 1.436,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 4.424239997170105e-06,
|
|
"loss": 1.4556,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 4.365537659646418e-06,
|
|
"loss": 1.399,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 4.307209587050576e-06,
|
|
"loss": 1.5502,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 4.249256257752421e-06,
|
|
"loss": 1.4414,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 4.191678147048445e-06,
|
|
"loss": 1.5388,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 4.134475727157777e-06,
|
|
"loss": 1.419,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 4.077649467218436e-06,
|
|
"loss": 1.471,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 4.0211998332833514e-06,
|
|
"loss": 1.4475,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 3.965127288316634e-06,
|
|
"loss": 1.4398,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 3.9094322921897574e-06,
|
|
"loss": 1.3985,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 3.854115301677757e-06,
|
|
"loss": 1.3773,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 3.799176770455526e-06,
|
|
"loss": 1.4409,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 3.7446171490940706e-06,
|
|
"loss": 1.4241,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 3.690436885056808e-06,
|
|
"loss": 1.4467,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 3.6366364226959047e-06,
|
|
"loss": 1.433,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 3.5832162032486684e-06,
|
|
"loss": 1.4797,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 3.530176664833834e-06,
|
|
"loss": 1.3771,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 3.4775182424481135e-06,
|
|
"loss": 1.4442,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 3.4252413679624616e-06,
|
|
"loss": 1.4842,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 3.373346470118682e-06,
|
|
"loss": 1.4969,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 3.321833974525812e-06,
|
|
"loss": 1.4653,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 3.2707043036566965e-06,
|
|
"loss": 1.4349,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 3.219957876844465e-06,
|
|
"loss": 1.4189,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 3.16959511027915e-06,
|
|
"loss": 1.4463,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 3.119616417004223e-06,
|
|
"loss": 1.4771,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 3.0700222069132422e-06,
|
|
"loss": 1.4248,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 3.020812886746477e-06,
|
|
"loss": 1.4778,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 2.9719888600875713e-06,
|
|
"loss": 1.4952,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 2.923550527360247e-06,
|
|
"loss": 1.4907,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 2.875498285824979e-06,
|
|
"loss": 1.4447,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 2.8278325295758134e-06,
|
|
"loss": 1.4653,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 2.7805536495370375e-06,
|
|
"loss": 1.4342,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 2.7336620334600604e-06,
|
|
"loss": 1.4871,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 2.687158065920192e-06,
|
|
"loss": 1.4212,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 2.6410421283134866e-06,
|
|
"loss": 1.4416,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 2.595314598853632e-06,
|
|
"loss": 1.5976,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 2.54997585256882e-06,
|
|
"loss": 1.4335,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 2.5050262612987206e-06,
|
|
"loss": 1.4337,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 2.4604661936913687e-06,
|
|
"loss": 1.4384,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 2.416296015200198e-06,
|
|
"loss": 1.4824,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 2.372516088081006e-06,
|
|
"loss": 1.4162,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 2.3291267713889953e-06,
|
|
"loss": 1.4193,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 2.286128420975836e-06,
|
|
"loss": 1.442,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 2.2435213894867303e-06,
|
|
"loss": 1.4323,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 2.2013060263575415e-06,
|
|
"loss": 1.4348,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 2.159482677811919e-06,
|
|
"loss": 1.4858,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 2.1180516868584467e-06,
|
|
"loss": 1.3906,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 2.0770133932878412e-06,
|
|
"loss": 1.374,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 2.0363681336701746e-06,
|
|
"loss": 1.4733,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 1.996116241352092e-06,
|
|
"loss": 1.4614,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 1.9562580464541014e-06,
|
|
"loss": 1.5179,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 1.9167938758678394e-06,
|
|
"loss": 1.3801,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 1.8777240532534212e-06,
|
|
"loss": 1.4825,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 1.8390488990367493e-06,
|
|
"loss": 1.3541,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 1.8007687304069375e-06,
|
|
"loss": 1.4733,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 1.7628838613136412e-06,
|
|
"loss": 1.4835,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 1.7253946024645473e-06,
|
|
"loss": 1.4677,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 1.6883012613227778e-06,
|
|
"loss": 1.446,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 1.6516041421044127e-06,
|
|
"loss": 1.461,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 1.6153035457759536e-06,
|
|
"loss": 1.4779,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 1.579399770051876e-06,
|
|
"loss": 1.4892,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 1.5438931093921805e-06,
|
|
"loss": 1.4253,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 1.5087838549999956e-06,
|
|
"loss": 1.4644,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 1.474072294819162e-06,
|
|
"loss": 1.4157,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 1.4397587135318857e-06,
|
|
"loss": 1.4122,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 1.4058433925564107e-06,
|
|
"loss": 1.5177,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 1.3723266100447053e-06,
|
|
"loss": 1.4533,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 1.3392086408801518e-06,
|
|
"loss": 1.4155,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 1.3064897566753442e-06,
|
|
"loss": 1.4637,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 1.2741702257698273e-06,
|
|
"loss": 1.395,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 1.242250313227905e-06,
|
|
"loss": 1.4807,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 1.210730280836464e-06,
|
|
"loss": 1.4223,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 1.1796103871028196e-06,
|
|
"loss": 1.461,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 1.1488908872526183e-06,
|
|
"loss": 1.4161,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 1.1185720332277162e-06,
|
|
"loss": 1.4039,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 1.0886540736841311e-06,
|
|
"loss": 1.4827,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 1.0591372539900058e-06,
|
|
"loss": 1.4112,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 1.0300218162235752e-06,
|
|
"loss": 1.3737,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 1.0013079991711972e-06,
|
|
"loss": 1.4186,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 9.729960383254134e-07,
|
|
"loss": 1.4583,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 9.450861658829469e-07,
|
|
"loss": 1.4222,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 9.175786107429085e-07,
|
|
"loss": 1.4342,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 8.90473598504804e-07,
|
|
"loss": 1.4549,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 8.637713514667634e-07,
|
|
"loss": 1.4385,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 8.37472088623692e-07,
|
|
"loss": 1.447,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 8.115760256654669e-07,
|
|
"loss": 1.5006,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 7.860833749751773e-07,
|
|
"loss": 1.4861,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 7.60994345627386e-07,
|
|
"loss": 1.4061,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 7.363091433864044e-07,
|
|
"loss": 1.4612,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 7.120279707046096e-07,
|
|
"loss": 1.432,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 6.881510267207846e-07,
|
|
"loss": 1.4702,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 6.646785072584872e-07,
|
|
"loss": 1.5084,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 6.416106048244386e-07,
|
|
"loss": 1.4661,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 6.189475086069485e-07,
|
|
"loss": 1.3731,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 5.966894044743709e-07,
|
|
"loss": 1.455,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 5.748364749735613e-07,
|
|
"loss": 1.4169,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 5.533888993283831e-07,
|
|
"loss": 1.3907,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 5.323468534382703e-07,
|
|
"loss": 1.4668,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 5.117105098767283e-07,
|
|
"loss": 1.4628,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 4.914800378899687e-07,
|
|
"loss": 1.3697,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 4.7165560339549886e-07,
|
|
"loss": 1.5115,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 4.522373689807624e-07,
|
|
"loss": 1.4415,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 4.33225493901801e-07,
|
|
"loss": 1.4368,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 4.1462013408196664e-07,
|
|
"loss": 1.4338,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 3.9642144211061714e-07,
|
|
"loss": 1.3896,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 3.7862956724190045e-07,
|
|
"loss": 1.4796,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 3.612446553934723e-07,
|
|
"loss": 1.4081,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 3.4426684914538045e-07,
|
|
"loss": 1.3993,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 3.276962877388157e-07,
|
|
"loss": 1.4188,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 3.115331070750127e-07,
|
|
"loss": 1.4747,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 2.957774397141455e-07,
|
|
"loss": 1.4073,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 2.8042941487419483e-07,
|
|
"loss": 1.4816,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 2.6548915842993793e-07,
|
|
"loss": 1.4085,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 2.5095679291188833e-07,
|
|
"loss": 1.3901,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 2.368324375052855e-07,
|
|
"loss": 1.4146,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 2.2311620804914002e-07,
|
|
"loss": 1.4465,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 2.0980821703527886e-07,
|
|
"loss": 1.4298,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 1.9690857360739612e-07,
|
|
"loss": 1.5303,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 1.8441738356019256e-07,
|
|
"loss": 1.5039,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 1.7233474933849303e-07,
|
|
"loss": 1.4276,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 1.6066077003639714e-07,
|
|
"loss": 1.4591,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 1.4939554139648537e-07,
|
|
"loss": 1.4353,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 1.3853915580901988e-07,
|
|
"loss": 1.3791,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 1.2809170231118938e-07,
|
|
"loss": 1.4862,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 1.1805326658639316e-07,
|
|
"loss": 1.3861,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 1.0842393096350823e-07,
|
|
"loss": 1.4563,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 9.920377441623996e-08,
|
|
"loss": 1.4102,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 9.039287256247253e-08,
|
|
"loss": 1.4573,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 8.199129766363056e-08,
|
|
"loss": 1.4369,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 7.399911862410735e-08,
|
|
"loss": 1.5305,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 6.641640099068758e-08,
|
|
"loss": 1.4532,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 5.9243206952019904e-08,
|
|
"loss": 1.473,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 5.247959533808966e-08,
|
|
"loss": 1.3916,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 4.612562161974698e-08,
|
|
"loss": 1.3834,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 4.018133790826273e-08,
|
|
"loss": 1.4524,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 3.464679295487328e-08,
|
|
"loss": 1.428,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 2.952203215041971e-08,
|
|
"loss": 1.4697,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 2.480709752493704e-08,
|
|
"loss": 1.3832,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 2.050202774732668e-08,
|
|
"loss": 1.5032,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 1.6606858125040038e-08,
|
|
"loss": 1.4712,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 1.3121620603795404e-08,
|
|
"loss": 1.4285,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 1.0046343767294852e-08,
|
|
"loss": 1.4409,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 7.381052837013291e-09,
|
|
"loss": 1.4037,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 5.125769671976421e-09,
|
|
"loss": 1.4707,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 3.2805127685886504e-09,
|
|
"loss": 1.4793,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 1.845297260472112e-09,
|
|
"loss": 1.4402,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 8.201349183611928e-10,
|
|
"loss": 1.4417,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 2.0503414998040982e-10,
|
|
"loss": 1.4528,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 0.0,
|
|
"loss": 1.4566,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 1131,
|
|
"total_flos": 2.4511080029906534e+17,
|
|
"train_loss": 1.623349694105295,
|
|
"train_runtime": 8800.864,
|
|
"train_samples_per_second": 16.443,
|
|
"train_steps_per_second": 0.129
|
|
}
|
|
],
|
|
"max_steps": 1131,
|
|
"num_train_epochs": 3,
|
|
"total_flos": 2.4511080029906534e+17,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|