From e6849c30173effb9d224313b5505f8b9b0817624 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Wed, 6 May 2026 05:27:37 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: eqhylxx/vicuna-160m Source: Original Platform --- .gitattributes | 35 + config.json | 26 + generation_config.json | 7 + pytorch_model.bin | 3 + special_tokens_map.json | 24 + tokenizer.model | 3 + tokenizer_config.json | 35 + trainer_state.json | 6811 +++++++++++++++++++++++++++++++++++++++ training_args.bin | 3 + 9 files changed, 6947 insertions(+) create mode 100644 .gitattributes create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 pytorch_model.bin create mode 100644 special_tokens_map.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json create mode 100644 trainer_state.json create mode 100644 training_args.bin diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/config.json b/config.json new file mode 100644 index 0000000..6eb4fdd --- /dev/null +++ b/config.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "/workspace/llama-160m", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 0, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "max_position_embeddings": 2048, + "model_type": "llama", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "num_key_value_heads": 12, + "pad_token_id": 1, + "pretraining_tp": 1, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.31.0", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..0c095b1 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 2, + "pad_token_id": 1, + "transformers_version": "4.31.0" +} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..bb1d4a1 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e66bf6fe78f5c5b4df4dd8b033319296e4c139402a28aaf3bddb77ecaf86499 +size 649710457 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..f928b24 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..6d77222 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,35 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "legacy": true, + "model_max_length": 2048, + "pad_token": null, + "padding_side": "right", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizer", + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..99fe3ce --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,6811 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "global_step": 1131, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.9411764705882355e-06, + "loss": 3.5459, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 5.882352941176471e-06, + "loss": 3.4515, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 8.823529411764707e-06, + "loss": 3.5459, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.1764705882352942e-05, + "loss": 3.3973, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 1.4705882352941177e-05, + "loss": 3.3427, + "step": 5 + }, + { + "epoch": 0.02, + "learning_rate": 1.7647058823529414e-05, + "loss": 3.1525, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 2.058823529411765e-05, + "loss": 3.1169, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 2.3529411764705884e-05, + "loss": 3.0744, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 2.647058823529412e-05, + "loss": 2.9719, + "step": 9 + }, + { + "epoch": 0.03, + "learning_rate": 2.9411764705882354e-05, + "loss": 2.8901, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 3.235294117647059e-05, + "loss": 2.9138, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 3.529411764705883e-05, + "loss": 2.7916, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 3.8235294117647055e-05, + "loss": 2.7394, + "step": 13 + }, + { + "epoch": 0.04, + "learning_rate": 4.11764705882353e-05, + "loss": 2.6911, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 4.411764705882353e-05, + "loss": 2.6669, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 4.705882352941177e-05, + "loss": 2.7453, + "step": 16 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 2.5887, + "step": 17 + }, + { + "epoch": 0.05, + "learning_rate": 5.294117647058824e-05, + "loss": 2.5516, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 5.588235294117647e-05, + "loss": 2.6737, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 5.882352941176471e-05, + "loss": 2.4736, + "step": 20 + }, + { + "epoch": 0.06, + "learning_rate": 6.176470588235295e-05, + "loss": 2.397, + "step": 21 + }, + { + "epoch": 0.06, + "learning_rate": 6.470588235294118e-05, + "loss": 2.4433, + "step": 22 + }, + { + "epoch": 0.06, + "learning_rate": 6.764705882352942e-05, + "loss": 2.354, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 7.058823529411765e-05, + "loss": 2.4111, + "step": 24 + }, + { + "epoch": 0.07, + "learning_rate": 7.352941176470589e-05, + "loss": 2.3523, + "step": 25 + }, + { + "epoch": 0.07, + "learning_rate": 7.647058823529411e-05, + "loss": 2.3626, + "step": 26 + }, + { + "epoch": 0.07, + "learning_rate": 7.941176470588235e-05, + "loss": 2.2676, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 8.23529411764706e-05, + "loss": 2.403, + "step": 28 + }, + { + "epoch": 0.08, + "learning_rate": 8.529411764705883e-05, + "loss": 2.2134, + "step": 29 + }, + { + "epoch": 0.08, + "learning_rate": 8.823529411764706e-05, + "loss": 2.2442, + "step": 30 + }, + { + "epoch": 0.08, + "learning_rate": 9.11764705882353e-05, + "loss": 2.2206, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 9.411764705882353e-05, + "loss": 2.1881, + "step": 32 + }, + { + "epoch": 0.09, + "learning_rate": 9.705882352941177e-05, + "loss": 2.1922, + "step": 33 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001, + "loss": 2.1636, + "step": 34 + }, + { + "epoch": 0.09, + "learning_rate": 9.999979496585003e-05, + "loss": 2.1973, + "step": 35 + }, + { + "epoch": 0.1, + "learning_rate": 9.999917986508165e-05, + "loss": 2.1669, + "step": 36 + }, + { + "epoch": 0.1, + "learning_rate": 9.999815470273954e-05, + "loss": 2.1175, + "step": 37 + }, + { + "epoch": 0.1, + "learning_rate": 9.999671948723141e-05, + "loss": 2.2195, + "step": 38 + }, + { + "epoch": 0.1, + "learning_rate": 9.999487423032803e-05, + "loss": 2.0737, + "step": 39 + }, + { + "epoch": 0.11, + "learning_rate": 9.999261894716299e-05, + "loss": 2.1123, + "step": 40 + }, + { + "epoch": 0.11, + "learning_rate": 9.998995365623271e-05, + "loss": 2.1978, + "step": 41 + }, + { + "epoch": 0.11, + "learning_rate": 9.998687837939621e-05, + "loss": 2.0849, + "step": 42 + }, + { + "epoch": 0.11, + "learning_rate": 9.998339314187497e-05, + "loss": 2.0458, + "step": 43 + }, + { + "epoch": 0.12, + "learning_rate": 9.997949797225268e-05, + "loss": 1.9736, + "step": 44 + }, + { + "epoch": 0.12, + "learning_rate": 9.997519290247507e-05, + "loss": 2.1129, + "step": 45 + }, + { + "epoch": 0.12, + "learning_rate": 9.997047796784959e-05, + "loss": 2.0466, + "step": 46 + }, + { + "epoch": 0.12, + "learning_rate": 9.996535320704514e-05, + "loss": 2.0619, + "step": 47 + }, + { + "epoch": 0.13, + "learning_rate": 9.995981866209174e-05, + "loss": 2.1507, + "step": 48 + }, + { + "epoch": 0.13, + "learning_rate": 9.995387437838026e-05, + "loss": 2.0693, + "step": 49 + }, + { + "epoch": 0.13, + "learning_rate": 9.99475204046619e-05, + "loss": 1.9994, + "step": 50 + }, + { + "epoch": 0.14, + "learning_rate": 9.994075679304798e-05, + "loss": 2.1005, + "step": 51 + }, + { + "epoch": 0.14, + "learning_rate": 9.993358359900931e-05, + "loss": 2.0013, + "step": 52 + }, + { + "epoch": 0.14, + "learning_rate": 9.99260008813759e-05, + "loss": 2.0427, + "step": 53 + }, + { + "epoch": 0.14, + "learning_rate": 9.991800870233638e-05, + "loss": 2.0812, + "step": 54 + }, + { + "epoch": 0.15, + "learning_rate": 9.990960712743754e-05, + "loss": 1.9826, + "step": 55 + }, + { + "epoch": 0.15, + "learning_rate": 9.990079622558377e-05, + "loss": 2.0451, + "step": 56 + }, + { + "epoch": 0.15, + "learning_rate": 9.989157606903649e-05, + "loss": 2.0134, + "step": 57 + }, + { + "epoch": 0.15, + "learning_rate": 9.988194673341362e-05, + "loss": 2.0029, + "step": 58 + }, + { + "epoch": 0.16, + "learning_rate": 9.987190829768882e-05, + "loss": 1.9063, + "step": 59 + }, + { + "epoch": 0.16, + "learning_rate": 9.986146084419099e-05, + "loss": 1.9806, + "step": 60 + }, + { + "epoch": 0.16, + "learning_rate": 9.985060445860352e-05, + "loss": 1.9337, + "step": 61 + }, + { + "epoch": 0.16, + "learning_rate": 9.983933922996361e-05, + "loss": 1.9956, + "step": 62 + }, + { + "epoch": 0.17, + "learning_rate": 9.982766525066152e-05, + "loss": 2.0342, + "step": 63 + }, + { + "epoch": 0.17, + "learning_rate": 9.981558261643981e-05, + "loss": 2.0178, + "step": 64 + }, + { + "epoch": 0.17, + "learning_rate": 9.980309142639261e-05, + "loss": 1.9531, + "step": 65 + }, + { + "epoch": 0.18, + "learning_rate": 9.979019178296473e-05, + "loss": 2.0035, + "step": 66 + }, + { + "epoch": 0.18, + "learning_rate": 9.977688379195087e-05, + "loss": 1.8774, + "step": 67 + }, + { + "epoch": 0.18, + "learning_rate": 9.976316756249472e-05, + "loss": 2.0842, + "step": 68 + }, + { + "epoch": 0.18, + "learning_rate": 9.97490432070881e-05, + "loss": 1.9861, + "step": 69 + }, + { + "epoch": 0.19, + "learning_rate": 9.973451084157006e-05, + "loss": 1.8747, + "step": 70 + }, + { + "epoch": 0.19, + "learning_rate": 9.97195705851258e-05, + "loss": 1.9883, + "step": 71 + }, + { + "epoch": 0.19, + "learning_rate": 9.970422256028587e-05, + "loss": 1.9212, + "step": 72 + }, + { + "epoch": 0.19, + "learning_rate": 9.9688466892925e-05, + "loss": 1.9101, + "step": 73 + }, + { + "epoch": 0.2, + "learning_rate": 9.96723037122612e-05, + "loss": 2.0329, + "step": 74 + }, + { + "epoch": 0.2, + "learning_rate": 9.965573315085462e-05, + "loss": 1.9547, + "step": 75 + }, + { + "epoch": 0.2, + "learning_rate": 9.963875534460653e-05, + "loss": 1.9255, + "step": 76 + }, + { + "epoch": 0.2, + "learning_rate": 9.96213704327581e-05, + "loss": 1.9336, + "step": 77 + }, + { + "epoch": 0.21, + "learning_rate": 9.960357855788938e-05, + "loss": 1.8824, + "step": 78 + }, + { + "epoch": 0.21, + "learning_rate": 9.958537986591803e-05, + "loss": 1.884, + "step": 79 + }, + { + "epoch": 0.21, + "learning_rate": 9.95667745060982e-05, + "loss": 1.9605, + "step": 80 + }, + { + "epoch": 0.21, + "learning_rate": 9.954776263101924e-05, + "loss": 1.8576, + "step": 81 + }, + { + "epoch": 0.22, + "learning_rate": 9.95283443966045e-05, + "loss": 1.9995, + "step": 82 + }, + { + "epoch": 0.22, + "learning_rate": 9.950851996211004e-05, + "loss": 1.944, + "step": 83 + }, + { + "epoch": 0.22, + "learning_rate": 9.948828949012327e-05, + "loss": 1.9854, + "step": 84 + }, + { + "epoch": 0.23, + "learning_rate": 9.946765314656174e-05, + "loss": 1.9635, + "step": 85 + }, + { + "epoch": 0.23, + "learning_rate": 9.944661110067162e-05, + "loss": 2.0009, + "step": 86 + }, + { + "epoch": 0.23, + "learning_rate": 9.942516352502644e-05, + "loss": 1.9422, + "step": 87 + }, + { + "epoch": 0.23, + "learning_rate": 9.940331059552563e-05, + "loss": 1.8891, + "step": 88 + }, + { + "epoch": 0.24, + "learning_rate": 9.938105249139306e-05, + "loss": 1.9399, + "step": 89 + }, + { + "epoch": 0.24, + "learning_rate": 9.935838939517556e-05, + "loss": 1.9283, + "step": 90 + }, + { + "epoch": 0.24, + "learning_rate": 9.933532149274152e-05, + "loss": 1.9132, + "step": 91 + }, + { + "epoch": 0.24, + "learning_rate": 9.931184897327922e-05, + "loss": 1.8997, + "step": 92 + }, + { + "epoch": 0.25, + "learning_rate": 9.928797202929539e-05, + "loss": 1.8664, + "step": 93 + }, + { + "epoch": 0.25, + "learning_rate": 9.92636908566136e-05, + "loss": 1.8697, + "step": 94 + }, + { + "epoch": 0.25, + "learning_rate": 9.923900565437262e-05, + "loss": 1.9275, + "step": 95 + }, + { + "epoch": 0.25, + "learning_rate": 9.921391662502483e-05, + "loss": 1.9188, + "step": 96 + }, + { + "epoch": 0.26, + "learning_rate": 9.918842397433455e-05, + "loss": 1.8527, + "step": 97 + }, + { + "epoch": 0.26, + "learning_rate": 9.916252791137631e-05, + "loss": 1.9087, + "step": 98 + }, + { + "epoch": 0.26, + "learning_rate": 9.913622864853325e-05, + "loss": 1.8689, + "step": 99 + }, + { + "epoch": 0.27, + "learning_rate": 9.91095264014952e-05, + "loss": 1.9366, + "step": 100 + }, + { + "epoch": 0.27, + "learning_rate": 9.908242138925709e-05, + "loss": 1.8494, + "step": 101 + }, + { + "epoch": 0.27, + "learning_rate": 9.905491383411705e-05, + "loss": 1.8945, + "step": 102 + }, + { + "epoch": 0.27, + "learning_rate": 9.902700396167459e-05, + "loss": 1.914, + "step": 103 + }, + { + "epoch": 0.28, + "learning_rate": 9.899869200082881e-05, + "loss": 1.8494, + "step": 104 + }, + { + "epoch": 0.28, + "learning_rate": 9.896997818377642e-05, + "loss": 1.8909, + "step": 105 + }, + { + "epoch": 0.28, + "learning_rate": 9.894086274601e-05, + "loss": 1.8623, + "step": 106 + }, + { + "epoch": 0.28, + "learning_rate": 9.891134592631587e-05, + "loss": 1.8872, + "step": 107 + }, + { + "epoch": 0.29, + "learning_rate": 9.88814279667723e-05, + "loss": 1.8787, + "step": 108 + }, + { + "epoch": 0.29, + "learning_rate": 9.885110911274738e-05, + "loss": 1.8782, + "step": 109 + }, + { + "epoch": 0.29, + "learning_rate": 9.88203896128972e-05, + "loss": 1.7577, + "step": 110 + }, + { + "epoch": 0.29, + "learning_rate": 9.878926971916354e-05, + "loss": 1.8954, + "step": 111 + }, + { + "epoch": 0.3, + "learning_rate": 9.87577496867721e-05, + "loss": 1.8517, + "step": 112 + }, + { + "epoch": 0.3, + "learning_rate": 9.872582977423018e-05, + "loss": 1.839, + "step": 113 + }, + { + "epoch": 0.3, + "learning_rate": 9.869351024332467e-05, + "loss": 1.8851, + "step": 114 + }, + { + "epoch": 0.31, + "learning_rate": 9.866079135911986e-05, + "loss": 1.854, + "step": 115 + }, + { + "epoch": 0.31, + "learning_rate": 9.86276733899553e-05, + "loss": 1.8384, + "step": 116 + }, + { + "epoch": 0.31, + "learning_rate": 9.85941566074436e-05, + "loss": 1.8775, + "step": 117 + }, + { + "epoch": 0.31, + "learning_rate": 9.856024128646812e-05, + "loss": 1.8111, + "step": 118 + }, + { + "epoch": 0.32, + "learning_rate": 9.852592770518085e-05, + "loss": 1.8075, + "step": 119 + }, + { + "epoch": 0.32, + "learning_rate": 9.849121614500001e-05, + "loss": 1.7952, + "step": 120 + }, + { + "epoch": 0.32, + "learning_rate": 9.845610689060782e-05, + "loss": 1.8371, + "step": 121 + }, + { + "epoch": 0.32, + "learning_rate": 9.842060022994814e-05, + "loss": 1.8761, + "step": 122 + }, + { + "epoch": 0.33, + "learning_rate": 9.838469645422406e-05, + "loss": 1.8242, + "step": 123 + }, + { + "epoch": 0.33, + "learning_rate": 9.834839585789559e-05, + "loss": 1.8772, + "step": 124 + }, + { + "epoch": 0.33, + "learning_rate": 9.831169873867723e-05, + "loss": 1.7998, + "step": 125 + }, + { + "epoch": 0.33, + "learning_rate": 9.827460539753546e-05, + "loss": 1.8881, + "step": 126 + }, + { + "epoch": 0.34, + "learning_rate": 9.823711613868636e-05, + "loss": 1.8629, + "step": 127 + }, + { + "epoch": 0.34, + "learning_rate": 9.819923126959308e-05, + "loss": 1.8018, + "step": 128 + }, + { + "epoch": 0.34, + "learning_rate": 9.816095110096325e-05, + "loss": 1.8126, + "step": 129 + }, + { + "epoch": 0.34, + "learning_rate": 9.812227594674659e-05, + "loss": 1.8217, + "step": 130 + }, + { + "epoch": 0.35, + "learning_rate": 9.808320612413217e-05, + "loss": 1.8537, + "step": 131 + }, + { + "epoch": 0.35, + "learning_rate": 9.804374195354591e-05, + "loss": 1.8028, + "step": 132 + }, + { + "epoch": 0.35, + "learning_rate": 9.80038837586479e-05, + "loss": 1.8954, + "step": 133 + }, + { + "epoch": 0.36, + "learning_rate": 9.796363186632985e-05, + "loss": 1.8818, + "step": 134 + }, + { + "epoch": 0.36, + "learning_rate": 9.792298660671217e-05, + "loss": 1.8965, + "step": 135 + }, + { + "epoch": 0.36, + "learning_rate": 9.788194831314158e-05, + "loss": 1.8414, + "step": 136 + }, + { + "epoch": 0.36, + "learning_rate": 9.784051732218808e-05, + "loss": 1.8456, + "step": 137 + }, + { + "epoch": 0.37, + "learning_rate": 9.779869397364247e-05, + "loss": 1.8479, + "step": 138 + }, + { + "epoch": 0.37, + "learning_rate": 9.775647861051329e-05, + "loss": 1.8176, + "step": 139 + }, + { + "epoch": 0.37, + "learning_rate": 9.771387157902417e-05, + "loss": 1.7994, + "step": 140 + }, + { + "epoch": 0.37, + "learning_rate": 9.767087322861102e-05, + "loss": 1.8153, + "step": 141 + }, + { + "epoch": 0.38, + "learning_rate": 9.7627483911919e-05, + "loss": 1.8048, + "step": 142 + }, + { + "epoch": 0.38, + "learning_rate": 9.758370398479981e-05, + "loss": 1.8491, + "step": 143 + }, + { + "epoch": 0.38, + "learning_rate": 9.753953380630862e-05, + "loss": 1.82, + "step": 144 + }, + { + "epoch": 0.38, + "learning_rate": 9.74949737387013e-05, + "loss": 1.922, + "step": 145 + }, + { + "epoch": 0.39, + "learning_rate": 9.745002414743119e-05, + "loss": 1.8061, + "step": 146 + }, + { + "epoch": 0.39, + "learning_rate": 9.740468540114638e-05, + "loss": 1.8676, + "step": 147 + }, + { + "epoch": 0.39, + "learning_rate": 9.735895787168652e-05, + "loss": 1.904, + "step": 148 + }, + { + "epoch": 0.4, + "learning_rate": 9.73128419340798e-05, + "loss": 1.7908, + "step": 149 + }, + { + "epoch": 0.4, + "learning_rate": 9.726633796653994e-05, + "loss": 1.8096, + "step": 150 + }, + { + "epoch": 0.4, + "learning_rate": 9.721944635046297e-05, + "loss": 1.8669, + "step": 151 + }, + { + "epoch": 0.4, + "learning_rate": 9.717216747042419e-05, + "loss": 1.7547, + "step": 152 + }, + { + "epoch": 0.41, + "learning_rate": 9.712450171417502e-05, + "loss": 1.7849, + "step": 153 + }, + { + "epoch": 0.41, + "learning_rate": 9.707644947263976e-05, + "loss": 1.8122, + "step": 154 + }, + { + "epoch": 0.41, + "learning_rate": 9.702801113991243e-05, + "loss": 1.768, + "step": 155 + }, + { + "epoch": 0.41, + "learning_rate": 9.697918711325353e-05, + "loss": 1.8519, + "step": 156 + }, + { + "epoch": 0.42, + "learning_rate": 9.692997779308677e-05, + "loss": 1.7329, + "step": 157 + }, + { + "epoch": 0.42, + "learning_rate": 9.688038358299578e-05, + "loss": 1.7725, + "step": 158 + }, + { + "epoch": 0.42, + "learning_rate": 9.683040488972086e-05, + "loss": 1.7678, + "step": 159 + }, + { + "epoch": 0.42, + "learning_rate": 9.678004212315554e-05, + "loss": 1.7351, + "step": 160 + }, + { + "epoch": 0.43, + "learning_rate": 9.672929569634331e-05, + "loss": 1.8248, + "step": 161 + }, + { + "epoch": 0.43, + "learning_rate": 9.66781660254742e-05, + "loss": 1.7674, + "step": 162 + }, + { + "epoch": 0.43, + "learning_rate": 9.662665352988133e-05, + "loss": 1.7685, + "step": 163 + }, + { + "epoch": 0.44, + "learning_rate": 9.657475863203755e-05, + "loss": 1.8122, + "step": 164 + }, + { + "epoch": 0.44, + "learning_rate": 9.65224817575519e-05, + "loss": 1.8858, + "step": 165 + }, + { + "epoch": 0.44, + "learning_rate": 9.646982333516616e-05, + "loss": 1.8532, + "step": 166 + }, + { + "epoch": 0.44, + "learning_rate": 9.641678379675135e-05, + "loss": 1.8341, + "step": 167 + }, + { + "epoch": 0.45, + "learning_rate": 9.63633635773041e-05, + "loss": 1.6986, + "step": 168 + }, + { + "epoch": 0.45, + "learning_rate": 9.63095631149432e-05, + "loss": 1.7467, + "step": 169 + }, + { + "epoch": 0.45, + "learning_rate": 9.625538285090595e-05, + "loss": 1.769, + "step": 170 + }, + { + "epoch": 0.45, + "learning_rate": 9.620082322954448e-05, + "loss": 1.7554, + "step": 171 + }, + { + "epoch": 0.46, + "learning_rate": 9.614588469832225e-05, + "loss": 1.6883, + "step": 172 + }, + { + "epoch": 0.46, + "learning_rate": 9.609056770781026e-05, + "loss": 1.7908, + "step": 173 + }, + { + "epoch": 0.46, + "learning_rate": 9.603487271168336e-05, + "loss": 1.769, + "step": 174 + }, + { + "epoch": 0.46, + "learning_rate": 9.597880016671665e-05, + "loss": 1.8701, + "step": 175 + }, + { + "epoch": 0.47, + "learning_rate": 9.592235053278157e-05, + "loss": 1.7404, + "step": 176 + }, + { + "epoch": 0.47, + "learning_rate": 9.586552427284223e-05, + "loss": 1.7462, + "step": 177 + }, + { + "epoch": 0.47, + "learning_rate": 9.580832185295156e-05, + "loss": 1.7704, + "step": 178 + }, + { + "epoch": 0.47, + "learning_rate": 9.575074374224758e-05, + "loss": 1.7354, + "step": 179 + }, + { + "epoch": 0.48, + "learning_rate": 9.569279041294944e-05, + "loss": 1.9016, + "step": 180 + }, + { + "epoch": 0.48, + "learning_rate": 9.563446234035358e-05, + "loss": 1.7546, + "step": 181 + }, + { + "epoch": 0.48, + "learning_rate": 9.557576000282991e-05, + "loss": 1.7814, + "step": 182 + }, + { + "epoch": 0.49, + "learning_rate": 9.551668388181776e-05, + "loss": 1.7423, + "step": 183 + }, + { + "epoch": 0.49, + "learning_rate": 9.545723446182202e-05, + "loss": 1.737, + "step": 184 + }, + { + "epoch": 0.49, + "learning_rate": 9.539741223040915e-05, + "loss": 1.7577, + "step": 185 + }, + { + "epoch": 0.49, + "learning_rate": 9.533721767820317e-05, + "loss": 1.7864, + "step": 186 + }, + { + "epoch": 0.5, + "learning_rate": 9.527665129888161e-05, + "loss": 1.7015, + "step": 187 + }, + { + "epoch": 0.5, + "learning_rate": 9.521571358917153e-05, + "loss": 1.7017, + "step": 188 + }, + { + "epoch": 0.5, + "learning_rate": 9.51544050488454e-05, + "loss": 1.7616, + "step": 189 + }, + { + "epoch": 0.5, + "learning_rate": 9.509272618071699e-05, + "loss": 1.7538, + "step": 190 + }, + { + "epoch": 0.51, + "learning_rate": 9.503067749063726e-05, + "loss": 1.8012, + "step": 191 + }, + { + "epoch": 0.51, + "learning_rate": 9.496825948749024e-05, + "loss": 1.7607, + "step": 192 + }, + { + "epoch": 0.51, + "learning_rate": 9.490547268318881e-05, + "loss": 1.7575, + "step": 193 + }, + { + "epoch": 0.51, + "learning_rate": 9.484231759267054e-05, + "loss": 1.7102, + "step": 194 + }, + { + "epoch": 0.52, + "learning_rate": 9.477879473389345e-05, + "loss": 1.7801, + "step": 195 + }, + { + "epoch": 0.52, + "learning_rate": 9.471490462783175e-05, + "loss": 1.7379, + "step": 196 + }, + { + "epoch": 0.52, + "learning_rate": 9.465064779847156e-05, + "loss": 1.718, + "step": 197 + }, + { + "epoch": 0.53, + "learning_rate": 9.458602477280668e-05, + "loss": 1.6832, + "step": 198 + }, + { + "epoch": 0.53, + "learning_rate": 9.452103608083417e-05, + "loss": 1.7995, + "step": 199 + }, + { + "epoch": 0.53, + "learning_rate": 9.445568225555014e-05, + "loss": 1.7036, + "step": 200 + }, + { + "epoch": 0.53, + "learning_rate": 9.438996383294516e-05, + "loss": 1.6973, + "step": 201 + }, + { + "epoch": 0.54, + "learning_rate": 9.43238813520001e-05, + "loss": 1.757, + "step": 202 + }, + { + "epoch": 0.54, + "learning_rate": 9.425743535468156e-05, + "loss": 1.7293, + "step": 203 + }, + { + "epoch": 0.54, + "learning_rate": 9.41906263859375e-05, + "loss": 1.8156, + "step": 204 + }, + { + "epoch": 0.54, + "learning_rate": 9.412345499369271e-05, + "loss": 1.7483, + "step": 205 + }, + { + "epoch": 0.55, + "learning_rate": 9.405592172884437e-05, + "loss": 1.7947, + "step": 206 + }, + { + "epoch": 0.55, + "learning_rate": 9.39880271452575e-05, + "loss": 1.8237, + "step": 207 + }, + { + "epoch": 0.55, + "learning_rate": 9.391977179976043e-05, + "loss": 1.6674, + "step": 208 + }, + { + "epoch": 0.55, + "learning_rate": 9.385115625214022e-05, + "loss": 1.7484, + "step": 209 + }, + { + "epoch": 0.56, + "learning_rate": 9.378218106513812e-05, + "loss": 1.7449, + "step": 210 + }, + { + "epoch": 0.56, + "learning_rate": 9.371284680444483e-05, + "loss": 1.7444, + "step": 211 + }, + { + "epoch": 0.56, + "learning_rate": 9.364315403869606e-05, + "loss": 1.6666, + "step": 212 + }, + { + "epoch": 0.56, + "learning_rate": 9.357310333946763e-05, + "loss": 1.7569, + "step": 213 + }, + { + "epoch": 0.57, + "learning_rate": 9.3502695281271e-05, + "loss": 1.6859, + "step": 214 + }, + { + "epoch": 0.57, + "learning_rate": 9.343193044154843e-05, + "loss": 1.7095, + "step": 215 + }, + { + "epoch": 0.57, + "learning_rate": 9.336080940066826e-05, + "loss": 1.8226, + "step": 216 + }, + { + "epoch": 0.58, + "learning_rate": 9.328933274192015e-05, + "loss": 1.8059, + "step": 217 + }, + { + "epoch": 0.58, + "learning_rate": 9.32175010515104e-05, + "loss": 1.699, + "step": 218 + }, + { + "epoch": 0.58, + "learning_rate": 9.314531491855692e-05, + "loss": 1.7162, + "step": 219 + }, + { + "epoch": 0.58, + "learning_rate": 9.307277493508465e-05, + "loss": 1.7898, + "step": 220 + }, + { + "epoch": 0.59, + "learning_rate": 9.299988169602054e-05, + "loss": 1.8133, + "step": 221 + }, + { + "epoch": 0.59, + "learning_rate": 9.292663579918873e-05, + "loss": 1.81, + "step": 222 + }, + { + "epoch": 0.59, + "learning_rate": 9.285303784530558e-05, + "loss": 1.694, + "step": 223 + }, + { + "epoch": 0.59, + "learning_rate": 9.277908843797492e-05, + "loss": 1.648, + "step": 224 + }, + { + "epoch": 0.6, + "learning_rate": 9.270478818368287e-05, + "loss": 1.8439, + "step": 225 + }, + { + "epoch": 0.6, + "learning_rate": 9.263013769179298e-05, + "loss": 1.7486, + "step": 226 + }, + { + "epoch": 0.6, + "learning_rate": 9.25551375745413e-05, + "loss": 1.7601, + "step": 227 + }, + { + "epoch": 0.6, + "learning_rate": 9.247978844703122e-05, + "loss": 1.7399, + "step": 228 + }, + { + "epoch": 0.61, + "learning_rate": 9.240409092722852e-05, + "loss": 1.8162, + "step": 229 + }, + { + "epoch": 0.61, + "learning_rate": 9.232804563595626e-05, + "loss": 1.6533, + "step": 230 + }, + { + "epoch": 0.61, + "learning_rate": 9.22516531968897e-05, + "loss": 1.7488, + "step": 231 + }, + { + "epoch": 0.62, + "learning_rate": 9.217491423655123e-05, + "loss": 1.7544, + "step": 232 + }, + { + "epoch": 0.62, + "learning_rate": 9.209782938430509e-05, + "loss": 1.6406, + "step": 233 + }, + { + "epoch": 0.62, + "learning_rate": 9.202039927235241e-05, + "loss": 1.7158, + "step": 234 + }, + { + "epoch": 0.62, + "learning_rate": 9.194262453572586e-05, + "loss": 1.7827, + "step": 235 + }, + { + "epoch": 0.63, + "learning_rate": 9.186450581228454e-05, + "loss": 1.6567, + "step": 236 + }, + { + "epoch": 0.63, + "learning_rate": 9.178604374270867e-05, + "loss": 1.7305, + "step": 237 + }, + { + "epoch": 0.63, + "learning_rate": 9.170723897049439e-05, + "loss": 1.7544, + "step": 238 + }, + { + "epoch": 0.63, + "learning_rate": 9.162809214194851e-05, + "loss": 1.7247, + "step": 239 + }, + { + "epoch": 0.64, + "learning_rate": 9.154860390618313e-05, + "loss": 1.8192, + "step": 240 + }, + { + "epoch": 0.64, + "learning_rate": 9.146877491511035e-05, + "loss": 1.7016, + "step": 241 + }, + { + "epoch": 0.64, + "learning_rate": 9.138860582343696e-05, + "loss": 1.7377, + "step": 242 + }, + { + "epoch": 0.64, + "learning_rate": 9.130809728865901e-05, + "loss": 1.6459, + "step": 243 + }, + { + "epoch": 0.65, + "learning_rate": 9.122724997105647e-05, + "loss": 1.7161, + "step": 244 + }, + { + "epoch": 0.65, + "learning_rate": 9.114606453368779e-05, + "loss": 1.6868, + "step": 245 + }, + { + "epoch": 0.65, + "learning_rate": 9.106454164238442e-05, + "loss": 1.7086, + "step": 246 + }, + { + "epoch": 0.66, + "learning_rate": 9.098268196574546e-05, + "loss": 1.7164, + "step": 247 + }, + { + "epoch": 0.66, + "learning_rate": 9.090048617513207e-05, + "loss": 1.6877, + "step": 248 + }, + { + "epoch": 0.66, + "learning_rate": 9.081795494466201e-05, + "loss": 1.6701, + "step": 249 + }, + { + "epoch": 0.66, + "learning_rate": 9.073508895120411e-05, + "loss": 1.7393, + "step": 250 + }, + { + "epoch": 0.67, + "learning_rate": 9.065188887437273e-05, + "loss": 1.746, + "step": 251 + }, + { + "epoch": 0.67, + "learning_rate": 9.056835539652211e-05, + "loss": 1.79, + "step": 252 + }, + { + "epoch": 0.67, + "learning_rate": 9.048448920274088e-05, + "loss": 1.6791, + "step": 253 + }, + { + "epoch": 0.67, + "learning_rate": 9.040029098084643e-05, + "loss": 1.6771, + "step": 254 + }, + { + "epoch": 0.68, + "learning_rate": 9.031576142137919e-05, + "loss": 1.644, + "step": 255 + }, + { + "epoch": 0.68, + "learning_rate": 9.023090121759699e-05, + "loss": 1.7242, + "step": 256 + }, + { + "epoch": 0.68, + "learning_rate": 9.01457110654694e-05, + "loss": 1.7745, + "step": 257 + }, + { + "epoch": 0.68, + "learning_rate": 9.006019166367208e-05, + "loss": 1.7381, + "step": 258 + }, + { + "epoch": 0.69, + "learning_rate": 8.997434371358093e-05, + "loss": 1.6923, + "step": 259 + }, + { + "epoch": 0.69, + "learning_rate": 8.98881679192664e-05, + "loss": 1.8049, + "step": 260 + }, + { + "epoch": 0.69, + "learning_rate": 8.980166498748774e-05, + "loss": 1.6683, + "step": 261 + }, + { + "epoch": 0.69, + "learning_rate": 8.971483562768712e-05, + "loss": 1.7033, + "step": 262 + }, + { + "epoch": 0.7, + "learning_rate": 8.962768055198394e-05, + "loss": 1.761, + "step": 263 + }, + { + "epoch": 0.7, + "learning_rate": 8.954020047516884e-05, + "loss": 1.7824, + "step": 264 + }, + { + "epoch": 0.7, + "learning_rate": 8.945239611469796e-05, + "loss": 1.725, + "step": 265 + }, + { + "epoch": 0.71, + "learning_rate": 8.9364268190687e-05, + "loss": 1.6417, + "step": 266 + }, + { + "epoch": 0.71, + "learning_rate": 8.927581742590533e-05, + "loss": 1.7119, + "step": 267 + }, + { + "epoch": 0.71, + "learning_rate": 8.918704454577003e-05, + "loss": 1.7466, + "step": 268 + }, + { + "epoch": 0.71, + "learning_rate": 8.909795027833998e-05, + "loss": 1.6963, + "step": 269 + }, + { + "epoch": 0.72, + "learning_rate": 8.900853535430986e-05, + "loss": 1.7345, + "step": 270 + }, + { + "epoch": 0.72, + "learning_rate": 8.891880050700424e-05, + "loss": 1.6779, + "step": 271 + }, + { + "epoch": 0.72, + "learning_rate": 8.882874647237138e-05, + "loss": 1.6923, + "step": 272 + }, + { + "epoch": 0.72, + "learning_rate": 8.873837398897742e-05, + "loss": 1.6592, + "step": 273 + }, + { + "epoch": 0.73, + "learning_rate": 8.864768379800016e-05, + "loss": 1.6333, + "step": 274 + }, + { + "epoch": 0.73, + "learning_rate": 8.855667664322307e-05, + "loss": 1.7154, + "step": 275 + }, + { + "epoch": 0.73, + "learning_rate": 8.846535327102909e-05, + "loss": 1.7901, + "step": 276 + }, + { + "epoch": 0.73, + "learning_rate": 8.837371443039466e-05, + "loss": 1.6907, + "step": 277 + }, + { + "epoch": 0.74, + "learning_rate": 8.828176087288345e-05, + "loss": 1.7244, + "step": 278 + }, + { + "epoch": 0.74, + "learning_rate": 8.818949335264021e-05, + "loss": 1.7037, + "step": 279 + }, + { + "epoch": 0.74, + "learning_rate": 8.809691262638467e-05, + "loss": 1.6272, + "step": 280 + }, + { + "epoch": 0.75, + "learning_rate": 8.800401945340523e-05, + "loss": 1.6574, + "step": 281 + }, + { + "epoch": 0.75, + "learning_rate": 8.791081459555281e-05, + "loss": 1.6544, + "step": 282 + }, + { + "epoch": 0.75, + "learning_rate": 8.781729881723458e-05, + "loss": 1.6271, + "step": 283 + }, + { + "epoch": 0.75, + "learning_rate": 8.772347288540763e-05, + "loss": 1.7392, + "step": 284 + }, + { + "epoch": 0.76, + "learning_rate": 8.762933756957281e-05, + "loss": 1.6172, + "step": 285 + }, + { + "epoch": 0.76, + "learning_rate": 8.753489364176826e-05, + "loss": 1.7241, + "step": 286 + }, + { + "epoch": 0.76, + "learning_rate": 8.744014187656321e-05, + "loss": 1.726, + "step": 287 + }, + { + "epoch": 0.76, + "learning_rate": 8.734508305105158e-05, + "loss": 1.699, + "step": 288 + }, + { + "epoch": 0.77, + "learning_rate": 8.724971794484556e-05, + "loss": 1.6371, + "step": 289 + }, + { + "epoch": 0.77, + "learning_rate": 8.715404734006931e-05, + "loss": 1.7337, + "step": 290 + }, + { + "epoch": 0.77, + "learning_rate": 8.705807202135248e-05, + "loss": 1.6385, + "step": 291 + }, + { + "epoch": 0.77, + "learning_rate": 8.69617927758238e-05, + "loss": 1.7023, + "step": 292 + }, + { + "epoch": 0.78, + "learning_rate": 8.686521039310454e-05, + "loss": 1.6796, + "step": 293 + }, + { + "epoch": 0.78, + "learning_rate": 8.676832566530221e-05, + "loss": 1.7157, + "step": 294 + }, + { + "epoch": 0.78, + "learning_rate": 8.667113938700396e-05, + "loss": 1.6873, + "step": 295 + }, + { + "epoch": 0.79, + "learning_rate": 8.657365235526995e-05, + "loss": 1.7194, + "step": 296 + }, + { + "epoch": 0.79, + "learning_rate": 8.647586536962707e-05, + "loss": 1.7695, + "step": 297 + }, + { + "epoch": 0.79, + "learning_rate": 8.637777923206215e-05, + "loss": 1.6464, + "step": 298 + }, + { + "epoch": 0.79, + "learning_rate": 8.62793947470155e-05, + "loss": 1.7462, + "step": 299 + }, + { + "epoch": 0.8, + "learning_rate": 8.618071272137431e-05, + "loss": 1.6386, + "step": 300 + }, + { + "epoch": 0.8, + "learning_rate": 8.608173396446598e-05, + "loss": 1.6692, + "step": 301 + }, + { + "epoch": 0.8, + "learning_rate": 8.598245928805152e-05, + "loss": 1.7241, + "step": 302 + }, + { + "epoch": 0.8, + "learning_rate": 8.588288950631889e-05, + "loss": 1.744, + "step": 303 + }, + { + "epoch": 0.81, + "learning_rate": 8.578302543587631e-05, + "loss": 1.6958, + "step": 304 + }, + { + "epoch": 0.81, + "learning_rate": 8.568286789574557e-05, + "loss": 1.7288, + "step": 305 + }, + { + "epoch": 0.81, + "learning_rate": 8.558241770735531e-05, + "loss": 1.7376, + "step": 306 + }, + { + "epoch": 0.81, + "learning_rate": 8.548167569453429e-05, + "loss": 1.668, + "step": 307 + }, + { + "epoch": 0.82, + "learning_rate": 8.538064268350465e-05, + "loss": 1.6949, + "step": 308 + }, + { + "epoch": 0.82, + "learning_rate": 8.527931950287507e-05, + "loss": 1.645, + "step": 309 + }, + { + "epoch": 0.82, + "learning_rate": 8.517770698363404e-05, + "loss": 1.6848, + "step": 310 + }, + { + "epoch": 0.82, + "learning_rate": 8.507580595914303e-05, + "loss": 1.7163, + "step": 311 + }, + { + "epoch": 0.83, + "learning_rate": 8.497361726512965e-05, + "loss": 1.7366, + "step": 312 + }, + { + "epoch": 0.83, + "learning_rate": 8.487114173968074e-05, + "loss": 1.7858, + "step": 313 + }, + { + "epoch": 0.83, + "learning_rate": 8.476838022323561e-05, + "loss": 1.6975, + "step": 314 + }, + { + "epoch": 0.84, + "learning_rate": 8.466533355857908e-05, + "loss": 1.7549, + "step": 315 + }, + { + "epoch": 0.84, + "learning_rate": 8.456200259083454e-05, + "loss": 1.6796, + "step": 316 + }, + { + "epoch": 0.84, + "learning_rate": 8.445838816745709e-05, + "loss": 1.6895, + "step": 317 + }, + { + "epoch": 0.84, + "learning_rate": 8.435449113822655e-05, + "loss": 1.6524, + "step": 318 + }, + { + "epoch": 0.85, + "learning_rate": 8.425031235524046e-05, + "loss": 1.7097, + "step": 319 + }, + { + "epoch": 0.85, + "learning_rate": 8.414585267290715e-05, + "loss": 1.7021, + "step": 320 + }, + { + "epoch": 0.85, + "learning_rate": 8.404111294793873e-05, + "loss": 1.7239, + "step": 321 + }, + { + "epoch": 0.85, + "learning_rate": 8.393609403934398e-05, + "loss": 1.6201, + "step": 322 + }, + { + "epoch": 0.86, + "learning_rate": 8.383079680842145e-05, + "loss": 1.6921, + "step": 323 + }, + { + "epoch": 0.86, + "learning_rate": 8.372522211875224e-05, + "loss": 1.6285, + "step": 324 + }, + { + "epoch": 0.86, + "learning_rate": 8.361937083619304e-05, + "loss": 1.692, + "step": 325 + }, + { + "epoch": 0.86, + "learning_rate": 8.351324382886895e-05, + "loss": 1.7094, + "step": 326 + }, + { + "epoch": 0.87, + "learning_rate": 8.340684196716639e-05, + "loss": 1.661, + "step": 327 + }, + { + "epoch": 0.87, + "learning_rate": 8.330016612372599e-05, + "loss": 1.6573, + "step": 328 + }, + { + "epoch": 0.87, + "learning_rate": 8.319321717343535e-05, + "loss": 1.7666, + "step": 329 + }, + { + "epoch": 0.88, + "learning_rate": 8.308599599342202e-05, + "loss": 1.6458, + "step": 330 + }, + { + "epoch": 0.88, + "learning_rate": 8.297850346304608e-05, + "loss": 1.6689, + "step": 331 + }, + { + "epoch": 0.88, + "learning_rate": 8.287074046389312e-05, + "loss": 1.6694, + "step": 332 + }, + { + "epoch": 0.88, + "learning_rate": 8.276270787976696e-05, + "loss": 1.7342, + "step": 333 + }, + { + "epoch": 0.89, + "learning_rate": 8.265440659668236e-05, + "loss": 1.7041, + "step": 334 + }, + { + "epoch": 0.89, + "learning_rate": 8.254583750285776e-05, + "loss": 1.707, + "step": 335 + }, + { + "epoch": 0.89, + "learning_rate": 8.243700148870805e-05, + "loss": 1.6359, + "step": 336 + }, + { + "epoch": 0.89, + "learning_rate": 8.232789944683723e-05, + "loss": 1.6944, + "step": 337 + }, + { + "epoch": 0.9, + "learning_rate": 8.221853227203106e-05, + "loss": 1.6221, + "step": 338 + }, + { + "epoch": 0.9, + "learning_rate": 8.210890086124977e-05, + "loss": 1.6485, + "step": 339 + }, + { + "epoch": 0.9, + "learning_rate": 8.199900611362068e-05, + "loss": 1.6927, + "step": 340 + }, + { + "epoch": 0.9, + "learning_rate": 8.188884893043083e-05, + "loss": 1.71, + "step": 341 + }, + { + "epoch": 0.91, + "learning_rate": 8.177843021511962e-05, + "loss": 1.6721, + "step": 342 + }, + { + "epoch": 0.91, + "learning_rate": 8.166775087327133e-05, + "loss": 1.7052, + "step": 343 + }, + { + "epoch": 0.91, + "learning_rate": 8.155681181260777e-05, + "loss": 1.679, + "step": 344 + }, + { + "epoch": 0.92, + "learning_rate": 8.144561394298075e-05, + "loss": 1.6976, + "step": 345 + }, + { + "epoch": 0.92, + "learning_rate": 8.133415817636471e-05, + "loss": 1.591, + "step": 346 + }, + { + "epoch": 0.92, + "learning_rate": 8.12224454268492e-05, + "loss": 1.7302, + "step": 347 + }, + { + "epoch": 0.92, + "learning_rate": 8.111047661063136e-05, + "loss": 1.649, + "step": 348 + }, + { + "epoch": 0.93, + "learning_rate": 8.099825264600842e-05, + "loss": 1.7271, + "step": 349 + }, + { + "epoch": 0.93, + "learning_rate": 8.08857744533702e-05, + "loss": 1.7033, + "step": 350 + }, + { + "epoch": 0.93, + "learning_rate": 8.077304295519151e-05, + "loss": 1.6853, + "step": 351 + }, + { + "epoch": 0.93, + "learning_rate": 8.066005907602465e-05, + "loss": 1.6198, + "step": 352 + }, + { + "epoch": 0.94, + "learning_rate": 8.054682374249174e-05, + "loss": 1.5788, + "step": 353 + }, + { + "epoch": 0.94, + "learning_rate": 8.04333378832772e-05, + "loss": 1.6358, + "step": 354 + }, + { + "epoch": 0.94, + "learning_rate": 8.031960242912011e-05, + "loss": 1.6205, + "step": 355 + }, + { + "epoch": 0.94, + "learning_rate": 8.020561831280654e-05, + "loss": 1.6251, + "step": 356 + }, + { + "epoch": 0.95, + "learning_rate": 8.009138646916196e-05, + "loss": 1.6325, + "step": 357 + }, + { + "epoch": 0.95, + "learning_rate": 7.997690783504353e-05, + "loss": 1.5752, + "step": 358 + }, + { + "epoch": 0.95, + "learning_rate": 7.986218334933241e-05, + "loss": 1.702, + "step": 359 + }, + { + "epoch": 0.95, + "learning_rate": 7.97472139529261e-05, + "loss": 1.6434, + "step": 360 + }, + { + "epoch": 0.96, + "learning_rate": 7.963200058873072e-05, + "loss": 1.6503, + "step": 361 + }, + { + "epoch": 0.96, + "learning_rate": 7.951654420165323e-05, + "loss": 1.6811, + "step": 362 + }, + { + "epoch": 0.96, + "learning_rate": 7.940084573859369e-05, + "loss": 1.6883, + "step": 363 + }, + { + "epoch": 0.97, + "learning_rate": 7.928490614843757e-05, + "loss": 1.6747, + "step": 364 + }, + { + "epoch": 0.97, + "learning_rate": 7.916872638204788e-05, + "loss": 1.6585, + "step": 365 + }, + { + "epoch": 0.97, + "learning_rate": 7.90523073922574e-05, + "loss": 1.6598, + "step": 366 + }, + { + "epoch": 0.97, + "learning_rate": 7.893565013386087e-05, + "loss": 1.6732, + "step": 367 + }, + { + "epoch": 0.98, + "learning_rate": 7.881875556360717e-05, + "loss": 1.6139, + "step": 368 + }, + { + "epoch": 0.98, + "learning_rate": 7.870162464019144e-05, + "loss": 1.7143, + "step": 369 + }, + { + "epoch": 0.98, + "learning_rate": 7.858425832424728e-05, + "loss": 1.6749, + "step": 370 + }, + { + "epoch": 0.98, + "learning_rate": 7.846665757833878e-05, + "loss": 1.7282, + "step": 371 + }, + { + "epoch": 0.99, + "learning_rate": 7.83488233669527e-05, + "loss": 1.6329, + "step": 372 + }, + { + "epoch": 0.99, + "learning_rate": 7.823075665649056e-05, + "loss": 1.6273, + "step": 373 + }, + { + "epoch": 0.99, + "learning_rate": 7.811245841526063e-05, + "loss": 1.6262, + "step": 374 + }, + { + "epoch": 0.99, + "learning_rate": 7.79939296134701e-05, + "loss": 1.6977, + "step": 375 + }, + { + "epoch": 1.0, + "learning_rate": 7.787517122321706e-05, + "loss": 1.735, + "step": 376 + }, + { + "epoch": 1.0, + "learning_rate": 7.775618421848252e-05, + "loss": 1.6294, + "step": 377 + }, + { + "epoch": 1.0, + "learning_rate": 7.763696957512246e-05, + "loss": 1.5115, + "step": 378 + }, + { + "epoch": 1.01, + "learning_rate": 7.75175282708598e-05, + "loss": 1.5511, + "step": 379 + }, + { + "epoch": 1.01, + "learning_rate": 7.739786128527643e-05, + "loss": 1.6208, + "step": 380 + }, + { + "epoch": 1.01, + "learning_rate": 7.727796959980504e-05, + "loss": 1.5682, + "step": 381 + }, + { + "epoch": 1.01, + "learning_rate": 7.715785419772126e-05, + "loss": 1.5706, + "step": 382 + }, + { + "epoch": 1.02, + "learning_rate": 7.703751606413542e-05, + "loss": 1.6126, + "step": 383 + }, + { + "epoch": 1.02, + "learning_rate": 7.691695618598467e-05, + "loss": 1.6065, + "step": 384 + }, + { + "epoch": 1.02, + "learning_rate": 7.679617555202463e-05, + "loss": 1.5688, + "step": 385 + }, + { + "epoch": 1.02, + "learning_rate": 7.667517515282152e-05, + "loss": 1.5788, + "step": 386 + }, + { + "epoch": 1.03, + "learning_rate": 7.655395598074389e-05, + "loss": 1.513, + "step": 387 + }, + { + "epoch": 1.03, + "learning_rate": 7.643251902995452e-05, + "loss": 1.5044, + "step": 388 + }, + { + "epoch": 1.03, + "learning_rate": 7.63108652964023e-05, + "loss": 1.5667, + "step": 389 + }, + { + "epoch": 1.03, + "learning_rate": 7.618899577781404e-05, + "loss": 1.5765, + "step": 390 + }, + { + "epoch": 1.04, + "learning_rate": 7.606691147368627e-05, + "loss": 1.5661, + "step": 391 + }, + { + "epoch": 1.04, + "learning_rate": 7.594461338527701e-05, + "loss": 1.5763, + "step": 392 + }, + { + "epoch": 1.04, + "learning_rate": 7.582210251559769e-05, + "loss": 1.5253, + "step": 393 + }, + { + "epoch": 1.05, + "learning_rate": 7.569937986940477e-05, + "loss": 1.5982, + "step": 394 + }, + { + "epoch": 1.05, + "learning_rate": 7.557644645319158e-05, + "loss": 1.5583, + "step": 395 + }, + { + "epoch": 1.05, + "learning_rate": 7.545330327518007e-05, + "loss": 1.488, + "step": 396 + }, + { + "epoch": 1.05, + "learning_rate": 7.532995134531251e-05, + "loss": 1.5368, + "step": 397 + }, + { + "epoch": 1.06, + "learning_rate": 7.520639167524322e-05, + "loss": 1.5863, + "step": 398 + }, + { + "epoch": 1.06, + "learning_rate": 7.508262527833029e-05, + "loss": 1.6736, + "step": 399 + }, + { + "epoch": 1.06, + "learning_rate": 7.495865316962723e-05, + "loss": 1.5957, + "step": 400 + }, + { + "epoch": 1.06, + "learning_rate": 7.483447636587467e-05, + "loss": 1.5553, + "step": 401 + }, + { + "epoch": 1.07, + "learning_rate": 7.471009588549205e-05, + "loss": 1.5217, + "step": 402 + }, + { + "epoch": 1.07, + "learning_rate": 7.458551274856918e-05, + "loss": 1.5806, + "step": 403 + }, + { + "epoch": 1.07, + "learning_rate": 7.4460727976858e-05, + "loss": 1.6075, + "step": 404 + }, + { + "epoch": 1.07, + "learning_rate": 7.433574259376407e-05, + "loss": 1.5302, + "step": 405 + }, + { + "epoch": 1.08, + "learning_rate": 7.421055762433826e-05, + "loss": 1.4965, + "step": 406 + }, + { + "epoch": 1.08, + "learning_rate": 7.408517409526835e-05, + "loss": 1.6272, + "step": 407 + }, + { + "epoch": 1.08, + "learning_rate": 7.39595930348705e-05, + "loss": 1.5668, + "step": 408 + }, + { + "epoch": 1.08, + "learning_rate": 7.3833815473081e-05, + "loss": 1.5652, + "step": 409 + }, + { + "epoch": 1.09, + "learning_rate": 7.370784244144762e-05, + "loss": 1.5885, + "step": 410 + }, + { + "epoch": 1.09, + "learning_rate": 7.358167497312134e-05, + "loss": 1.5324, + "step": 411 + }, + { + "epoch": 1.09, + "learning_rate": 7.345531410284774e-05, + "loss": 1.6304, + "step": 412 + }, + { + "epoch": 1.1, + "learning_rate": 7.332876086695855e-05, + "loss": 1.5931, + "step": 413 + }, + { + "epoch": 1.1, + "learning_rate": 7.320201630336318e-05, + "loss": 1.5992, + "step": 414 + }, + { + "epoch": 1.1, + "learning_rate": 7.307508145154019e-05, + "loss": 1.5467, + "step": 415 + }, + { + "epoch": 1.1, + "learning_rate": 7.294795735252875e-05, + "loss": 1.5775, + "step": 416 + }, + { + "epoch": 1.11, + "learning_rate": 7.282064504892015e-05, + "loss": 1.5119, + "step": 417 + }, + { + "epoch": 1.11, + "learning_rate": 7.269314558484914e-05, + "loss": 1.5829, + "step": 418 + }, + { + "epoch": 1.11, + "learning_rate": 7.256546000598551e-05, + "loss": 1.6211, + "step": 419 + }, + { + "epoch": 1.11, + "learning_rate": 7.243758935952547e-05, + "loss": 1.5241, + "step": 420 + }, + { + "epoch": 1.12, + "learning_rate": 7.230953469418292e-05, + "loss": 1.5521, + "step": 421 + }, + { + "epoch": 1.12, + "learning_rate": 7.218129706018108e-05, + "loss": 1.5349, + "step": 422 + }, + { + "epoch": 1.12, + "learning_rate": 7.205287750924372e-05, + "loss": 1.5815, + "step": 423 + }, + { + "epoch": 1.12, + "learning_rate": 7.192427709458656e-05, + "loss": 1.5188, + "step": 424 + }, + { + "epoch": 1.13, + "learning_rate": 7.179549687090867e-05, + "loss": 1.5987, + "step": 425 + }, + { + "epoch": 1.13, + "learning_rate": 7.166653789438382e-05, + "loss": 1.5643, + "step": 426 + }, + { + "epoch": 1.13, + "learning_rate": 7.153740122265176e-05, + "loss": 1.5052, + "step": 427 + }, + { + "epoch": 1.14, + "learning_rate": 7.140808791480959e-05, + "loss": 1.6092, + "step": 428 + }, + { + "epoch": 1.14, + "learning_rate": 7.127859903140311e-05, + "loss": 1.5671, + "step": 429 + }, + { + "epoch": 1.14, + "learning_rate": 7.114893563441802e-05, + "loss": 1.5004, + "step": 430 + }, + { + "epoch": 1.14, + "learning_rate": 7.101909878727128e-05, + "loss": 1.5558, + "step": 431 + }, + { + "epoch": 1.15, + "learning_rate": 7.088908955480244e-05, + "loss": 1.5113, + "step": 432 + }, + { + "epoch": 1.15, + "learning_rate": 7.075890900326475e-05, + "loss": 1.6546, + "step": 433 + }, + { + "epoch": 1.15, + "learning_rate": 7.062855820031659e-05, + "loss": 1.5282, + "step": 434 + }, + { + "epoch": 1.15, + "learning_rate": 7.049803821501259e-05, + "loss": 1.5285, + "step": 435 + }, + { + "epoch": 1.16, + "learning_rate": 7.036735011779492e-05, + "loss": 1.5854, + "step": 436 + }, + { + "epoch": 1.16, + "learning_rate": 7.023649498048451e-05, + "loss": 1.6048, + "step": 437 + }, + { + "epoch": 1.16, + "learning_rate": 7.01054738762722e-05, + "loss": 1.5618, + "step": 438 + }, + { + "epoch": 1.16, + "learning_rate": 6.997428787971005e-05, + "loss": 1.6191, + "step": 439 + }, + { + "epoch": 1.17, + "learning_rate": 6.984293806670244e-05, + "loss": 1.5588, + "step": 440 + }, + { + "epoch": 1.17, + "learning_rate": 6.971142551449725e-05, + "loss": 1.6202, + "step": 441 + }, + { + "epoch": 1.17, + "learning_rate": 6.957975130167705e-05, + "loss": 1.607, + "step": 442 + }, + { + "epoch": 1.18, + "learning_rate": 6.944791650815023e-05, + "loss": 1.554, + "step": 443 + }, + { + "epoch": 1.18, + "learning_rate": 6.931592221514222e-05, + "loss": 1.6057, + "step": 444 + }, + { + "epoch": 1.18, + "learning_rate": 6.91837695051865e-05, + "loss": 1.5725, + "step": 445 + }, + { + "epoch": 1.18, + "learning_rate": 6.905145946211583e-05, + "loss": 1.5788, + "step": 446 + }, + { + "epoch": 1.19, + "learning_rate": 6.891899317105329e-05, + "loss": 1.5324, + "step": 447 + }, + { + "epoch": 1.19, + "learning_rate": 6.878637171840343e-05, + "loss": 1.5962, + "step": 448 + }, + { + "epoch": 1.19, + "learning_rate": 6.865359619184331e-05, + "loss": 1.5458, + "step": 449 + }, + { + "epoch": 1.19, + "learning_rate": 6.85206676803136e-05, + "loss": 1.6023, + "step": 450 + }, + { + "epoch": 1.2, + "learning_rate": 6.83875872740097e-05, + "loss": 1.5291, + "step": 451 + }, + { + "epoch": 1.2, + "learning_rate": 6.825435606437273e-05, + "loss": 1.5929, + "step": 452 + }, + { + "epoch": 1.2, + "learning_rate": 6.81209751440806e-05, + "loss": 1.5424, + "step": 453 + }, + { + "epoch": 1.2, + "learning_rate": 6.798744560703905e-05, + "loss": 1.5881, + "step": 454 + }, + { + "epoch": 1.21, + "learning_rate": 6.785376854837268e-05, + "loss": 1.4747, + "step": 455 + }, + { + "epoch": 1.21, + "learning_rate": 6.771994506441597e-05, + "loss": 1.5215, + "step": 456 + }, + { + "epoch": 1.21, + "learning_rate": 6.758597625270433e-05, + "loss": 1.465, + "step": 457 + }, + { + "epoch": 1.21, + "learning_rate": 6.745186321196495e-05, + "loss": 1.5071, + "step": 458 + }, + { + "epoch": 1.22, + "learning_rate": 6.731760704210802e-05, + "loss": 1.4882, + "step": 459 + }, + { + "epoch": 1.22, + "learning_rate": 6.718320884421751e-05, + "loss": 1.5905, + "step": 460 + }, + { + "epoch": 1.22, + "learning_rate": 6.704866972054223e-05, + "loss": 1.5922, + "step": 461 + }, + { + "epoch": 1.23, + "learning_rate": 6.691399077448677e-05, + "loss": 1.5448, + "step": 462 + }, + { + "epoch": 1.23, + "learning_rate": 6.677917311060246e-05, + "loss": 1.5675, + "step": 463 + }, + { + "epoch": 1.23, + "learning_rate": 6.66442178345783e-05, + "loss": 1.6005, + "step": 464 + }, + { + "epoch": 1.23, + "learning_rate": 6.650912605323194e-05, + "loss": 1.6179, + "step": 465 + }, + { + "epoch": 1.24, + "learning_rate": 6.637389887450045e-05, + "loss": 1.5711, + "step": 466 + }, + { + "epoch": 1.24, + "learning_rate": 6.623853740743146e-05, + "loss": 1.6179, + "step": 467 + }, + { + "epoch": 1.24, + "learning_rate": 6.610304276217392e-05, + "loss": 1.6407, + "step": 468 + }, + { + "epoch": 1.24, + "learning_rate": 6.596741604996897e-05, + "loss": 1.6296, + "step": 469 + }, + { + "epoch": 1.25, + "learning_rate": 6.583165838314095e-05, + "loss": 1.6393, + "step": 470 + }, + { + "epoch": 1.25, + "learning_rate": 6.569577087508814e-05, + "loss": 1.5851, + "step": 471 + }, + { + "epoch": 1.25, + "learning_rate": 6.555975464027375e-05, + "loss": 1.5772, + "step": 472 + }, + { + "epoch": 1.25, + "learning_rate": 6.542361079421669e-05, + "loss": 1.5792, + "step": 473 + }, + { + "epoch": 1.26, + "learning_rate": 6.528734045348248e-05, + "loss": 1.5866, + "step": 474 + }, + { + "epoch": 1.26, + "learning_rate": 6.515094473567407e-05, + "loss": 1.5141, + "step": 475 + }, + { + "epoch": 1.26, + "learning_rate": 6.501442475942265e-05, + "loss": 1.5783, + "step": 476 + }, + { + "epoch": 1.27, + "learning_rate": 6.48777816443785e-05, + "loss": 1.5052, + "step": 477 + }, + { + "epoch": 1.27, + "learning_rate": 6.474101651120184e-05, + "loss": 1.5681, + "step": 478 + }, + { + "epoch": 1.27, + "learning_rate": 6.460413048155355e-05, + "loss": 1.6441, + "step": 479 + }, + { + "epoch": 1.27, + "learning_rate": 6.446712467808608e-05, + "loss": 1.5737, + "step": 480 + }, + { + "epoch": 1.28, + "learning_rate": 6.433000022443419e-05, + "loss": 1.5541, + "step": 481 + }, + { + "epoch": 1.28, + "learning_rate": 6.419275824520568e-05, + "loss": 1.5573, + "step": 482 + }, + { + "epoch": 1.28, + "learning_rate": 6.405539986597225e-05, + "loss": 1.5178, + "step": 483 + }, + { + "epoch": 1.28, + "learning_rate": 6.391792621326027e-05, + "loss": 1.5345, + "step": 484 + }, + { + "epoch": 1.29, + "learning_rate": 6.378033841454147e-05, + "loss": 1.6092, + "step": 485 + }, + { + "epoch": 1.29, + "learning_rate": 6.364263759822371e-05, + "loss": 1.5439, + "step": 486 + }, + { + "epoch": 1.29, + "learning_rate": 6.350482489364186e-05, + "loss": 1.547, + "step": 487 + }, + { + "epoch": 1.29, + "learning_rate": 6.336690143104827e-05, + "loss": 1.5803, + "step": 488 + }, + { + "epoch": 1.3, + "learning_rate": 6.322886834160378e-05, + "loss": 1.584, + "step": 489 + }, + { + "epoch": 1.3, + "learning_rate": 6.309072675736827e-05, + "loss": 1.46, + "step": 490 + }, + { + "epoch": 1.3, + "learning_rate": 6.29524778112914e-05, + "loss": 1.5754, + "step": 491 + }, + { + "epoch": 1.31, + "learning_rate": 6.281412263720344e-05, + "loss": 1.5056, + "step": 492 + }, + { + "epoch": 1.31, + "learning_rate": 6.267566236980574e-05, + "loss": 1.5539, + "step": 493 + }, + { + "epoch": 1.31, + "learning_rate": 6.253709814466168e-05, + "loss": 1.5229, + "step": 494 + }, + { + "epoch": 1.31, + "learning_rate": 6.239843109818716e-05, + "loss": 1.4894, + "step": 495 + }, + { + "epoch": 1.32, + "learning_rate": 6.22596623676414e-05, + "loss": 1.5337, + "step": 496 + }, + { + "epoch": 1.32, + "learning_rate": 6.212079309111753e-05, + "loss": 1.592, + "step": 497 + }, + { + "epoch": 1.32, + "learning_rate": 6.19818244075333e-05, + "loss": 1.4937, + "step": 498 + }, + { + "epoch": 1.32, + "learning_rate": 6.18427574566218e-05, + "loss": 1.5862, + "step": 499 + }, + { + "epoch": 1.33, + "learning_rate": 6.170359337892194e-05, + "loss": 1.5252, + "step": 500 + }, + { + "epoch": 1.33, + "learning_rate": 6.156433331576927e-05, + "loss": 1.5639, + "step": 501 + }, + { + "epoch": 1.33, + "learning_rate": 6.142497840928656e-05, + "loss": 1.5306, + "step": 502 + }, + { + "epoch": 1.33, + "learning_rate": 6.128552980237437e-05, + "loss": 1.6537, + "step": 503 + }, + { + "epoch": 1.34, + "learning_rate": 6.114598863870177e-05, + "loss": 1.4589, + "step": 504 + }, + { + "epoch": 1.34, + "learning_rate": 6.100635606269694e-05, + "loss": 1.5472, + "step": 505 + }, + { + "epoch": 1.34, + "learning_rate": 6.0866633219537694e-05, + "loss": 1.5372, + "step": 506 + }, + { + "epoch": 1.34, + "learning_rate": 6.0726821255142255e-05, + "loss": 1.4852, + "step": 507 + }, + { + "epoch": 1.35, + "learning_rate": 6.058692131615968e-05, + "loss": 1.509, + "step": 508 + }, + { + "epoch": 1.35, + "learning_rate": 6.04469345499606e-05, + "loss": 1.5736, + "step": 509 + }, + { + "epoch": 1.35, + "learning_rate": 6.0306862104627705e-05, + "loss": 1.5348, + "step": 510 + }, + { + "epoch": 1.36, + "learning_rate": 6.0166705128946375e-05, + "loss": 1.5519, + "step": 511 + }, + { + "epoch": 1.36, + "learning_rate": 6.00264647723953e-05, + "loss": 1.5526, + "step": 512 + }, + { + "epoch": 1.36, + "learning_rate": 5.988614218513693e-05, + "loss": 1.5908, + "step": 513 + }, + { + "epoch": 1.36, + "learning_rate": 5.974573851800818e-05, + "loss": 1.5455, + "step": 514 + }, + { + "epoch": 1.37, + "learning_rate": 5.9605254922510926e-05, + "loss": 1.5317, + "step": 515 + }, + { + "epoch": 1.37, + "learning_rate": 5.946469255080251e-05, + "loss": 1.5962, + "step": 516 + }, + { + "epoch": 1.37, + "learning_rate": 5.9324052555686436e-05, + "loss": 1.6437, + "step": 517 + }, + { + "epoch": 1.37, + "learning_rate": 5.918333609060276e-05, + "loss": 1.5306, + "step": 518 + }, + { + "epoch": 1.38, + "learning_rate": 5.9042544309618694e-05, + "loss": 1.5289, + "step": 519 + }, + { + "epoch": 1.38, + "learning_rate": 5.890167836741919e-05, + "loss": 1.5338, + "step": 520 + }, + { + "epoch": 1.38, + "learning_rate": 5.8760739419297384e-05, + "loss": 1.6154, + "step": 521 + }, + { + "epoch": 1.38, + "learning_rate": 5.861972862114518e-05, + "loss": 1.5108, + "step": 522 + }, + { + "epoch": 1.39, + "learning_rate": 5.847864712944373e-05, + "loss": 1.5818, + "step": 523 + }, + { + "epoch": 1.39, + "learning_rate": 5.833749610125402e-05, + "loss": 1.6317, + "step": 524 + }, + { + "epoch": 1.39, + "learning_rate": 5.819627669420724e-05, + "loss": 1.5724, + "step": 525 + }, + { + "epoch": 1.4, + "learning_rate": 5.805499006649547e-05, + "loss": 1.5023, + "step": 526 + }, + { + "epoch": 1.4, + "learning_rate": 5.791363737686205e-05, + "loss": 1.5374, + "step": 527 + }, + { + "epoch": 1.4, + "learning_rate": 5.7772219784592105e-05, + "loss": 1.5141, + "step": 528 + }, + { + "epoch": 1.4, + "learning_rate": 5.76307384495031e-05, + "loss": 1.6443, + "step": 529 + }, + { + "epoch": 1.41, + "learning_rate": 5.748919453193521e-05, + "loss": 1.5954, + "step": 530 + }, + { + "epoch": 1.41, + "learning_rate": 5.734758919274192e-05, + "loss": 1.6019, + "step": 531 + }, + { + "epoch": 1.41, + "learning_rate": 5.720592359328047e-05, + "loss": 1.6241, + "step": 532 + }, + { + "epoch": 1.41, + "learning_rate": 5.706419889540225e-05, + "loss": 1.5813, + "step": 533 + }, + { + "epoch": 1.42, + "learning_rate": 5.69224162614434e-05, + "loss": 1.518, + "step": 534 + }, + { + "epoch": 1.42, + "learning_rate": 5.6780576854215195e-05, + "loss": 1.5473, + "step": 535 + }, + { + "epoch": 1.42, + "learning_rate": 5.6638681836994535e-05, + "loss": 1.6277, + "step": 536 + }, + { + "epoch": 1.42, + "learning_rate": 5.649673237351436e-05, + "loss": 1.6213, + "step": 537 + }, + { + "epoch": 1.43, + "learning_rate": 5.6354729627954195e-05, + "loss": 1.5182, + "step": 538 + }, + { + "epoch": 1.43, + "learning_rate": 5.621267476493053e-05, + "loss": 1.6186, + "step": 539 + }, + { + "epoch": 1.43, + "learning_rate": 5.607056894948728e-05, + "loss": 1.5195, + "step": 540 + }, + { + "epoch": 1.44, + "learning_rate": 5.592841334708624e-05, + "loss": 1.5293, + "step": 541 + }, + { + "epoch": 1.44, + "learning_rate": 5.578620912359758e-05, + "loss": 1.6225, + "step": 542 + }, + { + "epoch": 1.44, + "learning_rate": 5.564395744529012e-05, + "loss": 1.5548, + "step": 543 + }, + { + "epoch": 1.44, + "learning_rate": 5.5501659478821964e-05, + "loss": 1.556, + "step": 544 + }, + { + "epoch": 1.45, + "learning_rate": 5.535931639123083e-05, + "loss": 1.4946, + "step": 545 + }, + { + "epoch": 1.45, + "learning_rate": 5.521692934992447e-05, + "loss": 1.5343, + "step": 546 + }, + { + "epoch": 1.45, + "learning_rate": 5.5074499522671106e-05, + "loss": 1.5353, + "step": 547 + }, + { + "epoch": 1.45, + "learning_rate": 5.493202807758992e-05, + "loss": 1.5644, + "step": 548 + }, + { + "epoch": 1.46, + "learning_rate": 5.478951618314133e-05, + "loss": 1.4671, + "step": 549 + }, + { + "epoch": 1.46, + "learning_rate": 5.464696500811757e-05, + "loss": 1.553, + "step": 550 + }, + { + "epoch": 1.46, + "learning_rate": 5.450437572163298e-05, + "loss": 1.5658, + "step": 551 + }, + { + "epoch": 1.46, + "learning_rate": 5.4361749493114514e-05, + "loss": 1.5448, + "step": 552 + }, + { + "epoch": 1.47, + "learning_rate": 5.4219087492292054e-05, + "loss": 1.5305, + "step": 553 + }, + { + "epoch": 1.47, + "learning_rate": 5.407639088918888e-05, + "loss": 1.5567, + "step": 554 + }, + { + "epoch": 1.47, + "learning_rate": 5.3933660854112075e-05, + "loss": 1.5312, + "step": 555 + }, + { + "epoch": 1.47, + "learning_rate": 5.37908985576429e-05, + "loss": 1.4669, + "step": 556 + }, + { + "epoch": 1.48, + "learning_rate": 5.364810517062717e-05, + "loss": 1.6714, + "step": 557 + }, + { + "epoch": 1.48, + "learning_rate": 5.350528186416573e-05, + "loss": 1.5867, + "step": 558 + }, + { + "epoch": 1.48, + "learning_rate": 5.3362429809604806e-05, + "loss": 1.5232, + "step": 559 + }, + { + "epoch": 1.49, + "learning_rate": 5.321955017852637e-05, + "loss": 1.5636, + "step": 560 + }, + { + "epoch": 1.49, + "learning_rate": 5.307664414273855e-05, + "loss": 1.4686, + "step": 561 + }, + { + "epoch": 1.49, + "learning_rate": 5.2933712874266084e-05, + "loss": 1.5301, + "step": 562 + }, + { + "epoch": 1.49, + "learning_rate": 5.2790757545340586e-05, + "loss": 1.5631, + "step": 563 + }, + { + "epoch": 1.5, + "learning_rate": 5.2647779328391045e-05, + "loss": 1.5805, + "step": 564 + }, + { + "epoch": 1.5, + "learning_rate": 5.2504779396034146e-05, + "loss": 1.6171, + "step": 565 + }, + { + "epoch": 1.5, + "learning_rate": 5.236175892106467e-05, + "loss": 1.5264, + "step": 566 + }, + { + "epoch": 1.5, + "learning_rate": 5.221871907644589e-05, + "loss": 1.5189, + "step": 567 + }, + { + "epoch": 1.51, + "learning_rate": 5.207566103529991e-05, + "loss": 1.5974, + "step": 568 + }, + { + "epoch": 1.51, + "learning_rate": 5.1932585970898096e-05, + "loss": 1.5221, + "step": 569 + }, + { + "epoch": 1.51, + "learning_rate": 5.17894950566514e-05, + "loss": 1.5471, + "step": 570 + }, + { + "epoch": 1.51, + "learning_rate": 5.1646389466100795e-05, + "loss": 1.521, + "step": 571 + }, + { + "epoch": 1.52, + "learning_rate": 5.150327037290761e-05, + "loss": 1.5258, + "step": 572 + }, + { + "epoch": 1.52, + "learning_rate": 5.136013895084388e-05, + "loss": 1.4685, + "step": 573 + }, + { + "epoch": 1.52, + "learning_rate": 5.121699637378282e-05, + "loss": 1.5678, + "step": 574 + }, + { + "epoch": 1.53, + "learning_rate": 5.107384381568907e-05, + "loss": 1.4684, + "step": 575 + }, + { + "epoch": 1.53, + "learning_rate": 5.093068245060917e-05, + "loss": 1.4688, + "step": 576 + }, + { + "epoch": 1.53, + "learning_rate": 5.0787513452661864e-05, + "loss": 1.566, + "step": 577 + }, + { + "epoch": 1.53, + "learning_rate": 5.064433799602849e-05, + "loss": 1.5323, + "step": 578 + }, + { + "epoch": 1.54, + "learning_rate": 5.05011572549434e-05, + "loss": 1.581, + "step": 579 + }, + { + "epoch": 1.54, + "learning_rate": 5.0357972403684225e-05, + "loss": 1.5065, + "step": 580 + }, + { + "epoch": 1.54, + "learning_rate": 5.021478461656235e-05, + "loss": 1.5708, + "step": 581 + }, + { + "epoch": 1.54, + "learning_rate": 5.007159506791325e-05, + "loss": 1.5121, + "step": 582 + }, + { + "epoch": 1.55, + "learning_rate": 4.992840493208676e-05, + "loss": 1.5743, + "step": 583 + }, + { + "epoch": 1.55, + "learning_rate": 4.9785215383437646e-05, + "loss": 1.5861, + "step": 584 + }, + { + "epoch": 1.55, + "learning_rate": 4.9642027596315786e-05, + "loss": 1.5671, + "step": 585 + }, + { + "epoch": 1.55, + "learning_rate": 4.949884274505661e-05, + "loss": 1.5105, + "step": 586 + }, + { + "epoch": 1.56, + "learning_rate": 4.935566200397152e-05, + "loss": 1.5658, + "step": 587 + }, + { + "epoch": 1.56, + "learning_rate": 4.921248654733814e-05, + "loss": 1.5483, + "step": 588 + }, + { + "epoch": 1.56, + "learning_rate": 4.906931754939084e-05, + "loss": 1.567, + "step": 589 + }, + { + "epoch": 1.56, + "learning_rate": 4.8926156184310946e-05, + "loss": 1.5763, + "step": 590 + }, + { + "epoch": 1.57, + "learning_rate": 4.878300362621719e-05, + "loss": 1.5044, + "step": 591 + }, + { + "epoch": 1.57, + "learning_rate": 4.8639861049156136e-05, + "loss": 1.5653, + "step": 592 + }, + { + "epoch": 1.57, + "learning_rate": 4.8496729627092405e-05, + "loss": 1.5588, + "step": 593 + }, + { + "epoch": 1.58, + "learning_rate": 4.835361053389922e-05, + "loss": 1.4821, + "step": 594 + }, + { + "epoch": 1.58, + "learning_rate": 4.821050494334861e-05, + "loss": 1.6273, + "step": 595 + }, + { + "epoch": 1.58, + "learning_rate": 4.806741402910193e-05, + "loss": 1.4818, + "step": 596 + }, + { + "epoch": 1.58, + "learning_rate": 4.7924338964700096e-05, + "loss": 1.4659, + "step": 597 + }, + { + "epoch": 1.59, + "learning_rate": 4.778128092355412e-05, + "loss": 1.5297, + "step": 598 + }, + { + "epoch": 1.59, + "learning_rate": 4.7638241078935325e-05, + "loss": 1.585, + "step": 599 + }, + { + "epoch": 1.59, + "learning_rate": 4.7495220603965866e-05, + "loss": 1.4958, + "step": 600 + }, + { + "epoch": 1.59, + "learning_rate": 4.735222067160896e-05, + "loss": 1.5098, + "step": 601 + }, + { + "epoch": 1.6, + "learning_rate": 4.720924245465943e-05, + "loss": 1.6065, + "step": 602 + }, + { + "epoch": 1.6, + "learning_rate": 4.706628712573394e-05, + "loss": 1.5091, + "step": 603 + }, + { + "epoch": 1.6, + "learning_rate": 4.6923355857261455e-05, + "loss": 1.4611, + "step": 604 + }, + { + "epoch": 1.6, + "learning_rate": 4.678044982147365e-05, + "loss": 1.5287, + "step": 605 + }, + { + "epoch": 1.61, + "learning_rate": 4.6637570190395205e-05, + "loss": 1.5573, + "step": 606 + }, + { + "epoch": 1.61, + "learning_rate": 4.649471813583427e-05, + "loss": 1.6371, + "step": 607 + }, + { + "epoch": 1.61, + "learning_rate": 4.635189482937284e-05, + "loss": 1.5336, + "step": 608 + }, + { + "epoch": 1.62, + "learning_rate": 4.620910144235712e-05, + "loss": 1.5559, + "step": 609 + }, + { + "epoch": 1.62, + "learning_rate": 4.606633914588793e-05, + "loss": 1.5399, + "step": 610 + }, + { + "epoch": 1.62, + "learning_rate": 4.592360911081113e-05, + "loss": 1.487, + "step": 611 + }, + { + "epoch": 1.62, + "learning_rate": 4.5780912507707944e-05, + "loss": 1.5583, + "step": 612 + }, + { + "epoch": 1.63, + "learning_rate": 4.563825050688549e-05, + "loss": 1.5271, + "step": 613 + }, + { + "epoch": 1.63, + "learning_rate": 4.549562427836701e-05, + "loss": 1.5934, + "step": 614 + }, + { + "epoch": 1.63, + "learning_rate": 4.535303499188244e-05, + "loss": 1.5261, + "step": 615 + }, + { + "epoch": 1.63, + "learning_rate": 4.5210483816858676e-05, + "loss": 1.6577, + "step": 616 + }, + { + "epoch": 1.64, + "learning_rate": 4.506797192241009e-05, + "loss": 1.4575, + "step": 617 + }, + { + "epoch": 1.64, + "learning_rate": 4.49255004773289e-05, + "loss": 1.4948, + "step": 618 + }, + { + "epoch": 1.64, + "learning_rate": 4.478307065007554e-05, + "loss": 1.4523, + "step": 619 + }, + { + "epoch": 1.64, + "learning_rate": 4.464068360876919e-05, + "loss": 1.6135, + "step": 620 + }, + { + "epoch": 1.65, + "learning_rate": 4.449834052117804e-05, + "loss": 1.5568, + "step": 621 + }, + { + "epoch": 1.65, + "learning_rate": 4.4356042554709905e-05, + "loss": 1.5823, + "step": 622 + }, + { + "epoch": 1.65, + "learning_rate": 4.421379087640244e-05, + "loss": 1.664, + "step": 623 + }, + { + "epoch": 1.66, + "learning_rate": 4.407158665291377e-05, + "loss": 1.5322, + "step": 624 + }, + { + "epoch": 1.66, + "learning_rate": 4.3929431050512727e-05, + "loss": 1.5811, + "step": 625 + }, + { + "epoch": 1.66, + "learning_rate": 4.3787325235069487e-05, + "loss": 1.5768, + "step": 626 + }, + { + "epoch": 1.66, + "learning_rate": 4.36452703720458e-05, + "loss": 1.5219, + "step": 627 + }, + { + "epoch": 1.67, + "learning_rate": 4.350326762648565e-05, + "loss": 1.5525, + "step": 628 + }, + { + "epoch": 1.67, + "learning_rate": 4.3361318163005484e-05, + "loss": 1.477, + "step": 629 + }, + { + "epoch": 1.67, + "learning_rate": 4.321942314578482e-05, + "loss": 1.524, + "step": 630 + }, + { + "epoch": 1.67, + "learning_rate": 4.307758373855661e-05, + "loss": 1.5741, + "step": 631 + }, + { + "epoch": 1.68, + "learning_rate": 4.293580110459776e-05, + "loss": 1.531, + "step": 632 + }, + { + "epoch": 1.68, + "learning_rate": 4.279407640671956e-05, + "loss": 1.5424, + "step": 633 + }, + { + "epoch": 1.68, + "learning_rate": 4.265241080725808e-05, + "loss": 1.471, + "step": 634 + }, + { + "epoch": 1.68, + "learning_rate": 4.251080546806481e-05, + "loss": 1.5149, + "step": 635 + }, + { + "epoch": 1.69, + "learning_rate": 4.2369261550496905e-05, + "loss": 1.5289, + "step": 636 + }, + { + "epoch": 1.69, + "learning_rate": 4.22277802154079e-05, + "loss": 1.455, + "step": 637 + }, + { + "epoch": 1.69, + "learning_rate": 4.2086362623137955e-05, + "loss": 1.5351, + "step": 638 + }, + { + "epoch": 1.69, + "learning_rate": 4.194500993350454e-05, + "loss": 1.5747, + "step": 639 + }, + { + "epoch": 1.7, + "learning_rate": 4.180372330579276e-05, + "loss": 1.5356, + "step": 640 + }, + { + "epoch": 1.7, + "learning_rate": 4.1662503898745994e-05, + "loss": 1.4969, + "step": 641 + }, + { + "epoch": 1.7, + "learning_rate": 4.1521352870556266e-05, + "loss": 1.5077, + "step": 642 + }, + { + "epoch": 1.71, + "learning_rate": 4.1380271378854833e-05, + "loss": 1.5598, + "step": 643 + }, + { + "epoch": 1.71, + "learning_rate": 4.1239260580702635e-05, + "loss": 1.5431, + "step": 644 + }, + { + "epoch": 1.71, + "learning_rate": 4.1098321632580824e-05, + "loss": 1.5806, + "step": 645 + }, + { + "epoch": 1.71, + "learning_rate": 4.095745569038133e-05, + "loss": 1.4687, + "step": 646 + }, + { + "epoch": 1.72, + "learning_rate": 4.0816663909397256e-05, + "loss": 1.534, + "step": 647 + }, + { + "epoch": 1.72, + "learning_rate": 4.067594744431358e-05, + "loss": 1.5602, + "step": 648 + }, + { + "epoch": 1.72, + "learning_rate": 4.053530744919749e-05, + "loss": 1.5434, + "step": 649 + }, + { + "epoch": 1.72, + "learning_rate": 4.03947450774891e-05, + "loss": 1.4529, + "step": 650 + }, + { + "epoch": 1.73, + "learning_rate": 4.0254261481991825e-05, + "loss": 1.5127, + "step": 651 + }, + { + "epoch": 1.73, + "learning_rate": 4.011385781486308e-05, + "loss": 1.5195, + "step": 652 + }, + { + "epoch": 1.73, + "learning_rate": 3.9973535227604714e-05, + "loss": 1.5714, + "step": 653 + }, + { + "epoch": 1.73, + "learning_rate": 3.983329487105364e-05, + "loss": 1.5864, + "step": 654 + }, + { + "epoch": 1.74, + "learning_rate": 3.96931378953723e-05, + "loss": 1.5457, + "step": 655 + }, + { + "epoch": 1.74, + "learning_rate": 3.955306545003941e-05, + "loss": 1.5544, + "step": 656 + }, + { + "epoch": 1.74, + "learning_rate": 3.941307868384034e-05, + "loss": 1.5802, + "step": 657 + }, + { + "epoch": 1.75, + "learning_rate": 3.927317874485776e-05, + "loss": 1.4793, + "step": 658 + }, + { + "epoch": 1.75, + "learning_rate": 3.9133366780462325e-05, + "loss": 1.5746, + "step": 659 + }, + { + "epoch": 1.75, + "learning_rate": 3.899364393730308e-05, + "loss": 1.5031, + "step": 660 + }, + { + "epoch": 1.75, + "learning_rate": 3.8854011361298246e-05, + "loss": 1.5029, + "step": 661 + }, + { + "epoch": 1.76, + "learning_rate": 3.871447019762564e-05, + "loss": 1.4952, + "step": 662 + }, + { + "epoch": 1.76, + "learning_rate": 3.857502159071346e-05, + "loss": 1.556, + "step": 663 + }, + { + "epoch": 1.76, + "learning_rate": 3.843566668423073e-05, + "loss": 1.5939, + "step": 664 + }, + { + "epoch": 1.76, + "learning_rate": 3.829640662107807e-05, + "loss": 1.5231, + "step": 665 + }, + { + "epoch": 1.77, + "learning_rate": 3.8157242543378205e-05, + "loss": 1.543, + "step": 666 + }, + { + "epoch": 1.77, + "learning_rate": 3.8018175592466695e-05, + "loss": 1.5051, + "step": 667 + }, + { + "epoch": 1.77, + "learning_rate": 3.787920690888248e-05, + "loss": 1.4483, + "step": 668 + }, + { + "epoch": 1.77, + "learning_rate": 3.7740337632358616e-05, + "loss": 1.5926, + "step": 669 + }, + { + "epoch": 1.78, + "learning_rate": 3.760156890181283e-05, + "loss": 1.5499, + "step": 670 + }, + { + "epoch": 1.78, + "learning_rate": 3.746290185533833e-05, + "loss": 1.6084, + "step": 671 + }, + { + "epoch": 1.78, + "learning_rate": 3.732433763019428e-05, + "loss": 1.4915, + "step": 672 + }, + { + "epoch": 1.79, + "learning_rate": 3.718587736279658e-05, + "loss": 1.5149, + "step": 673 + }, + { + "epoch": 1.79, + "learning_rate": 3.704752218870861e-05, + "loss": 1.5557, + "step": 674 + }, + { + "epoch": 1.79, + "learning_rate": 3.690927324263175e-05, + "loss": 1.4818, + "step": 675 + }, + { + "epoch": 1.79, + "learning_rate": 3.677113165839623e-05, + "loss": 1.4723, + "step": 676 + }, + { + "epoch": 1.8, + "learning_rate": 3.663309856895174e-05, + "loss": 1.4855, + "step": 677 + }, + { + "epoch": 1.8, + "learning_rate": 3.6495175106358154e-05, + "loss": 1.5185, + "step": 678 + }, + { + "epoch": 1.8, + "learning_rate": 3.6357362401776277e-05, + "loss": 1.5155, + "step": 679 + }, + { + "epoch": 1.8, + "learning_rate": 3.621966158545855e-05, + "loss": 1.5517, + "step": 680 + }, + { + "epoch": 1.81, + "learning_rate": 3.608207378673973e-05, + "loss": 1.4894, + "step": 681 + }, + { + "epoch": 1.81, + "learning_rate": 3.594460013402775e-05, + "loss": 1.4591, + "step": 682 + }, + { + "epoch": 1.81, + "learning_rate": 3.580724175479432e-05, + "loss": 1.5306, + "step": 683 + }, + { + "epoch": 1.81, + "learning_rate": 3.566999977556582e-05, + "loss": 1.4702, + "step": 684 + }, + { + "epoch": 1.82, + "learning_rate": 3.5532875321913935e-05, + "loss": 1.6138, + "step": 685 + }, + { + "epoch": 1.82, + "learning_rate": 3.5395869518446464e-05, + "loss": 1.4431, + "step": 686 + }, + { + "epoch": 1.82, + "learning_rate": 3.525898348879819e-05, + "loss": 1.6268, + "step": 687 + }, + { + "epoch": 1.82, + "learning_rate": 3.5122218355621514e-05, + "loss": 1.5443, + "step": 688 + }, + { + "epoch": 1.83, + "learning_rate": 3.4985575240577365e-05, + "loss": 1.6156, + "step": 689 + }, + { + "epoch": 1.83, + "learning_rate": 3.484905526432594e-05, + "loss": 1.5196, + "step": 690 + }, + { + "epoch": 1.83, + "learning_rate": 3.471265954651752e-05, + "loss": 1.5153, + "step": 691 + }, + { + "epoch": 1.84, + "learning_rate": 3.457638920578331e-05, + "loss": 1.5637, + "step": 692 + }, + { + "epoch": 1.84, + "learning_rate": 3.4440245359726266e-05, + "loss": 1.555, + "step": 693 + }, + { + "epoch": 1.84, + "learning_rate": 3.4304229124911856e-05, + "loss": 1.5495, + "step": 694 + }, + { + "epoch": 1.84, + "learning_rate": 3.416834161685907e-05, + "loss": 1.5596, + "step": 695 + }, + { + "epoch": 1.85, + "learning_rate": 3.403258395003102e-05, + "loss": 1.5496, + "step": 696 + }, + { + "epoch": 1.85, + "learning_rate": 3.389695723782609e-05, + "loss": 1.5649, + "step": 697 + }, + { + "epoch": 1.85, + "learning_rate": 3.376146259256855e-05, + "loss": 1.5552, + "step": 698 + }, + { + "epoch": 1.85, + "learning_rate": 3.3626101125499555e-05, + "loss": 1.5355, + "step": 699 + }, + { + "epoch": 1.86, + "learning_rate": 3.349087394676809e-05, + "loss": 1.5022, + "step": 700 + }, + { + "epoch": 1.86, + "learning_rate": 3.33557821654217e-05, + "loss": 1.527, + "step": 701 + }, + { + "epoch": 1.86, + "learning_rate": 3.322082688939755e-05, + "loss": 1.5452, + "step": 702 + }, + { + "epoch": 1.86, + "learning_rate": 3.308600922551324e-05, + "loss": 1.5208, + "step": 703 + }, + { + "epoch": 1.87, + "learning_rate": 3.295133027945778e-05, + "loss": 1.47, + "step": 704 + }, + { + "epoch": 1.87, + "learning_rate": 3.281679115578249e-05, + "loss": 1.5202, + "step": 705 + }, + { + "epoch": 1.87, + "learning_rate": 3.2682392957891985e-05, + "loss": 1.4507, + "step": 706 + }, + { + "epoch": 1.88, + "learning_rate": 3.254813678803504e-05, + "loss": 1.6117, + "step": 707 + }, + { + "epoch": 1.88, + "learning_rate": 3.241402374729569e-05, + "loss": 1.6149, + "step": 708 + }, + { + "epoch": 1.88, + "learning_rate": 3.2280054935584025e-05, + "loss": 1.5947, + "step": 709 + }, + { + "epoch": 1.88, + "learning_rate": 3.2146231451627334e-05, + "loss": 1.5165, + "step": 710 + }, + { + "epoch": 1.89, + "learning_rate": 3.2012554392960966e-05, + "loss": 1.4893, + "step": 711 + }, + { + "epoch": 1.89, + "learning_rate": 3.187902485591941e-05, + "loss": 1.6028, + "step": 712 + }, + { + "epoch": 1.89, + "learning_rate": 3.174564393562728e-05, + "loss": 1.5429, + "step": 713 + }, + { + "epoch": 1.89, + "learning_rate": 3.161241272599031e-05, + "loss": 1.5214, + "step": 714 + }, + { + "epoch": 1.9, + "learning_rate": 3.147933231968642e-05, + "loss": 1.5541, + "step": 715 + }, + { + "epoch": 1.9, + "learning_rate": 3.1346403808156713e-05, + "loss": 1.5747, + "step": 716 + }, + { + "epoch": 1.9, + "learning_rate": 3.121362828159659e-05, + "loss": 1.5768, + "step": 717 + }, + { + "epoch": 1.9, + "learning_rate": 3.108100682894671e-05, + "loss": 1.6119, + "step": 718 + }, + { + "epoch": 1.91, + "learning_rate": 3.094854053788418e-05, + "loss": 1.577, + "step": 719 + }, + { + "epoch": 1.91, + "learning_rate": 3.08162304948135e-05, + "loss": 1.5888, + "step": 720 + }, + { + "epoch": 1.91, + "learning_rate": 3.06840777848578e-05, + "loss": 1.5093, + "step": 721 + }, + { + "epoch": 1.92, + "learning_rate": 3.055208349184977e-05, + "loss": 1.4787, + "step": 722 + }, + { + "epoch": 1.92, + "learning_rate": 3.0420248698322973e-05, + "loss": 1.5513, + "step": 723 + }, + { + "epoch": 1.92, + "learning_rate": 3.0288574485502757e-05, + "loss": 1.594, + "step": 724 + }, + { + "epoch": 1.92, + "learning_rate": 3.015706193329757e-05, + "loss": 1.5548, + "step": 725 + }, + { + "epoch": 1.93, + "learning_rate": 3.002571212028995e-05, + "loss": 1.5783, + "step": 726 + }, + { + "epoch": 1.93, + "learning_rate": 2.9894526123727808e-05, + "loss": 1.5001, + "step": 727 + }, + { + "epoch": 1.93, + "learning_rate": 2.9763505019515525e-05, + "loss": 1.5542, + "step": 728 + }, + { + "epoch": 1.93, + "learning_rate": 2.9632649882205088e-05, + "loss": 1.5134, + "step": 729 + }, + { + "epoch": 1.94, + "learning_rate": 2.950196178498743e-05, + "loss": 1.5232, + "step": 730 + }, + { + "epoch": 1.94, + "learning_rate": 2.937144179968342e-05, + "loss": 1.4753, + "step": 731 + }, + { + "epoch": 1.94, + "learning_rate": 2.9241090996735266e-05, + "loss": 1.4371, + "step": 732 + }, + { + "epoch": 1.94, + "learning_rate": 2.911091044519757e-05, + "loss": 1.5026, + "step": 733 + }, + { + "epoch": 1.95, + "learning_rate": 2.8980901212728728e-05, + "loss": 1.5565, + "step": 734 + }, + { + "epoch": 1.95, + "learning_rate": 2.8851064365581982e-05, + "loss": 1.509, + "step": 735 + }, + { + "epoch": 1.95, + "learning_rate": 2.8721400968596903e-05, + "loss": 1.5417, + "step": 736 + }, + { + "epoch": 1.95, + "learning_rate": 2.8591912085190392e-05, + "loss": 1.4827, + "step": 737 + }, + { + "epoch": 1.96, + "learning_rate": 2.8462598777348247e-05, + "loss": 1.5347, + "step": 738 + }, + { + "epoch": 1.96, + "learning_rate": 2.8333462105616194e-05, + "loss": 1.5072, + "step": 739 + }, + { + "epoch": 1.96, + "learning_rate": 2.820450312909134e-05, + "loss": 1.4506, + "step": 740 + }, + { + "epoch": 1.97, + "learning_rate": 2.807572290541346e-05, + "loss": 1.5673, + "step": 741 + }, + { + "epoch": 1.97, + "learning_rate": 2.79471224907563e-05, + "loss": 1.5108, + "step": 742 + }, + { + "epoch": 1.97, + "learning_rate": 2.781870293981893e-05, + "loss": 1.4845, + "step": 743 + }, + { + "epoch": 1.97, + "learning_rate": 2.7690465305817088e-05, + "loss": 1.5846, + "step": 744 + }, + { + "epoch": 1.98, + "learning_rate": 2.756241064047456e-05, + "loss": 1.5504, + "step": 745 + }, + { + "epoch": 1.98, + "learning_rate": 2.7434539994014475e-05, + "loss": 1.5451, + "step": 746 + }, + { + "epoch": 1.98, + "learning_rate": 2.730685441515088e-05, + "loss": 1.4817, + "step": 747 + }, + { + "epoch": 1.98, + "learning_rate": 2.7179354951079856e-05, + "loss": 1.4819, + "step": 748 + }, + { + "epoch": 1.99, + "learning_rate": 2.7052042647471252e-05, + "loss": 1.5487, + "step": 749 + }, + { + "epoch": 1.99, + "learning_rate": 2.69249185484598e-05, + "loss": 1.4851, + "step": 750 + }, + { + "epoch": 1.99, + "learning_rate": 2.679798369663683e-05, + "loss": 1.5208, + "step": 751 + }, + { + "epoch": 1.99, + "learning_rate": 2.667123913304146e-05, + "loss": 1.536, + "step": 752 + }, + { + "epoch": 2.0, + "learning_rate": 2.6544685897152272e-05, + "loss": 1.5505, + "step": 753 + }, + { + "epoch": 2.0, + "learning_rate": 2.6418325026878665e-05, + "loss": 1.6026, + "step": 754 + }, + { + "epoch": 2.0, + "learning_rate": 2.629215755855239e-05, + "loss": 1.4181, + "step": 755 + }, + { + "epoch": 2.01, + "learning_rate": 2.6166184526919047e-05, + "loss": 1.4751, + "step": 756 + }, + { + "epoch": 2.01, + "learning_rate": 2.6040406965129515e-05, + "loss": 1.4894, + "step": 757 + }, + { + "epoch": 2.01, + "learning_rate": 2.5914825904731686e-05, + "loss": 1.5007, + "step": 758 + }, + { + "epoch": 2.01, + "learning_rate": 2.5789442375661744e-05, + "loss": 1.372, + "step": 759 + }, + { + "epoch": 2.02, + "learning_rate": 2.5664257406235955e-05, + "loss": 1.4389, + "step": 760 + }, + { + "epoch": 2.02, + "learning_rate": 2.5539272023141995e-05, + "loss": 1.4259, + "step": 761 + }, + { + "epoch": 2.02, + "learning_rate": 2.541448725143083e-05, + "loss": 1.4355, + "step": 762 + }, + { + "epoch": 2.02, + "learning_rate": 2.5289904114507946e-05, + "loss": 1.4497, + "step": 763 + }, + { + "epoch": 2.03, + "learning_rate": 2.516552363412534e-05, + "loss": 1.4206, + "step": 764 + }, + { + "epoch": 2.03, + "learning_rate": 2.504134683037278e-05, + "loss": 1.481, + "step": 765 + }, + { + "epoch": 2.03, + "learning_rate": 2.491737472166972e-05, + "loss": 1.4599, + "step": 766 + }, + { + "epoch": 2.03, + "learning_rate": 2.479360832475679e-05, + "loss": 1.5219, + "step": 767 + }, + { + "epoch": 2.04, + "learning_rate": 2.46700486546875e-05, + "loss": 1.4852, + "step": 768 + }, + { + "epoch": 2.04, + "learning_rate": 2.4546696724819963e-05, + "loss": 1.4385, + "step": 769 + }, + { + "epoch": 2.04, + "learning_rate": 2.4423553546808427e-05, + "loss": 1.4962, + "step": 770 + }, + { + "epoch": 2.05, + "learning_rate": 2.430062013059526e-05, + "loss": 1.441, + "step": 771 + }, + { + "epoch": 2.05, + "learning_rate": 2.4177897484402306e-05, + "loss": 1.4178, + "step": 772 + }, + { + "epoch": 2.05, + "learning_rate": 2.4055386614722996e-05, + "loss": 1.499, + "step": 773 + }, + { + "epoch": 2.05, + "learning_rate": 2.393308852631373e-05, + "loss": 1.4574, + "step": 774 + }, + { + "epoch": 2.06, + "learning_rate": 2.381100422218596e-05, + "loss": 1.4838, + "step": 775 + }, + { + "epoch": 2.06, + "learning_rate": 2.3689134703597706e-05, + "loss": 1.479, + "step": 776 + }, + { + "epoch": 2.06, + "learning_rate": 2.3567480970045492e-05, + "loss": 1.5401, + "step": 777 + }, + { + "epoch": 2.06, + "learning_rate": 2.344604401925613e-05, + "loss": 1.4839, + "step": 778 + }, + { + "epoch": 2.07, + "learning_rate": 2.3324824847178494e-05, + "loss": 1.4536, + "step": 779 + }, + { + "epoch": 2.07, + "learning_rate": 2.3203824447975392e-05, + "loss": 1.3847, + "step": 780 + }, + { + "epoch": 2.07, + "learning_rate": 2.308304381401534e-05, + "loss": 1.4686, + "step": 781 + }, + { + "epoch": 2.07, + "learning_rate": 2.296248393586459e-05, + "loss": 1.4785, + "step": 782 + }, + { + "epoch": 2.08, + "learning_rate": 2.284214580227875e-05, + "loss": 1.4651, + "step": 783 + }, + { + "epoch": 2.08, + "learning_rate": 2.2722030400194976e-05, + "loss": 1.4577, + "step": 784 + }, + { + "epoch": 2.08, + "learning_rate": 2.2602138714723574e-05, + "loss": 1.4656, + "step": 785 + }, + { + "epoch": 2.08, + "learning_rate": 2.24824717291402e-05, + "loss": 1.4736, + "step": 786 + }, + { + "epoch": 2.09, + "learning_rate": 2.2363030424877535e-05, + "loss": 1.4946, + "step": 787 + }, + { + "epoch": 2.09, + "learning_rate": 2.2243815781517496e-05, + "loss": 1.4902, + "step": 788 + }, + { + "epoch": 2.09, + "learning_rate": 2.2124828776782957e-05, + "loss": 1.3805, + "step": 789 + }, + { + "epoch": 2.1, + "learning_rate": 2.2006070386529913e-05, + "loss": 1.4926, + "step": 790 + }, + { + "epoch": 2.1, + "learning_rate": 2.1887541584739385e-05, + "loss": 1.4136, + "step": 791 + }, + { + "epoch": 2.1, + "learning_rate": 2.1769243343509454e-05, + "loss": 1.4177, + "step": 792 + }, + { + "epoch": 2.1, + "learning_rate": 2.165117663304732e-05, + "loss": 1.4555, + "step": 793 + }, + { + "epoch": 2.11, + "learning_rate": 2.153334242166123e-05, + "loss": 1.4362, + "step": 794 + }, + { + "epoch": 2.11, + "learning_rate": 2.1415741675752742e-05, + "loss": 1.4483, + "step": 795 + }, + { + "epoch": 2.11, + "learning_rate": 2.129837535980856e-05, + "loss": 1.3899, + "step": 796 + }, + { + "epoch": 2.11, + "learning_rate": 2.1181244436392855e-05, + "loss": 1.521, + "step": 797 + }, + { + "epoch": 2.12, + "learning_rate": 2.1064349866139132e-05, + "loss": 1.4221, + "step": 798 + }, + { + "epoch": 2.12, + "learning_rate": 2.094769260774262e-05, + "loss": 1.4968, + "step": 799 + }, + { + "epoch": 2.12, + "learning_rate": 2.0831273617952136e-05, + "loss": 1.4631, + "step": 800 + }, + { + "epoch": 2.12, + "learning_rate": 2.071509385156244e-05, + "loss": 1.4571, + "step": 801 + }, + { + "epoch": 2.13, + "learning_rate": 2.0599154261406316e-05, + "loss": 1.4922, + "step": 802 + }, + { + "epoch": 2.13, + "learning_rate": 2.0483455798346786e-05, + "loss": 1.4316, + "step": 803 + }, + { + "epoch": 2.13, + "learning_rate": 2.0367999411269285e-05, + "loss": 1.4226, + "step": 804 + }, + { + "epoch": 2.14, + "learning_rate": 2.0252786047073895e-05, + "loss": 1.4586, + "step": 805 + }, + { + "epoch": 2.14, + "learning_rate": 2.0137816650667612e-05, + "loss": 1.4131, + "step": 806 + }, + { + "epoch": 2.14, + "learning_rate": 2.0023092164956474e-05, + "loss": 1.4782, + "step": 807 + }, + { + "epoch": 2.14, + "learning_rate": 1.9908613530838055e-05, + "loss": 1.4648, + "step": 808 + }, + { + "epoch": 2.15, + "learning_rate": 1.979438168719346e-05, + "loss": 1.4328, + "step": 809 + }, + { + "epoch": 2.15, + "learning_rate": 1.968039757087991e-05, + "loss": 1.4804, + "step": 810 + }, + { + "epoch": 2.15, + "learning_rate": 1.9566662116722793e-05, + "loss": 1.5185, + "step": 811 + }, + { + "epoch": 2.15, + "learning_rate": 1.9453176257508275e-05, + "loss": 1.418, + "step": 812 + }, + { + "epoch": 2.16, + "learning_rate": 1.9339940923975364e-05, + "loss": 1.5342, + "step": 813 + }, + { + "epoch": 2.16, + "learning_rate": 1.9226957044808497e-05, + "loss": 1.4951, + "step": 814 + }, + { + "epoch": 2.16, + "learning_rate": 1.911422554662981e-05, + "loss": 1.5001, + "step": 815 + }, + { + "epoch": 2.16, + "learning_rate": 1.9001747353991582e-05, + "loss": 1.4289, + "step": 816 + }, + { + "epoch": 2.17, + "learning_rate": 1.888952338936864e-05, + "loss": 1.4779, + "step": 817 + }, + { + "epoch": 2.17, + "learning_rate": 1.8777554573150795e-05, + "loss": 1.4541, + "step": 818 + }, + { + "epoch": 2.17, + "learning_rate": 1.8665841823635284e-05, + "loss": 1.3708, + "step": 819 + }, + { + "epoch": 2.18, + "learning_rate": 1.855438605701925e-05, + "loss": 1.4434, + "step": 820 + }, + { + "epoch": 2.18, + "learning_rate": 1.8443188187392257e-05, + "loss": 1.4388, + "step": 821 + }, + { + "epoch": 2.18, + "learning_rate": 1.8332249126728666e-05, + "loss": 1.543, + "step": 822 + }, + { + "epoch": 2.18, + "learning_rate": 1.8221569784880397e-05, + "loss": 1.4487, + "step": 823 + }, + { + "epoch": 2.19, + "learning_rate": 1.811115106956918e-05, + "loss": 1.4323, + "step": 824 + }, + { + "epoch": 2.19, + "learning_rate": 1.8000993886379342e-05, + "loss": 1.4424, + "step": 825 + }, + { + "epoch": 2.19, + "learning_rate": 1.789109913875025e-05, + "loss": 1.3609, + "step": 826 + }, + { + "epoch": 2.19, + "learning_rate": 1.7781467727968953e-05, + "loss": 1.4008, + "step": 827 + }, + { + "epoch": 2.2, + "learning_rate": 1.7672100553162774e-05, + "loss": 1.4308, + "step": 828 + }, + { + "epoch": 2.2, + "learning_rate": 1.7562998511291946e-05, + "loss": 1.5258, + "step": 829 + }, + { + "epoch": 2.2, + "learning_rate": 1.745416249714224e-05, + "loss": 1.4535, + "step": 830 + }, + { + "epoch": 2.2, + "learning_rate": 1.734559340331765e-05, + "loss": 1.4607, + "step": 831 + }, + { + "epoch": 2.21, + "learning_rate": 1.7237292120233044e-05, + "loss": 1.3692, + "step": 832 + }, + { + "epoch": 2.21, + "learning_rate": 1.7129259536106885e-05, + "loss": 1.4383, + "step": 833 + }, + { + "epoch": 2.21, + "learning_rate": 1.702149653695395e-05, + "loss": 1.3952, + "step": 834 + }, + { + "epoch": 2.21, + "learning_rate": 1.691400400657799e-05, + "loss": 1.3934, + "step": 835 + }, + { + "epoch": 2.22, + "learning_rate": 1.6806782826564654e-05, + "loss": 1.4273, + "step": 836 + }, + { + "epoch": 2.22, + "learning_rate": 1.6699833876274028e-05, + "loss": 1.4847, + "step": 837 + }, + { + "epoch": 2.22, + "learning_rate": 1.6593158032833624e-05, + "loss": 1.4369, + "step": 838 + }, + { + "epoch": 2.23, + "learning_rate": 1.6486756171131063e-05, + "loss": 1.4289, + "step": 839 + }, + { + "epoch": 2.23, + "learning_rate": 1.638062916380697e-05, + "loss": 1.4509, + "step": 840 + }, + { + "epoch": 2.23, + "learning_rate": 1.627477788124776e-05, + "loss": 1.4375, + "step": 841 + }, + { + "epoch": 2.23, + "learning_rate": 1.6169203191578557e-05, + "loss": 1.5603, + "step": 842 + }, + { + "epoch": 2.24, + "learning_rate": 1.606390596065602e-05, + "loss": 1.5002, + "step": 843 + }, + { + "epoch": 2.24, + "learning_rate": 1.5958887052061283e-05, + "loss": 1.4442, + "step": 844 + }, + { + "epoch": 2.24, + "learning_rate": 1.5854147327092855e-05, + "loss": 1.4966, + "step": 845 + }, + { + "epoch": 2.24, + "learning_rate": 1.5749687644759552e-05, + "loss": 1.4576, + "step": 846 + }, + { + "epoch": 2.25, + "learning_rate": 1.564550886177348e-05, + "loss": 1.3977, + "step": 847 + }, + { + "epoch": 2.25, + "learning_rate": 1.5541611832542925e-05, + "loss": 1.5316, + "step": 848 + }, + { + "epoch": 2.25, + "learning_rate": 1.5437997409165478e-05, + "loss": 1.434, + "step": 849 + }, + { + "epoch": 2.25, + "learning_rate": 1.533466644142095e-05, + "loss": 1.449, + "step": 850 + }, + { + "epoch": 2.26, + "learning_rate": 1.523161977676441e-05, + "loss": 1.4726, + "step": 851 + }, + { + "epoch": 2.26, + "learning_rate": 1.5128858260319285e-05, + "loss": 1.4609, + "step": 852 + }, + { + "epoch": 2.26, + "learning_rate": 1.5026382734870376e-05, + "loss": 1.3527, + "step": 853 + }, + { + "epoch": 2.27, + "learning_rate": 1.4924194040856975e-05, + "loss": 1.4362, + "step": 854 + }, + { + "epoch": 2.27, + "learning_rate": 1.4822293016365962e-05, + "loss": 1.4483, + "step": 855 + }, + { + "epoch": 2.27, + "learning_rate": 1.4720680497124934e-05, + "loss": 1.3976, + "step": 856 + }, + { + "epoch": 2.27, + "learning_rate": 1.4619357316495352e-05, + "loss": 1.4461, + "step": 857 + }, + { + "epoch": 2.28, + "learning_rate": 1.4518324305465702e-05, + "loss": 1.4909, + "step": 858 + }, + { + "epoch": 2.28, + "learning_rate": 1.4417582292644694e-05, + "loss": 1.4755, + "step": 859 + }, + { + "epoch": 2.28, + "learning_rate": 1.4317132104254438e-05, + "loss": 1.4927, + "step": 860 + }, + { + "epoch": 2.28, + "learning_rate": 1.421697456412371e-05, + "loss": 1.4471, + "step": 861 + }, + { + "epoch": 2.29, + "learning_rate": 1.4117110493681124e-05, + "loss": 1.4859, + "step": 862 + }, + { + "epoch": 2.29, + "learning_rate": 1.401754071194849e-05, + "loss": 1.471, + "step": 863 + }, + { + "epoch": 2.29, + "learning_rate": 1.3918266035534027e-05, + "loss": 1.3538, + "step": 864 + }, + { + "epoch": 2.29, + "learning_rate": 1.3819287278625697e-05, + "loss": 1.4906, + "step": 865 + }, + { + "epoch": 2.3, + "learning_rate": 1.3720605252984503e-05, + "loss": 1.5514, + "step": 866 + }, + { + "epoch": 2.3, + "learning_rate": 1.362222076793786e-05, + "loss": 1.45, + "step": 867 + }, + { + "epoch": 2.3, + "learning_rate": 1.3524134630372937e-05, + "loss": 1.4378, + "step": 868 + }, + { + "epoch": 2.31, + "learning_rate": 1.3426347644730047e-05, + "loss": 1.4655, + "step": 869 + }, + { + "epoch": 2.31, + "learning_rate": 1.3328860612996053e-05, + "loss": 1.4872, + "step": 870 + }, + { + "epoch": 2.31, + "learning_rate": 1.3231674334697774e-05, + "loss": 1.5583, + "step": 871 + }, + { + "epoch": 2.31, + "learning_rate": 1.3134789606895476e-05, + "loss": 1.3942, + "step": 872 + }, + { + "epoch": 2.32, + "learning_rate": 1.3038207224176213e-05, + "loss": 1.4931, + "step": 873 + }, + { + "epoch": 2.32, + "learning_rate": 1.2941927978647528e-05, + "loss": 1.4525, + "step": 874 + }, + { + "epoch": 2.32, + "learning_rate": 1.2845952659930693e-05, + "loss": 1.5043, + "step": 875 + }, + { + "epoch": 2.32, + "learning_rate": 1.275028205515445e-05, + "loss": 1.3989, + "step": 876 + }, + { + "epoch": 2.33, + "learning_rate": 1.2654916948948436e-05, + "loss": 1.4537, + "step": 877 + }, + { + "epoch": 2.33, + "learning_rate": 1.2559858123436802e-05, + "loss": 1.4277, + "step": 878 + }, + { + "epoch": 2.33, + "learning_rate": 1.2465106358231753e-05, + "loss": 1.441, + "step": 879 + }, + { + "epoch": 2.33, + "learning_rate": 1.23706624304272e-05, + "loss": 1.4923, + "step": 880 + }, + { + "epoch": 2.34, + "learning_rate": 1.2276527114592367e-05, + "loss": 1.4097, + "step": 881 + }, + { + "epoch": 2.34, + "learning_rate": 1.2182701182765426e-05, + "loss": 1.4913, + "step": 882 + }, + { + "epoch": 2.34, + "learning_rate": 1.208918540444719e-05, + "loss": 1.421, + "step": 883 + }, + { + "epoch": 2.34, + "learning_rate": 1.1995980546594776e-05, + "loss": 1.4794, + "step": 884 + }, + { + "epoch": 2.35, + "learning_rate": 1.1903087373615351e-05, + "loss": 1.4501, + "step": 885 + }, + { + "epoch": 2.35, + "learning_rate": 1.1810506647359793e-05, + "loss": 1.5201, + "step": 886 + }, + { + "epoch": 2.35, + "learning_rate": 1.171823912711657e-05, + "loss": 1.4111, + "step": 887 + }, + { + "epoch": 2.36, + "learning_rate": 1.1626285569605344e-05, + "loss": 1.4176, + "step": 888 + }, + { + "epoch": 2.36, + "learning_rate": 1.153464672897091e-05, + "loss": 1.5283, + "step": 889 + }, + { + "epoch": 2.36, + "learning_rate": 1.144332335677694e-05, + "loss": 1.4544, + "step": 890 + }, + { + "epoch": 2.36, + "learning_rate": 1.1352316201999841e-05, + "loss": 1.4489, + "step": 891 + }, + { + "epoch": 2.37, + "learning_rate": 1.1261626011022586e-05, + "loss": 1.4405, + "step": 892 + }, + { + "epoch": 2.37, + "learning_rate": 1.1171253527628628e-05, + "loss": 1.4862, + "step": 893 + }, + { + "epoch": 2.37, + "learning_rate": 1.1081199492995781e-05, + "loss": 1.4785, + "step": 894 + }, + { + "epoch": 2.37, + "learning_rate": 1.0991464645690142e-05, + "loss": 1.4659, + "step": 895 + }, + { + "epoch": 2.38, + "learning_rate": 1.0902049721660046e-05, + "loss": 1.438, + "step": 896 + }, + { + "epoch": 2.38, + "learning_rate": 1.0812955454229978e-05, + "loss": 1.4393, + "step": 897 + }, + { + "epoch": 2.38, + "learning_rate": 1.0724182574094682e-05, + "loss": 1.4228, + "step": 898 + }, + { + "epoch": 2.38, + "learning_rate": 1.0635731809312993e-05, + "loss": 1.4681, + "step": 899 + }, + { + "epoch": 2.39, + "learning_rate": 1.0547603885302049e-05, + "loss": 1.4792, + "step": 900 + }, + { + "epoch": 2.39, + "learning_rate": 1.045979952483117e-05, + "loss": 1.4224, + "step": 901 + }, + { + "epoch": 2.39, + "learning_rate": 1.037231944801607e-05, + "loss": 1.4102, + "step": 902 + }, + { + "epoch": 2.4, + "learning_rate": 1.0285164372312884e-05, + "loss": 1.4084, + "step": 903 + }, + { + "epoch": 2.4, + "learning_rate": 1.0198335012512272e-05, + "loss": 1.493, + "step": 904 + }, + { + "epoch": 2.4, + "learning_rate": 1.0111832080733601e-05, + "loss": 1.4612, + "step": 905 + }, + { + "epoch": 2.4, + "learning_rate": 1.0025656286419078e-05, + "loss": 1.4915, + "step": 906 + }, + { + "epoch": 2.41, + "learning_rate": 9.939808336327921e-06, + "loss": 1.4335, + "step": 907 + }, + { + "epoch": 2.41, + "learning_rate": 9.854288934530604e-06, + "loss": 1.4599, + "step": 908 + }, + { + "epoch": 2.41, + "learning_rate": 9.769098782403041e-06, + "loss": 1.3922, + "step": 909 + }, + { + "epoch": 2.41, + "learning_rate": 9.684238578620814e-06, + "loss": 1.4095, + "step": 910 + }, + { + "epoch": 2.42, + "learning_rate": 9.599709019153568e-06, + "loss": 1.4492, + "step": 911 + }, + { + "epoch": 2.42, + "learning_rate": 9.515510797259102e-06, + "loss": 1.4238, + "step": 912 + }, + { + "epoch": 2.42, + "learning_rate": 9.431644603477907e-06, + "loss": 1.4694, + "step": 913 + }, + { + "epoch": 2.42, + "learning_rate": 9.34811112562728e-06, + "loss": 1.5137, + "step": 914 + }, + { + "epoch": 2.43, + "learning_rate": 9.264911048795893e-06, + "loss": 1.3667, + "step": 915 + }, + { + "epoch": 2.43, + "learning_rate": 9.182045055337995e-06, + "loss": 1.4456, + "step": 916 + }, + { + "epoch": 2.43, + "learning_rate": 9.099513824867939e-06, + "loss": 1.4658, + "step": 917 + }, + { + "epoch": 2.44, + "learning_rate": 9.017318034254546e-06, + "loss": 1.4212, + "step": 918 + }, + { + "epoch": 2.44, + "learning_rate": 8.935458357615584e-06, + "loss": 1.4453, + "step": 919 + }, + { + "epoch": 2.44, + "learning_rate": 8.853935466312225e-06, + "loss": 1.4445, + "step": 920 + }, + { + "epoch": 2.44, + "learning_rate": 8.772750028943527e-06, + "loss": 1.3734, + "step": 921 + }, + { + "epoch": 2.45, + "learning_rate": 8.691902711341e-06, + "loss": 1.4893, + "step": 922 + }, + { + "epoch": 2.45, + "learning_rate": 8.611394176563038e-06, + "loss": 1.5218, + "step": 923 + }, + { + "epoch": 2.45, + "learning_rate": 8.531225084889654e-06, + "loss": 1.4519, + "step": 924 + }, + { + "epoch": 2.45, + "learning_rate": 8.451396093816872e-06, + "loss": 1.4522, + "step": 925 + }, + { + "epoch": 2.46, + "learning_rate": 8.371907858051497e-06, + "loss": 1.4729, + "step": 926 + }, + { + "epoch": 2.46, + "learning_rate": 8.292761029505603e-06, + "loss": 1.4275, + "step": 927 + }, + { + "epoch": 2.46, + "learning_rate": 8.21395625729135e-06, + "loss": 1.4604, + "step": 928 + }, + { + "epoch": 2.46, + "learning_rate": 8.135494187715475e-06, + "loss": 1.4039, + "step": 929 + }, + { + "epoch": 2.47, + "learning_rate": 8.057375464274142e-06, + "loss": 1.4912, + "step": 930 + }, + { + "epoch": 2.47, + "learning_rate": 7.979600727647596e-06, + "loss": 1.4187, + "step": 931 + }, + { + "epoch": 2.47, + "learning_rate": 7.902170615694915e-06, + "loss": 1.4225, + "step": 932 + }, + { + "epoch": 2.47, + "learning_rate": 7.825085763448798e-06, + "loss": 1.4011, + "step": 933 + }, + { + "epoch": 2.48, + "learning_rate": 7.748346803110295e-06, + "loss": 1.3841, + "step": 934 + }, + { + "epoch": 2.48, + "learning_rate": 7.671954364043754e-06, + "loss": 1.474, + "step": 935 + }, + { + "epoch": 2.48, + "learning_rate": 7.595909072771485e-06, + "loss": 1.4427, + "step": 936 + }, + { + "epoch": 2.49, + "learning_rate": 7.520211552968792e-06, + "loss": 1.5081, + "step": 937 + }, + { + "epoch": 2.49, + "learning_rate": 7.444862425458699e-06, + "loss": 1.4544, + "step": 938 + }, + { + "epoch": 2.49, + "learning_rate": 7.369862308207026e-06, + "loss": 1.5018, + "step": 939 + }, + { + "epoch": 2.49, + "learning_rate": 7.295211816317149e-06, + "loss": 1.4605, + "step": 940 + }, + { + "epoch": 2.5, + "learning_rate": 7.220911562025085e-06, + "loss": 1.4122, + "step": 941 + }, + { + "epoch": 2.5, + "learning_rate": 7.146962154694409e-06, + "loss": 1.4672, + "step": 942 + }, + { + "epoch": 2.5, + "learning_rate": 7.0733642008112836e-06, + "loss": 1.5257, + "step": 943 + }, + { + "epoch": 2.5, + "learning_rate": 7.000118303979464e-06, + "loss": 1.4355, + "step": 944 + }, + { + "epoch": 2.51, + "learning_rate": 6.927225064915349e-06, + "loss": 1.4799, + "step": 945 + }, + { + "epoch": 2.51, + "learning_rate": 6.854685081443097e-06, + "loss": 1.5002, + "step": 946 + }, + { + "epoch": 2.51, + "learning_rate": 6.782498948489613e-06, + "loss": 1.478, + "step": 947 + }, + { + "epoch": 2.51, + "learning_rate": 6.71066725807985e-06, + "loss": 1.4507, + "step": 948 + }, + { + "epoch": 2.52, + "learning_rate": 6.639190599331746e-06, + "loss": 1.482, + "step": 949 + }, + { + "epoch": 2.52, + "learning_rate": 6.5680695584515725e-06, + "loss": 1.4785, + "step": 950 + }, + { + "epoch": 2.52, + "learning_rate": 6.497304718728986e-06, + "loss": 1.4368, + "step": 951 + }, + { + "epoch": 2.53, + "learning_rate": 6.4268966605323725e-06, + "loss": 1.4422, + "step": 952 + }, + { + "epoch": 2.53, + "learning_rate": 6.3568459613039536e-06, + "loss": 1.4643, + "step": 953 + }, + { + "epoch": 2.53, + "learning_rate": 6.287153195555174e-06, + "loss": 1.4136, + "step": 954 + }, + { + "epoch": 2.53, + "learning_rate": 6.217818934861896e-06, + "loss": 1.4759, + "step": 955 + }, + { + "epoch": 2.54, + "learning_rate": 6.148843747859778e-06, + "loss": 1.5521, + "step": 956 + }, + { + "epoch": 2.54, + "learning_rate": 6.080228200239585e-06, + "loss": 1.4326, + "step": 957 + }, + { + "epoch": 2.54, + "learning_rate": 6.011972854742503e-06, + "loss": 1.4925, + "step": 958 + }, + { + "epoch": 2.54, + "learning_rate": 5.94407827115564e-06, + "loss": 1.5337, + "step": 959 + }, + { + "epoch": 2.55, + "learning_rate": 5.876545006307288e-06, + "loss": 1.4804, + "step": 960 + }, + { + "epoch": 2.55, + "learning_rate": 5.809373614062508e-06, + "loss": 1.4134, + "step": 961 + }, + { + "epoch": 2.55, + "learning_rate": 5.742564645318432e-06, + "loss": 1.4892, + "step": 962 + }, + { + "epoch": 2.55, + "learning_rate": 5.6761186479999115e-06, + "loss": 1.5565, + "step": 963 + }, + { + "epoch": 2.56, + "learning_rate": 5.610036167054839e-06, + "loss": 1.4506, + "step": 964 + }, + { + "epoch": 2.56, + "learning_rate": 5.544317744449873e-06, + "loss": 1.3972, + "step": 965 + }, + { + "epoch": 2.56, + "learning_rate": 5.478963919165819e-06, + "loss": 1.5298, + "step": 966 + }, + { + "epoch": 2.56, + "learning_rate": 5.4139752271933295e-06, + "loss": 1.507, + "step": 967 + }, + { + "epoch": 2.57, + "learning_rate": 5.349352201528446e-06, + "loss": 1.5116, + "step": 968 + }, + { + "epoch": 2.57, + "learning_rate": 5.285095372168264e-06, + "loss": 1.4707, + "step": 969 + }, + { + "epoch": 2.57, + "learning_rate": 5.2212052661065656e-06, + "loss": 1.4136, + "step": 970 + }, + { + "epoch": 2.58, + "learning_rate": 5.157682407329456e-06, + "loss": 1.5139, + "step": 971 + }, + { + "epoch": 2.58, + "learning_rate": 5.094527316811204e-06, + "loss": 1.4348, + "step": 972 + }, + { + "epoch": 2.58, + "learning_rate": 5.031740512509769e-06, + "loss": 1.5051, + "step": 973 + }, + { + "epoch": 2.58, + "learning_rate": 4.969322509362762e-06, + "loss": 1.4504, + "step": 974 + }, + { + "epoch": 2.59, + "learning_rate": 4.9072738192830255e-06, + "loss": 1.3567, + "step": 975 + }, + { + "epoch": 2.59, + "learning_rate": 4.845594951154614e-06, + "loss": 1.5088, + "step": 976 + }, + { + "epoch": 2.59, + "learning_rate": 4.784286410828481e-06, + "loss": 1.4605, + "step": 977 + }, + { + "epoch": 2.59, + "learning_rate": 4.723348701118407e-06, + "loss": 1.4053, + "step": 978 + }, + { + "epoch": 2.6, + "learning_rate": 4.662782321796849e-06, + "loss": 1.3966, + "step": 979 + }, + { + "epoch": 2.6, + "learning_rate": 4.60258776959086e-06, + "loss": 1.4311, + "step": 980 + }, + { + "epoch": 2.6, + "learning_rate": 4.54276553817799e-06, + "loss": 1.4258, + "step": 981 + }, + { + "epoch": 2.6, + "learning_rate": 4.483316118182251e-06, + "loss": 1.436, + "step": 982 + }, + { + "epoch": 2.61, + "learning_rate": 4.424239997170105e-06, + "loss": 1.4556, + "step": 983 + }, + { + "epoch": 2.61, + "learning_rate": 4.365537659646418e-06, + "loss": 1.399, + "step": 984 + }, + { + "epoch": 2.61, + "learning_rate": 4.307209587050576e-06, + "loss": 1.5502, + "step": 985 + }, + { + "epoch": 2.62, + "learning_rate": 4.249256257752421e-06, + "loss": 1.4414, + "step": 986 + }, + { + "epoch": 2.62, + "learning_rate": 4.191678147048445e-06, + "loss": 1.5388, + "step": 987 + }, + { + "epoch": 2.62, + "learning_rate": 4.134475727157777e-06, + "loss": 1.419, + "step": 988 + }, + { + "epoch": 2.62, + "learning_rate": 4.077649467218436e-06, + "loss": 1.471, + "step": 989 + }, + { + "epoch": 2.63, + "learning_rate": 4.0211998332833514e-06, + "loss": 1.4475, + "step": 990 + }, + { + "epoch": 2.63, + "learning_rate": 3.965127288316634e-06, + "loss": 1.4398, + "step": 991 + }, + { + "epoch": 2.63, + "learning_rate": 3.9094322921897574e-06, + "loss": 1.3985, + "step": 992 + }, + { + "epoch": 2.63, + "learning_rate": 3.854115301677757e-06, + "loss": 1.3773, + "step": 993 + }, + { + "epoch": 2.64, + "learning_rate": 3.799176770455526e-06, + "loss": 1.4409, + "step": 994 + }, + { + "epoch": 2.64, + "learning_rate": 3.7446171490940706e-06, + "loss": 1.4241, + "step": 995 + }, + { + "epoch": 2.64, + "learning_rate": 3.690436885056808e-06, + "loss": 1.4467, + "step": 996 + }, + { + "epoch": 2.64, + "learning_rate": 3.6366364226959047e-06, + "loss": 1.433, + "step": 997 + }, + { + "epoch": 2.65, + "learning_rate": 3.5832162032486684e-06, + "loss": 1.4797, + "step": 998 + }, + { + "epoch": 2.65, + "learning_rate": 3.530176664833834e-06, + "loss": 1.3771, + "step": 999 + }, + { + "epoch": 2.65, + "learning_rate": 3.4775182424481135e-06, + "loss": 1.4442, + "step": 1000 + }, + { + "epoch": 2.66, + "learning_rate": 3.4252413679624616e-06, + "loss": 1.4842, + "step": 1001 + }, + { + "epoch": 2.66, + "learning_rate": 3.373346470118682e-06, + "loss": 1.4969, + "step": 1002 + }, + { + "epoch": 2.66, + "learning_rate": 3.321833974525812e-06, + "loss": 1.4653, + "step": 1003 + }, + { + "epoch": 2.66, + "learning_rate": 3.2707043036566965e-06, + "loss": 1.4349, + "step": 1004 + }, + { + "epoch": 2.67, + "learning_rate": 3.219957876844465e-06, + "loss": 1.4189, + "step": 1005 + }, + { + "epoch": 2.67, + "learning_rate": 3.16959511027915e-06, + "loss": 1.4463, + "step": 1006 + }, + { + "epoch": 2.67, + "learning_rate": 3.119616417004223e-06, + "loss": 1.4771, + "step": 1007 + }, + { + "epoch": 2.67, + "learning_rate": 3.0700222069132422e-06, + "loss": 1.4248, + "step": 1008 + }, + { + "epoch": 2.68, + "learning_rate": 3.020812886746477e-06, + "loss": 1.4778, + "step": 1009 + }, + { + "epoch": 2.68, + "learning_rate": 2.9719888600875713e-06, + "loss": 1.4952, + "step": 1010 + }, + { + "epoch": 2.68, + "learning_rate": 2.923550527360247e-06, + "loss": 1.4907, + "step": 1011 + }, + { + "epoch": 2.68, + "learning_rate": 2.875498285824979e-06, + "loss": 1.4447, + "step": 1012 + }, + { + "epoch": 2.69, + "learning_rate": 2.8278325295758134e-06, + "loss": 1.4653, + "step": 1013 + }, + { + "epoch": 2.69, + "learning_rate": 2.7805536495370375e-06, + "loss": 1.4342, + "step": 1014 + }, + { + "epoch": 2.69, + "learning_rate": 2.7336620334600604e-06, + "loss": 1.4871, + "step": 1015 + }, + { + "epoch": 2.69, + "learning_rate": 2.687158065920192e-06, + "loss": 1.4212, + "step": 1016 + }, + { + "epoch": 2.7, + "learning_rate": 2.6410421283134866e-06, + "loss": 1.4416, + "step": 1017 + }, + { + "epoch": 2.7, + "learning_rate": 2.595314598853632e-06, + "loss": 1.5976, + "step": 1018 + }, + { + "epoch": 2.7, + "learning_rate": 2.54997585256882e-06, + "loss": 1.4335, + "step": 1019 + }, + { + "epoch": 2.71, + "learning_rate": 2.5050262612987206e-06, + "loss": 1.4337, + "step": 1020 + }, + { + "epoch": 2.71, + "learning_rate": 2.4604661936913687e-06, + "loss": 1.4384, + "step": 1021 + }, + { + "epoch": 2.71, + "learning_rate": 2.416296015200198e-06, + "loss": 1.4824, + "step": 1022 + }, + { + "epoch": 2.71, + "learning_rate": 2.372516088081006e-06, + "loss": 1.4162, + "step": 1023 + }, + { + "epoch": 2.72, + "learning_rate": 2.3291267713889953e-06, + "loss": 1.4193, + "step": 1024 + }, + { + "epoch": 2.72, + "learning_rate": 2.286128420975836e-06, + "loss": 1.442, + "step": 1025 + }, + { + "epoch": 2.72, + "learning_rate": 2.2435213894867303e-06, + "loss": 1.4323, + "step": 1026 + }, + { + "epoch": 2.72, + "learning_rate": 2.2013060263575415e-06, + "loss": 1.4348, + "step": 1027 + }, + { + "epoch": 2.73, + "learning_rate": 2.159482677811919e-06, + "loss": 1.4858, + "step": 1028 + }, + { + "epoch": 2.73, + "learning_rate": 2.1180516868584467e-06, + "loss": 1.3906, + "step": 1029 + }, + { + "epoch": 2.73, + "learning_rate": 2.0770133932878412e-06, + "loss": 1.374, + "step": 1030 + }, + { + "epoch": 2.73, + "learning_rate": 2.0363681336701746e-06, + "loss": 1.4733, + "step": 1031 + }, + { + "epoch": 2.74, + "learning_rate": 1.996116241352092e-06, + "loss": 1.4614, + "step": 1032 + }, + { + "epoch": 2.74, + "learning_rate": 1.9562580464541014e-06, + "loss": 1.5179, + "step": 1033 + }, + { + "epoch": 2.74, + "learning_rate": 1.9167938758678394e-06, + "loss": 1.3801, + "step": 1034 + }, + { + "epoch": 2.75, + "learning_rate": 1.8777240532534212e-06, + "loss": 1.4825, + "step": 1035 + }, + { + "epoch": 2.75, + "learning_rate": 1.8390488990367493e-06, + "loss": 1.3541, + "step": 1036 + }, + { + "epoch": 2.75, + "learning_rate": 1.8007687304069375e-06, + "loss": 1.4733, + "step": 1037 + }, + { + "epoch": 2.75, + "learning_rate": 1.7628838613136412e-06, + "loss": 1.4835, + "step": 1038 + }, + { + "epoch": 2.76, + "learning_rate": 1.7253946024645473e-06, + "loss": 1.4677, + "step": 1039 + }, + { + "epoch": 2.76, + "learning_rate": 1.6883012613227778e-06, + "loss": 1.446, + "step": 1040 + }, + { + "epoch": 2.76, + "learning_rate": 1.6516041421044127e-06, + "loss": 1.461, + "step": 1041 + }, + { + "epoch": 2.76, + "learning_rate": 1.6153035457759536e-06, + "loss": 1.4779, + "step": 1042 + }, + { + "epoch": 2.77, + "learning_rate": 1.579399770051876e-06, + "loss": 1.4892, + "step": 1043 + }, + { + "epoch": 2.77, + "learning_rate": 1.5438931093921805e-06, + "loss": 1.4253, + "step": 1044 + }, + { + "epoch": 2.77, + "learning_rate": 1.5087838549999956e-06, + "loss": 1.4644, + "step": 1045 + }, + { + "epoch": 2.77, + "learning_rate": 1.474072294819162e-06, + "loss": 1.4157, + "step": 1046 + }, + { + "epoch": 2.78, + "learning_rate": 1.4397587135318857e-06, + "loss": 1.4122, + "step": 1047 + }, + { + "epoch": 2.78, + "learning_rate": 1.4058433925564107e-06, + "loss": 1.5177, + "step": 1048 + }, + { + "epoch": 2.78, + "learning_rate": 1.3723266100447053e-06, + "loss": 1.4533, + "step": 1049 + }, + { + "epoch": 2.79, + "learning_rate": 1.3392086408801518e-06, + "loss": 1.4155, + "step": 1050 + }, + { + "epoch": 2.79, + "learning_rate": 1.3064897566753442e-06, + "loss": 1.4637, + "step": 1051 + }, + { + "epoch": 2.79, + "learning_rate": 1.2741702257698273e-06, + "loss": 1.395, + "step": 1052 + }, + { + "epoch": 2.79, + "learning_rate": 1.242250313227905e-06, + "loss": 1.4807, + "step": 1053 + }, + { + "epoch": 2.8, + "learning_rate": 1.210730280836464e-06, + "loss": 1.4223, + "step": 1054 + }, + { + "epoch": 2.8, + "learning_rate": 1.1796103871028196e-06, + "loss": 1.461, + "step": 1055 + }, + { + "epoch": 2.8, + "learning_rate": 1.1488908872526183e-06, + "loss": 1.4161, + "step": 1056 + }, + { + "epoch": 2.8, + "learning_rate": 1.1185720332277162e-06, + "loss": 1.4039, + "step": 1057 + }, + { + "epoch": 2.81, + "learning_rate": 1.0886540736841311e-06, + "loss": 1.4827, + "step": 1058 + }, + { + "epoch": 2.81, + "learning_rate": 1.0591372539900058e-06, + "loss": 1.4112, + "step": 1059 + }, + { + "epoch": 2.81, + "learning_rate": 1.0300218162235752e-06, + "loss": 1.3737, + "step": 1060 + }, + { + "epoch": 2.81, + "learning_rate": 1.0013079991711972e-06, + "loss": 1.4186, + "step": 1061 + }, + { + "epoch": 2.82, + "learning_rate": 9.729960383254134e-07, + "loss": 1.4583, + "step": 1062 + }, + { + "epoch": 2.82, + "learning_rate": 9.450861658829469e-07, + "loss": 1.4222, + "step": 1063 + }, + { + "epoch": 2.82, + "learning_rate": 9.175786107429085e-07, + "loss": 1.4342, + "step": 1064 + }, + { + "epoch": 2.82, + "learning_rate": 8.90473598504804e-07, + "loss": 1.4549, + "step": 1065 + }, + { + "epoch": 2.83, + "learning_rate": 8.637713514667634e-07, + "loss": 1.4385, + "step": 1066 + }, + { + "epoch": 2.83, + "learning_rate": 8.37472088623692e-07, + "loss": 1.447, + "step": 1067 + }, + { + "epoch": 2.83, + "learning_rate": 8.115760256654669e-07, + "loss": 1.5006, + "step": 1068 + }, + { + "epoch": 2.84, + "learning_rate": 7.860833749751773e-07, + "loss": 1.4861, + "step": 1069 + }, + { + "epoch": 2.84, + "learning_rate": 7.60994345627386e-07, + "loss": 1.4061, + "step": 1070 + }, + { + "epoch": 2.84, + "learning_rate": 7.363091433864044e-07, + "loss": 1.4612, + "step": 1071 + }, + { + "epoch": 2.84, + "learning_rate": 7.120279707046096e-07, + "loss": 1.432, + "step": 1072 + }, + { + "epoch": 2.85, + "learning_rate": 6.881510267207846e-07, + "loss": 1.4702, + "step": 1073 + }, + { + "epoch": 2.85, + "learning_rate": 6.646785072584872e-07, + "loss": 1.5084, + "step": 1074 + }, + { + "epoch": 2.85, + "learning_rate": 6.416106048244386e-07, + "loss": 1.4661, + "step": 1075 + }, + { + "epoch": 2.85, + "learning_rate": 6.189475086069485e-07, + "loss": 1.3731, + "step": 1076 + }, + { + "epoch": 2.86, + "learning_rate": 5.966894044743709e-07, + "loss": 1.455, + "step": 1077 + }, + { + "epoch": 2.86, + "learning_rate": 5.748364749735613e-07, + "loss": 1.4169, + "step": 1078 + }, + { + "epoch": 2.86, + "learning_rate": 5.533888993283831e-07, + "loss": 1.3907, + "step": 1079 + }, + { + "epoch": 2.86, + "learning_rate": 5.323468534382703e-07, + "loss": 1.4668, + "step": 1080 + }, + { + "epoch": 2.87, + "learning_rate": 5.117105098767283e-07, + "loss": 1.4628, + "step": 1081 + }, + { + "epoch": 2.87, + "learning_rate": 4.914800378899687e-07, + "loss": 1.3697, + "step": 1082 + }, + { + "epoch": 2.87, + "learning_rate": 4.7165560339549886e-07, + "loss": 1.5115, + "step": 1083 + }, + { + "epoch": 2.88, + "learning_rate": 4.522373689807624e-07, + "loss": 1.4415, + "step": 1084 + }, + { + "epoch": 2.88, + "learning_rate": 4.33225493901801e-07, + "loss": 1.4368, + "step": 1085 + }, + { + "epoch": 2.88, + "learning_rate": 4.1462013408196664e-07, + "loss": 1.4338, + "step": 1086 + }, + { + "epoch": 2.88, + "learning_rate": 3.9642144211061714e-07, + "loss": 1.3896, + "step": 1087 + }, + { + "epoch": 2.89, + "learning_rate": 3.7862956724190045e-07, + "loss": 1.4796, + "step": 1088 + }, + { + "epoch": 2.89, + "learning_rate": 3.612446553934723e-07, + "loss": 1.4081, + "step": 1089 + }, + { + "epoch": 2.89, + "learning_rate": 3.4426684914538045e-07, + "loss": 1.3993, + "step": 1090 + }, + { + "epoch": 2.89, + "learning_rate": 3.276962877388157e-07, + "loss": 1.4188, + "step": 1091 + }, + { + "epoch": 2.9, + "learning_rate": 3.115331070750127e-07, + "loss": 1.4747, + "step": 1092 + }, + { + "epoch": 2.9, + "learning_rate": 2.957774397141455e-07, + "loss": 1.4073, + "step": 1093 + }, + { + "epoch": 2.9, + "learning_rate": 2.8042941487419483e-07, + "loss": 1.4816, + "step": 1094 + }, + { + "epoch": 2.9, + "learning_rate": 2.6548915842993793e-07, + "loss": 1.4085, + "step": 1095 + }, + { + "epoch": 2.91, + "learning_rate": 2.5095679291188833e-07, + "loss": 1.3901, + "step": 1096 + }, + { + "epoch": 2.91, + "learning_rate": 2.368324375052855e-07, + "loss": 1.4146, + "step": 1097 + }, + { + "epoch": 2.91, + "learning_rate": 2.2311620804914002e-07, + "loss": 1.4465, + "step": 1098 + }, + { + "epoch": 2.92, + "learning_rate": 2.0980821703527886e-07, + "loss": 1.4298, + "step": 1099 + }, + { + "epoch": 2.92, + "learning_rate": 1.9690857360739612e-07, + "loss": 1.5303, + "step": 1100 + }, + { + "epoch": 2.92, + "learning_rate": 1.8441738356019256e-07, + "loss": 1.5039, + "step": 1101 + }, + { + "epoch": 2.92, + "learning_rate": 1.7233474933849303e-07, + "loss": 1.4276, + "step": 1102 + }, + { + "epoch": 2.93, + "learning_rate": 1.6066077003639714e-07, + "loss": 1.4591, + "step": 1103 + }, + { + "epoch": 2.93, + "learning_rate": 1.4939554139648537e-07, + "loss": 1.4353, + "step": 1104 + }, + { + "epoch": 2.93, + "learning_rate": 1.3853915580901988e-07, + "loss": 1.3791, + "step": 1105 + }, + { + "epoch": 2.93, + "learning_rate": 1.2809170231118938e-07, + "loss": 1.4862, + "step": 1106 + }, + { + "epoch": 2.94, + "learning_rate": 1.1805326658639316e-07, + "loss": 1.3861, + "step": 1107 + }, + { + "epoch": 2.94, + "learning_rate": 1.0842393096350823e-07, + "loss": 1.4563, + "step": 1108 + }, + { + "epoch": 2.94, + "learning_rate": 9.920377441623996e-08, + "loss": 1.4102, + "step": 1109 + }, + { + "epoch": 2.94, + "learning_rate": 9.039287256247253e-08, + "loss": 1.4573, + "step": 1110 + }, + { + "epoch": 2.95, + "learning_rate": 8.199129766363056e-08, + "loss": 1.4369, + "step": 1111 + }, + { + "epoch": 2.95, + "learning_rate": 7.399911862410735e-08, + "loss": 1.5305, + "step": 1112 + }, + { + "epoch": 2.95, + "learning_rate": 6.641640099068758e-08, + "loss": 1.4532, + "step": 1113 + }, + { + "epoch": 2.95, + "learning_rate": 5.9243206952019904e-08, + "loss": 1.473, + "step": 1114 + }, + { + "epoch": 2.96, + "learning_rate": 5.247959533808966e-08, + "loss": 1.3916, + "step": 1115 + }, + { + "epoch": 2.96, + "learning_rate": 4.612562161974698e-08, + "loss": 1.3834, + "step": 1116 + }, + { + "epoch": 2.96, + "learning_rate": 4.018133790826273e-08, + "loss": 1.4524, + "step": 1117 + }, + { + "epoch": 2.97, + "learning_rate": 3.464679295487328e-08, + "loss": 1.428, + "step": 1118 + }, + { + "epoch": 2.97, + "learning_rate": 2.952203215041971e-08, + "loss": 1.4697, + "step": 1119 + }, + { + "epoch": 2.97, + "learning_rate": 2.480709752493704e-08, + "loss": 1.3832, + "step": 1120 + }, + { + "epoch": 2.97, + "learning_rate": 2.050202774732668e-08, + "loss": 1.5032, + "step": 1121 + }, + { + "epoch": 2.98, + "learning_rate": 1.6606858125040038e-08, + "loss": 1.4712, + "step": 1122 + }, + { + "epoch": 2.98, + "learning_rate": 1.3121620603795404e-08, + "loss": 1.4285, + "step": 1123 + }, + { + "epoch": 2.98, + "learning_rate": 1.0046343767294852e-08, + "loss": 1.4409, + "step": 1124 + }, + { + "epoch": 2.98, + "learning_rate": 7.381052837013291e-09, + "loss": 1.4037, + "step": 1125 + }, + { + "epoch": 2.99, + "learning_rate": 5.125769671976421e-09, + "loss": 1.4707, + "step": 1126 + }, + { + "epoch": 2.99, + "learning_rate": 3.2805127685886504e-09, + "loss": 1.4793, + "step": 1127 + }, + { + "epoch": 2.99, + "learning_rate": 1.845297260472112e-09, + "loss": 1.4402, + "step": 1128 + }, + { + "epoch": 2.99, + "learning_rate": 8.201349183611928e-10, + "loss": 1.4417, + "step": 1129 + }, + { + "epoch": 3.0, + "learning_rate": 2.0503414998040982e-10, + "loss": 1.4528, + "step": 1130 + }, + { + "epoch": 3.0, + "learning_rate": 0.0, + "loss": 1.4566, + "step": 1131 + }, + { + "epoch": 3.0, + "step": 1131, + "total_flos": 2.4511080029906534e+17, + "train_loss": 1.623349694105295, + "train_runtime": 8800.864, + "train_samples_per_second": 16.443, + "train_steps_per_second": 0.129 + } + ], + "max_steps": 1131, + "num_train_epochs": 3, + "total_flos": 2.4511080029906534e+17, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..a2a492f --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60ab013c12a6ea3d260e1a7aed32904273c68d82484a9d2e5e4dea4267153789 +size 4027