commit c6f78ae574b30aeadaf382d9c59ed390d41e366c Author: ModelHub XC Date: Sat May 9 18:28:03 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: Lvxy1117/amber_fine_tune_sg_part1 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..3a09e9e --- /dev/null +++ b/README.md @@ -0,0 +1,195 @@ +--- +license: apache-2.0 +--- + +# Model Card for Model ID + +amber fine tune model used sg_90k_part1 +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..e600e3d --- /dev/null +++ b/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "LLM360/Amber", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 2048, + "max_sequence_length": 2048, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "pad_token_id": 0, + "pretraining_tp": 1, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.37.0", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..8e43854 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 0, + "transformers_version": "4.37.0" +} diff --git a/model-00001-of-00006.safetensors b/model-00001-of-00006.safetensors new file mode 100644 index 0000000..16b15cc --- /dev/null +++ b/model-00001-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c015f82112938dbf8c84d6640ccc8477ec3a0ad9696219b94428e9c56958565 +size 4840396416 diff --git a/model-00002-of-00006.safetensors b/model-00002-of-00006.safetensors new file mode 100644 index 0000000..f11ef3b --- /dev/null +++ b/model-00002-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:525226f77eaf1e7490027f8a1f544224fe7f381598a2549d3dc81b9ceb57fdb7 +size 4857206856 diff --git a/model-00003-of-00006.safetensors b/model-00003-of-00006.safetensors new file mode 100644 index 0000000..9c993af --- /dev/null +++ b/model-00003-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cda41719297ffc89d53b928fcea6fa12b8dcf5c2f765c64dbc5adde6487d543e +size 4857206904 diff --git a/model-00004-of-00006.safetensors b/model-00004-of-00006.safetensors new file mode 100644 index 0000000..29af474 --- /dev/null +++ b/model-00004-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a52d57f05d4269ce803a338e983fea1397b5d70b972feb32023ab7e72e8b3444 +size 4857206904 diff --git a/model-00005-of-00006.safetensors b/model-00005-of-00006.safetensors new file mode 100644 index 0000000..fa362b0 --- /dev/null +++ b/model-00005-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21063d3d97f112c15785c80d4013a68ff2f946bbd4a2ef52e1a917bcc4c475c0 +size 4857206904 diff --git a/model-00006-of-00006.safetensors b/model-00006-of-00006.safetensors new file mode 100644 index 0000000..3c1b081 --- /dev/null +++ b/model-00006-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3302606fd133897d74854aa397ca2b3a441d45d6cf154d862e3dd764cfc2c7e6 +size 2684472112 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..005f893 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 26953662464 + }, + "weight_map": { + "lm_head.weight": "model-00006-of-00006.safetensors", + "model.embed_tokens.weight": "model-00001-of-00006.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.20.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.22.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.28.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.input_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.norm.weight": "model-00006-of-00006.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..f928b24 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..743455a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,42 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 2048, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..c8e268c --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,8490 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1410, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.5087719298245616e-07, + "loss": 0.8662, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 7.017543859649123e-07, + "loss": 0.8976, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.0526315789473685e-06, + "loss": 0.7644, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.4035087719298246e-06, + "loss": 0.75, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 1.7543859649122807e-06, + "loss": 0.7614, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.105263157894737e-06, + "loss": 0.7483, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.456140350877193e-06, + "loss": 0.8028, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 2.8070175438596493e-06, + "loss": 0.8263, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 3.157894736842105e-06, + "loss": 0.6802, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 3.5087719298245615e-06, + "loss": 0.6552, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 3.859649122807018e-06, + "loss": 0.6763, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 4.210526315789474e-06, + "loss": 0.9193, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 4.56140350877193e-06, + "loss": 0.7467, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 4.912280701754386e-06, + "loss": 0.6195, + "step": 14 + }, + { + "epoch": 0.03, + "learning_rate": 5.263157894736842e-06, + "loss": 0.6827, + "step": 15 + }, + { + "epoch": 0.03, + "learning_rate": 5.6140350877192985e-06, + "loss": 0.6826, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 5.964912280701755e-06, + "loss": 0.6064, + "step": 17 + }, + { + "epoch": 0.04, + "learning_rate": 6.31578947368421e-06, + "loss": 0.6143, + "step": 18 + }, + { + "epoch": 0.04, + "learning_rate": 6.666666666666667e-06, + "loss": 0.7848, + "step": 19 + }, + { + "epoch": 0.04, + "learning_rate": 7.017543859649123e-06, + "loss": 0.6382, + "step": 20 + }, + { + "epoch": 0.04, + "learning_rate": 7.368421052631579e-06, + "loss": 0.647, + "step": 21 + }, + { + "epoch": 0.05, + "learning_rate": 7.719298245614036e-06, + "loss": 0.6001, + "step": 22 + }, + { + "epoch": 0.05, + "learning_rate": 8.070175438596492e-06, + "loss": 0.5955, + "step": 23 + }, + { + "epoch": 0.05, + "learning_rate": 8.421052631578948e-06, + "loss": 0.59, + "step": 24 + }, + { + "epoch": 0.05, + "learning_rate": 8.771929824561405e-06, + "loss": 0.6246, + "step": 25 + }, + { + "epoch": 0.06, + "learning_rate": 9.12280701754386e-06, + "loss": 0.5511, + "step": 26 + }, + { + "epoch": 0.06, + "learning_rate": 9.473684210526315e-06, + "loss": 0.7381, + "step": 27 + }, + { + "epoch": 0.06, + "learning_rate": 9.824561403508772e-06, + "loss": 0.6461, + "step": 28 + }, + { + "epoch": 0.06, + "learning_rate": 1.017543859649123e-05, + "loss": 0.6119, + "step": 29 + }, + { + "epoch": 0.06, + "learning_rate": 1.0526315789473684e-05, + "loss": 0.6681, + "step": 30 + }, + { + "epoch": 0.07, + "learning_rate": 1.0877192982456142e-05, + "loss": 0.6209, + "step": 31 + }, + { + "epoch": 0.07, + "learning_rate": 1.1228070175438597e-05, + "loss": 0.6019, + "step": 32 + }, + { + "epoch": 0.07, + "learning_rate": 1.1578947368421053e-05, + "loss": 0.6868, + "step": 33 + }, + { + "epoch": 0.07, + "learning_rate": 1.192982456140351e-05, + "loss": 0.6542, + "step": 34 + }, + { + "epoch": 0.07, + "learning_rate": 1.2280701754385966e-05, + "loss": 0.7621, + "step": 35 + }, + { + "epoch": 0.08, + "learning_rate": 1.263157894736842e-05, + "loss": 0.6887, + "step": 36 + }, + { + "epoch": 0.08, + "learning_rate": 1.2982456140350879e-05, + "loss": 0.6346, + "step": 37 + }, + { + "epoch": 0.08, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.5595, + "step": 38 + }, + { + "epoch": 0.08, + "learning_rate": 1.3684210526315791e-05, + "loss": 0.6166, + "step": 39 + }, + { + "epoch": 0.09, + "learning_rate": 1.4035087719298246e-05, + "loss": 0.6737, + "step": 40 + }, + { + "epoch": 0.09, + "learning_rate": 1.4385964912280704e-05, + "loss": 0.5667, + "step": 41 + }, + { + "epoch": 0.09, + "learning_rate": 1.4736842105263159e-05, + "loss": 0.5999, + "step": 42 + }, + { + "epoch": 0.09, + "learning_rate": 1.5087719298245615e-05, + "loss": 0.6239, + "step": 43 + }, + { + "epoch": 0.09, + "learning_rate": 1.543859649122807e-05, + "loss": 0.6119, + "step": 44 + }, + { + "epoch": 0.1, + "learning_rate": 1.578947368421053e-05, + "loss": 0.5702, + "step": 45 + }, + { + "epoch": 0.1, + "learning_rate": 1.6140350877192984e-05, + "loss": 0.6552, + "step": 46 + }, + { + "epoch": 0.1, + "learning_rate": 1.649122807017544e-05, + "loss": 0.6073, + "step": 47 + }, + { + "epoch": 0.1, + "learning_rate": 1.6842105263157896e-05, + "loss": 0.644, + "step": 48 + }, + { + "epoch": 0.1, + "learning_rate": 1.719298245614035e-05, + "loss": 0.6378, + "step": 49 + }, + { + "epoch": 0.11, + "learning_rate": 1.754385964912281e-05, + "loss": 0.6699, + "step": 50 + }, + { + "epoch": 0.11, + "learning_rate": 1.7894736842105264e-05, + "loss": 0.5986, + "step": 51 + }, + { + "epoch": 0.11, + "learning_rate": 1.824561403508772e-05, + "loss": 0.7282, + "step": 52 + }, + { + "epoch": 0.11, + "learning_rate": 1.8596491228070176e-05, + "loss": 0.6829, + "step": 53 + }, + { + "epoch": 0.11, + "learning_rate": 1.894736842105263e-05, + "loss": 0.5986, + "step": 54 + }, + { + "epoch": 0.12, + "learning_rate": 1.929824561403509e-05, + "loss": 0.6266, + "step": 55 + }, + { + "epoch": 0.12, + "learning_rate": 1.9649122807017544e-05, + "loss": 0.5485, + "step": 56 + }, + { + "epoch": 0.12, + "learning_rate": 2e-05, + "loss": 0.5597, + "step": 57 + }, + { + "epoch": 0.12, + "learning_rate": 1.999997304285086e-05, + "loss": 0.5847, + "step": 58 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999892171548773e-05, + "loss": 0.6214, + "step": 59 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999757386529747e-05, + "loss": 0.6071, + "step": 60 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999568688520472e-05, + "loss": 0.665, + "step": 61 + }, + { + "epoch": 0.13, + "learning_rate": 1.99993260785383e-05, + "loss": 0.6297, + "step": 62 + }, + { + "epoch": 0.13, + "learning_rate": 1.999902955789124e-05, + "loss": 0.6356, + "step": 63 + }, + { + "epoch": 0.14, + "learning_rate": 1.9998679128177967e-05, + "loss": 0.6394, + "step": 64 + }, + { + "epoch": 0.14, + "learning_rate": 1.9998274791287798e-05, + "loss": 0.6208, + "step": 65 + }, + { + "epoch": 0.14, + "learning_rate": 1.9997816549400688e-05, + "loss": 0.7193, + "step": 66 + }, + { + "epoch": 0.14, + "learning_rate": 1.999730440498721e-05, + "loss": 0.5991, + "step": 67 + }, + { + "epoch": 0.14, + "learning_rate": 1.9996738360808566e-05, + "loss": 0.5896, + "step": 68 + }, + { + "epoch": 0.15, + "learning_rate": 1.9996118419916532e-05, + "loss": 0.6058, + "step": 69 + }, + { + "epoch": 0.15, + "learning_rate": 1.999544458565348e-05, + "loss": 0.6062, + "step": 70 + }, + { + "epoch": 0.15, + "learning_rate": 1.9994716861652344e-05, + "loss": 0.6048, + "step": 71 + }, + { + "epoch": 0.15, + "learning_rate": 1.999393525183659e-05, + "loss": 0.6239, + "step": 72 + }, + { + "epoch": 0.16, + "learning_rate": 1.9993099760420214e-05, + "loss": 0.6054, + "step": 73 + }, + { + "epoch": 0.16, + "learning_rate": 1.9992210391907713e-05, + "loss": 0.6833, + "step": 74 + }, + { + "epoch": 0.16, + "learning_rate": 1.9991267151094055e-05, + "loss": 0.6242, + "step": 75 + }, + { + "epoch": 0.16, + "learning_rate": 1.999027004306465e-05, + "loss": 0.6493, + "step": 76 + }, + { + "epoch": 0.16, + "learning_rate": 1.9989219073195346e-05, + "loss": 0.6074, + "step": 77 + }, + { + "epoch": 0.17, + "learning_rate": 1.9988114247152363e-05, + "loss": 0.6953, + "step": 78 + }, + { + "epoch": 0.17, + "learning_rate": 1.9986955570892302e-05, + "loss": 0.621, + "step": 79 + }, + { + "epoch": 0.17, + "learning_rate": 1.998574305066208e-05, + "loss": 0.9288, + "step": 80 + }, + { + "epoch": 0.17, + "learning_rate": 1.9984476692998914e-05, + "loss": 0.5513, + "step": 81 + }, + { + "epoch": 0.17, + "learning_rate": 1.9983156504730282e-05, + "loss": 0.5326, + "step": 82 + }, + { + "epoch": 0.18, + "learning_rate": 1.9981782492973893e-05, + "loss": 0.6088, + "step": 83 + }, + { + "epoch": 0.18, + "learning_rate": 1.9980354665137626e-05, + "loss": 0.5821, + "step": 84 + }, + { + "epoch": 0.18, + "learning_rate": 1.9978873028919523e-05, + "loss": 0.5775, + "step": 85 + }, + { + "epoch": 0.18, + "learning_rate": 1.9977337592307714e-05, + "loss": 0.5944, + "step": 86 + }, + { + "epoch": 0.19, + "learning_rate": 1.9975748363580406e-05, + "loss": 0.5936, + "step": 87 + }, + { + "epoch": 0.19, + "learning_rate": 1.9974105351305806e-05, + "loss": 0.6673, + "step": 88 + }, + { + "epoch": 0.19, + "learning_rate": 1.9972408564342105e-05, + "loss": 0.6375, + "step": 89 + }, + { + "epoch": 0.19, + "learning_rate": 1.9970658011837404e-05, + "loss": 0.5909, + "step": 90 + }, + { + "epoch": 0.19, + "learning_rate": 1.9968853703229697e-05, + "loss": 0.6053, + "step": 91 + }, + { + "epoch": 0.2, + "learning_rate": 1.9966995648246773e-05, + "loss": 0.5445, + "step": 92 + }, + { + "epoch": 0.2, + "learning_rate": 1.996508385690621e-05, + "loss": 0.5668, + "step": 93 + }, + { + "epoch": 0.2, + "learning_rate": 1.9963118339515303e-05, + "loss": 0.5677, + "step": 94 + }, + { + "epoch": 0.2, + "learning_rate": 1.9961099106670994e-05, + "loss": 0.6079, + "step": 95 + }, + { + "epoch": 0.2, + "learning_rate": 1.9959026169259833e-05, + "loss": 0.63, + "step": 96 + }, + { + "epoch": 0.21, + "learning_rate": 1.995689953845793e-05, + "loss": 0.6319, + "step": 97 + }, + { + "epoch": 0.21, + "learning_rate": 1.9954719225730847e-05, + "loss": 0.598, + "step": 98 + }, + { + "epoch": 0.21, + "learning_rate": 1.99524852428336e-05, + "loss": 0.6368, + "step": 99 + }, + { + "epoch": 0.21, + "learning_rate": 1.995019760181055e-05, + "loss": 0.5722, + "step": 100 + }, + { + "epoch": 0.21, + "learning_rate": 1.994785631499535e-05, + "loss": 0.5877, + "step": 101 + }, + { + "epoch": 0.22, + "learning_rate": 1.9945461395010885e-05, + "loss": 0.5945, + "step": 102 + }, + { + "epoch": 0.22, + "learning_rate": 1.9943012854769192e-05, + "loss": 0.588, + "step": 103 + }, + { + "epoch": 0.22, + "learning_rate": 1.9940510707471415e-05, + "loss": 0.5814, + "step": 104 + }, + { + "epoch": 0.22, + "learning_rate": 1.9937954966607696e-05, + "loss": 0.5506, + "step": 105 + }, + { + "epoch": 0.23, + "learning_rate": 1.993534564595714e-05, + "loss": 0.7037, + "step": 106 + }, + { + "epoch": 0.23, + "learning_rate": 1.9932682759587706e-05, + "loss": 0.7214, + "step": 107 + }, + { + "epoch": 0.23, + "learning_rate": 1.992996632185617e-05, + "loss": 0.583, + "step": 108 + }, + { + "epoch": 0.23, + "learning_rate": 1.9927196347408012e-05, + "loss": 0.5895, + "step": 109 + }, + { + "epoch": 0.23, + "learning_rate": 1.9924372851177353e-05, + "loss": 0.5871, + "step": 110 + }, + { + "epoch": 0.24, + "learning_rate": 1.9921495848386877e-05, + "loss": 0.6514, + "step": 111 + }, + { + "epoch": 0.24, + "learning_rate": 1.9918565354547738e-05, + "loss": 0.594, + "step": 112 + }, + { + "epoch": 0.24, + "learning_rate": 1.9915581385459494e-05, + "loss": 0.6074, + "step": 113 + }, + { + "epoch": 0.24, + "learning_rate": 1.991254395721e-05, + "loss": 0.6044, + "step": 114 + }, + { + "epoch": 0.24, + "learning_rate": 1.9909453086175338e-05, + "loss": 0.6021, + "step": 115 + }, + { + "epoch": 0.25, + "learning_rate": 1.990630878901973e-05, + "loss": 0.5687, + "step": 116 + }, + { + "epoch": 0.25, + "learning_rate": 1.990311108269542e-05, + "loss": 0.6048, + "step": 117 + }, + { + "epoch": 0.25, + "learning_rate": 1.989985998444263e-05, + "loss": 0.6329, + "step": 118 + }, + { + "epoch": 0.25, + "learning_rate": 1.9896555511789418e-05, + "loss": 0.5591, + "step": 119 + }, + { + "epoch": 0.26, + "learning_rate": 1.9893197682551624e-05, + "loss": 0.5716, + "step": 120 + }, + { + "epoch": 0.26, + "learning_rate": 1.9889786514832743e-05, + "loss": 0.5738, + "step": 121 + }, + { + "epoch": 0.26, + "learning_rate": 1.988632202702385e-05, + "loss": 0.6311, + "step": 122 + }, + { + "epoch": 0.26, + "learning_rate": 1.9882804237803487e-05, + "loss": 0.5432, + "step": 123 + }, + { + "epoch": 0.26, + "learning_rate": 1.987923316613757e-05, + "loss": 0.6476, + "step": 124 + }, + { + "epoch": 0.27, + "learning_rate": 1.9875608831279275e-05, + "loss": 0.5989, + "step": 125 + }, + { + "epoch": 0.27, + "learning_rate": 1.9871931252768953e-05, + "loss": 0.5636, + "step": 126 + }, + { + "epoch": 0.27, + "learning_rate": 1.9868200450434016e-05, + "loss": 0.5904, + "step": 127 + }, + { + "epoch": 0.27, + "learning_rate": 1.986441644438881e-05, + "loss": 0.7163, + "step": 128 + }, + { + "epoch": 0.27, + "learning_rate": 1.986057925503455e-05, + "loss": 0.6039, + "step": 129 + }, + { + "epoch": 0.28, + "learning_rate": 1.9856688903059165e-05, + "loss": 0.5949, + "step": 130 + }, + { + "epoch": 0.28, + "learning_rate": 1.9852745409437224e-05, + "loss": 0.5821, + "step": 131 + }, + { + "epoch": 0.28, + "learning_rate": 1.984874879542979e-05, + "loss": 0.5741, + "step": 132 + }, + { + "epoch": 0.28, + "learning_rate": 1.9844699082584324e-05, + "loss": 0.5729, + "step": 133 + }, + { + "epoch": 0.29, + "learning_rate": 1.9840596292734573e-05, + "loss": 0.5669, + "step": 134 + }, + { + "epoch": 0.29, + "learning_rate": 1.983644044800044e-05, + "loss": 0.6607, + "step": 135 + }, + { + "epoch": 0.29, + "learning_rate": 1.983223157078787e-05, + "loss": 0.6773, + "step": 136 + }, + { + "epoch": 0.29, + "learning_rate": 1.9827969683788728e-05, + "loss": 0.6229, + "step": 137 + }, + { + "epoch": 0.29, + "learning_rate": 1.9823654809980682e-05, + "loss": 0.5741, + "step": 138 + }, + { + "epoch": 0.3, + "learning_rate": 1.9819286972627066e-05, + "loss": 0.5656, + "step": 139 + }, + { + "epoch": 0.3, + "learning_rate": 1.9814866195276777e-05, + "loss": 0.5498, + "step": 140 + }, + { + "epoch": 0.3, + "learning_rate": 1.9810392501764118e-05, + "loss": 0.5746, + "step": 141 + }, + { + "epoch": 0.3, + "learning_rate": 1.9805865916208695e-05, + "loss": 0.6259, + "step": 142 + }, + { + "epoch": 0.3, + "learning_rate": 1.9801286463015278e-05, + "loss": 0.5406, + "step": 143 + }, + { + "epoch": 0.31, + "learning_rate": 1.9796654166873667e-05, + "loss": 0.6014, + "step": 144 + }, + { + "epoch": 0.31, + "learning_rate": 1.9791969052758563e-05, + "loss": 0.5907, + "step": 145 + }, + { + "epoch": 0.31, + "learning_rate": 1.978723114592943e-05, + "loss": 0.6961, + "step": 146 + }, + { + "epoch": 0.31, + "learning_rate": 1.9782440471930354e-05, + "loss": 0.5668, + "step": 147 + }, + { + "epoch": 0.31, + "learning_rate": 1.9777597056589926e-05, + "loss": 0.6467, + "step": 148 + }, + { + "epoch": 0.32, + "learning_rate": 1.9772700926021078e-05, + "loss": 0.578, + "step": 149 + }, + { + "epoch": 0.32, + "learning_rate": 1.976775210662095e-05, + "loss": 0.5804, + "step": 150 + }, + { + "epoch": 0.32, + "learning_rate": 1.976275062507076e-05, + "loss": 0.6496, + "step": 151 + }, + { + "epoch": 0.32, + "learning_rate": 1.975769650833564e-05, + "loss": 0.6114, + "step": 152 + }, + { + "epoch": 0.33, + "learning_rate": 1.975258978366451e-05, + "loss": 0.5341, + "step": 153 + }, + { + "epoch": 0.33, + "learning_rate": 1.9747430478589918e-05, + "loss": 0.5692, + "step": 154 + }, + { + "epoch": 0.33, + "learning_rate": 1.9742218620927888e-05, + "loss": 0.5832, + "step": 155 + }, + { + "epoch": 0.33, + "learning_rate": 1.9736954238777793e-05, + "loss": 0.6228, + "step": 156 + }, + { + "epoch": 0.33, + "learning_rate": 1.9731637360522176e-05, + "loss": 0.6104, + "step": 157 + }, + { + "epoch": 0.34, + "learning_rate": 1.9726268014826614e-05, + "loss": 0.5358, + "step": 158 + }, + { + "epoch": 0.34, + "learning_rate": 1.9720846230639555e-05, + "loss": 0.5154, + "step": 159 + }, + { + "epoch": 0.34, + "learning_rate": 1.9715372037192174e-05, + "loss": 0.6287, + "step": 160 + }, + { + "epoch": 0.34, + "learning_rate": 1.9709845463998196e-05, + "loss": 0.6632, + "step": 161 + }, + { + "epoch": 0.34, + "learning_rate": 1.9704266540853753e-05, + "loss": 0.5931, + "step": 162 + }, + { + "epoch": 0.35, + "learning_rate": 1.9698635297837222e-05, + "loss": 0.5358, + "step": 163 + }, + { + "epoch": 0.35, + "learning_rate": 1.969295176530905e-05, + "loss": 0.6283, + "step": 164 + }, + { + "epoch": 0.35, + "learning_rate": 1.96872159739116e-05, + "loss": 0.6102, + "step": 165 + }, + { + "epoch": 0.35, + "learning_rate": 1.9681427954568996e-05, + "loss": 0.5304, + "step": 166 + }, + { + "epoch": 0.36, + "learning_rate": 1.9675587738486935e-05, + "loss": 0.5622, + "step": 167 + }, + { + "epoch": 0.36, + "learning_rate": 1.966969535715254e-05, + "loss": 0.5813, + "step": 168 + }, + { + "epoch": 0.36, + "learning_rate": 1.9663750842334157e-05, + "loss": 0.5547, + "step": 169 + }, + { + "epoch": 0.36, + "learning_rate": 1.9657754226081233e-05, + "loss": 0.5534, + "step": 170 + }, + { + "epoch": 0.36, + "learning_rate": 1.9651705540724092e-05, + "loss": 0.5268, + "step": 171 + }, + { + "epoch": 0.37, + "learning_rate": 1.964560481887381e-05, + "loss": 0.6293, + "step": 172 + }, + { + "epoch": 0.37, + "learning_rate": 1.963945209342199e-05, + "loss": 0.5484, + "step": 173 + }, + { + "epoch": 0.37, + "learning_rate": 1.963324739754063e-05, + "loss": 0.573, + "step": 174 + }, + { + "epoch": 0.37, + "learning_rate": 1.9626990764681896e-05, + "loss": 0.5163, + "step": 175 + }, + { + "epoch": 0.37, + "learning_rate": 1.9620682228578005e-05, + "loss": 0.6003, + "step": 176 + }, + { + "epoch": 0.38, + "learning_rate": 1.9614321823240968e-05, + "loss": 0.549, + "step": 177 + }, + { + "epoch": 0.38, + "learning_rate": 1.9607909582962478e-05, + "loss": 0.6384, + "step": 178 + }, + { + "epoch": 0.38, + "learning_rate": 1.9601445542313673e-05, + "loss": 0.6185, + "step": 179 + }, + { + "epoch": 0.38, + "learning_rate": 1.9594929736144978e-05, + "loss": 0.5759, + "step": 180 + }, + { + "epoch": 0.39, + "learning_rate": 1.9588362199585895e-05, + "loss": 0.6265, + "step": 181 + }, + { + "epoch": 0.39, + "learning_rate": 1.958174296804485e-05, + "loss": 0.7275, + "step": 182 + }, + { + "epoch": 0.39, + "learning_rate": 1.9575072077208956e-05, + "loss": 0.5421, + "step": 183 + }, + { + "epoch": 0.39, + "learning_rate": 1.9568349563043855e-05, + "loss": 0.6138, + "step": 184 + }, + { + "epoch": 0.39, + "learning_rate": 1.956157546179351e-05, + "loss": 0.6757, + "step": 185 + }, + { + "epoch": 0.4, + "learning_rate": 1.9554749809980014e-05, + "loss": 0.5631, + "step": 186 + }, + { + "epoch": 0.4, + "learning_rate": 1.9547872644403388e-05, + "loss": 0.624, + "step": 187 + }, + { + "epoch": 0.4, + "learning_rate": 1.954094400214139e-05, + "loss": 0.574, + "step": 188 + }, + { + "epoch": 0.4, + "learning_rate": 1.9533963920549307e-05, + "loss": 0.6794, + "step": 189 + }, + { + "epoch": 0.4, + "learning_rate": 1.952693243725976e-05, + "loss": 0.5697, + "step": 190 + }, + { + "epoch": 0.41, + "learning_rate": 1.9519849590182498e-05, + "loss": 0.5723, + "step": 191 + }, + { + "epoch": 0.41, + "learning_rate": 1.951271541750419e-05, + "loss": 0.5961, + "step": 192 + }, + { + "epoch": 0.41, + "learning_rate": 1.9505529957688235e-05, + "loss": 0.554, + "step": 193 + }, + { + "epoch": 0.41, + "learning_rate": 1.949829324947453e-05, + "loss": 0.5049, + "step": 194 + }, + { + "epoch": 0.41, + "learning_rate": 1.949100533187928e-05, + "loss": 0.5668, + "step": 195 + }, + { + "epoch": 0.42, + "learning_rate": 1.948366624419478e-05, + "loss": 0.8587, + "step": 196 + }, + { + "epoch": 0.42, + "learning_rate": 1.947627602598921e-05, + "loss": 0.5574, + "step": 197 + }, + { + "epoch": 0.42, + "learning_rate": 1.946883471710641e-05, + "loss": 0.5698, + "step": 198 + }, + { + "epoch": 0.42, + "learning_rate": 1.9461342357665674e-05, + "loss": 0.5807, + "step": 199 + }, + { + "epoch": 0.43, + "learning_rate": 1.9453798988061535e-05, + "loss": 0.5747, + "step": 200 + }, + { + "epoch": 0.43, + "learning_rate": 1.944620464896354e-05, + "loss": 0.5907, + "step": 201 + }, + { + "epoch": 0.43, + "learning_rate": 1.9438559381316037e-05, + "loss": 0.5842, + "step": 202 + }, + { + "epoch": 0.43, + "learning_rate": 1.9430863226337947e-05, + "loss": 0.5926, + "step": 203 + }, + { + "epoch": 0.43, + "learning_rate": 1.942311622552255e-05, + "loss": 0.5817, + "step": 204 + }, + { + "epoch": 0.44, + "learning_rate": 1.9415318420637256e-05, + "loss": 0.5592, + "step": 205 + }, + { + "epoch": 0.44, + "learning_rate": 1.9407469853723385e-05, + "loss": 0.6489, + "step": 206 + }, + { + "epoch": 0.44, + "learning_rate": 1.939957056709594e-05, + "loss": 0.6019, + "step": 207 + }, + { + "epoch": 0.44, + "learning_rate": 1.9391620603343358e-05, + "loss": 0.5702, + "step": 208 + }, + { + "epoch": 0.44, + "learning_rate": 1.938362000532732e-05, + "loss": 0.5035, + "step": 209 + }, + { + "epoch": 0.45, + "learning_rate": 1.9375568816182486e-05, + "loss": 0.6347, + "step": 210 + }, + { + "epoch": 0.45, + "learning_rate": 1.936746707931628e-05, + "loss": 0.6858, + "step": 211 + }, + { + "epoch": 0.45, + "learning_rate": 1.9359314838408645e-05, + "loss": 0.5813, + "step": 212 + }, + { + "epoch": 0.45, + "learning_rate": 1.9351112137411815e-05, + "loss": 0.7665, + "step": 213 + }, + { + "epoch": 0.46, + "learning_rate": 1.934285902055008e-05, + "loss": 0.5011, + "step": 214 + }, + { + "epoch": 0.46, + "learning_rate": 1.9334555532319536e-05, + "loss": 0.5746, + "step": 215 + }, + { + "epoch": 0.46, + "learning_rate": 1.9326201717487865e-05, + "loss": 0.5465, + "step": 216 + }, + { + "epoch": 0.46, + "learning_rate": 1.9317797621094063e-05, + "loss": 0.725, + "step": 217 + }, + { + "epoch": 0.46, + "learning_rate": 1.9309343288448233e-05, + "loss": 0.5627, + "step": 218 + }, + { + "epoch": 0.47, + "learning_rate": 1.9300838765131316e-05, + "loss": 0.5769, + "step": 219 + }, + { + "epoch": 0.47, + "learning_rate": 1.929228409699485e-05, + "loss": 0.5504, + "step": 220 + }, + { + "epoch": 0.47, + "learning_rate": 1.9283679330160726e-05, + "loss": 0.5275, + "step": 221 + }, + { + "epoch": 0.47, + "learning_rate": 1.927502451102095e-05, + "loss": 0.5643, + "step": 222 + }, + { + "epoch": 0.47, + "learning_rate": 1.926631968623736e-05, + "loss": 0.5223, + "step": 223 + }, + { + "epoch": 0.48, + "learning_rate": 1.9257564902741418e-05, + "loss": 0.66, + "step": 224 + }, + { + "epoch": 0.48, + "learning_rate": 1.9248760207733917e-05, + "loss": 0.5442, + "step": 225 + }, + { + "epoch": 0.48, + "learning_rate": 1.923990564868476e-05, + "loss": 0.5641, + "step": 226 + }, + { + "epoch": 0.48, + "learning_rate": 1.9231001273332672e-05, + "loss": 0.5879, + "step": 227 + }, + { + "epoch": 0.49, + "learning_rate": 1.9222047129684974e-05, + "loss": 0.4893, + "step": 228 + }, + { + "epoch": 0.49, + "learning_rate": 1.9213043266017305e-05, + "loss": 0.5996, + "step": 229 + }, + { + "epoch": 0.49, + "learning_rate": 1.9203989730873357e-05, + "loss": 0.5653, + "step": 230 + }, + { + "epoch": 0.49, + "learning_rate": 1.9194886573064636e-05, + "loss": 0.5909, + "step": 231 + }, + { + "epoch": 0.49, + "learning_rate": 1.9185733841670174e-05, + "loss": 0.6136, + "step": 232 + }, + { + "epoch": 0.5, + "learning_rate": 1.9176531586036282e-05, + "loss": 0.567, + "step": 233 + }, + { + "epoch": 0.5, + "learning_rate": 1.9167279855776277e-05, + "loss": 0.5549, + "step": 234 + }, + { + "epoch": 0.5, + "learning_rate": 1.9157978700770207e-05, + "loss": 0.6023, + "step": 235 + }, + { + "epoch": 0.5, + "learning_rate": 1.9148628171164608e-05, + "loss": 0.6015, + "step": 236 + }, + { + "epoch": 0.5, + "learning_rate": 1.9139228317372195e-05, + "loss": 0.5636, + "step": 237 + }, + { + "epoch": 0.51, + "learning_rate": 1.9129779190071622e-05, + "loss": 0.5434, + "step": 238 + }, + { + "epoch": 0.51, + "learning_rate": 1.9120280840207196e-05, + "loss": 0.6067, + "step": 239 + }, + { + "epoch": 0.51, + "learning_rate": 1.9110733318988607e-05, + "loss": 0.6202, + "step": 240 + }, + { + "epoch": 0.51, + "learning_rate": 1.9101136677890643e-05, + "loss": 0.5488, + "step": 241 + }, + { + "epoch": 0.51, + "learning_rate": 1.9091490968652917e-05, + "loss": 0.5133, + "step": 242 + }, + { + "epoch": 0.52, + "learning_rate": 1.90817962432796e-05, + "loss": 0.5316, + "step": 243 + }, + { + "epoch": 0.52, + "learning_rate": 1.9072052554039123e-05, + "loss": 0.5251, + "step": 244 + }, + { + "epoch": 0.52, + "learning_rate": 1.9062259953463897e-05, + "loss": 0.6964, + "step": 245 + }, + { + "epoch": 0.52, + "learning_rate": 1.9052418494350048e-05, + "loss": 0.505, + "step": 246 + }, + { + "epoch": 0.53, + "learning_rate": 1.904252822975711e-05, + "loss": 0.5992, + "step": 247 + }, + { + "epoch": 0.53, + "learning_rate": 1.9032589213007747e-05, + "loss": 0.5738, + "step": 248 + }, + { + "epoch": 0.53, + "learning_rate": 1.9022601497687472e-05, + "loss": 0.5535, + "step": 249 + }, + { + "epoch": 0.53, + "learning_rate": 1.9012565137644354e-05, + "loss": 0.5917, + "step": 250 + }, + { + "epoch": 0.53, + "learning_rate": 1.9002480186988722e-05, + "loss": 0.5675, + "step": 251 + }, + { + "epoch": 0.54, + "learning_rate": 1.899234670009288e-05, + "loss": 0.9224, + "step": 252 + }, + { + "epoch": 0.54, + "learning_rate": 1.898216473159081e-05, + "loss": 0.5505, + "step": 253 + }, + { + "epoch": 0.54, + "learning_rate": 1.8971934336377885e-05, + "loss": 0.5127, + "step": 254 + }, + { + "epoch": 0.54, + "learning_rate": 1.8961655569610557e-05, + "loss": 0.5166, + "step": 255 + }, + { + "epoch": 0.54, + "learning_rate": 1.895132848670608e-05, + "loss": 0.5903, + "step": 256 + }, + { + "epoch": 0.55, + "learning_rate": 1.89409531433422e-05, + "loss": 0.5994, + "step": 257 + }, + { + "epoch": 0.55, + "learning_rate": 1.893052959545684e-05, + "loss": 0.5717, + "step": 258 + }, + { + "epoch": 0.55, + "learning_rate": 1.892005789924784e-05, + "loss": 0.6139, + "step": 259 + }, + { + "epoch": 0.55, + "learning_rate": 1.890953811117261e-05, + "loss": 0.664, + "step": 260 + }, + { + "epoch": 0.56, + "learning_rate": 1.889897028794785e-05, + "loss": 0.6204, + "step": 261 + }, + { + "epoch": 0.56, + "learning_rate": 1.8888354486549238e-05, + "loss": 0.5762, + "step": 262 + }, + { + "epoch": 0.56, + "learning_rate": 1.887769076421112e-05, + "loss": 0.5652, + "step": 263 + }, + { + "epoch": 0.56, + "learning_rate": 1.8866979178426206e-05, + "loss": 0.6786, + "step": 264 + }, + { + "epoch": 0.56, + "learning_rate": 1.8856219786945263e-05, + "loss": 0.7182, + "step": 265 + }, + { + "epoch": 0.57, + "learning_rate": 1.8845412647776795e-05, + "loss": 0.7323, + "step": 266 + }, + { + "epoch": 0.57, + "learning_rate": 1.8834557819186732e-05, + "loss": 0.7034, + "step": 267 + }, + { + "epoch": 0.57, + "learning_rate": 1.8823655359698122e-05, + "loss": 0.5732, + "step": 268 + }, + { + "epoch": 0.57, + "learning_rate": 1.881270532809081e-05, + "loss": 0.5907, + "step": 269 + }, + { + "epoch": 0.57, + "learning_rate": 1.880170778340112e-05, + "loss": 0.5519, + "step": 270 + }, + { + "epoch": 0.58, + "learning_rate": 1.8790662784921548e-05, + "loss": 0.5273, + "step": 271 + }, + { + "epoch": 0.58, + "learning_rate": 1.8779570392200426e-05, + "loss": 0.5191, + "step": 272 + }, + { + "epoch": 0.58, + "learning_rate": 1.876843066504161e-05, + "loss": 0.6582, + "step": 273 + }, + { + "epoch": 0.58, + "learning_rate": 1.875724366350416e-05, + "loss": 0.5217, + "step": 274 + }, + { + "epoch": 0.59, + "learning_rate": 1.8746009447902004e-05, + "loss": 0.5844, + "step": 275 + }, + { + "epoch": 0.59, + "learning_rate": 1.873472807880363e-05, + "loss": 0.6017, + "step": 276 + }, + { + "epoch": 0.59, + "learning_rate": 1.8723399617031754e-05, + "loss": 0.6601, + "step": 277 + }, + { + "epoch": 0.59, + "learning_rate": 1.871202412366297e-05, + "loss": 0.5643, + "step": 278 + }, + { + "epoch": 0.59, + "learning_rate": 1.8700601660027464e-05, + "loss": 0.5066, + "step": 279 + }, + { + "epoch": 0.6, + "learning_rate": 1.8689132287708643e-05, + "loss": 0.567, + "step": 280 + }, + { + "epoch": 0.6, + "learning_rate": 1.8677616068542824e-05, + "loss": 0.6143, + "step": 281 + }, + { + "epoch": 0.6, + "learning_rate": 1.866605306461889e-05, + "loss": 0.577, + "step": 282 + }, + { + "epoch": 0.6, + "learning_rate": 1.865444333827797e-05, + "loss": 0.5981, + "step": 283 + }, + { + "epoch": 0.6, + "learning_rate": 1.8642786952113085e-05, + "loss": 0.554, + "step": 284 + }, + { + "epoch": 0.61, + "learning_rate": 1.8631083968968827e-05, + "loss": 0.5653, + "step": 285 + }, + { + "epoch": 0.61, + "learning_rate": 1.8619334451941008e-05, + "loss": 0.5485, + "step": 286 + }, + { + "epoch": 0.61, + "learning_rate": 1.8607538464376325e-05, + "loss": 0.6102, + "step": 287 + }, + { + "epoch": 0.61, + "learning_rate": 1.8595696069872013e-05, + "loss": 0.5693, + "step": 288 + }, + { + "epoch": 0.61, + "learning_rate": 1.8583807332275513e-05, + "loss": 0.8636, + "step": 289 + }, + { + "epoch": 0.62, + "learning_rate": 1.8571872315684122e-05, + "loss": 0.4947, + "step": 290 + }, + { + "epoch": 0.62, + "learning_rate": 1.8559891084444642e-05, + "loss": 0.7176, + "step": 291 + }, + { + "epoch": 0.62, + "learning_rate": 1.8547863703153042e-05, + "loss": 0.5391, + "step": 292 + }, + { + "epoch": 0.62, + "learning_rate": 1.8535790236654106e-05, + "loss": 0.6151, + "step": 293 + }, + { + "epoch": 0.63, + "learning_rate": 1.8523670750041073e-05, + "loss": 0.538, + "step": 294 + }, + { + "epoch": 0.63, + "learning_rate": 1.8511505308655314e-05, + "loss": 0.5991, + "step": 295 + }, + { + "epoch": 0.63, + "learning_rate": 1.8499293978085948e-05, + "loss": 0.6281, + "step": 296 + }, + { + "epoch": 0.63, + "learning_rate": 1.8487036824169505e-05, + "loss": 0.5706, + "step": 297 + }, + { + "epoch": 0.63, + "learning_rate": 1.8474733912989578e-05, + "loss": 0.6071, + "step": 298 + }, + { + "epoch": 0.64, + "learning_rate": 1.8462385310876444e-05, + "loss": 0.652, + "step": 299 + }, + { + "epoch": 0.64, + "learning_rate": 1.844999108440672e-05, + "loss": 0.5011, + "step": 300 + }, + { + "epoch": 0.64, + "learning_rate": 1.843755130040302e-05, + "loss": 0.555, + "step": 301 + }, + { + "epoch": 0.64, + "learning_rate": 1.8425066025933555e-05, + "loss": 0.5793, + "step": 302 + }, + { + "epoch": 0.64, + "learning_rate": 1.8412535328311813e-05, + "loss": 0.656, + "step": 303 + }, + { + "epoch": 0.65, + "learning_rate": 1.839995927509617e-05, + "loss": 0.5638, + "step": 304 + }, + { + "epoch": 0.65, + "learning_rate": 1.8387337934089534e-05, + "loss": 0.578, + "step": 305 + }, + { + "epoch": 0.65, + "learning_rate": 1.8374671373338973e-05, + "loss": 0.5273, + "step": 306 + }, + { + "epoch": 0.65, + "learning_rate": 1.8361959661135374e-05, + "loss": 0.5598, + "step": 307 + }, + { + "epoch": 0.66, + "learning_rate": 1.834920286601303e-05, + "loss": 0.5756, + "step": 308 + }, + { + "epoch": 0.66, + "learning_rate": 1.8336401056749312e-05, + "loss": 0.5958, + "step": 309 + }, + { + "epoch": 0.66, + "learning_rate": 1.8323554302364273e-05, + "loss": 0.5164, + "step": 310 + }, + { + "epoch": 0.66, + "learning_rate": 1.8310662672120288e-05, + "loss": 0.5305, + "step": 311 + }, + { + "epoch": 0.66, + "learning_rate": 1.8297726235521686e-05, + "loss": 0.5168, + "step": 312 + }, + { + "epoch": 0.67, + "learning_rate": 1.828474506231434e-05, + "loss": 0.5231, + "step": 313 + }, + { + "epoch": 0.67, + "learning_rate": 1.8271719222485352e-05, + "loss": 0.5364, + "step": 314 + }, + { + "epoch": 0.67, + "learning_rate": 1.8258648786262612e-05, + "loss": 0.5845, + "step": 315 + }, + { + "epoch": 0.67, + "learning_rate": 1.8245533824114462e-05, + "loss": 0.6356, + "step": 316 + }, + { + "epoch": 0.67, + "learning_rate": 1.82323744067493e-05, + "loss": 0.5311, + "step": 317 + }, + { + "epoch": 0.68, + "learning_rate": 1.821917060511521e-05, + "loss": 0.6394, + "step": 318 + }, + { + "epoch": 0.68, + "learning_rate": 1.8205922490399545e-05, + "loss": 0.515, + "step": 319 + }, + { + "epoch": 0.68, + "learning_rate": 1.8192630134028603e-05, + "loss": 0.5768, + "step": 320 + }, + { + "epoch": 0.68, + "learning_rate": 1.8179293607667177e-05, + "loss": 0.6466, + "step": 321 + }, + { + "epoch": 0.69, + "learning_rate": 1.8165912983218225e-05, + "loss": 0.511, + "step": 322 + }, + { + "epoch": 0.69, + "learning_rate": 1.8152488332822433e-05, + "loss": 0.5823, + "step": 323 + }, + { + "epoch": 0.69, + "learning_rate": 1.813901972885787e-05, + "loss": 0.5998, + "step": 324 + }, + { + "epoch": 0.69, + "learning_rate": 1.812550724393957e-05, + "loss": 0.5798, + "step": 325 + }, + { + "epoch": 0.69, + "learning_rate": 1.8111950950919138e-05, + "loss": 0.5294, + "step": 326 + }, + { + "epoch": 0.7, + "learning_rate": 1.8098350922884384e-05, + "loss": 0.5478, + "step": 327 + }, + { + "epoch": 0.7, + "learning_rate": 1.8084707233158906e-05, + "loss": 0.5513, + "step": 328 + }, + { + "epoch": 0.7, + "learning_rate": 1.807101995530169e-05, + "loss": 0.5569, + "step": 329 + }, + { + "epoch": 0.7, + "learning_rate": 1.805728916310675e-05, + "loss": 0.5221, + "step": 330 + }, + { + "epoch": 0.7, + "learning_rate": 1.8043514930602674e-05, + "loss": 0.5518, + "step": 331 + }, + { + "epoch": 0.71, + "learning_rate": 1.8029697332052277e-05, + "loss": 0.5667, + "step": 332 + }, + { + "epoch": 0.71, + "learning_rate": 1.8015836441952173e-05, + "loss": 0.5892, + "step": 333 + }, + { + "epoch": 0.71, + "learning_rate": 1.800193233503237e-05, + "loss": 0.5462, + "step": 334 + }, + { + "epoch": 0.71, + "learning_rate": 1.7987985086255896e-05, + "loss": 0.5712, + "step": 335 + }, + { + "epoch": 0.71, + "learning_rate": 1.7973994770818357e-05, + "loss": 0.616, + "step": 336 + }, + { + "epoch": 0.72, + "learning_rate": 1.7959961464147557e-05, + "loss": 0.7231, + "step": 337 + }, + { + "epoch": 0.72, + "learning_rate": 1.7945885241903092e-05, + "loss": 0.5568, + "step": 338 + }, + { + "epoch": 0.72, + "learning_rate": 1.7931766179975914e-05, + "loss": 0.5819, + "step": 339 + }, + { + "epoch": 0.72, + "learning_rate": 1.7917604354487964e-05, + "loss": 0.5489, + "step": 340 + }, + { + "epoch": 0.73, + "learning_rate": 1.7903399841791727e-05, + "loss": 0.5323, + "step": 341 + }, + { + "epoch": 0.73, + "learning_rate": 1.7889152718469836e-05, + "loss": 0.5843, + "step": 342 + }, + { + "epoch": 0.73, + "learning_rate": 1.7874863061334658e-05, + "loss": 0.5585, + "step": 343 + }, + { + "epoch": 0.73, + "learning_rate": 1.7860530947427878e-05, + "loss": 0.56, + "step": 344 + }, + { + "epoch": 0.73, + "learning_rate": 1.7846156454020077e-05, + "loss": 0.5985, + "step": 345 + }, + { + "epoch": 0.74, + "learning_rate": 1.7831739658610333e-05, + "loss": 0.5735, + "step": 346 + }, + { + "epoch": 0.74, + "learning_rate": 1.7817280638925787e-05, + "loss": 0.4935, + "step": 347 + }, + { + "epoch": 0.74, + "learning_rate": 1.7802779472921223e-05, + "loss": 0.5359, + "step": 348 + }, + { + "epoch": 0.74, + "learning_rate": 1.7788236238778668e-05, + "loss": 0.7238, + "step": 349 + }, + { + "epoch": 0.74, + "learning_rate": 1.777365101490694e-05, + "loss": 0.5957, + "step": 350 + }, + { + "epoch": 0.75, + "learning_rate": 1.7759023879941258e-05, + "loss": 0.5702, + "step": 351 + }, + { + "epoch": 0.75, + "learning_rate": 1.774435491274279e-05, + "loss": 0.5646, + "step": 352 + }, + { + "epoch": 0.75, + "learning_rate": 1.772964419239824e-05, + "loss": 0.5632, + "step": 353 + }, + { + "epoch": 0.75, + "learning_rate": 1.7714891798219432e-05, + "loss": 0.5156, + "step": 354 + }, + { + "epoch": 0.76, + "learning_rate": 1.770009780974286e-05, + "loss": 0.5397, + "step": 355 + }, + { + "epoch": 0.76, + "learning_rate": 1.768526230672927e-05, + "loss": 0.536, + "step": 356 + }, + { + "epoch": 0.76, + "learning_rate": 1.7670385369163242e-05, + "loss": 0.5894, + "step": 357 + }, + { + "epoch": 0.76, + "learning_rate": 1.7655467077252742e-05, + "loss": 0.4887, + "step": 358 + }, + { + "epoch": 0.76, + "learning_rate": 1.7640507511428686e-05, + "loss": 0.5744, + "step": 359 + }, + { + "epoch": 0.77, + "learning_rate": 1.7625506752344532e-05, + "loss": 0.5508, + "step": 360 + }, + { + "epoch": 0.77, + "learning_rate": 1.7610464880875815e-05, + "loss": 0.5196, + "step": 361 + }, + { + "epoch": 0.77, + "learning_rate": 1.759538197811973e-05, + "loss": 0.5254, + "step": 362 + }, + { + "epoch": 0.77, + "learning_rate": 1.7580258125394693e-05, + "loss": 0.5571, + "step": 363 + }, + { + "epoch": 0.77, + "learning_rate": 1.756509340423989e-05, + "loss": 0.5562, + "step": 364 + }, + { + "epoch": 0.78, + "learning_rate": 1.7549887896414853e-05, + "loss": 0.7288, + "step": 365 + }, + { + "epoch": 0.78, + "learning_rate": 1.7534641683899008e-05, + "loss": 0.5949, + "step": 366 + }, + { + "epoch": 0.78, + "learning_rate": 1.751935484889124e-05, + "loss": 0.5134, + "step": 367 + }, + { + "epoch": 0.78, + "learning_rate": 1.7504027473809456e-05, + "loss": 0.6306, + "step": 368 + }, + { + "epoch": 0.79, + "learning_rate": 1.748865964129011e-05, + "loss": 0.568, + "step": 369 + }, + { + "epoch": 0.79, + "learning_rate": 1.74732514341878e-05, + "loss": 0.9366, + "step": 370 + }, + { + "epoch": 0.79, + "learning_rate": 1.7457802935574795e-05, + "loss": 0.5199, + "step": 371 + }, + { + "epoch": 0.79, + "learning_rate": 1.7442314228740586e-05, + "loss": 0.5369, + "step": 372 + }, + { + "epoch": 0.79, + "learning_rate": 1.742678539719145e-05, + "loss": 0.7287, + "step": 373 + }, + { + "epoch": 0.8, + "learning_rate": 1.7411216524650003e-05, + "loss": 0.5812, + "step": 374 + }, + { + "epoch": 0.8, + "learning_rate": 1.739560769505471e-05, + "loss": 0.5035, + "step": 375 + }, + { + "epoch": 0.8, + "learning_rate": 1.7379958992559494e-05, + "loss": 0.551, + "step": 376 + }, + { + "epoch": 0.8, + "learning_rate": 1.7364270501533233e-05, + "loss": 0.5052, + "step": 377 + }, + { + "epoch": 0.8, + "learning_rate": 1.7348542306559326e-05, + "loss": 0.5535, + "step": 378 + }, + { + "epoch": 0.81, + "learning_rate": 1.733277449243523e-05, + "loss": 0.5613, + "step": 379 + }, + { + "epoch": 0.81, + "learning_rate": 1.731696714417201e-05, + "loss": 0.5979, + "step": 380 + }, + { + "epoch": 0.81, + "learning_rate": 1.7301120346993875e-05, + "loss": 0.6526, + "step": 381 + }, + { + "epoch": 0.81, + "learning_rate": 1.7285234186337722e-05, + "loss": 0.6873, + "step": 382 + }, + { + "epoch": 0.81, + "learning_rate": 1.726930874785267e-05, + "loss": 0.5117, + "step": 383 + }, + { + "epoch": 0.82, + "learning_rate": 1.7253344117399603e-05, + "loss": 0.5227, + "step": 384 + }, + { + "epoch": 0.82, + "learning_rate": 1.72373403810507e-05, + "loss": 0.5714, + "step": 385 + }, + { + "epoch": 0.82, + "learning_rate": 1.7221297625088996e-05, + "loss": 0.5319, + "step": 386 + }, + { + "epoch": 0.82, + "learning_rate": 1.720521593600787e-05, + "loss": 0.6479, + "step": 387 + }, + { + "epoch": 0.83, + "learning_rate": 1.7189095400510633e-05, + "loss": 0.5302, + "step": 388 + }, + { + "epoch": 0.83, + "learning_rate": 1.7172936105510007e-05, + "loss": 0.5071, + "step": 389 + }, + { + "epoch": 0.83, + "learning_rate": 1.7156738138127708e-05, + "loss": 0.7573, + "step": 390 + }, + { + "epoch": 0.83, + "learning_rate": 1.7140501585693934e-05, + "loss": 0.53, + "step": 391 + }, + { + "epoch": 0.83, + "learning_rate": 1.7124226535746925e-05, + "loss": 0.5813, + "step": 392 + }, + { + "epoch": 0.84, + "learning_rate": 1.710791307603246e-05, + "loss": 0.6378, + "step": 393 + }, + { + "epoch": 0.84, + "learning_rate": 1.7091561294503426e-05, + "loss": 0.5711, + "step": 394 + }, + { + "epoch": 0.84, + "learning_rate": 1.7075171279319295e-05, + "loss": 0.538, + "step": 395 + }, + { + "epoch": 0.84, + "learning_rate": 1.7058743118845686e-05, + "loss": 0.5814, + "step": 396 + }, + { + "epoch": 0.84, + "learning_rate": 1.704227690165388e-05, + "loss": 0.5403, + "step": 397 + }, + { + "epoch": 0.85, + "learning_rate": 1.7025772716520324e-05, + "loss": 0.597, + "step": 398 + }, + { + "epoch": 0.85, + "learning_rate": 1.700923065242617e-05, + "loss": 0.5606, + "step": 399 + }, + { + "epoch": 0.85, + "learning_rate": 1.699265079855681e-05, + "loss": 0.5571, + "step": 400 + }, + { + "epoch": 0.85, + "learning_rate": 1.6976033244301353e-05, + "loss": 0.5394, + "step": 401 + }, + { + "epoch": 0.86, + "learning_rate": 1.6959378079252176e-05, + "loss": 0.6385, + "step": 402 + }, + { + "epoch": 0.86, + "learning_rate": 1.6942685393204436e-05, + "loss": 0.5464, + "step": 403 + }, + { + "epoch": 0.86, + "learning_rate": 1.692595527615558e-05, + "loss": 0.4927, + "step": 404 + }, + { + "epoch": 0.86, + "learning_rate": 1.6909187818304854e-05, + "loss": 0.6287, + "step": 405 + }, + { + "epoch": 0.86, + "learning_rate": 1.689238311005284e-05, + "loss": 0.6408, + "step": 406 + }, + { + "epoch": 0.87, + "learning_rate": 1.687554124200093e-05, + "loss": 0.6399, + "step": 407 + }, + { + "epoch": 0.87, + "learning_rate": 1.6858662304950886e-05, + "loss": 0.5654, + "step": 408 + }, + { + "epoch": 0.87, + "learning_rate": 1.6841746389904306e-05, + "loss": 0.5819, + "step": 409 + }, + { + "epoch": 0.87, + "learning_rate": 1.682479358806216e-05, + "loss": 0.5342, + "step": 410 + }, + { + "epoch": 0.87, + "learning_rate": 1.6807803990824295e-05, + "loss": 0.5133, + "step": 411 + }, + { + "epoch": 0.88, + "learning_rate": 1.6790777689788923e-05, + "loss": 0.5768, + "step": 412 + }, + { + "epoch": 0.88, + "learning_rate": 1.6773714776752157e-05, + "loss": 0.4957, + "step": 413 + }, + { + "epoch": 0.88, + "learning_rate": 1.6756615343707494e-05, + "loss": 0.5332, + "step": 414 + }, + { + "epoch": 0.88, + "learning_rate": 1.6739479482845328e-05, + "loss": 0.5559, + "step": 415 + }, + { + "epoch": 0.89, + "learning_rate": 1.6722307286552452e-05, + "loss": 0.5378, + "step": 416 + }, + { + "epoch": 0.89, + "learning_rate": 1.6705098847411552e-05, + "loss": 0.5984, + "step": 417 + }, + { + "epoch": 0.89, + "learning_rate": 1.6687854258200725e-05, + "loss": 0.5809, + "step": 418 + }, + { + "epoch": 0.89, + "learning_rate": 1.667057361189296e-05, + "loss": 0.5274, + "step": 419 + }, + { + "epoch": 0.89, + "learning_rate": 1.6653257001655652e-05, + "loss": 0.5169, + "step": 420 + }, + { + "epoch": 0.9, + "learning_rate": 1.6635904520850092e-05, + "loss": 0.5522, + "step": 421 + }, + { + "epoch": 0.9, + "learning_rate": 1.6618516263030958e-05, + "loss": 0.5573, + "step": 422 + }, + { + "epoch": 0.9, + "learning_rate": 1.6601092321945823e-05, + "loss": 0.8641, + "step": 423 + }, + { + "epoch": 0.9, + "learning_rate": 1.6583632791534646e-05, + "loss": 0.6361, + "step": 424 + }, + { + "epoch": 0.9, + "learning_rate": 1.6566137765929253e-05, + "loss": 0.574, + "step": 425 + }, + { + "epoch": 0.91, + "learning_rate": 1.6548607339452853e-05, + "loss": 0.5475, + "step": 426 + }, + { + "epoch": 0.91, + "learning_rate": 1.6531041606619508e-05, + "loss": 0.5733, + "step": 427 + }, + { + "epoch": 0.91, + "learning_rate": 1.6513440662133633e-05, + "loss": 0.5707, + "step": 428 + }, + { + "epoch": 0.91, + "learning_rate": 1.6495804600889486e-05, + "loss": 0.6041, + "step": 429 + }, + { + "epoch": 0.91, + "learning_rate": 1.6478133517970657e-05, + "loss": 0.6346, + "step": 430 + }, + { + "epoch": 0.92, + "learning_rate": 1.6460427508649546e-05, + "loss": 0.5406, + "step": 431 + }, + { + "epoch": 0.92, + "learning_rate": 1.644268666838686e-05, + "loss": 0.6286, + "step": 432 + }, + { + "epoch": 0.92, + "learning_rate": 1.6424911092831094e-05, + "loss": 0.5748, + "step": 433 + }, + { + "epoch": 0.92, + "learning_rate": 1.6407100877818015e-05, + "loss": 0.562, + "step": 434 + }, + { + "epoch": 0.93, + "learning_rate": 1.6389256119370153e-05, + "loss": 0.5044, + "step": 435 + }, + { + "epoch": 0.93, + "learning_rate": 1.6371376913696268e-05, + "loss": 0.5033, + "step": 436 + }, + { + "epoch": 0.93, + "learning_rate": 1.635346335719084e-05, + "loss": 0.5715, + "step": 437 + }, + { + "epoch": 0.93, + "learning_rate": 1.6335515546433552e-05, + "loss": 0.4951, + "step": 438 + }, + { + "epoch": 0.93, + "learning_rate": 1.6317533578188774e-05, + "loss": 0.6537, + "step": 439 + }, + { + "epoch": 0.94, + "learning_rate": 1.6299517549405016e-05, + "loss": 0.4899, + "step": 440 + }, + { + "epoch": 0.94, + "learning_rate": 1.6281467557214437e-05, + "loss": 0.5508, + "step": 441 + }, + { + "epoch": 0.94, + "learning_rate": 1.6263383698932307e-05, + "loss": 0.5715, + "step": 442 + }, + { + "epoch": 0.94, + "learning_rate": 1.6245266072056475e-05, + "loss": 0.5756, + "step": 443 + }, + { + "epoch": 0.94, + "learning_rate": 1.6227114774266855e-05, + "loss": 0.4816, + "step": 444 + }, + { + "epoch": 0.95, + "learning_rate": 1.6208929903424893e-05, + "loss": 0.604, + "step": 445 + }, + { + "epoch": 0.95, + "learning_rate": 1.619071155757305e-05, + "loss": 0.5611, + "step": 446 + }, + { + "epoch": 0.95, + "learning_rate": 1.6172459834934257e-05, + "loss": 0.5746, + "step": 447 + }, + { + "epoch": 0.95, + "learning_rate": 1.615417483391139e-05, + "loss": 0.6121, + "step": 448 + }, + { + "epoch": 0.96, + "learning_rate": 1.6135856653086762e-05, + "loss": 0.5301, + "step": 449 + }, + { + "epoch": 0.96, + "learning_rate": 1.6117505391221543e-05, + "loss": 0.6232, + "step": 450 + }, + { + "epoch": 0.96, + "learning_rate": 1.609912114725529e-05, + "loss": 0.5813, + "step": 451 + }, + { + "epoch": 0.96, + "learning_rate": 1.6080704020305353e-05, + "loss": 0.5819, + "step": 452 + }, + { + "epoch": 0.96, + "learning_rate": 1.6062254109666383e-05, + "loss": 0.5674, + "step": 453 + }, + { + "epoch": 0.97, + "learning_rate": 1.6043771514809777e-05, + "loss": 0.5507, + "step": 454 + }, + { + "epoch": 0.97, + "learning_rate": 1.602525633538315e-05, + "loss": 0.5557, + "step": 455 + }, + { + "epoch": 0.97, + "learning_rate": 1.6006708671209794e-05, + "loss": 0.5442, + "step": 456 + }, + { + "epoch": 0.97, + "learning_rate": 1.5988128622288137e-05, + "loss": 0.5993, + "step": 457 + }, + { + "epoch": 0.97, + "learning_rate": 1.596951628879121e-05, + "loss": 0.6125, + "step": 458 + }, + { + "epoch": 0.98, + "learning_rate": 1.59508717710661e-05, + "loss": 0.5378, + "step": 459 + }, + { + "epoch": 0.98, + "learning_rate": 1.593219516963342e-05, + "loss": 0.5442, + "step": 460 + }, + { + "epoch": 0.98, + "learning_rate": 1.5913486585186753e-05, + "loss": 0.6602, + "step": 461 + }, + { + "epoch": 0.98, + "learning_rate": 1.5894746118592122e-05, + "loss": 0.566, + "step": 462 + }, + { + "epoch": 0.99, + "learning_rate": 1.5875973870887436e-05, + "loss": 0.5463, + "step": 463 + }, + { + "epoch": 0.99, + "learning_rate": 1.5857169943281948e-05, + "loss": 0.5223, + "step": 464 + }, + { + "epoch": 0.99, + "learning_rate": 1.5838334437155724e-05, + "loss": 0.5547, + "step": 465 + }, + { + "epoch": 0.99, + "learning_rate": 1.5819467454059062e-05, + "loss": 0.7302, + "step": 466 + }, + { + "epoch": 0.99, + "learning_rate": 1.5800569095711983e-05, + "loss": 0.6616, + "step": 467 + }, + { + "epoch": 1.0, + "learning_rate": 1.578163946400366e-05, + "loss": 0.4715, + "step": 468 + }, + { + "epoch": 1.0, + "learning_rate": 1.5762678660991875e-05, + "loss": 0.4849, + "step": 469 + }, + { + "epoch": 1.0, + "learning_rate": 1.5743686788902463e-05, + "loss": 0.5749, + "step": 470 + }, + { + "epoch": 1.0, + "learning_rate": 1.5724663950128777e-05, + "loss": 0.4897, + "step": 471 + }, + { + "epoch": 1.0, + "learning_rate": 1.5705610247231107e-05, + "loss": 0.3574, + "step": 472 + }, + { + "epoch": 1.01, + "learning_rate": 1.568652578293616e-05, + "loss": 0.4656, + "step": 473 + }, + { + "epoch": 1.01, + "learning_rate": 1.566741066013649e-05, + "loss": 0.4328, + "step": 474 + }, + { + "epoch": 1.01, + "learning_rate": 1.5648264981889936e-05, + "loss": 0.439, + "step": 475 + }, + { + "epoch": 1.01, + "learning_rate": 1.562908885141908e-05, + "loss": 0.4264, + "step": 476 + }, + { + "epoch": 1.01, + "learning_rate": 1.5609882372110683e-05, + "loss": 0.4621, + "step": 477 + }, + { + "epoch": 1.02, + "learning_rate": 1.559064564751513e-05, + "loss": 0.4388, + "step": 478 + }, + { + "epoch": 1.02, + "learning_rate": 1.5571378781345875e-05, + "loss": 0.5268, + "step": 479 + }, + { + "epoch": 1.02, + "learning_rate": 1.555208187747887e-05, + "loss": 0.3881, + "step": 480 + }, + { + "epoch": 1.02, + "learning_rate": 1.553275503995202e-05, + "loss": 0.4049, + "step": 481 + }, + { + "epoch": 1.03, + "learning_rate": 1.551339837296462e-05, + "loss": 0.452, + "step": 482 + }, + { + "epoch": 1.03, + "learning_rate": 1.549401198087677e-05, + "loss": 0.5123, + "step": 483 + }, + { + "epoch": 1.03, + "learning_rate": 1.5474595968208853e-05, + "loss": 0.554, + "step": 484 + }, + { + "epoch": 1.03, + "learning_rate": 1.5455150439640932e-05, + "loss": 0.4197, + "step": 485 + }, + { + "epoch": 1.03, + "learning_rate": 1.5435675500012212e-05, + "loss": 0.4411, + "step": 486 + }, + { + "epoch": 1.04, + "learning_rate": 1.5416171254320467e-05, + "loss": 0.4467, + "step": 487 + }, + { + "epoch": 1.04, + "learning_rate": 1.5396637807721463e-05, + "loss": 0.581, + "step": 488 + }, + { + "epoch": 1.04, + "learning_rate": 1.5377075265528407e-05, + "loss": 0.486, + "step": 489 + }, + { + "epoch": 1.04, + "learning_rate": 1.5357483733211377e-05, + "loss": 0.403, + "step": 490 + }, + { + "epoch": 1.04, + "learning_rate": 1.5337863316396743e-05, + "loss": 0.4318, + "step": 491 + }, + { + "epoch": 1.05, + "learning_rate": 1.5318214120866597e-05, + "loss": 0.3938, + "step": 492 + }, + { + "epoch": 1.05, + "learning_rate": 1.5298536252558212e-05, + "loss": 0.5432, + "step": 493 + }, + { + "epoch": 1.05, + "learning_rate": 1.527882981756343e-05, + "loss": 0.4367, + "step": 494 + }, + { + "epoch": 1.05, + "learning_rate": 1.5259094922128108e-05, + "loss": 0.4516, + "step": 495 + }, + { + "epoch": 1.06, + "learning_rate": 1.5239331672651552e-05, + "loss": 0.4864, + "step": 496 + }, + { + "epoch": 1.06, + "learning_rate": 1.5219540175685938e-05, + "loss": 0.4523, + "step": 497 + }, + { + "epoch": 1.06, + "learning_rate": 1.5199720537935726e-05, + "loss": 0.555, + "step": 498 + }, + { + "epoch": 1.06, + "learning_rate": 1.5179872866257109e-05, + "loss": 0.3994, + "step": 499 + }, + { + "epoch": 1.06, + "learning_rate": 1.515999726765741e-05, + "loss": 0.4714, + "step": 500 + }, + { + "epoch": 1.07, + "learning_rate": 1.514009384929453e-05, + "loss": 0.5137, + "step": 501 + }, + { + "epoch": 1.07, + "learning_rate": 1.5120162718476347e-05, + "loss": 0.5492, + "step": 502 + }, + { + "epoch": 1.07, + "learning_rate": 1.5100203982660162e-05, + "loss": 0.5626, + "step": 503 + }, + { + "epoch": 1.07, + "learning_rate": 1.5080217749452094e-05, + "loss": 0.4047, + "step": 504 + }, + { + "epoch": 1.07, + "learning_rate": 1.506020412660651e-05, + "loss": 0.4376, + "step": 505 + }, + { + "epoch": 1.08, + "learning_rate": 1.5040163222025466e-05, + "loss": 0.4096, + "step": 506 + }, + { + "epoch": 1.08, + "learning_rate": 1.5020095143758085e-05, + "loss": 0.429, + "step": 507 + }, + { + "epoch": 1.08, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.466, + "step": 508 + }, + { + "epoch": 1.08, + "learning_rate": 1.4979877899092779e-05, + "loss": 0.4206, + "step": 509 + }, + { + "epoch": 1.09, + "learning_rate": 1.4959728949523307e-05, + "loss": 0.4151, + "step": 510 + }, + { + "epoch": 1.09, + "learning_rate": 1.4939553259923235e-05, + "loss": 0.5916, + "step": 511 + }, + { + "epoch": 1.09, + "learning_rate": 1.4919350939068383e-05, + "loss": 0.4114, + "step": 512 + }, + { + "epoch": 1.09, + "learning_rate": 1.4899122095878137e-05, + "loss": 0.4489, + "step": 513 + }, + { + "epoch": 1.09, + "learning_rate": 1.4878866839414892e-05, + "loss": 0.4071, + "step": 514 + }, + { + "epoch": 1.1, + "learning_rate": 1.4858585278883439e-05, + "loss": 0.4666, + "step": 515 + }, + { + "epoch": 1.1, + "learning_rate": 1.483827752363039e-05, + "loss": 0.4299, + "step": 516 + }, + { + "epoch": 1.1, + "learning_rate": 1.4817943683143583e-05, + "loss": 0.4332, + "step": 517 + }, + { + "epoch": 1.1, + "learning_rate": 1.479758386705149e-05, + "loss": 0.4278, + "step": 518 + }, + { + "epoch": 1.1, + "learning_rate": 1.477719818512263e-05, + "loss": 0.5002, + "step": 519 + }, + { + "epoch": 1.11, + "learning_rate": 1.4756786747264981e-05, + "loss": 0.4764, + "step": 520 + }, + { + "epoch": 1.11, + "learning_rate": 1.4736349663525374e-05, + "loss": 0.4269, + "step": 521 + }, + { + "epoch": 1.11, + "learning_rate": 1.4715887044088912e-05, + "loss": 0.4289, + "step": 522 + }, + { + "epoch": 1.11, + "learning_rate": 1.4695398999278376e-05, + "loss": 0.5354, + "step": 523 + }, + { + "epoch": 1.11, + "learning_rate": 1.4674885639553616e-05, + "loss": 0.4014, + "step": 524 + }, + { + "epoch": 1.12, + "learning_rate": 1.4654347075510975e-05, + "loss": 0.3984, + "step": 525 + }, + { + "epoch": 1.12, + "learning_rate": 1.463378341788268e-05, + "loss": 0.4506, + "step": 526 + }, + { + "epoch": 1.12, + "learning_rate": 1.4613194777536248e-05, + "loss": 0.4297, + "step": 527 + }, + { + "epoch": 1.12, + "learning_rate": 1.4592581265473884e-05, + "loss": 0.4673, + "step": 528 + }, + { + "epoch": 1.13, + "learning_rate": 1.45719429928319e-05, + "loss": 0.388, + "step": 529 + }, + { + "epoch": 1.13, + "learning_rate": 1.4551280070880089e-05, + "loss": 0.3934, + "step": 530 + }, + { + "epoch": 1.13, + "learning_rate": 1.4530592611021146e-05, + "loss": 0.4194, + "step": 531 + }, + { + "epoch": 1.13, + "learning_rate": 1.4509880724790062e-05, + "loss": 0.4533, + "step": 532 + }, + { + "epoch": 1.13, + "learning_rate": 1.4489144523853513e-05, + "loss": 0.407, + "step": 533 + }, + { + "epoch": 1.14, + "learning_rate": 1.4468384120009273e-05, + "loss": 0.4387, + "step": 534 + }, + { + "epoch": 1.14, + "learning_rate": 1.4447599625185604e-05, + "loss": 0.3819, + "step": 535 + }, + { + "epoch": 1.14, + "learning_rate": 1.4426791151440654e-05, + "loss": 0.4498, + "step": 536 + }, + { + "epoch": 1.14, + "learning_rate": 1.4405958810961842e-05, + "loss": 0.4084, + "step": 537 + }, + { + "epoch": 1.14, + "learning_rate": 1.4385102716065276e-05, + "loss": 0.4129, + "step": 538 + }, + { + "epoch": 1.15, + "learning_rate": 1.4364222979195124e-05, + "loss": 0.4277, + "step": 539 + }, + { + "epoch": 1.15, + "learning_rate": 1.4343319712923024e-05, + "loss": 0.4222, + "step": 540 + }, + { + "epoch": 1.15, + "learning_rate": 1.432239302994747e-05, + "loss": 0.4406, + "step": 541 + }, + { + "epoch": 1.15, + "learning_rate": 1.4301443043093205e-05, + "loss": 0.4434, + "step": 542 + }, + { + "epoch": 1.16, + "learning_rate": 1.4280469865310614e-05, + "loss": 0.4577, + "step": 543 + }, + { + "epoch": 1.16, + "learning_rate": 1.4259473609675107e-05, + "loss": 0.4785, + "step": 544 + }, + { + "epoch": 1.16, + "learning_rate": 1.4238454389386531e-05, + "loss": 0.441, + "step": 545 + }, + { + "epoch": 1.16, + "learning_rate": 1.421741231776853e-05, + "loss": 0.4395, + "step": 546 + }, + { + "epoch": 1.16, + "learning_rate": 1.419634750826796e-05, + "loss": 0.4651, + "step": 547 + }, + { + "epoch": 1.17, + "learning_rate": 1.4175260074454267e-05, + "loss": 0.3872, + "step": 548 + }, + { + "epoch": 1.17, + "learning_rate": 1.4154150130018867e-05, + "loss": 0.4242, + "step": 549 + }, + { + "epoch": 1.17, + "learning_rate": 1.4133017788774541e-05, + "loss": 0.4917, + "step": 550 + }, + { + "epoch": 1.17, + "learning_rate": 1.4111863164654831e-05, + "loss": 0.452, + "step": 551 + }, + { + "epoch": 1.17, + "learning_rate": 1.4090686371713403e-05, + "loss": 0.3756, + "step": 552 + }, + { + "epoch": 1.18, + "learning_rate": 1.4069487524123448e-05, + "loss": 0.398, + "step": 553 + }, + { + "epoch": 1.18, + "learning_rate": 1.4048266736177073e-05, + "loss": 0.427, + "step": 554 + }, + { + "epoch": 1.18, + "learning_rate": 1.4027024122284663e-05, + "loss": 0.4636, + "step": 555 + }, + { + "epoch": 1.18, + "learning_rate": 1.4005759796974276e-05, + "loss": 0.4771, + "step": 556 + }, + { + "epoch": 1.19, + "learning_rate": 1.3984473874891038e-05, + "loss": 0.433, + "step": 557 + }, + { + "epoch": 1.19, + "learning_rate": 1.3963166470796502e-05, + "loss": 0.5075, + "step": 558 + }, + { + "epoch": 1.19, + "learning_rate": 1.3941837699568037e-05, + "loss": 0.4287, + "step": 559 + }, + { + "epoch": 1.19, + "learning_rate": 1.392048767619822e-05, + "loss": 0.4379, + "step": 560 + }, + { + "epoch": 1.19, + "learning_rate": 1.3899116515794204e-05, + "loss": 0.4394, + "step": 561 + }, + { + "epoch": 1.2, + "learning_rate": 1.3877724333577102e-05, + "loss": 0.4455, + "step": 562 + }, + { + "epoch": 1.2, + "learning_rate": 1.385631124488136e-05, + "loss": 0.5637, + "step": 563 + }, + { + "epoch": 1.2, + "learning_rate": 1.3834877365154144e-05, + "loss": 0.4545, + "step": 564 + }, + { + "epoch": 1.2, + "learning_rate": 1.3813422809954711e-05, + "loss": 0.4857, + "step": 565 + }, + { + "epoch": 1.2, + "learning_rate": 1.3791947694953795e-05, + "loss": 0.4526, + "step": 566 + }, + { + "epoch": 1.21, + "learning_rate": 1.3770452135932967e-05, + "loss": 0.4468, + "step": 567 + }, + { + "epoch": 1.21, + "learning_rate": 1.374893624878403e-05, + "loss": 0.5532, + "step": 568 + }, + { + "epoch": 1.21, + "learning_rate": 1.3727400149508373e-05, + "loss": 0.4146, + "step": 569 + }, + { + "epoch": 1.21, + "learning_rate": 1.3705843954216367e-05, + "loss": 0.4376, + "step": 570 + }, + { + "epoch": 1.21, + "learning_rate": 1.368426777912673e-05, + "loss": 0.3972, + "step": 571 + }, + { + "epoch": 1.22, + "learning_rate": 1.366267174056589e-05, + "loss": 0.5031, + "step": 572 + }, + { + "epoch": 1.22, + "learning_rate": 1.3641055954967377e-05, + "loss": 0.4251, + "step": 573 + }, + { + "epoch": 1.22, + "learning_rate": 1.361942053887118e-05, + "loss": 0.4303, + "step": 574 + }, + { + "epoch": 1.22, + "learning_rate": 1.3597765608923127e-05, + "loss": 0.4729, + "step": 575 + }, + { + "epoch": 1.23, + "learning_rate": 1.3576091281874258e-05, + "loss": 0.458, + "step": 576 + }, + { + "epoch": 1.23, + "learning_rate": 1.3554397674580182e-05, + "loss": 0.416, + "step": 577 + }, + { + "epoch": 1.23, + "learning_rate": 1.3532684904000461e-05, + "loss": 0.409, + "step": 578 + }, + { + "epoch": 1.23, + "learning_rate": 1.3510953087197972e-05, + "loss": 0.4528, + "step": 579 + }, + { + "epoch": 1.23, + "learning_rate": 1.3489202341338286e-05, + "loss": 0.3871, + "step": 580 + }, + { + "epoch": 1.24, + "learning_rate": 1.3467432783689018e-05, + "loss": 0.4527, + "step": 581 + }, + { + "epoch": 1.24, + "learning_rate": 1.344564453161921e-05, + "loss": 0.438, + "step": 582 + }, + { + "epoch": 1.24, + "learning_rate": 1.3423837702598696e-05, + "loss": 0.4686, + "step": 583 + }, + { + "epoch": 1.24, + "learning_rate": 1.3402012414197466e-05, + "loss": 0.4685, + "step": 584 + }, + { + "epoch": 1.24, + "learning_rate": 1.3380168784085028e-05, + "loss": 0.5712, + "step": 585 + }, + { + "epoch": 1.25, + "learning_rate": 1.3358306930029786e-05, + "loss": 0.4276, + "step": 586 + }, + { + "epoch": 1.25, + "learning_rate": 1.3336426969898384e-05, + "loss": 0.3882, + "step": 587 + }, + { + "epoch": 1.25, + "learning_rate": 1.3314529021655099e-05, + "loss": 0.4515, + "step": 588 + }, + { + "epoch": 1.25, + "learning_rate": 1.3292613203361175e-05, + "loss": 0.4591, + "step": 589 + }, + { + "epoch": 1.26, + "learning_rate": 1.3270679633174219e-05, + "loss": 0.4546, + "step": 590 + }, + { + "epoch": 1.26, + "learning_rate": 1.3248728429347525e-05, + "loss": 0.4148, + "step": 591 + }, + { + "epoch": 1.26, + "learning_rate": 1.3226759710229474e-05, + "loss": 0.4262, + "step": 592 + }, + { + "epoch": 1.26, + "learning_rate": 1.3204773594262871e-05, + "loss": 0.4672, + "step": 593 + }, + { + "epoch": 1.26, + "learning_rate": 1.3182770199984321e-05, + "loss": 0.462, + "step": 594 + }, + { + "epoch": 1.27, + "learning_rate": 1.3160749646023575e-05, + "loss": 0.435, + "step": 595 + }, + { + "epoch": 1.27, + "learning_rate": 1.3138712051102908e-05, + "loss": 0.4688, + "step": 596 + }, + { + "epoch": 1.27, + "learning_rate": 1.3116657534036467e-05, + "loss": 0.4343, + "step": 597 + }, + { + "epoch": 1.27, + "learning_rate": 1.3094586213729631e-05, + "loss": 0.4522, + "step": 598 + }, + { + "epoch": 1.27, + "learning_rate": 1.3072498209178376e-05, + "loss": 0.4201, + "step": 599 + }, + { + "epoch": 1.28, + "learning_rate": 1.3050393639468629e-05, + "loss": 0.405, + "step": 600 + }, + { + "epoch": 1.28, + "learning_rate": 1.3028272623775622e-05, + "loss": 0.4581, + "step": 601 + }, + { + "epoch": 1.28, + "learning_rate": 1.3006135281363267e-05, + "loss": 0.4026, + "step": 602 + }, + { + "epoch": 1.28, + "learning_rate": 1.2983981731583486e-05, + "loss": 0.4508, + "step": 603 + }, + { + "epoch": 1.29, + "learning_rate": 1.2961812093875587e-05, + "loss": 0.5279, + "step": 604 + }, + { + "epoch": 1.29, + "learning_rate": 1.2939626487765624e-05, + "loss": 0.5008, + "step": 605 + }, + { + "epoch": 1.29, + "learning_rate": 1.291742503286573e-05, + "loss": 0.4679, + "step": 606 + }, + { + "epoch": 1.29, + "learning_rate": 1.2895207848873488e-05, + "loss": 0.39, + "step": 607 + }, + { + "epoch": 1.29, + "learning_rate": 1.287297505557129e-05, + "loss": 0.4171, + "step": 608 + }, + { + "epoch": 1.3, + "learning_rate": 1.2850726772825685e-05, + "loss": 0.6276, + "step": 609 + }, + { + "epoch": 1.3, + "learning_rate": 1.282846312058672e-05, + "loss": 0.4669, + "step": 610 + }, + { + "epoch": 1.3, + "learning_rate": 1.280618421888732e-05, + "loss": 0.4437, + "step": 611 + }, + { + "epoch": 1.3, + "learning_rate": 1.2783890187842618e-05, + "loss": 0.411, + "step": 612 + }, + { + "epoch": 1.3, + "learning_rate": 1.2761581147649318e-05, + "loss": 0.5022, + "step": 613 + }, + { + "epoch": 1.31, + "learning_rate": 1.2739257218585044e-05, + "loss": 0.3699, + "step": 614 + }, + { + "epoch": 1.31, + "learning_rate": 1.2716918521007697e-05, + "loss": 0.4104, + "step": 615 + }, + { + "epoch": 1.31, + "learning_rate": 1.2694565175354792e-05, + "loss": 0.4293, + "step": 616 + }, + { + "epoch": 1.31, + "learning_rate": 1.2672197302142826e-05, + "loss": 0.4112, + "step": 617 + }, + { + "epoch": 1.31, + "learning_rate": 1.264981502196662e-05, + "loss": 0.4051, + "step": 618 + }, + { + "epoch": 1.32, + "learning_rate": 1.2627418455498664e-05, + "loss": 0.4586, + "step": 619 + }, + { + "epoch": 1.32, + "learning_rate": 1.260500772348847e-05, + "loss": 0.4391, + "step": 620 + }, + { + "epoch": 1.32, + "learning_rate": 1.2582582946761939e-05, + "loss": 0.4341, + "step": 621 + }, + { + "epoch": 1.32, + "learning_rate": 1.2560144246220669e-05, + "loss": 0.4549, + "step": 622 + }, + { + "epoch": 1.33, + "learning_rate": 1.253769174284135e-05, + "loss": 0.4016, + "step": 623 + }, + { + "epoch": 1.33, + "learning_rate": 1.251522555767507e-05, + "loss": 0.4004, + "step": 624 + }, + { + "epoch": 1.33, + "learning_rate": 1.2492745811846693e-05, + "loss": 0.4357, + "step": 625 + }, + { + "epoch": 1.33, + "learning_rate": 1.2470252626554191e-05, + "loss": 0.4213, + "step": 626 + }, + { + "epoch": 1.33, + "learning_rate": 1.2447746123067996e-05, + "loss": 0.4392, + "step": 627 + }, + { + "epoch": 1.34, + "learning_rate": 1.2425226422730341e-05, + "loss": 0.4143, + "step": 628 + }, + { + "epoch": 1.34, + "learning_rate": 1.2402693646954607e-05, + "loss": 0.4138, + "step": 629 + }, + { + "epoch": 1.34, + "learning_rate": 1.238014791722468e-05, + "loss": 0.4327, + "step": 630 + }, + { + "epoch": 1.34, + "learning_rate": 1.2357589355094275e-05, + "loss": 0.4665, + "step": 631 + }, + { + "epoch": 1.34, + "learning_rate": 1.2335018082186295e-05, + "loss": 0.6256, + "step": 632 + }, + { + "epoch": 1.35, + "learning_rate": 1.2312434220192178e-05, + "loss": 0.3503, + "step": 633 + }, + { + "epoch": 1.35, + "learning_rate": 1.2289837890871232e-05, + "loss": 0.405, + "step": 634 + }, + { + "epoch": 1.35, + "learning_rate": 1.2267229216049978e-05, + "loss": 0.4501, + "step": 635 + }, + { + "epoch": 1.35, + "learning_rate": 1.22446083176215e-05, + "loss": 0.4207, + "step": 636 + }, + { + "epoch": 1.36, + "learning_rate": 1.2221975317544786e-05, + "loss": 0.437, + "step": 637 + }, + { + "epoch": 1.36, + "learning_rate": 1.2199330337844064e-05, + "loss": 0.427, + "step": 638 + }, + { + "epoch": 1.36, + "learning_rate": 1.2176673500608157e-05, + "loss": 0.3718, + "step": 639 + }, + { + "epoch": 1.36, + "learning_rate": 1.2154004927989815e-05, + "loss": 0.4484, + "step": 640 + }, + { + "epoch": 1.36, + "learning_rate": 1.213132474220505e-05, + "loss": 0.4583, + "step": 641 + }, + { + "epoch": 1.37, + "learning_rate": 1.2108633065532498e-05, + "loss": 0.4142, + "step": 642 + }, + { + "epoch": 1.37, + "learning_rate": 1.2085930020312739e-05, + "loss": 0.4064, + "step": 643 + }, + { + "epoch": 1.37, + "learning_rate": 1.206321572894765e-05, + "loss": 0.4418, + "step": 644 + }, + { + "epoch": 1.37, + "learning_rate": 1.2040490313899735e-05, + "loss": 0.5394, + "step": 645 + }, + { + "epoch": 1.37, + "learning_rate": 1.2017753897691477e-05, + "loss": 0.4478, + "step": 646 + }, + { + "epoch": 1.38, + "learning_rate": 1.1995006602904668e-05, + "loss": 0.4088, + "step": 647 + }, + { + "epoch": 1.38, + "learning_rate": 1.1972248552179755e-05, + "loss": 0.4033, + "step": 648 + }, + { + "epoch": 1.38, + "learning_rate": 1.1949479868215167e-05, + "loss": 0.4681, + "step": 649 + }, + { + "epoch": 1.38, + "learning_rate": 1.1926700673766666e-05, + "loss": 0.4387, + "step": 650 + }, + { + "epoch": 1.39, + "learning_rate": 1.1903911091646684e-05, + "loss": 0.4501, + "step": 651 + }, + { + "epoch": 1.39, + "learning_rate": 1.1881111244723652e-05, + "loss": 0.4723, + "step": 652 + }, + { + "epoch": 1.39, + "learning_rate": 1.1858301255921344e-05, + "loss": 0.3908, + "step": 653 + }, + { + "epoch": 1.39, + "learning_rate": 1.1835481248218215e-05, + "loss": 0.4153, + "step": 654 + }, + { + "epoch": 1.39, + "learning_rate": 1.1812651344646734e-05, + "loss": 0.4463, + "step": 655 + }, + { + "epoch": 1.4, + "learning_rate": 1.1789811668292726e-05, + "loss": 0.4087, + "step": 656 + }, + { + "epoch": 1.4, + "learning_rate": 1.1766962342294702e-05, + "loss": 0.4184, + "step": 657 + }, + { + "epoch": 1.4, + "learning_rate": 1.1744103489843197e-05, + "loss": 0.412, + "step": 658 + }, + { + "epoch": 1.4, + "learning_rate": 1.1721235234180117e-05, + "loss": 0.5649, + "step": 659 + }, + { + "epoch": 1.4, + "learning_rate": 1.1698357698598054e-05, + "loss": 0.3745, + "step": 660 + }, + { + "epoch": 1.41, + "learning_rate": 1.1675471006439632e-05, + "loss": 0.4393, + "step": 661 + }, + { + "epoch": 1.41, + "learning_rate": 1.165257528109685e-05, + "loss": 0.4439, + "step": 662 + }, + { + "epoch": 1.41, + "learning_rate": 1.1629670646010407e-05, + "loss": 0.4255, + "step": 663 + }, + { + "epoch": 1.41, + "learning_rate": 1.160675722466903e-05, + "loss": 0.4601, + "step": 664 + }, + { + "epoch": 1.41, + "learning_rate": 1.1583835140608823e-05, + "loss": 0.4186, + "step": 665 + }, + { + "epoch": 1.42, + "learning_rate": 1.1560904517412599e-05, + "loss": 0.4287, + "step": 666 + }, + { + "epoch": 1.42, + "learning_rate": 1.1537965478709196e-05, + "loss": 0.473, + "step": 667 + }, + { + "epoch": 1.42, + "learning_rate": 1.151501814817284e-05, + "loss": 0.494, + "step": 668 + }, + { + "epoch": 1.42, + "learning_rate": 1.149206264952245e-05, + "loss": 0.4533, + "step": 669 + }, + { + "epoch": 1.43, + "learning_rate": 1.1469099106520982e-05, + "loss": 0.4328, + "step": 670 + }, + { + "epoch": 1.43, + "learning_rate": 1.1446127642974773e-05, + "loss": 0.4971, + "step": 671 + }, + { + "epoch": 1.43, + "learning_rate": 1.1423148382732854e-05, + "loss": 0.3823, + "step": 672 + }, + { + "epoch": 1.43, + "learning_rate": 1.1400161449686293e-05, + "loss": 0.4643, + "step": 673 + }, + { + "epoch": 1.43, + "learning_rate": 1.1377166967767526e-05, + "loss": 0.4228, + "step": 674 + }, + { + "epoch": 1.44, + "learning_rate": 1.135416506094969e-05, + "loss": 0.4448, + "step": 675 + }, + { + "epoch": 1.44, + "learning_rate": 1.1331155853245954e-05, + "loss": 0.421, + "step": 676 + }, + { + "epoch": 1.44, + "learning_rate": 1.1308139468708845e-05, + "loss": 0.4255, + "step": 677 + }, + { + "epoch": 1.44, + "learning_rate": 1.1285116031429585e-05, + "loss": 0.3847, + "step": 678 + }, + { + "epoch": 1.44, + "learning_rate": 1.126208566553742e-05, + "loss": 0.4021, + "step": 679 + }, + { + "epoch": 1.45, + "learning_rate": 1.1239048495198947e-05, + "loss": 0.418, + "step": 680 + }, + { + "epoch": 1.45, + "learning_rate": 1.1216004644617462e-05, + "loss": 0.4156, + "step": 681 + }, + { + "epoch": 1.45, + "learning_rate": 1.1192954238032266e-05, + "loss": 0.4014, + "step": 682 + }, + { + "epoch": 1.45, + "learning_rate": 1.1169897399718004e-05, + "loss": 0.4396, + "step": 683 + }, + { + "epoch": 1.46, + "learning_rate": 1.1146834253984008e-05, + "loss": 0.5337, + "step": 684 + }, + { + "epoch": 1.46, + "learning_rate": 1.1123764925173603e-05, + "loss": 0.479, + "step": 685 + }, + { + "epoch": 1.46, + "learning_rate": 1.110068953766346e-05, + "loss": 0.4552, + "step": 686 + }, + { + "epoch": 1.46, + "learning_rate": 1.1077608215862913e-05, + "loss": 0.4254, + "step": 687 + }, + { + "epoch": 1.46, + "learning_rate": 1.1054521084213293e-05, + "loss": 0.42, + "step": 688 + }, + { + "epoch": 1.47, + "learning_rate": 1.1031428267187238e-05, + "loss": 0.437, + "step": 689 + }, + { + "epoch": 1.47, + "learning_rate": 1.100832988928806e-05, + "loss": 0.4511, + "step": 690 + }, + { + "epoch": 1.47, + "learning_rate": 1.0985226075049042e-05, + "loss": 0.4145, + "step": 691 + }, + { + "epoch": 1.47, + "learning_rate": 1.0962116949032773e-05, + "loss": 0.4419, + "step": 692 + }, + { + "epoch": 1.47, + "learning_rate": 1.0939002635830485e-05, + "loss": 0.4575, + "step": 693 + }, + { + "epoch": 1.48, + "learning_rate": 1.091588326006138e-05, + "loss": 0.3497, + "step": 694 + }, + { + "epoch": 1.48, + "learning_rate": 1.0892758946371943e-05, + "loss": 0.3745, + "step": 695 + }, + { + "epoch": 1.48, + "learning_rate": 1.0869629819435297e-05, + "loss": 0.4391, + "step": 696 + }, + { + "epoch": 1.48, + "learning_rate": 1.0846496003950503e-05, + "loss": 0.4469, + "step": 697 + }, + { + "epoch": 1.49, + "learning_rate": 1.0823357624641904e-05, + "loss": 0.4285, + "step": 698 + }, + { + "epoch": 1.49, + "learning_rate": 1.0800214806258445e-05, + "loss": 0.4416, + "step": 699 + }, + { + "epoch": 1.49, + "learning_rate": 1.0777067673573012e-05, + "loss": 0.3884, + "step": 700 + }, + { + "epoch": 1.49, + "learning_rate": 1.0753916351381746e-05, + "loss": 0.4592, + "step": 701 + }, + { + "epoch": 1.49, + "learning_rate": 1.073076096450337e-05, + "loss": 0.4149, + "step": 702 + }, + { + "epoch": 1.5, + "learning_rate": 1.0707601637778538e-05, + "loss": 0.4422, + "step": 703 + }, + { + "epoch": 1.5, + "learning_rate": 1.0684438496069128e-05, + "loss": 0.4402, + "step": 704 + }, + { + "epoch": 1.5, + "learning_rate": 1.0661271664257592e-05, + "loss": 0.5038, + "step": 705 + }, + { + "epoch": 1.5, + "learning_rate": 1.0638101267246283e-05, + "loss": 0.4443, + "step": 706 + }, + { + "epoch": 1.5, + "learning_rate": 1.0614927429956771e-05, + "loss": 0.4186, + "step": 707 + }, + { + "epoch": 1.51, + "learning_rate": 1.0591750277329167e-05, + "loss": 0.4048, + "step": 708 + }, + { + "epoch": 1.51, + "learning_rate": 1.0568569934321469e-05, + "loss": 0.4013, + "step": 709 + }, + { + "epoch": 1.51, + "learning_rate": 1.0545386525908868e-05, + "loss": 0.4245, + "step": 710 + }, + { + "epoch": 1.51, + "learning_rate": 1.0522200177083077e-05, + "loss": 0.3857, + "step": 711 + }, + { + "epoch": 1.51, + "learning_rate": 1.0499011012851681e-05, + "loss": 0.4596, + "step": 712 + }, + { + "epoch": 1.52, + "learning_rate": 1.0475819158237426e-05, + "loss": 0.4351, + "step": 713 + }, + { + "epoch": 1.52, + "learning_rate": 1.0452624738277564e-05, + "loss": 0.3745, + "step": 714 + }, + { + "epoch": 1.52, + "learning_rate": 1.042942787802319e-05, + "loss": 0.4008, + "step": 715 + }, + { + "epoch": 1.52, + "learning_rate": 1.0406228702538544e-05, + "loss": 0.3811, + "step": 716 + }, + { + "epoch": 1.53, + "learning_rate": 1.0383027336900356e-05, + "loss": 0.4398, + "step": 717 + }, + { + "epoch": 1.53, + "learning_rate": 1.0359823906197157e-05, + "loss": 0.451, + "step": 718 + }, + { + "epoch": 1.53, + "learning_rate": 1.0336618535528618e-05, + "loss": 0.4468, + "step": 719 + }, + { + "epoch": 1.53, + "learning_rate": 1.0313411350004863e-05, + "loss": 0.5174, + "step": 720 + }, + { + "epoch": 1.53, + "learning_rate": 1.029020247474581e-05, + "loss": 0.434, + "step": 721 + }, + { + "epoch": 1.54, + "learning_rate": 1.0266992034880476e-05, + "loss": 0.4097, + "step": 722 + }, + { + "epoch": 1.54, + "learning_rate": 1.0243780155546324e-05, + "loss": 0.3854, + "step": 723 + }, + { + "epoch": 1.54, + "learning_rate": 1.0220566961888567e-05, + "loss": 0.5038, + "step": 724 + }, + { + "epoch": 1.54, + "learning_rate": 1.0197352579059514e-05, + "loss": 0.4164, + "step": 725 + }, + { + "epoch": 1.54, + "learning_rate": 1.0174137132217883e-05, + "loss": 0.4094, + "step": 726 + }, + { + "epoch": 1.55, + "learning_rate": 1.0150920746528118e-05, + "loss": 0.4299, + "step": 727 + }, + { + "epoch": 1.55, + "learning_rate": 1.012770354715974e-05, + "loss": 0.4154, + "step": 728 + }, + { + "epoch": 1.55, + "learning_rate": 1.0104485659286646e-05, + "loss": 0.41, + "step": 729 + }, + { + "epoch": 1.55, + "learning_rate": 1.0081267208086457e-05, + "loss": 0.5481, + "step": 730 + }, + { + "epoch": 1.56, + "learning_rate": 1.0058048318739815e-05, + "loss": 0.433, + "step": 731 + }, + { + "epoch": 1.56, + "learning_rate": 1.0034829116429739e-05, + "loss": 0.4426, + "step": 732 + }, + { + "epoch": 1.56, + "learning_rate": 1.0011609726340922e-05, + "loss": 0.4128, + "step": 733 + }, + { + "epoch": 1.56, + "learning_rate": 9.988390273659081e-06, + "loss": 0.4499, + "step": 734 + }, + { + "epoch": 1.56, + "learning_rate": 9.965170883570266e-06, + "loss": 0.4967, + "step": 735 + }, + { + "epoch": 1.57, + "learning_rate": 9.941951681260188e-06, + "loss": 0.5052, + "step": 736 + }, + { + "epoch": 1.57, + "learning_rate": 9.918732791913547e-06, + "loss": 0.4586, + "step": 737 + }, + { + "epoch": 1.57, + "learning_rate": 9.895514340713355e-06, + "loss": 0.4368, + "step": 738 + }, + { + "epoch": 1.57, + "learning_rate": 9.872296452840266e-06, + "loss": 0.4169, + "step": 739 + }, + { + "epoch": 1.57, + "learning_rate": 9.849079253471887e-06, + "loss": 0.6132, + "step": 740 + }, + { + "epoch": 1.58, + "learning_rate": 9.825862867782123e-06, + "loss": 0.3985, + "step": 741 + }, + { + "epoch": 1.58, + "learning_rate": 9.802647420940489e-06, + "loss": 0.5709, + "step": 742 + }, + { + "epoch": 1.58, + "learning_rate": 9.779433038111433e-06, + "loss": 0.3995, + "step": 743 + }, + { + "epoch": 1.58, + "learning_rate": 9.756219844453676e-06, + "loss": 0.5665, + "step": 744 + }, + { + "epoch": 1.59, + "learning_rate": 9.733007965119525e-06, + "loss": 0.5403, + "step": 745 + }, + { + "epoch": 1.59, + "learning_rate": 9.709797525254192e-06, + "loss": 0.4125, + "step": 746 + }, + { + "epoch": 1.59, + "learning_rate": 9.686588649995137e-06, + "loss": 0.4245, + "step": 747 + }, + { + "epoch": 1.59, + "learning_rate": 9.663381464471387e-06, + "loss": 0.4478, + "step": 748 + }, + { + "epoch": 1.59, + "learning_rate": 9.640176093802847e-06, + "loss": 0.4667, + "step": 749 + }, + { + "epoch": 1.6, + "learning_rate": 9.616972663099648e-06, + "loss": 0.4114, + "step": 750 + }, + { + "epoch": 1.6, + "learning_rate": 9.59377129746146e-06, + "loss": 0.4121, + "step": 751 + }, + { + "epoch": 1.6, + "learning_rate": 9.570572121976813e-06, + "loss": 0.3997, + "step": 752 + }, + { + "epoch": 1.6, + "learning_rate": 9.547375261722438e-06, + "loss": 0.4431, + "step": 753 + }, + { + "epoch": 1.6, + "learning_rate": 9.524180841762577e-06, + "loss": 0.4263, + "step": 754 + }, + { + "epoch": 1.61, + "learning_rate": 9.500988987148322e-06, + "loss": 0.4108, + "step": 755 + }, + { + "epoch": 1.61, + "learning_rate": 9.477799822916925e-06, + "loss": 0.4543, + "step": 756 + }, + { + "epoch": 1.61, + "learning_rate": 9.454613474091137e-06, + "loss": 0.4153, + "step": 757 + }, + { + "epoch": 1.61, + "learning_rate": 9.431430065678536e-06, + "loss": 0.4109, + "step": 758 + }, + { + "epoch": 1.61, + "learning_rate": 9.408249722670836e-06, + "loss": 0.42, + "step": 759 + }, + { + "epoch": 1.62, + "learning_rate": 9.385072570043234e-06, + "loss": 0.4476, + "step": 760 + }, + { + "epoch": 1.62, + "learning_rate": 9.361898732753715e-06, + "loss": 0.5659, + "step": 761 + }, + { + "epoch": 1.62, + "learning_rate": 9.338728335742406e-06, + "loss": 0.4172, + "step": 762 + }, + { + "epoch": 1.62, + "learning_rate": 9.315561503930872e-06, + "loss": 0.4106, + "step": 763 + }, + { + "epoch": 1.63, + "learning_rate": 9.292398362221463e-06, + "loss": 0.4349, + "step": 764 + }, + { + "epoch": 1.63, + "learning_rate": 9.26923903549663e-06, + "loss": 0.4241, + "step": 765 + }, + { + "epoch": 1.63, + "learning_rate": 9.246083648618257e-06, + "loss": 0.4166, + "step": 766 + }, + { + "epoch": 1.63, + "learning_rate": 9.222932326426993e-06, + "loss": 0.418, + "step": 767 + }, + { + "epoch": 1.63, + "learning_rate": 9.199785193741558e-06, + "loss": 0.4026, + "step": 768 + }, + { + "epoch": 1.64, + "learning_rate": 9.1766423753581e-06, + "loss": 0.4171, + "step": 769 + }, + { + "epoch": 1.64, + "learning_rate": 9.1535039960495e-06, + "loss": 0.4492, + "step": 770 + }, + { + "epoch": 1.64, + "learning_rate": 9.130370180564706e-06, + "loss": 0.4063, + "step": 771 + }, + { + "epoch": 1.64, + "learning_rate": 9.107241053628058e-06, + "loss": 0.4035, + "step": 772 + }, + { + "epoch": 1.64, + "learning_rate": 9.084116739938625e-06, + "loss": 0.3902, + "step": 773 + }, + { + "epoch": 1.65, + "learning_rate": 9.060997364169519e-06, + "loss": 0.4208, + "step": 774 + }, + { + "epoch": 1.65, + "learning_rate": 9.037883050967232e-06, + "loss": 0.5065, + "step": 775 + }, + { + "epoch": 1.65, + "learning_rate": 9.014773924950964e-06, + "loss": 0.4095, + "step": 776 + }, + { + "epoch": 1.65, + "learning_rate": 8.991670110711945e-06, + "loss": 0.4353, + "step": 777 + }, + { + "epoch": 1.66, + "learning_rate": 8.968571732812767e-06, + "loss": 0.3457, + "step": 778 + }, + { + "epoch": 1.66, + "learning_rate": 8.945478915786712e-06, + "loss": 0.4503, + "step": 779 + }, + { + "epoch": 1.66, + "learning_rate": 8.922391784137085e-06, + "loss": 0.4069, + "step": 780 + }, + { + "epoch": 1.66, + "learning_rate": 8.89931046233654e-06, + "loss": 0.4699, + "step": 781 + }, + { + "epoch": 1.66, + "learning_rate": 8.876235074826398e-06, + "loss": 0.4429, + "step": 782 + }, + { + "epoch": 1.67, + "learning_rate": 8.853165746015997e-06, + "loss": 0.5613, + "step": 783 + }, + { + "epoch": 1.67, + "learning_rate": 8.830102600281998e-06, + "loss": 0.4255, + "step": 784 + }, + { + "epoch": 1.67, + "learning_rate": 8.807045761967738e-06, + "loss": 0.4564, + "step": 785 + }, + { + "epoch": 1.67, + "learning_rate": 8.783995355382542e-06, + "loss": 0.3682, + "step": 786 + }, + { + "epoch": 1.67, + "learning_rate": 8.760951504801056e-06, + "loss": 0.5271, + "step": 787 + }, + { + "epoch": 1.68, + "learning_rate": 8.737914334462585e-06, + "loss": 0.4023, + "step": 788 + }, + { + "epoch": 1.68, + "learning_rate": 8.714883968570417e-06, + "loss": 0.4608, + "step": 789 + }, + { + "epoch": 1.68, + "learning_rate": 8.691860531291158e-06, + "loss": 0.4327, + "step": 790 + }, + { + "epoch": 1.68, + "learning_rate": 8.668844146754048e-06, + "loss": 0.4271, + "step": 791 + }, + { + "epoch": 1.69, + "learning_rate": 8.645834939050311e-06, + "loss": 0.3897, + "step": 792 + }, + { + "epoch": 1.69, + "learning_rate": 8.622833032232479e-06, + "loss": 0.4519, + "step": 793 + }, + { + "epoch": 1.69, + "learning_rate": 8.599838550313714e-06, + "loss": 0.4635, + "step": 794 + }, + { + "epoch": 1.69, + "learning_rate": 8.576851617267151e-06, + "loss": 0.4361, + "step": 795 + }, + { + "epoch": 1.69, + "learning_rate": 8.55387235702523e-06, + "loss": 0.4024, + "step": 796 + }, + { + "epoch": 1.7, + "learning_rate": 8.530900893479021e-06, + "loss": 0.3924, + "step": 797 + }, + { + "epoch": 1.7, + "learning_rate": 8.507937350477553e-06, + "loss": 0.3979, + "step": 798 + }, + { + "epoch": 1.7, + "learning_rate": 8.484981851827161e-06, + "loss": 0.391, + "step": 799 + }, + { + "epoch": 1.7, + "learning_rate": 8.462034521290802e-06, + "loss": 0.4488, + "step": 800 + }, + { + "epoch": 1.7, + "learning_rate": 8.439095482587403e-06, + "loss": 0.3862, + "step": 801 + }, + { + "epoch": 1.71, + "learning_rate": 8.416164859391179e-06, + "loss": 0.5312, + "step": 802 + }, + { + "epoch": 1.71, + "learning_rate": 8.393242775330974e-06, + "loss": 0.5317, + "step": 803 + }, + { + "epoch": 1.71, + "learning_rate": 8.370329353989596e-06, + "loss": 0.4558, + "step": 804 + }, + { + "epoch": 1.71, + "learning_rate": 8.347424718903152e-06, + "loss": 0.5051, + "step": 805 + }, + { + "epoch": 1.71, + "learning_rate": 8.324528993560372e-06, + "loss": 0.4244, + "step": 806 + }, + { + "epoch": 1.72, + "learning_rate": 8.30164230140195e-06, + "loss": 0.4013, + "step": 807 + }, + { + "epoch": 1.72, + "learning_rate": 8.278764765819885e-06, + "loss": 0.3555, + "step": 808 + }, + { + "epoch": 1.72, + "learning_rate": 8.255896510156805e-06, + "loss": 0.4226, + "step": 809 + }, + { + "epoch": 1.72, + "learning_rate": 8.233037657705303e-06, + "loss": 0.4141, + "step": 810 + }, + { + "epoch": 1.73, + "learning_rate": 8.210188331707277e-06, + "loss": 0.4345, + "step": 811 + }, + { + "epoch": 1.73, + "learning_rate": 8.18734865535327e-06, + "loss": 0.419, + "step": 812 + }, + { + "epoch": 1.73, + "learning_rate": 8.164518751781789e-06, + "loss": 0.3886, + "step": 813 + }, + { + "epoch": 1.73, + "learning_rate": 8.141698744078659e-06, + "loss": 0.4573, + "step": 814 + }, + { + "epoch": 1.73, + "learning_rate": 8.11888875527635e-06, + "loss": 0.3978, + "step": 815 + }, + { + "epoch": 1.74, + "learning_rate": 8.096088908353316e-06, + "loss": 0.384, + "step": 816 + }, + { + "epoch": 1.74, + "learning_rate": 8.073299326233332e-06, + "loss": 0.4143, + "step": 817 + }, + { + "epoch": 1.74, + "learning_rate": 8.050520131784835e-06, + "loss": 0.3918, + "step": 818 + }, + { + "epoch": 1.74, + "learning_rate": 8.027751447820247e-06, + "loss": 0.4657, + "step": 819 + }, + { + "epoch": 1.74, + "learning_rate": 8.004993397095332e-06, + "loss": 0.4544, + "step": 820 + }, + { + "epoch": 1.75, + "learning_rate": 7.982246102308526e-06, + "loss": 0.4102, + "step": 821 + }, + { + "epoch": 1.75, + "learning_rate": 7.959509686100267e-06, + "loss": 0.5127, + "step": 822 + }, + { + "epoch": 1.75, + "learning_rate": 7.936784271052354e-06, + "loss": 0.4046, + "step": 823 + }, + { + "epoch": 1.75, + "learning_rate": 7.914069979687265e-06, + "loss": 0.5415, + "step": 824 + }, + { + "epoch": 1.76, + "learning_rate": 7.891366934467504e-06, + "loss": 0.434, + "step": 825 + }, + { + "epoch": 1.76, + "learning_rate": 7.868675257794952e-06, + "loss": 0.3929, + "step": 826 + }, + { + "epoch": 1.76, + "learning_rate": 7.845995072010188e-06, + "loss": 0.4171, + "step": 827 + }, + { + "epoch": 1.76, + "learning_rate": 7.823326499391845e-06, + "loss": 0.4927, + "step": 828 + }, + { + "epoch": 1.76, + "learning_rate": 7.80066966215594e-06, + "loss": 0.4298, + "step": 829 + }, + { + "epoch": 1.77, + "learning_rate": 7.778024682455218e-06, + "loss": 0.4334, + "step": 830 + }, + { + "epoch": 1.77, + "learning_rate": 7.755391682378506e-06, + "loss": 0.4637, + "step": 831 + }, + { + "epoch": 1.77, + "learning_rate": 7.732770783950027e-06, + "loss": 0.4549, + "step": 832 + }, + { + "epoch": 1.77, + "learning_rate": 7.710162109128773e-06, + "loss": 0.4311, + "step": 833 + }, + { + "epoch": 1.77, + "learning_rate": 7.687565779807823e-06, + "loss": 0.417, + "step": 834 + }, + { + "epoch": 1.78, + "learning_rate": 7.664981917813705e-06, + "loss": 0.4535, + "step": 835 + }, + { + "epoch": 1.78, + "learning_rate": 7.642410644905726e-06, + "loss": 0.5526, + "step": 836 + }, + { + "epoch": 1.78, + "learning_rate": 7.619852082775323e-06, + "loss": 0.4682, + "step": 837 + }, + { + "epoch": 1.78, + "learning_rate": 7.597306353045393e-06, + "loss": 0.5119, + "step": 838 + }, + { + "epoch": 1.79, + "learning_rate": 7.574773577269661e-06, + "loss": 0.412, + "step": 839 + }, + { + "epoch": 1.79, + "learning_rate": 7.552253876932006e-06, + "loss": 0.3829, + "step": 840 + }, + { + "epoch": 1.79, + "learning_rate": 7.52974737344581e-06, + "loss": 0.4386, + "step": 841 + }, + { + "epoch": 1.79, + "learning_rate": 7.507254188153309e-06, + "loss": 0.414, + "step": 842 + }, + { + "epoch": 1.79, + "learning_rate": 7.484774442324932e-06, + "loss": 0.4219, + "step": 843 + }, + { + "epoch": 1.8, + "learning_rate": 7.462308257158655e-06, + "loss": 0.4341, + "step": 844 + }, + { + "epoch": 1.8, + "learning_rate": 7.439855753779333e-06, + "loss": 0.4384, + "step": 845 + }, + { + "epoch": 1.8, + "learning_rate": 7.417417053238065e-06, + "loss": 0.4034, + "step": 846 + }, + { + "epoch": 1.8, + "learning_rate": 7.394992276511533e-06, + "loss": 0.3944, + "step": 847 + }, + { + "epoch": 1.8, + "learning_rate": 7.372581544501343e-06, + "loss": 0.4344, + "step": 848 + }, + { + "epoch": 1.81, + "learning_rate": 7.350184978033386e-06, + "loss": 0.4615, + "step": 849 + }, + { + "epoch": 1.81, + "learning_rate": 7.327802697857178e-06, + "loss": 0.3883, + "step": 850 + }, + { + "epoch": 1.81, + "learning_rate": 7.305434824645213e-06, + "loss": 0.4425, + "step": 851 + }, + { + "epoch": 1.81, + "learning_rate": 7.283081478992307e-06, + "loss": 0.3758, + "step": 852 + }, + { + "epoch": 1.81, + "learning_rate": 7.260742781414955e-06, + "loss": 0.4134, + "step": 853 + }, + { + "epoch": 1.82, + "learning_rate": 7.238418852350683e-06, + "loss": 0.4101, + "step": 854 + }, + { + "epoch": 1.82, + "learning_rate": 7.216109812157383e-06, + "loss": 0.3996, + "step": 855 + }, + { + "epoch": 1.82, + "learning_rate": 7.193815781112682e-06, + "loss": 0.4518, + "step": 856 + }, + { + "epoch": 1.82, + "learning_rate": 7.171536879413281e-06, + "loss": 0.4452, + "step": 857 + }, + { + "epoch": 1.83, + "learning_rate": 7.149273227174318e-06, + "loss": 0.47, + "step": 858 + }, + { + "epoch": 1.83, + "learning_rate": 7.127024944428712e-06, + "loss": 0.4358, + "step": 859 + }, + { + "epoch": 1.83, + "learning_rate": 7.104792151126515e-06, + "loss": 0.4139, + "step": 860 + }, + { + "epoch": 1.83, + "learning_rate": 7.082574967134274e-06, + "loss": 0.3857, + "step": 861 + }, + { + "epoch": 1.83, + "learning_rate": 7.060373512234377e-06, + "loss": 0.4131, + "step": 862 + }, + { + "epoch": 1.84, + "learning_rate": 7.038187906124414e-06, + "loss": 0.4267, + "step": 863 + }, + { + "epoch": 1.84, + "learning_rate": 7.016018268416518e-06, + "loss": 0.4942, + "step": 864 + }, + { + "epoch": 1.84, + "learning_rate": 6.993864718636736e-06, + "loss": 0.3773, + "step": 865 + }, + { + "epoch": 1.84, + "learning_rate": 6.97172737622438e-06, + "loss": 0.4204, + "step": 866 + }, + { + "epoch": 1.84, + "learning_rate": 6.949606360531376e-06, + "loss": 0.4001, + "step": 867 + }, + { + "epoch": 1.85, + "learning_rate": 6.927501790821627e-06, + "loss": 0.3965, + "step": 868 + }, + { + "epoch": 1.85, + "learning_rate": 6.905413786270372e-06, + "loss": 0.3987, + "step": 869 + }, + { + "epoch": 1.85, + "learning_rate": 6.883342465963537e-06, + "loss": 0.4314, + "step": 870 + }, + { + "epoch": 1.85, + "learning_rate": 6.861287948897091e-06, + "loss": 0.3842, + "step": 871 + }, + { + "epoch": 1.86, + "learning_rate": 6.839250353976425e-06, + "loss": 0.3856, + "step": 872 + }, + { + "epoch": 1.86, + "learning_rate": 6.817229800015681e-06, + "loss": 0.4149, + "step": 873 + }, + { + "epoch": 1.86, + "learning_rate": 6.7952264057371284e-06, + "loss": 0.4074, + "step": 874 + }, + { + "epoch": 1.86, + "learning_rate": 6.773240289770529e-06, + "loss": 0.3996, + "step": 875 + }, + { + "epoch": 1.86, + "learning_rate": 6.751271570652477e-06, + "loss": 0.3922, + "step": 876 + }, + { + "epoch": 1.87, + "learning_rate": 6.729320366825785e-06, + "loss": 0.396, + "step": 877 + }, + { + "epoch": 1.87, + "learning_rate": 6.707386796638826e-06, + "loss": 0.4152, + "step": 878 + }, + { + "epoch": 1.87, + "learning_rate": 6.685470978344906e-06, + "loss": 0.3851, + "step": 879 + }, + { + "epoch": 1.87, + "learning_rate": 6.6635730301016175e-06, + "loss": 0.411, + "step": 880 + }, + { + "epoch": 1.87, + "learning_rate": 6.641693069970217e-06, + "loss": 0.4108, + "step": 881 + }, + { + "epoch": 1.88, + "learning_rate": 6.619831215914974e-06, + "loss": 0.4186, + "step": 882 + }, + { + "epoch": 1.88, + "learning_rate": 6.597987585802537e-06, + "loss": 0.4397, + "step": 883 + }, + { + "epoch": 1.88, + "learning_rate": 6.576162297401306e-06, + "loss": 0.4588, + "step": 884 + }, + { + "epoch": 1.88, + "learning_rate": 6.554355468380796e-06, + "loss": 0.3963, + "step": 885 + }, + { + "epoch": 1.89, + "learning_rate": 6.532567216310988e-06, + "loss": 0.4631, + "step": 886 + }, + { + "epoch": 1.89, + "learning_rate": 6.510797658661718e-06, + "loss": 0.4407, + "step": 887 + }, + { + "epoch": 1.89, + "learning_rate": 6.489046912802031e-06, + "loss": 0.5845, + "step": 888 + }, + { + "epoch": 1.89, + "learning_rate": 6.4673150959995405e-06, + "loss": 0.374, + "step": 889 + }, + { + "epoch": 1.89, + "learning_rate": 6.445602325419817e-06, + "loss": 0.4322, + "step": 890 + }, + { + "epoch": 1.9, + "learning_rate": 6.423908718125742e-06, + "loss": 0.4989, + "step": 891 + }, + { + "epoch": 1.9, + "learning_rate": 6.402234391076871e-06, + "loss": 0.4108, + "step": 892 + }, + { + "epoch": 1.9, + "learning_rate": 6.38057946112882e-06, + "loss": 0.5602, + "step": 893 + }, + { + "epoch": 1.9, + "learning_rate": 6.358944045032627e-06, + "loss": 0.3452, + "step": 894 + }, + { + "epoch": 1.9, + "learning_rate": 6.3373282594341124e-06, + "loss": 0.4179, + "step": 895 + }, + { + "epoch": 1.91, + "learning_rate": 6.315732220873272e-06, + "loss": 0.4055, + "step": 896 + }, + { + "epoch": 1.91, + "learning_rate": 6.2941560457836346e-06, + "loss": 0.4152, + "step": 897 + }, + { + "epoch": 1.91, + "learning_rate": 6.27259985049163e-06, + "loss": 0.4018, + "step": 898 + }, + { + "epoch": 1.91, + "learning_rate": 6.2510637512159736e-06, + "loss": 0.4347, + "step": 899 + }, + { + "epoch": 1.91, + "learning_rate": 6.2295478640670336e-06, + "loss": 0.4047, + "step": 900 + }, + { + "epoch": 1.92, + "learning_rate": 6.208052305046208e-06, + "loss": 0.4209, + "step": 901 + }, + { + "epoch": 1.92, + "learning_rate": 6.186577190045291e-06, + "loss": 0.4377, + "step": 902 + }, + { + "epoch": 1.92, + "learning_rate": 6.16512263484586e-06, + "loss": 0.4113, + "step": 903 + }, + { + "epoch": 1.92, + "learning_rate": 6.1436887551186466e-06, + "loss": 0.4277, + "step": 904 + }, + { + "epoch": 1.93, + "learning_rate": 6.1222756664229035e-06, + "loss": 0.4485, + "step": 905 + }, + { + "epoch": 1.93, + "learning_rate": 6.1008834842057995e-06, + "loss": 0.4476, + "step": 906 + }, + { + "epoch": 1.93, + "learning_rate": 6.079512323801783e-06, + "loss": 0.4931, + "step": 907 + }, + { + "epoch": 1.93, + "learning_rate": 6.058162300431965e-06, + "loss": 0.4429, + "step": 908 + }, + { + "epoch": 1.93, + "learning_rate": 6.0368335292035e-06, + "loss": 0.4378, + "step": 909 + }, + { + "epoch": 1.94, + "learning_rate": 6.015526125108963e-06, + "loss": 0.3809, + "step": 910 + }, + { + "epoch": 1.94, + "learning_rate": 5.994240203025723e-06, + "loss": 0.4626, + "step": 911 + }, + { + "epoch": 1.94, + "learning_rate": 5.972975877715339e-06, + "loss": 0.4205, + "step": 912 + }, + { + "epoch": 1.94, + "learning_rate": 5.95173326382293e-06, + "loss": 0.4519, + "step": 913 + }, + { + "epoch": 1.94, + "learning_rate": 5.9305124758765544e-06, + "loss": 0.449, + "step": 914 + }, + { + "epoch": 1.95, + "learning_rate": 5.9093136282866014e-06, + "loss": 0.4106, + "step": 915 + }, + { + "epoch": 1.95, + "learning_rate": 5.888136835345173e-06, + "loss": 0.4069, + "step": 916 + }, + { + "epoch": 1.95, + "learning_rate": 5.866982211225462e-06, + "loss": 0.4323, + "step": 917 + }, + { + "epoch": 1.95, + "learning_rate": 5.845849869981137e-06, + "loss": 0.3633, + "step": 918 + }, + { + "epoch": 1.96, + "learning_rate": 5.824739925545737e-06, + "loss": 0.3987, + "step": 919 + }, + { + "epoch": 1.96, + "learning_rate": 5.803652491732041e-06, + "loss": 0.4442, + "step": 920 + }, + { + "epoch": 1.96, + "learning_rate": 5.782587682231473e-06, + "loss": 0.3786, + "step": 921 + }, + { + "epoch": 1.96, + "learning_rate": 5.761545610613474e-06, + "loss": 0.4222, + "step": 922 + }, + { + "epoch": 1.96, + "learning_rate": 5.740526390324896e-06, + "loss": 0.5067, + "step": 923 + }, + { + "epoch": 1.97, + "learning_rate": 5.71953013468939e-06, + "loss": 0.4283, + "step": 924 + }, + { + "epoch": 1.97, + "learning_rate": 5.6985569569068e-06, + "loss": 0.5653, + "step": 925 + }, + { + "epoch": 1.97, + "learning_rate": 5.67760697005253e-06, + "loss": 0.4295, + "step": 926 + }, + { + "epoch": 1.97, + "learning_rate": 5.656680287076976e-06, + "loss": 0.4367, + "step": 927 + }, + { + "epoch": 1.97, + "learning_rate": 5.635777020804876e-06, + "loss": 0.4482, + "step": 928 + }, + { + "epoch": 1.98, + "learning_rate": 5.614897283934725e-06, + "loss": 0.4187, + "step": 929 + }, + { + "epoch": 1.98, + "learning_rate": 5.5940411890381575e-06, + "loss": 0.4222, + "step": 930 + }, + { + "epoch": 1.98, + "learning_rate": 5.57320884855935e-06, + "loss": 0.5125, + "step": 931 + }, + { + "epoch": 1.98, + "learning_rate": 5.552400374814397e-06, + "loss": 0.4211, + "step": 932 + }, + { + "epoch": 1.99, + "learning_rate": 5.53161587999073e-06, + "loss": 0.4061, + "step": 933 + }, + { + "epoch": 1.99, + "learning_rate": 5.510855476146491e-06, + "loss": 0.3962, + "step": 934 + }, + { + "epoch": 1.99, + "learning_rate": 5.490119275209942e-06, + "loss": 0.3987, + "step": 935 + }, + { + "epoch": 1.99, + "learning_rate": 5.469407388978855e-06, + "loss": 0.4366, + "step": 936 + }, + { + "epoch": 1.99, + "learning_rate": 5.448719929119916e-06, + "loss": 0.485, + "step": 937 + }, + { + "epoch": 2.0, + "learning_rate": 5.428057007168104e-06, + "loss": 0.444, + "step": 938 + }, + { + "epoch": 2.0, + "learning_rate": 5.407418734526119e-06, + "loss": 0.4114, + "step": 939 + }, + { + "epoch": 2.0, + "learning_rate": 5.386805222463757e-06, + "loss": 0.4196, + "step": 940 + }, + { + "epoch": 2.0, + "learning_rate": 5.366216582117323e-06, + "loss": 0.3528, + "step": 941 + }, + { + "epoch": 2.0, + "learning_rate": 5.345652924489028e-06, + "loss": 0.3363, + "step": 942 + }, + { + "epoch": 2.01, + "learning_rate": 5.325114360446386e-06, + "loss": 0.3272, + "step": 943 + }, + { + "epoch": 2.01, + "learning_rate": 5.304601000721627e-06, + "loss": 0.338, + "step": 944 + }, + { + "epoch": 2.01, + "learning_rate": 5.284112955911089e-06, + "loss": 0.4136, + "step": 945 + }, + { + "epoch": 2.01, + "learning_rate": 5.2636503364746285e-06, + "loss": 0.35, + "step": 946 + }, + { + "epoch": 2.01, + "learning_rate": 5.24321325273502e-06, + "loss": 0.3295, + "step": 947 + }, + { + "epoch": 2.02, + "learning_rate": 5.22280181487737e-06, + "loss": 0.2865, + "step": 948 + }, + { + "epoch": 2.02, + "learning_rate": 5.202416132948511e-06, + "loss": 0.3599, + "step": 949 + }, + { + "epoch": 2.02, + "learning_rate": 5.182056316856421e-06, + "loss": 0.4011, + "step": 950 + }, + { + "epoch": 2.02, + "learning_rate": 5.161722476369613e-06, + "loss": 0.4179, + "step": 951 + }, + { + "epoch": 2.03, + "learning_rate": 5.1414147211165635e-06, + "loss": 0.3621, + "step": 952 + }, + { + "epoch": 2.03, + "learning_rate": 5.12113316058511e-06, + "loss": 0.3007, + "step": 953 + }, + { + "epoch": 2.03, + "learning_rate": 5.100877904121864e-06, + "loss": 0.3335, + "step": 954 + }, + { + "epoch": 2.03, + "learning_rate": 5.080649060931619e-06, + "loss": 0.3308, + "step": 955 + }, + { + "epoch": 2.03, + "learning_rate": 5.0604467400767645e-06, + "loss": 0.3253, + "step": 956 + }, + { + "epoch": 2.04, + "learning_rate": 5.040271050476697e-06, + "loss": 0.333, + "step": 957 + }, + { + "epoch": 2.04, + "learning_rate": 5.020122100907226e-06, + "loss": 0.3473, + "step": 958 + }, + { + "epoch": 2.04, + "learning_rate": 5.000000000000003e-06, + "loss": 0.2921, + "step": 959 + }, + { + "epoch": 2.04, + "learning_rate": 4.9799048562419204e-06, + "loss": 0.2982, + "step": 960 + }, + { + "epoch": 2.04, + "learning_rate": 4.959836777974538e-06, + "loss": 0.3681, + "step": 961 + }, + { + "epoch": 2.05, + "learning_rate": 4.939795873393491e-06, + "loss": 0.307, + "step": 962 + }, + { + "epoch": 2.05, + "learning_rate": 4.9197822505479105e-06, + "loss": 0.3041, + "step": 963 + }, + { + "epoch": 2.05, + "learning_rate": 4.899796017339839e-06, + "loss": 0.327, + "step": 964 + }, + { + "epoch": 2.05, + "learning_rate": 4.879837281523651e-06, + "loss": 0.307, + "step": 965 + }, + { + "epoch": 2.06, + "learning_rate": 4.859906150705471e-06, + "loss": 0.331, + "step": 966 + }, + { + "epoch": 2.06, + "learning_rate": 4.8400027323425905e-06, + "loss": 0.3283, + "step": 967 + }, + { + "epoch": 2.06, + "learning_rate": 4.820127133742893e-06, + "loss": 0.3444, + "step": 968 + }, + { + "epoch": 2.06, + "learning_rate": 4.800279462064278e-06, + "loss": 0.3296, + "step": 969 + }, + { + "epoch": 2.06, + "learning_rate": 4.7804598243140664e-06, + "loss": 0.3443, + "step": 970 + }, + { + "epoch": 2.07, + "learning_rate": 4.76066832734845e-06, + "loss": 0.3533, + "step": 971 + }, + { + "epoch": 2.07, + "learning_rate": 4.7409050778718945e-06, + "loss": 0.2954, + "step": 972 + }, + { + "epoch": 2.07, + "learning_rate": 4.721170182436572e-06, + "loss": 0.4319, + "step": 973 + }, + { + "epoch": 2.07, + "learning_rate": 4.7014637474417875e-06, + "loss": 0.3362, + "step": 974 + }, + { + "epoch": 2.07, + "learning_rate": 4.6817858791334025e-06, + "loss": 0.4228, + "step": 975 + }, + { + "epoch": 2.08, + "learning_rate": 4.662136683603265e-06, + "loss": 0.3344, + "step": 976 + }, + { + "epoch": 2.08, + "learning_rate": 4.6425162667886295e-06, + "loss": 0.3431, + "step": 977 + }, + { + "epoch": 2.08, + "learning_rate": 4.622924734471599e-06, + "loss": 0.3039, + "step": 978 + }, + { + "epoch": 2.08, + "learning_rate": 4.603362192278543e-06, + "loss": 0.3125, + "step": 979 + }, + { + "epoch": 2.09, + "learning_rate": 4.5838287456795386e-06, + "loss": 0.3323, + "step": 980 + }, + { + "epoch": 2.09, + "learning_rate": 4.56432449998779e-06, + "loss": 0.3335, + "step": 981 + }, + { + "epoch": 2.09, + "learning_rate": 4.5448495603590694e-06, + "loss": 0.3075, + "step": 982 + }, + { + "epoch": 2.09, + "learning_rate": 4.525404031791149e-06, + "loss": 0.3623, + "step": 983 + }, + { + "epoch": 2.09, + "learning_rate": 4.5059880191232285e-06, + "loss": 0.3062, + "step": 984 + }, + { + "epoch": 2.1, + "learning_rate": 4.486601627035381e-06, + "loss": 0.3392, + "step": 985 + }, + { + "epoch": 2.1, + "learning_rate": 4.4672449600479774e-06, + "loss": 0.3049, + "step": 986 + }, + { + "epoch": 2.1, + "learning_rate": 4.447918122521129e-06, + "loss": 0.3418, + "step": 987 + }, + { + "epoch": 2.1, + "learning_rate": 4.428621218654129e-06, + "loss": 0.2813, + "step": 988 + }, + { + "epoch": 2.1, + "learning_rate": 4.409354352484872e-06, + "loss": 0.3874, + "step": 989 + }, + { + "epoch": 2.11, + "learning_rate": 4.39011762788932e-06, + "loss": 0.3, + "step": 990 + }, + { + "epoch": 2.11, + "learning_rate": 4.370911148580923e-06, + "loss": 0.2715, + "step": 991 + }, + { + "epoch": 2.11, + "learning_rate": 4.351735018110066e-06, + "loss": 0.2863, + "step": 992 + }, + { + "epoch": 2.11, + "learning_rate": 4.332589339863512e-06, + "loss": 0.3177, + "step": 993 + }, + { + "epoch": 2.11, + "learning_rate": 4.31347421706384e-06, + "loss": 0.3071, + "step": 994 + }, + { + "epoch": 2.12, + "learning_rate": 4.294389752768899e-06, + "loss": 0.2774, + "step": 995 + }, + { + "epoch": 2.12, + "learning_rate": 4.27533604987123e-06, + "loss": 0.321, + "step": 996 + }, + { + "epoch": 2.12, + "learning_rate": 4.25631321109754e-06, + "loss": 0.3431, + "step": 997 + }, + { + "epoch": 2.12, + "learning_rate": 4.2373213390081295e-06, + "loss": 0.3292, + "step": 998 + }, + { + "epoch": 2.13, + "learning_rate": 4.218360535996338e-06, + "loss": 0.291, + "step": 999 + }, + { + "epoch": 2.13, + "learning_rate": 4.19943090428802e-06, + "loss": 0.3009, + "step": 1000 + }, + { + "epoch": 2.13, + "learning_rate": 4.180532545940941e-06, + "loss": 0.344, + "step": 1001 + }, + { + "epoch": 2.13, + "learning_rate": 4.161665562844281e-06, + "loss": 0.3883, + "step": 1002 + }, + { + "epoch": 2.13, + "learning_rate": 4.142830056718052e-06, + "loss": 0.326, + "step": 1003 + }, + { + "epoch": 2.14, + "learning_rate": 4.124026129112566e-06, + "loss": 0.3043, + "step": 1004 + }, + { + "epoch": 2.14, + "learning_rate": 4.105253881407879e-06, + "loss": 0.3243, + "step": 1005 + }, + { + "epoch": 2.14, + "learning_rate": 4.086513414813248e-06, + "loss": 0.3455, + "step": 1006 + }, + { + "epoch": 2.14, + "learning_rate": 4.067804830366584e-06, + "loss": 0.3453, + "step": 1007 + }, + { + "epoch": 2.14, + "learning_rate": 4.049128228933903e-06, + "loss": 0.4583, + "step": 1008 + }, + { + "epoch": 2.15, + "learning_rate": 4.030483711208795e-06, + "loss": 0.3281, + "step": 1009 + }, + { + "epoch": 2.15, + "learning_rate": 4.0118713777118655e-06, + "loss": 0.346, + "step": 1010 + }, + { + "epoch": 2.15, + "learning_rate": 3.993291328790208e-06, + "loss": 0.3473, + "step": 1011 + }, + { + "epoch": 2.15, + "learning_rate": 3.974743664616851e-06, + "loss": 0.3177, + "step": 1012 + }, + { + "epoch": 2.16, + "learning_rate": 3.956228485190224e-06, + "loss": 0.2808, + "step": 1013 + }, + { + "epoch": 2.16, + "learning_rate": 3.937745890333623e-06, + "loss": 0.3195, + "step": 1014 + }, + { + "epoch": 2.16, + "learning_rate": 3.919295979694653e-06, + "loss": 0.3441, + "step": 1015 + }, + { + "epoch": 2.16, + "learning_rate": 3.900878852744715e-06, + "loss": 0.3137, + "step": 1016 + }, + { + "epoch": 2.16, + "learning_rate": 3.882494608778454e-06, + "loss": 0.3207, + "step": 1017 + }, + { + "epoch": 2.17, + "learning_rate": 3.864143346913239e-06, + "loss": 0.3255, + "step": 1018 + }, + { + "epoch": 2.17, + "learning_rate": 3.84582516608861e-06, + "loss": 0.311, + "step": 1019 + }, + { + "epoch": 2.17, + "learning_rate": 3.827540165065746e-06, + "loss": 0.3139, + "step": 1020 + }, + { + "epoch": 2.17, + "learning_rate": 3.8092884424269515e-06, + "loss": 0.2845, + "step": 1021 + }, + { + "epoch": 2.17, + "learning_rate": 3.791070096575108e-06, + "loss": 0.3071, + "step": 1022 + }, + { + "epoch": 2.18, + "learning_rate": 3.7728852257331472e-06, + "loss": 0.3212, + "step": 1023 + }, + { + "epoch": 2.18, + "learning_rate": 3.7547339279435267e-06, + "loss": 0.3357, + "step": 1024 + }, + { + "epoch": 2.18, + "learning_rate": 3.7366163010676937e-06, + "loss": 0.3586, + "step": 1025 + }, + { + "epoch": 2.18, + "learning_rate": 3.7185324427855653e-06, + "loss": 0.2892, + "step": 1026 + }, + { + "epoch": 2.19, + "learning_rate": 3.7004824505949878e-06, + "loss": 0.3128, + "step": 1027 + }, + { + "epoch": 2.19, + "learning_rate": 3.6824664218112305e-06, + "loss": 0.3312, + "step": 1028 + }, + { + "epoch": 2.19, + "learning_rate": 3.664484453566449e-06, + "loss": 0.3201, + "step": 1029 + }, + { + "epoch": 2.19, + "learning_rate": 3.6465366428091633e-06, + "loss": 0.3399, + "step": 1030 + }, + { + "epoch": 2.19, + "learning_rate": 3.6286230863037354e-06, + "loss": 0.3565, + "step": 1031 + }, + { + "epoch": 2.2, + "learning_rate": 3.610743880629849e-06, + "loss": 0.3156, + "step": 1032 + }, + { + "epoch": 2.2, + "learning_rate": 3.5928991221819888e-06, + "loss": 0.3798, + "step": 1033 + }, + { + "epoch": 2.2, + "learning_rate": 3.575088907168911e-06, + "loss": 0.3351, + "step": 1034 + }, + { + "epoch": 2.2, + "learning_rate": 3.5573133316131447e-06, + "loss": 0.4802, + "step": 1035 + }, + { + "epoch": 2.2, + "learning_rate": 3.5395724913504546e-06, + "loss": 0.344, + "step": 1036 + }, + { + "epoch": 2.21, + "learning_rate": 3.5218664820293424e-06, + "loss": 0.3283, + "step": 1037 + }, + { + "epoch": 2.21, + "learning_rate": 3.504195399110516e-06, + "loss": 0.3309, + "step": 1038 + }, + { + "epoch": 2.21, + "learning_rate": 3.48655933786637e-06, + "loss": 0.3951, + "step": 1039 + }, + { + "epoch": 2.21, + "learning_rate": 3.468958393380496e-06, + "loss": 0.3379, + "step": 1040 + }, + { + "epoch": 2.21, + "learning_rate": 3.4513926605471504e-06, + "loss": 0.344, + "step": 1041 + }, + { + "epoch": 2.22, + "learning_rate": 3.4338622340707496e-06, + "loss": 0.2961, + "step": 1042 + }, + { + "epoch": 2.22, + "learning_rate": 3.4163672084653574e-06, + "loss": 0.3143, + "step": 1043 + }, + { + "epoch": 2.22, + "learning_rate": 3.3989076780541774e-06, + "loss": 0.309, + "step": 1044 + }, + { + "epoch": 2.22, + "learning_rate": 3.3814837369690455e-06, + "loss": 0.3118, + "step": 1045 + }, + { + "epoch": 2.23, + "learning_rate": 3.3640954791499103e-06, + "loss": 0.3501, + "step": 1046 + }, + { + "epoch": 2.23, + "learning_rate": 3.3467429983443477e-06, + "loss": 0.3497, + "step": 1047 + }, + { + "epoch": 2.23, + "learning_rate": 3.329426388107041e-06, + "loss": 0.3442, + "step": 1048 + }, + { + "epoch": 2.23, + "learning_rate": 3.3121457417992775e-06, + "loss": 0.3184, + "step": 1049 + }, + { + "epoch": 2.23, + "learning_rate": 3.29490115258845e-06, + "loss": 0.3324, + "step": 1050 + }, + { + "epoch": 2.24, + "learning_rate": 3.277692713447551e-06, + "loss": 0.3475, + "step": 1051 + }, + { + "epoch": 2.24, + "learning_rate": 3.2605205171546773e-06, + "loss": 0.3525, + "step": 1052 + }, + { + "epoch": 2.24, + "learning_rate": 3.243384656292511e-06, + "loss": 0.3346, + "step": 1053 + }, + { + "epoch": 2.24, + "learning_rate": 3.2262852232478446e-06, + "loss": 0.3163, + "step": 1054 + }, + { + "epoch": 2.24, + "learning_rate": 3.209222310211079e-06, + "loss": 0.2707, + "step": 1055 + }, + { + "epoch": 2.25, + "learning_rate": 3.1921960091757076e-06, + "loss": 0.3171, + "step": 1056 + }, + { + "epoch": 2.25, + "learning_rate": 3.175206411937839e-06, + "loss": 0.3103, + "step": 1057 + }, + { + "epoch": 2.25, + "learning_rate": 3.1582536100956973e-06, + "loss": 0.3199, + "step": 1058 + }, + { + "epoch": 2.25, + "learning_rate": 3.141337695049117e-06, + "loss": 0.3236, + "step": 1059 + }, + { + "epoch": 2.26, + "learning_rate": 3.1244587579990717e-06, + "loss": 0.3239, + "step": 1060 + }, + { + "epoch": 2.26, + "learning_rate": 3.107616889947165e-06, + "loss": 0.3033, + "step": 1061 + }, + { + "epoch": 2.26, + "learning_rate": 3.0908121816951465e-06, + "loss": 0.3706, + "step": 1062 + }, + { + "epoch": 2.26, + "learning_rate": 3.0740447238444214e-06, + "loss": 0.4513, + "step": 1063 + }, + { + "epoch": 2.26, + "learning_rate": 3.0573146067955674e-06, + "loss": 0.3289, + "step": 1064 + }, + { + "epoch": 2.27, + "learning_rate": 3.0406219207478272e-06, + "loss": 0.4044, + "step": 1065 + }, + { + "epoch": 2.27, + "learning_rate": 3.023966755698652e-06, + "loss": 0.3798, + "step": 1066 + }, + { + "epoch": 2.27, + "learning_rate": 3.007349201443194e-06, + "loss": 0.3498, + "step": 1067 + }, + { + "epoch": 2.27, + "learning_rate": 2.990769347573831e-06, + "loss": 0.3439, + "step": 1068 + }, + { + "epoch": 2.27, + "learning_rate": 2.9742272834796813e-06, + "loss": 0.3197, + "step": 1069 + }, + { + "epoch": 2.28, + "learning_rate": 2.9577230983461237e-06, + "loss": 0.3196, + "step": 1070 + }, + { + "epoch": 2.28, + "learning_rate": 2.9412568811543174e-06, + "loss": 0.3113, + "step": 1071 + }, + { + "epoch": 2.28, + "learning_rate": 2.924828720680707e-06, + "loss": 0.3027, + "step": 1072 + }, + { + "epoch": 2.28, + "learning_rate": 2.908438705496577e-06, + "loss": 0.3348, + "step": 1073 + }, + { + "epoch": 2.29, + "learning_rate": 2.8920869239675385e-06, + "loss": 0.3261, + "step": 1074 + }, + { + "epoch": 2.29, + "learning_rate": 2.8757734642530777e-06, + "loss": 0.3077, + "step": 1075 + }, + { + "epoch": 2.29, + "learning_rate": 2.8594984143060655e-06, + "loss": 0.3586, + "step": 1076 + }, + { + "epoch": 2.29, + "learning_rate": 2.843261861872296e-06, + "loss": 0.4437, + "step": 1077 + }, + { + "epoch": 2.29, + "learning_rate": 2.8270638944899964e-06, + "loss": 0.4236, + "step": 1078 + }, + { + "epoch": 2.3, + "learning_rate": 2.8109045994893723e-06, + "loss": 0.3172, + "step": 1079 + }, + { + "epoch": 2.3, + "learning_rate": 2.7947840639921308e-06, + "loss": 0.3325, + "step": 1080 + }, + { + "epoch": 2.3, + "learning_rate": 2.7787023749110065e-06, + "loss": 0.2887, + "step": 1081 + }, + { + "epoch": 2.3, + "learning_rate": 2.7626596189492983e-06, + "loss": 0.3095, + "step": 1082 + }, + { + "epoch": 2.3, + "learning_rate": 2.7466558826003996e-06, + "loss": 0.3206, + "step": 1083 + }, + { + "epoch": 2.31, + "learning_rate": 2.7306912521473337e-06, + "loss": 0.362, + "step": 1084 + }, + { + "epoch": 2.31, + "learning_rate": 2.71476581366228e-06, + "loss": 0.3094, + "step": 1085 + }, + { + "epoch": 2.31, + "learning_rate": 2.698879653006127e-06, + "loss": 0.2828, + "step": 1086 + }, + { + "epoch": 2.31, + "learning_rate": 2.6830328558279927e-06, + "loss": 0.3085, + "step": 1087 + }, + { + "epoch": 2.31, + "learning_rate": 2.6672255075647736e-06, + "loss": 0.2866, + "step": 1088 + }, + { + "epoch": 2.32, + "learning_rate": 2.651457693440678e-06, + "loss": 0.3411, + "step": 1089 + }, + { + "epoch": 2.32, + "learning_rate": 2.6357294984667724e-06, + "loss": 0.3579, + "step": 1090 + }, + { + "epoch": 2.32, + "learning_rate": 2.620041007440508e-06, + "loss": 0.3484, + "step": 1091 + }, + { + "epoch": 2.32, + "learning_rate": 2.6043923049452913e-06, + "loss": 0.3146, + "step": 1092 + }, + { + "epoch": 2.33, + "learning_rate": 2.588783475350002e-06, + "loss": 0.3199, + "step": 1093 + }, + { + "epoch": 2.33, + "learning_rate": 2.573214602808548e-06, + "loss": 0.3158, + "step": 1094 + }, + { + "epoch": 2.33, + "learning_rate": 2.557685771259414e-06, + "loss": 0.2909, + "step": 1095 + }, + { + "epoch": 2.33, + "learning_rate": 2.542197064425208e-06, + "loss": 0.3294, + "step": 1096 + }, + { + "epoch": 2.33, + "learning_rate": 2.5267485658122014e-06, + "loss": 0.3506, + "step": 1097 + }, + { + "epoch": 2.34, + "learning_rate": 2.5113403587098916e-06, + "loss": 0.3188, + "step": 1098 + }, + { + "epoch": 2.34, + "learning_rate": 2.4959725261905477e-06, + "loss": 0.3292, + "step": 1099 + }, + { + "epoch": 2.34, + "learning_rate": 2.4806451511087595e-06, + "loss": 0.3585, + "step": 1100 + }, + { + "epoch": 2.34, + "learning_rate": 2.465358316100994e-06, + "loss": 0.2882, + "step": 1101 + }, + { + "epoch": 2.34, + "learning_rate": 2.4501121035851494e-06, + "loss": 0.3238, + "step": 1102 + }, + { + "epoch": 2.35, + "learning_rate": 2.4349065957601147e-06, + "loss": 0.291, + "step": 1103 + }, + { + "epoch": 2.35, + "learning_rate": 2.4197418746053105e-06, + "loss": 0.2829, + "step": 1104 + }, + { + "epoch": 2.35, + "learning_rate": 2.4046180218802716e-06, + "loss": 0.3266, + "step": 1105 + }, + { + "epoch": 2.35, + "learning_rate": 2.389535119124188e-06, + "loss": 0.3178, + "step": 1106 + }, + { + "epoch": 2.36, + "learning_rate": 2.3744932476554714e-06, + "loss": 0.349, + "step": 1107 + }, + { + "epoch": 2.36, + "learning_rate": 2.359492488571317e-06, + "loss": 0.4481, + "step": 1108 + }, + { + "epoch": 2.36, + "learning_rate": 2.3445329227472626e-06, + "loss": 0.4572, + "step": 1109 + }, + { + "epoch": 2.36, + "learning_rate": 2.3296146308367597e-06, + "loss": 0.3213, + "step": 1110 + }, + { + "epoch": 2.36, + "learning_rate": 2.3147376932707324e-06, + "loss": 0.3159, + "step": 1111 + }, + { + "epoch": 2.37, + "learning_rate": 2.299902190257145e-06, + "loss": 0.3154, + "step": 1112 + }, + { + "epoch": 2.37, + "learning_rate": 2.2851082017805704e-06, + "loss": 0.3151, + "step": 1113 + }, + { + "epoch": 2.37, + "learning_rate": 2.2703558076017595e-06, + "loss": 0.3496, + "step": 1114 + }, + { + "epoch": 2.37, + "learning_rate": 2.2556450872572145e-06, + "loss": 0.3217, + "step": 1115 + }, + { + "epoch": 2.37, + "learning_rate": 2.2409761200587455e-06, + "loss": 0.2881, + "step": 1116 + }, + { + "epoch": 2.38, + "learning_rate": 2.226348985093062e-06, + "loss": 0.3809, + "step": 1117 + }, + { + "epoch": 2.38, + "learning_rate": 2.211763761221336e-06, + "loss": 0.2806, + "step": 1118 + }, + { + "epoch": 2.38, + "learning_rate": 2.1972205270787782e-06, + "loss": 0.3797, + "step": 1119 + }, + { + "epoch": 2.38, + "learning_rate": 2.182719361074216e-06, + "loss": 0.3732, + "step": 1120 + }, + { + "epoch": 2.39, + "learning_rate": 2.1682603413896687e-06, + "loss": 0.3134, + "step": 1121 + }, + { + "epoch": 2.39, + "learning_rate": 2.153843545979927e-06, + "loss": 0.3026, + "step": 1122 + }, + { + "epoch": 2.39, + "learning_rate": 2.1394690525721275e-06, + "loss": 0.3175, + "step": 1123 + }, + { + "epoch": 2.39, + "learning_rate": 2.1251369386653454e-06, + "loss": 0.2893, + "step": 1124 + }, + { + "epoch": 2.39, + "learning_rate": 2.1108472815301673e-06, + "loss": 0.4183, + "step": 1125 + }, + { + "epoch": 2.4, + "learning_rate": 2.0966001582082764e-06, + "loss": 0.3392, + "step": 1126 + }, + { + "epoch": 2.4, + "learning_rate": 2.0823956455120386e-06, + "loss": 0.3033, + "step": 1127 + }, + { + "epoch": 2.4, + "learning_rate": 2.068233820024088e-06, + "loss": 0.3701, + "step": 1128 + }, + { + "epoch": 2.4, + "learning_rate": 2.0541147580969123e-06, + "loss": 0.3736, + "step": 1129 + }, + { + "epoch": 2.4, + "learning_rate": 2.0400385358524435e-06, + "loss": 0.3015, + "step": 1130 + }, + { + "epoch": 2.41, + "learning_rate": 2.0260052291816445e-06, + "loss": 0.3237, + "step": 1131 + }, + { + "epoch": 2.41, + "learning_rate": 2.012014913744106e-06, + "loss": 0.3198, + "step": 1132 + }, + { + "epoch": 2.41, + "learning_rate": 1.998067664967629e-06, + "loss": 0.2989, + "step": 1133 + }, + { + "epoch": 2.41, + "learning_rate": 1.9841635580478325e-06, + "loss": 0.3097, + "step": 1134 + }, + { + "epoch": 2.41, + "learning_rate": 1.9703026679477253e-06, + "loss": 0.353, + "step": 1135 + }, + { + "epoch": 2.42, + "learning_rate": 1.9564850693973294e-06, + "loss": 0.2921, + "step": 1136 + }, + { + "epoch": 2.42, + "learning_rate": 1.9427108368932534e-06, + "loss": 0.3033, + "step": 1137 + }, + { + "epoch": 2.42, + "learning_rate": 1.9289800446983097e-06, + "loss": 0.3466, + "step": 1138 + }, + { + "epoch": 2.42, + "learning_rate": 1.915292766841098e-06, + "loss": 0.3351, + "step": 1139 + }, + { + "epoch": 2.43, + "learning_rate": 1.901649077115617e-06, + "loss": 0.2988, + "step": 1140 + }, + { + "epoch": 2.43, + "learning_rate": 1.8880490490808668e-06, + "loss": 0.3457, + "step": 1141 + }, + { + "epoch": 2.43, + "learning_rate": 1.8744927560604364e-06, + "loss": 0.3323, + "step": 1142 + }, + { + "epoch": 2.43, + "learning_rate": 1.8609802711421331e-06, + "loss": 0.3053, + "step": 1143 + }, + { + "epoch": 2.43, + "learning_rate": 1.8475116671775694e-06, + "loss": 0.2993, + "step": 1144 + }, + { + "epoch": 2.44, + "learning_rate": 1.834087016781777e-06, + "loss": 0.3448, + "step": 1145 + }, + { + "epoch": 2.44, + "learning_rate": 1.820706392332824e-06, + "loss": 0.3217, + "step": 1146 + }, + { + "epoch": 2.44, + "learning_rate": 1.8073698659714012e-06, + "loss": 0.386, + "step": 1147 + }, + { + "epoch": 2.44, + "learning_rate": 1.7940775096004548e-06, + "loss": 0.3579, + "step": 1148 + }, + { + "epoch": 2.44, + "learning_rate": 1.7808293948847944e-06, + "loss": 0.3317, + "step": 1149 + }, + { + "epoch": 2.45, + "learning_rate": 1.767625593250699e-06, + "loss": 0.3322, + "step": 1150 + }, + { + "epoch": 2.45, + "learning_rate": 1.7544661758855385e-06, + "loss": 0.3969, + "step": 1151 + }, + { + "epoch": 2.45, + "learning_rate": 1.74135121373739e-06, + "loss": 0.2878, + "step": 1152 + }, + { + "epoch": 2.45, + "learning_rate": 1.7282807775146516e-06, + "loss": 0.3675, + "step": 1153 + }, + { + "epoch": 2.46, + "learning_rate": 1.7152549376856599e-06, + "loss": 0.3212, + "step": 1154 + }, + { + "epoch": 2.46, + "learning_rate": 1.7022737644783183e-06, + "loss": 0.3326, + "step": 1155 + }, + { + "epoch": 2.46, + "learning_rate": 1.689337327879711e-06, + "loss": 0.3516, + "step": 1156 + }, + { + "epoch": 2.46, + "learning_rate": 1.6764456976357279e-06, + "loss": 0.308, + "step": 1157 + }, + { + "epoch": 2.46, + "learning_rate": 1.6635989432506905e-06, + "loss": 0.3277, + "step": 1158 + }, + { + "epoch": 2.47, + "learning_rate": 1.650797133986971e-06, + "loss": 0.3178, + "step": 1159 + }, + { + "epoch": 2.47, + "learning_rate": 1.6380403388646305e-06, + "loss": 0.3113, + "step": 1160 + }, + { + "epoch": 2.47, + "learning_rate": 1.625328626661028e-06, + "loss": 0.3352, + "step": 1161 + }, + { + "epoch": 2.47, + "learning_rate": 1.6126620659104708e-06, + "loss": 0.302, + "step": 1162 + }, + { + "epoch": 2.47, + "learning_rate": 1.6000407249038342e-06, + "loss": 0.413, + "step": 1163 + }, + { + "epoch": 2.48, + "learning_rate": 1.587464671688187e-06, + "loss": 0.3093, + "step": 1164 + }, + { + "epoch": 2.48, + "learning_rate": 1.5749339740664471e-06, + "loss": 0.3569, + "step": 1165 + }, + { + "epoch": 2.48, + "learning_rate": 1.5624486995969846e-06, + "loss": 0.4127, + "step": 1166 + }, + { + "epoch": 2.48, + "learning_rate": 1.5500089155932808e-06, + "loss": 0.3035, + "step": 1167 + }, + { + "epoch": 2.49, + "learning_rate": 1.53761468912356e-06, + "loss": 0.3015, + "step": 1168 + }, + { + "epoch": 2.49, + "learning_rate": 1.5252660870104242e-06, + "loss": 0.3048, + "step": 1169 + }, + { + "epoch": 2.49, + "learning_rate": 1.512963175830494e-06, + "loss": 0.3362, + "step": 1170 + }, + { + "epoch": 2.49, + "learning_rate": 1.5007060219140534e-06, + "loss": 0.5091, + "step": 1171 + }, + { + "epoch": 2.49, + "learning_rate": 1.4884946913446896e-06, + "loss": 0.3107, + "step": 1172 + }, + { + "epoch": 2.5, + "learning_rate": 1.4763292499589299e-06, + "loss": 0.2978, + "step": 1173 + }, + { + "epoch": 2.5, + "learning_rate": 1.4642097633458984e-06, + "loss": 0.3324, + "step": 1174 + }, + { + "epoch": 2.5, + "learning_rate": 1.4521362968469599e-06, + "loss": 0.2967, + "step": 1175 + }, + { + "epoch": 2.5, + "learning_rate": 1.4401089155553582e-06, + "loss": 0.3281, + "step": 1176 + }, + { + "epoch": 2.5, + "learning_rate": 1.4281276843158787e-06, + "loss": 0.3568, + "step": 1177 + }, + { + "epoch": 2.51, + "learning_rate": 1.4161926677244875e-06, + "loss": 0.3169, + "step": 1178 + }, + { + "epoch": 2.51, + "learning_rate": 1.4043039301279904e-06, + "loss": 0.3178, + "step": 1179 + }, + { + "epoch": 2.51, + "learning_rate": 1.3924615356236792e-06, + "loss": 0.3289, + "step": 1180 + }, + { + "epoch": 2.51, + "learning_rate": 1.3806655480589937e-06, + "loss": 0.3046, + "step": 1181 + }, + { + "epoch": 2.51, + "learning_rate": 1.3689160310311722e-06, + "loss": 0.334, + "step": 1182 + }, + { + "epoch": 2.52, + "learning_rate": 1.357213047886915e-06, + "loss": 0.3059, + "step": 1183 + }, + { + "epoch": 2.52, + "learning_rate": 1.3455566617220317e-06, + "loss": 0.3229, + "step": 1184 + }, + { + "epoch": 2.52, + "learning_rate": 1.3339469353811142e-06, + "loss": 0.3129, + "step": 1185 + }, + { + "epoch": 2.52, + "learning_rate": 1.3223839314571806e-06, + "loss": 0.3131, + "step": 1186 + }, + { + "epoch": 2.53, + "learning_rate": 1.3108677122913594e-06, + "loss": 0.3773, + "step": 1187 + }, + { + "epoch": 2.53, + "learning_rate": 1.2993983399725374e-06, + "loss": 0.3554, + "step": 1188 + }, + { + "epoch": 2.53, + "learning_rate": 1.287975876337031e-06, + "loss": 0.3144, + "step": 1189 + }, + { + "epoch": 2.53, + "learning_rate": 1.2766003829682504e-06, + "loss": 0.344, + "step": 1190 + }, + { + "epoch": 2.53, + "learning_rate": 1.2652719211963726e-06, + "loss": 0.3224, + "step": 1191 + }, + { + "epoch": 2.54, + "learning_rate": 1.253990552098e-06, + "loss": 0.3295, + "step": 1192 + }, + { + "epoch": 2.54, + "learning_rate": 1.242756336495845e-06, + "loss": 0.3441, + "step": 1193 + }, + { + "epoch": 2.54, + "learning_rate": 1.2315693349583923e-06, + "loss": 0.3153, + "step": 1194 + }, + { + "epoch": 2.54, + "learning_rate": 1.220429607799576e-06, + "loss": 0.3006, + "step": 1195 + }, + { + "epoch": 2.54, + "learning_rate": 1.2093372150784533e-06, + "loss": 0.3357, + "step": 1196 + }, + { + "epoch": 2.55, + "learning_rate": 1.1982922165988808e-06, + "loss": 0.3694, + "step": 1197 + }, + { + "epoch": 2.55, + "learning_rate": 1.1872946719091949e-06, + "loss": 0.3138, + "step": 1198 + }, + { + "epoch": 2.55, + "learning_rate": 1.176344640301882e-06, + "loss": 0.327, + "step": 1199 + }, + { + "epoch": 2.55, + "learning_rate": 1.1654421808132687e-06, + "loss": 0.3196, + "step": 1200 + }, + { + "epoch": 2.56, + "learning_rate": 1.1545873522232055e-06, + "loss": 0.3184, + "step": 1201 + }, + { + "epoch": 2.56, + "learning_rate": 1.143780213054736e-06, + "loss": 0.3269, + "step": 1202 + }, + { + "epoch": 2.56, + "learning_rate": 1.1330208215737937e-06, + "loss": 0.3234, + "step": 1203 + }, + { + "epoch": 2.56, + "learning_rate": 1.1223092357888843e-06, + "loss": 0.2969, + "step": 1204 + }, + { + "epoch": 2.56, + "learning_rate": 1.1116455134507665e-06, + "loss": 0.3045, + "step": 1205 + }, + { + "epoch": 2.57, + "learning_rate": 1.101029712052153e-06, + "loss": 0.3197, + "step": 1206 + }, + { + "epoch": 2.57, + "learning_rate": 1.0904618888273922e-06, + "loss": 0.3372, + "step": 1207 + }, + { + "epoch": 2.57, + "learning_rate": 1.0799421007521615e-06, + "loss": 0.3529, + "step": 1208 + }, + { + "epoch": 2.57, + "learning_rate": 1.0694704045431604e-06, + "loss": 0.3168, + "step": 1209 + }, + { + "epoch": 2.57, + "learning_rate": 1.0590468566578071e-06, + "loss": 0.3207, + "step": 1210 + }, + { + "epoch": 2.58, + "learning_rate": 1.0486715132939218e-06, + "loss": 0.3175, + "step": 1211 + }, + { + "epoch": 2.58, + "learning_rate": 1.0383444303894453e-06, + "loss": 0.3238, + "step": 1212 + }, + { + "epoch": 2.58, + "learning_rate": 1.0280656636221186e-06, + "loss": 0.3294, + "step": 1213 + }, + { + "epoch": 2.58, + "learning_rate": 1.017835268409192e-06, + "loss": 0.3158, + "step": 1214 + }, + { + "epoch": 2.59, + "learning_rate": 1.007653299907122e-06, + "loss": 0.3087, + "step": 1215 + }, + { + "epoch": 2.59, + "learning_rate": 9.975198130112807e-07, + "loss": 0.3587, + "step": 1216 + }, + { + "epoch": 2.59, + "learning_rate": 9.874348623556495e-07, + "loss": 0.3414, + "step": 1217 + }, + { + "epoch": 2.59, + "learning_rate": 9.77398502312531e-07, + "loss": 0.2997, + "step": 1218 + }, + { + "epoch": 2.59, + "learning_rate": 9.674107869922555e-07, + "loss": 0.3235, + "step": 1219 + }, + { + "epoch": 2.6, + "learning_rate": 9.574717702428937e-07, + "loss": 0.2922, + "step": 1220 + }, + { + "epoch": 2.6, + "learning_rate": 9.475815056499527e-07, + "loss": 0.3232, + "step": 1221 + }, + { + "epoch": 2.6, + "learning_rate": 9.377400465361031e-07, + "loss": 0.3378, + "step": 1222 + }, + { + "epoch": 2.6, + "learning_rate": 9.279474459608806e-07, + "loss": 0.3422, + "step": 1223 + }, + { + "epoch": 2.6, + "learning_rate": 9.182037567204017e-07, + "loss": 0.3265, + "step": 1224 + }, + { + "epoch": 2.61, + "learning_rate": 9.085090313470846e-07, + "loss": 0.2762, + "step": 1225 + }, + { + "epoch": 2.61, + "learning_rate": 8.988633221093612e-07, + "loss": 0.3776, + "step": 1226 + }, + { + "epoch": 2.61, + "learning_rate": 8.892666810113959e-07, + "loss": 0.3701, + "step": 1227 + }, + { + "epoch": 2.61, + "learning_rate": 8.797191597928046e-07, + "loss": 0.3587, + "step": 1228 + }, + { + "epoch": 2.61, + "learning_rate": 8.702208099283804e-07, + "loss": 0.3037, + "step": 1229 + }, + { + "epoch": 2.62, + "learning_rate": 8.607716826278089e-07, + "loss": 0.3066, + "step": 1230 + }, + { + "epoch": 2.62, + "learning_rate": 8.513718288353956e-07, + "loss": 0.299, + "step": 1231 + }, + { + "epoch": 2.62, + "learning_rate": 8.420212992297938e-07, + "loss": 0.303, + "step": 1232 + }, + { + "epoch": 2.62, + "learning_rate": 8.327201442237276e-07, + "loss": 0.3181, + "step": 1233 + }, + { + "epoch": 2.63, + "learning_rate": 8.234684139637205e-07, + "loss": 0.3326, + "step": 1234 + }, + { + "epoch": 2.63, + "learning_rate": 8.142661583298295e-07, + "loss": 0.2965, + "step": 1235 + }, + { + "epoch": 2.63, + "learning_rate": 8.051134269353689e-07, + "loss": 0.3726, + "step": 1236 + }, + { + "epoch": 2.63, + "learning_rate": 7.960102691266447e-07, + "loss": 0.304, + "step": 1237 + }, + { + "epoch": 2.63, + "learning_rate": 7.869567339826978e-07, + "loss": 0.3323, + "step": 1238 + }, + { + "epoch": 2.64, + "learning_rate": 7.779528703150263e-07, + "loss": 0.3128, + "step": 1239 + }, + { + "epoch": 2.64, + "learning_rate": 7.689987266673293e-07, + "loss": 0.311, + "step": 1240 + }, + { + "epoch": 2.64, + "learning_rate": 7.600943513152436e-07, + "loss": 0.3785, + "step": 1241 + }, + { + "epoch": 2.64, + "learning_rate": 7.512397922660853e-07, + "loss": 0.311, + "step": 1242 + }, + { + "epoch": 2.64, + "learning_rate": 7.424350972585858e-07, + "loss": 0.3316, + "step": 1243 + }, + { + "epoch": 2.65, + "learning_rate": 7.336803137626414e-07, + "loss": 0.39, + "step": 1244 + }, + { + "epoch": 2.65, + "learning_rate": 7.249754889790539e-07, + "loss": 0.432, + "step": 1245 + }, + { + "epoch": 2.65, + "learning_rate": 7.163206698392744e-07, + "loss": 0.3644, + "step": 1246 + }, + { + "epoch": 2.65, + "learning_rate": 7.077159030051528e-07, + "loss": 0.3166, + "step": 1247 + }, + { + "epoch": 2.66, + "learning_rate": 6.991612348686861e-07, + "loss": 0.3199, + "step": 1248 + }, + { + "epoch": 2.66, + "learning_rate": 6.906567115517692e-07, + "loss": 0.3978, + "step": 1249 + }, + { + "epoch": 2.66, + "learning_rate": 6.822023789059396e-07, + "loss": 0.2999, + "step": 1250 + }, + { + "epoch": 2.66, + "learning_rate": 6.737982825121392e-07, + "loss": 0.3125, + "step": 1251 + }, + { + "epoch": 2.66, + "learning_rate": 6.65444467680465e-07, + "loss": 0.3773, + "step": 1252 + }, + { + "epoch": 2.67, + "learning_rate": 6.571409794499229e-07, + "loss": 0.2993, + "step": 1253 + }, + { + "epoch": 2.67, + "learning_rate": 6.488878625881867e-07, + "loss": 0.3181, + "step": 1254 + }, + { + "epoch": 2.67, + "learning_rate": 6.406851615913567e-07, + "loss": 0.3321, + "step": 1255 + }, + { + "epoch": 2.67, + "learning_rate": 6.325329206837217e-07, + "loss": 0.3524, + "step": 1256 + }, + { + "epoch": 2.67, + "learning_rate": 6.244311838175143e-07, + "loss": 0.2851, + "step": 1257 + }, + { + "epoch": 2.68, + "learning_rate": 6.163799946726812e-07, + "loss": 0.3818, + "step": 1258 + }, + { + "epoch": 2.68, + "learning_rate": 6.083793966566431e-07, + "loss": 0.3254, + "step": 1259 + }, + { + "epoch": 2.68, + "learning_rate": 6.004294329040638e-07, + "loss": 0.3821, + "step": 1260 + }, + { + "epoch": 2.68, + "learning_rate": 5.925301462766164e-07, + "loss": 0.3444, + "step": 1261 + }, + { + "epoch": 2.69, + "learning_rate": 5.846815793627469e-07, + "loss": 0.3886, + "step": 1262 + }, + { + "epoch": 2.69, + "learning_rate": 5.768837744774547e-07, + "loss": 0.2968, + "step": 1263 + }, + { + "epoch": 2.69, + "learning_rate": 5.691367736620568e-07, + "loss": 0.3035, + "step": 1264 + }, + { + "epoch": 2.69, + "learning_rate": 5.614406186839661e-07, + "loss": 0.3098, + "step": 1265 + }, + { + "epoch": 2.69, + "learning_rate": 5.537953510364613e-07, + "loss": 0.3247, + "step": 1266 + }, + { + "epoch": 2.7, + "learning_rate": 5.462010119384665e-07, + "loss": 0.3128, + "step": 1267 + }, + { + "epoch": 2.7, + "learning_rate": 5.386576423343293e-07, + "loss": 0.3109, + "step": 1268 + }, + { + "epoch": 2.7, + "learning_rate": 5.311652828935943e-07, + "loss": 0.3231, + "step": 1269 + }, + { + "epoch": 2.7, + "learning_rate": 5.237239740107947e-07, + "loss": 0.3395, + "step": 1270 + }, + { + "epoch": 2.7, + "learning_rate": 5.163337558052239e-07, + "loss": 0.3393, + "step": 1271 + }, + { + "epoch": 2.71, + "learning_rate": 5.089946681207247e-07, + "loss": 0.3141, + "step": 1272 + }, + { + "epoch": 2.71, + "learning_rate": 5.017067505254735e-07, + "loss": 0.309, + "step": 1273 + }, + { + "epoch": 2.71, + "learning_rate": 4.944700423117677e-07, + "loss": 0.3223, + "step": 1274 + }, + { + "epoch": 2.71, + "learning_rate": 4.872845824958106e-07, + "loss": 0.3479, + "step": 1275 + }, + { + "epoch": 2.71, + "learning_rate": 4.801504098175047e-07, + "loss": 0.2875, + "step": 1276 + }, + { + "epoch": 2.72, + "learning_rate": 4.7306756274024145e-07, + "loss": 0.3098, + "step": 1277 + }, + { + "epoch": 2.72, + "learning_rate": 4.660360794506946e-07, + "loss": 0.3169, + "step": 1278 + }, + { + "epoch": 2.72, + "learning_rate": 4.5905599785861175e-07, + "loss": 0.2873, + "step": 1279 + }, + { + "epoch": 2.72, + "learning_rate": 4.521273555966155e-07, + "loss": 0.3054, + "step": 1280 + }, + { + "epoch": 2.73, + "learning_rate": 4.4525019001999013e-07, + "loss": 0.3251, + "step": 1281 + }, + { + "epoch": 2.73, + "learning_rate": 4.3842453820649443e-07, + "loss": 0.3217, + "step": 1282 + }, + { + "epoch": 2.73, + "learning_rate": 4.316504369561492e-07, + "loss": 0.325, + "step": 1283 + }, + { + "epoch": 2.73, + "learning_rate": 4.2492792279104853e-07, + "loss": 0.2866, + "step": 1284 + }, + { + "epoch": 2.73, + "learning_rate": 4.1825703195515376e-07, + "loss": 0.3152, + "step": 1285 + }, + { + "epoch": 2.74, + "learning_rate": 4.1163780041410526e-07, + "loss": 0.2914, + "step": 1286 + }, + { + "epoch": 2.74, + "learning_rate": 4.0507026385502747e-07, + "loss": 0.3274, + "step": 1287 + }, + { + "epoch": 2.74, + "learning_rate": 3.985544576863287e-07, + "loss": 0.3329, + "step": 1288 + }, + { + "epoch": 2.74, + "learning_rate": 3.920904170375239e-07, + "loss": 0.3149, + "step": 1289 + }, + { + "epoch": 2.74, + "learning_rate": 3.856781767590334e-07, + "loss": 0.3478, + "step": 1290 + }, + { + "epoch": 2.75, + "learning_rate": 3.7931777142199975e-07, + "loss": 0.3348, + "step": 1291 + }, + { + "epoch": 2.75, + "learning_rate": 3.730092353181047e-07, + "loss": 0.3417, + "step": 1292 + }, + { + "epoch": 2.75, + "learning_rate": 3.667526024593759e-07, + "loss": 0.3576, + "step": 1293 + }, + { + "epoch": 2.75, + "learning_rate": 3.6054790657801153e-07, + "loss": 0.3734, + "step": 1294 + }, + { + "epoch": 2.76, + "learning_rate": 3.5439518112619254e-07, + "loss": 0.3561, + "step": 1295 + }, + { + "epoch": 2.76, + "learning_rate": 3.482944592759085e-07, + "loss": 0.4734, + "step": 1296 + }, + { + "epoch": 2.76, + "learning_rate": 3.4224577391877124e-07, + "loss": 0.2974, + "step": 1297 + }, + { + "epoch": 2.76, + "learning_rate": 3.362491576658444e-07, + "loss": 0.3839, + "step": 1298 + }, + { + "epoch": 2.76, + "learning_rate": 3.3030464284746434e-07, + "loss": 0.3448, + "step": 1299 + }, + { + "epoch": 2.77, + "learning_rate": 3.2441226151306403e-07, + "loss": 0.3223, + "step": 1300 + }, + { + "epoch": 2.77, + "learning_rate": 3.1857204543100486e-07, + "loss": 0.3551, + "step": 1301 + }, + { + "epoch": 2.77, + "learning_rate": 3.127840260884019e-07, + "loss": 0.3652, + "step": 1302 + }, + { + "epoch": 2.77, + "learning_rate": 3.0704823469095536e-07, + "loss": 0.3183, + "step": 1303 + }, + { + "epoch": 2.77, + "learning_rate": 3.013647021627808e-07, + "loss": 0.3244, + "step": 1304 + }, + { + "epoch": 2.78, + "learning_rate": 2.95733459146248e-07, + "loss": 0.2974, + "step": 1305 + }, + { + "epoch": 2.78, + "learning_rate": 2.9015453600180654e-07, + "loss": 0.3154, + "step": 1306 + }, + { + "epoch": 2.78, + "learning_rate": 2.8462796280782855e-07, + "loss": 0.3215, + "step": 1307 + }, + { + "epoch": 2.78, + "learning_rate": 2.7915376936044626e-07, + "loss": 0.3268, + "step": 1308 + }, + { + "epoch": 2.79, + "learning_rate": 2.737319851733888e-07, + "loss": 0.2688, + "step": 1309 + }, + { + "epoch": 2.79, + "learning_rate": 2.6836263947782605e-07, + "loss": 0.3231, + "step": 1310 + }, + { + "epoch": 2.79, + "learning_rate": 2.6304576122221035e-07, + "loss": 0.2889, + "step": 1311 + }, + { + "epoch": 2.79, + "learning_rate": 2.57781379072114e-07, + "loss": 0.3666, + "step": 1312 + }, + { + "epoch": 2.79, + "learning_rate": 2.525695214100865e-07, + "loss": 0.3108, + "step": 1313 + }, + { + "epoch": 2.8, + "learning_rate": 2.474102163354908e-07, + "loss": 0.3485, + "step": 1314 + }, + { + "epoch": 2.8, + "learning_rate": 2.423034916643607e-07, + "loss": 0.3263, + "step": 1315 + }, + { + "epoch": 2.8, + "learning_rate": 2.3724937492924148e-07, + "loss": 0.3506, + "step": 1316 + }, + { + "epoch": 2.8, + "learning_rate": 2.3224789337905062e-07, + "loss": 0.3214, + "step": 1317 + }, + { + "epoch": 2.8, + "learning_rate": 2.2729907397892493e-07, + "loss": 0.3255, + "step": 1318 + }, + { + "epoch": 2.81, + "learning_rate": 2.2240294341007495e-07, + "loss": 0.3268, + "step": 1319 + }, + { + "epoch": 2.81, + "learning_rate": 2.175595280696463e-07, + "loss": 0.2999, + "step": 1320 + }, + { + "epoch": 2.81, + "learning_rate": 2.1276885407057411e-07, + "loss": 0.3115, + "step": 1321 + }, + { + "epoch": 2.81, + "learning_rate": 2.0803094724143879e-07, + "loss": 0.3807, + "step": 1322 + }, + { + "epoch": 2.81, + "learning_rate": 2.0334583312633383e-07, + "loss": 0.3427, + "step": 1323 + }, + { + "epoch": 2.82, + "learning_rate": 1.9871353698472374e-07, + "loss": 0.2857, + "step": 1324 + }, + { + "epoch": 2.82, + "learning_rate": 1.941340837913075e-07, + "loss": 0.3384, + "step": 1325 + }, + { + "epoch": 2.82, + "learning_rate": 1.896074982358853e-07, + "loss": 0.3124, + "step": 1326 + }, + { + "epoch": 2.82, + "learning_rate": 1.851338047232254e-07, + "loss": 0.2895, + "step": 1327 + }, + { + "epoch": 2.83, + "learning_rate": 1.8071302737293294e-07, + "loss": 0.3149, + "step": 1328 + }, + { + "epoch": 2.83, + "learning_rate": 1.7634519001931916e-07, + "loss": 0.3217, + "step": 1329 + }, + { + "epoch": 2.83, + "learning_rate": 1.7203031621127132e-07, + "loss": 0.3369, + "step": 1330 + }, + { + "epoch": 2.83, + "learning_rate": 1.6776842921213177e-07, + "loss": 0.2999, + "step": 1331 + }, + { + "epoch": 2.83, + "learning_rate": 1.6355955199956143e-07, + "loss": 0.3794, + "step": 1332 + }, + { + "epoch": 2.84, + "learning_rate": 1.5940370726542864e-07, + "loss": 0.2897, + "step": 1333 + }, + { + "epoch": 2.84, + "learning_rate": 1.553009174156783e-07, + "loss": 0.3281, + "step": 1334 + }, + { + "epoch": 2.84, + "learning_rate": 1.5125120457021303e-07, + "loss": 0.3232, + "step": 1335 + }, + { + "epoch": 2.84, + "learning_rate": 1.4725459056277647e-07, + "loss": 0.3466, + "step": 1336 + }, + { + "epoch": 2.84, + "learning_rate": 1.4331109694083357e-07, + "loss": 0.3293, + "step": 1337 + }, + { + "epoch": 2.85, + "learning_rate": 1.3942074496545166e-07, + "loss": 0.3342, + "step": 1338 + }, + { + "epoch": 2.85, + "learning_rate": 1.3558355561119063e-07, + "loss": 0.314, + "step": 1339 + }, + { + "epoch": 2.85, + "learning_rate": 1.317995495659885e-07, + "loss": 0.3504, + "step": 1340 + }, + { + "epoch": 2.85, + "learning_rate": 1.2806874723104824e-07, + "loss": 0.2979, + "step": 1341 + }, + { + "epoch": 2.86, + "learning_rate": 1.2439116872072775e-07, + "loss": 0.3856, + "step": 1342 + }, + { + "epoch": 2.86, + "learning_rate": 1.207668338624346e-07, + "loss": 0.4362, + "step": 1343 + }, + { + "epoch": 2.86, + "learning_rate": 1.1719576219651585e-07, + "loss": 0.3177, + "step": 1344 + }, + { + "epoch": 2.86, + "learning_rate": 1.136779729761528e-07, + "loss": 0.3072, + "step": 1345 + }, + { + "epoch": 2.86, + "learning_rate": 1.1021348516725983e-07, + "loss": 0.3293, + "step": 1346 + }, + { + "epoch": 2.87, + "learning_rate": 1.0680231744837899e-07, + "loss": 0.2838, + "step": 1347 + }, + { + "epoch": 2.87, + "learning_rate": 1.034444882105834e-07, + "loss": 0.331, + "step": 1348 + }, + { + "epoch": 2.87, + "learning_rate": 1.0014001555737285e-07, + "loss": 0.3608, + "step": 1349 + }, + { + "epoch": 2.87, + "learning_rate": 9.688891730458061e-08, + "loss": 0.3105, + "step": 1350 + }, + { + "epoch": 2.87, + "learning_rate": 9.369121098027345e-08, + "loss": 0.2981, + "step": 1351 + }, + { + "epoch": 2.88, + "learning_rate": 9.054691382466175e-08, + "loss": 0.3519, + "step": 1352 + }, + { + "epoch": 2.88, + "learning_rate": 8.745604279000175e-08, + "loss": 0.3241, + "step": 1353 + }, + { + "epoch": 2.88, + "learning_rate": 8.441861454050792e-08, + "loss": 0.359, + "step": 1354 + }, + { + "epoch": 2.88, + "learning_rate": 8.143464545226298e-08, + "loss": 0.324, + "step": 1355 + }, + { + "epoch": 2.89, + "learning_rate": 7.850415161312463e-08, + "loss": 0.3382, + "step": 1356 + }, + { + "epoch": 2.89, + "learning_rate": 7.562714882264787e-08, + "loss": 0.3078, + "step": 1357 + }, + { + "epoch": 2.89, + "learning_rate": 7.28036525919884e-08, + "loss": 0.3454, + "step": 1358 + }, + { + "epoch": 2.89, + "learning_rate": 7.003367814382933e-08, + "loss": 0.2991, + "step": 1359 + }, + { + "epoch": 2.89, + "learning_rate": 6.731724041229349e-08, + "loss": 0.3432, + "step": 1360 + }, + { + "epoch": 2.9, + "learning_rate": 6.465435404286347e-08, + "loss": 0.3001, + "step": 1361 + }, + { + "epoch": 2.9, + "learning_rate": 6.204503339230505e-08, + "loss": 0.3096, + "step": 1362 + }, + { + "epoch": 2.9, + "learning_rate": 5.9489292528588374e-08, + "loss": 0.3308, + "step": 1363 + }, + { + "epoch": 2.9, + "learning_rate": 5.698714523080906e-08, + "loss": 0.332, + "step": 1364 + }, + { + "epoch": 2.9, + "learning_rate": 5.453860498911945e-08, + "loss": 0.3425, + "step": 1365 + }, + { + "epoch": 2.91, + "learning_rate": 5.214368500465305e-08, + "loss": 0.3308, + "step": 1366 + }, + { + "epoch": 2.91, + "learning_rate": 4.980239818945132e-08, + "loss": 0.2952, + "step": 1367 + }, + { + "epoch": 2.91, + "learning_rate": 4.75147571664003e-08, + "loss": 0.3038, + "step": 1368 + }, + { + "epoch": 2.91, + "learning_rate": 4.528077426915412e-08, + "loss": 0.3702, + "step": 1369 + }, + { + "epoch": 2.91, + "learning_rate": 4.310046154207492e-08, + "loss": 0.3052, + "step": 1370 + }, + { + "epoch": 2.92, + "learning_rate": 4.0973830740166366e-08, + "loss": 0.3295, + "step": 1371 + }, + { + "epoch": 2.92, + "learning_rate": 3.890089332900915e-08, + "loss": 0.3197, + "step": 1372 + }, + { + "epoch": 2.92, + "learning_rate": 3.688166048469999e-08, + "loss": 0.2977, + "step": 1373 + }, + { + "epoch": 2.92, + "learning_rate": 3.4916143093790544e-08, + "loss": 0.2862, + "step": 1374 + }, + { + "epoch": 2.93, + "learning_rate": 3.300435175322969e-08, + "loss": 0.3353, + "step": 1375 + }, + { + "epoch": 2.93, + "learning_rate": 3.114629677030689e-08, + "loss": 0.3151, + "step": 1376 + }, + { + "epoch": 2.93, + "learning_rate": 2.9341988162595593e-08, + "loss": 0.3559, + "step": 1377 + }, + { + "epoch": 2.93, + "learning_rate": 2.7591435657897682e-08, + "loss": 0.3653, + "step": 1378 + }, + { + "epoch": 2.93, + "learning_rate": 2.589464869419578e-08, + "loss": 0.3027, + "step": 1379 + }, + { + "epoch": 2.94, + "learning_rate": 2.42516364195966e-08, + "loss": 0.3416, + "step": 1380 + }, + { + "epoch": 2.94, + "learning_rate": 2.266240769228545e-08, + "loss": 0.3433, + "step": 1381 + }, + { + "epoch": 2.94, + "learning_rate": 2.1126971080478452e-08, + "loss": 0.3188, + "step": 1382 + }, + { + "epoch": 2.94, + "learning_rate": 1.9645334862373745e-08, + "loss": 0.3562, + "step": 1383 + }, + { + "epoch": 2.94, + "learning_rate": 1.8217507026109248e-08, + "loss": 0.3392, + "step": 1384 + }, + { + "epoch": 2.95, + "learning_rate": 1.6843495269718292e-08, + "loss": 0.3489, + "step": 1385 + }, + { + "epoch": 2.95, + "learning_rate": 1.5523307001088505e-08, + "loss": 0.3129, + "step": 1386 + }, + { + "epoch": 2.95, + "learning_rate": 1.4256949337922987e-08, + "loss": 0.3118, + "step": 1387 + }, + { + "epoch": 2.95, + "learning_rate": 1.3044429107700319e-08, + "loss": 0.388, + "step": 1388 + }, + { + "epoch": 2.96, + "learning_rate": 1.1885752847637932e-08, + "loss": 0.3071, + "step": 1389 + }, + { + "epoch": 2.96, + "learning_rate": 1.0780926804657699e-08, + "loss": 0.3096, + "step": 1390 + }, + { + "epoch": 2.96, + "learning_rate": 9.729956935350394e-09, + "loss": 0.2774, + "step": 1391 + }, + { + "epoch": 2.96, + "learning_rate": 8.732848905947944e-09, + "loss": 0.2829, + "step": 1392 + }, + { + "epoch": 2.96, + "learning_rate": 7.789608092287904e-09, + "loss": 0.2984, + "step": 1393 + }, + { + "epoch": 2.97, + "learning_rate": 6.900239579787915e-09, + "loss": 0.3761, + "step": 1394 + }, + { + "epoch": 2.97, + "learning_rate": 6.0647481634135145e-09, + "loss": 0.3381, + "step": 1395 + }, + { + "epoch": 2.97, + "learning_rate": 5.283138347660366e-09, + "loss": 0.3043, + "step": 1396 + }, + { + "epoch": 2.97, + "learning_rate": 4.555414346520959e-09, + "loss": 0.3282, + "step": 1397 + }, + { + "epoch": 2.97, + "learning_rate": 3.881580083469061e-09, + "loss": 0.2731, + "step": 1398 + }, + { + "epoch": 2.98, + "learning_rate": 3.2616391914364056e-09, + "loss": 0.2712, + "step": 1399 + }, + { + "epoch": 2.98, + "learning_rate": 2.6955950127882658e-09, + "loss": 0.3199, + "step": 1400 + }, + { + "epoch": 2.98, + "learning_rate": 2.183450599313464e-09, + "loss": 0.2871, + "step": 1401 + }, + { + "epoch": 2.98, + "learning_rate": 1.7252087122021644e-09, + "loss": 0.3109, + "step": 1402 + }, + { + "epoch": 2.99, + "learning_rate": 1.3208718220336647e-09, + "loss": 0.3462, + "step": 1403 + }, + { + "epoch": 2.99, + "learning_rate": 9.704421087619597e-10, + "loss": 0.3581, + "step": 1404 + }, + { + "epoch": 2.99, + "learning_rate": 6.739214617035306e-10, + "loss": 0.3068, + "step": 1405 + }, + { + "epoch": 2.99, + "learning_rate": 4.313114795295725e-10, + "loss": 0.3527, + "step": 1406 + }, + { + "epoch": 2.99, + "learning_rate": 2.4261347025489323e-10, + "loss": 0.3005, + "step": 1407 + }, + { + "epoch": 3.0, + "learning_rate": 1.0782845123125108e-10, + "loss": 0.3546, + "step": 1408 + }, + { + "epoch": 3.0, + "learning_rate": 2.695714914180414e-11, + "loss": 0.3277, + "step": 1409 + }, + { + "epoch": 3.0, + "learning_rate": 0.0, + "loss": 0.2921, + "step": 1410 + }, + { + "epoch": 3.0, + "step": 1410, + "total_flos": 3.663339650505769e+18, + "train_loss": 0.4558141660183034, + "train_runtime": 72003.4946, + "train_samples_per_second": 1.88, + "train_steps_per_second": 0.02 + } + ], + "logging_steps": 1.0, + "max_steps": 1410, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1200, + "total_flos": 3.663339650505769e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..9a58fbd --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c239765ffa270674695dca03c4cae8852e51d50d03f52f2a915693edf85b2da8 +size 4920